#!/usr/bin/perl
use strict;
use HTML::Parser;
sub Convert
{
my ($what) = @_;
return "'$what'";
}
sub Fixerizer
{
my ($content) = @_;
my ($content_start) = 0;
my ($content_end) = 0;
my ($fixed_content);
my (@mods);
# &$TagStart() handles the opening of tags:
my ($TagStart) = sub
{
my ($tagname, $attr, $offset, $length) = @_;
# If this is a | type tag...
if ($tagname eq 'td')
{
# ...make a note of where the contents
# of it should start.
# First, copy any other HTML up to
# the end of this tag
$fixed_content .=
substr(
$content,
$content_end,
$offset+$length-$content_end
);
# Synchronize, stop copying...
$content_start = $offset+$length;
$content_end = $content_start-1;
}
};
# &$TagEnd() handles the closing of tags: |
my ($TagEnd) = sub
{
my ($tagname, $offset, $length) = @_;
# Check for any tag which might close out
# the , and handle busted HTML
# which is lazy: ' | |
'
if (($tagname eq 'td'
|| $tagname eq 'tr'
|| $tagname eq 'table')
&& ($content_start > $content_end))
{
# Add in the modified content
$fixed_content .=
Convert(
substr(
$content,
$content_start,
$offset-$content_start
)
);
# And the tag itself
$fixed_content .=
substr(
$content,
$offset,
$length,
);
# Synchronize, stop copying
$content_end = $offset+$length;
$content_start = $content_end - 1;
}
};
# Whip up a new HTML::Parser object with the
# above-defined handlers hooked in.
my ($hp) = new HTML::Parser (
api_version => 3,
start_h =>
[
$TagStart,
'tagname,attr,offset,length'
],
end_h =>
[
$TagEnd,
'tagname,offset,length'
],
);
# Et voila!
$hp->parse($content);
# Don't forget to catch any dangling HTML...
$fixed_content .=
substr(
$content,
$content_end+1,
) if ($content_end < length($content));
# Ship back the modified version.
return $fixed_content;
}