#!/usr/bin/perl use strict; use HTML::Parser; sub Convert { my ($what) = @_; return "'$what'"; } sub Fixerizer { my ($content) = @_; my ($content_start) = 0; my ($content_end) = 0; my ($fixed_content); my (@mods); # &$TagStart() handles the opening of tags: my ($TagStart) = sub { my ($tagname, $attr, $offset, $length) = @_; # If this is a type tag... if ($tagname eq 'td') { # ...make a note of where the contents # of it should start. # First, copy any other HTML up to # the end of this tag $fixed_content .= substr( $content, $content_end, $offset+$length-$content_end ); # Synchronize, stop copying... $content_start = $offset+$length; $content_end = $content_start-1; } }; # &$TagEnd() handles the closing of tags: my ($TagEnd) = sub { my ($tagname, $offset, $length) = @_; # Check for any tag which might close out # the , and handle busted HTML # which is lazy: '' if (($tagname eq 'td' || $tagname eq 'tr' || $tagname eq 'table') && ($content_start > $content_end)) { # Add in the modified content $fixed_content .= Convert( substr( $content, $content_start, $offset-$content_start ) ); # And the tag itself $fixed_content .= substr( $content, $offset, $length, ); # Synchronize, stop copying $content_end = $offset+$length; $content_start = $content_end - 1; } }; # Whip up a new HTML::Parser object with the # above-defined handlers hooked in. my ($hp) = new HTML::Parser ( api_version => 3, start_h => [ $TagStart, 'tagname,attr,offset,length' ], end_h => [ $TagEnd, 'tagname,offset,length' ], ); # Et voila! $hp->parse($content); # Don't forget to catch any dangling HTML... $fixed_content .= substr( $content, $content_end+1, ) if ($content_end < length($content)); # Ship back the modified version. return $fixed_content; } #### print Fixerizer(open (TEST, "test.html") && join ('', )); ####
My Friend My Other Friend
My Friend
##
##
'My Friend' 'My Other Friend'
'My Friend'