To make this "go", you would want to use it like: print Fixerizer(open (TEST, "test.html") && join ('', <TEST>)); Please excuse this one line hack. A real program would be much more careful.#!/usr/bin/perl use strict; use HTML::Parser; sub Convert { my ($what) = @_; return "'$what'"; } sub Fixerizer { my ($content) = @_; my ($content_start) = 0; my ($content_end) = 0; my ($fixed_content); my (@mods); # &$TagStart() handles the opening of tags: <TD> my ($TagStart) = sub { my ($tagname, $attr, $offset, $length) = @_; # If this is a <TD> type tag... if ($tagname eq 'td') { # ...make a note of where the contents # of it should start. # First, copy any other HTML up to # the end of this tag $fixed_content .= substr( $content, $content_end, $offset+$length-$content_end ); # Synchronize, stop copying... $content_start = $offset+$length; $content_end = $content_start-1; } }; # &$TagEnd() handles the closing of tags: </TD> my ($TagEnd) = sub { my ($tagname, $offset, $length) = @_; # Check for any tag which might close out # the <TD>, and handle busted HTML # which is lazy: '<TR><TD></TR>' if (($tagname eq 'td' || $tagname eq 'tr' || $tagname eq 'table') && ($content_start > $content_end)) { # Add in the modified content $fixed_content .= Convert( substr( $content, $content_start, $offset-$content_start ) ); # And the tag itself $fixed_content .= substr( $content, $offset, $length, ); # Synchronize, stop copying $content_end = $offset+$length; $content_start = $content_end - 1; } }; # Whip up a new HTML::Parser object with the # above-defined handlers hooked in. my ($hp) = new HTML::Parser ( api_version => 3, start_h => [ $TagStart, 'tagname,attr,offset,length' ], end_h => [ $TagEnd, 'tagname,offset,length' ], ); # Et voila! $hp->parse($content); # Don't forget to catch any dangling HTML... $fixed_content .= substr( $content, $content_end+1, ) if ($content_end < length($content)); # Ship back the modified version. return $fixed_content; }
And return:<TABLE BORDER=0> <TR> <TD ALIGN=left>My Friend</TD> <TD ALIGN=up>My Other Friend</TD> </TR> <TR> <TD ALIGN=left>My Friend</TD> </TR> </TABLE>
This simply puts single quotes around whatever is in the cell, which isn't very daring or bold. This can be customized to suit your particular application.<TABLE BORDER=0> <TR> <TD ALIGN=left>'My Friend'</TD> <TD ALIGN=up>'My Other Friend'</TD> </TR> <TR> <TD ALIGN=left>'My Friend'</TD> </TR> </TABLE>
In reply to Re: Using HTML::Parser to edit files in place
by tadman
in thread Using HTML::Parser to edit files in place
by markjugg
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |