use HTML::TokeParser::Simple; sub dummy () { # empty token return HTML::TokeParser::Simple::Token::Text->new([ T => '' ]); } sub cleanup_html { my($html) = @_; my $p = HTML::TokeParser::Simple->new(string => $html); my @out; my @font; while(my $t = $p->get_token) { if($t->is_start_tag('font')) { if(($t->get_attr('face')||'') eq 'Verdana') { $t->delete_attr('face'); } if(($t->get_attr('size')||'') eq '1') { $t->delete_attr('size'); } if(%{$t->get_attr}) { push @font, 1; } else { push @font, 0; $t = dummy; } } elsif($t->is_end_tag('font')) { unless(pop @font) { $t = dummy; } } my @append = $t; if($t->is_tag('br')) { @append = (); while(my $T = pop @out) { if($T->is_start_tag and $t->get_tag ne 'p') { unshift @append, $T; } else { push @out, $T; last; } } unshift @append, $t; } elsif($t->is_end_tag and $t->get_tag ne 'p') { my $tag = $t->get_tag; while(my $T = pop @out) { unshift @append, $T; if($T->is_text) { last if $T->as_is =~ /\S/; } elsif($T->is_tag('br')) { shift @append; push @append, $T; } elsif($T->is_start_tag($tag)) { @append = (); last; } elsif($out[-1]->is_tag) { last; } } } push @out, @append; } return join '', map $_->as_is, @out; } my $html = "\n\n
"; print cleanup_html($html);