wfsp has asked for the wisdom of the Perl Monks concerning the following question:

Removing font tags using, for example, HTML::TokeParser::Simple is quite straight forward
#!/usr/bin/perl use warnings; use strict; use HTML::TokeParser::Simple; my $html = do{local $/; <DATA>}; my $p = HTML::TokeParser::Simple->new(\$html); while (my $t = $p->get_token){ next if $t->is_tag(q{font}); print $t->as_is; } __DATA__ <p><font face="Verdana">one <a href="link.html">two</a> three</font></ +p>
<p>one <a href="link.html">two</a> three</p>
How would you go about this using HTML::TreeBuilder? The closest I've got replaces everything between font tags with content (text) zapping any tags.
#!/usr/bin/perl use warnings; use strict; use HTML::TreeBuilder; my $html = do{local $/; <DATA>}; my $h = HTML::TreeBuilder->new_from_content($html); my $para = $h->look_down(_tag => q{p}); my $font = $para->look_down(_tag => q{font}); $font->replace_with($font->as_text); print $para->as_HTML; __DATA__ <p><font face="Verdana">one <a href="link.html">two</a> three</font></ +p>
<p>one two three

Replies are listed 'Best First'.
Re: Removing font tags using HTML::TreeBuilder
by Anonymous Monk on Oct 27, 2008 at 09:54 UTC
    Don't do that ;)
    #!/usr/bin/perl -- use warnings; use strict; use HTML::TreeBuilder; my $html = <<'__HTML__'; <p>and a <font face="Verdana">one <a href="link.html">two</a> three</f +ont> four</p> <p>another |<font name="one">one</font>| and another |<font name="two" +> one </font>|</p> __HTML__ { my $h = HTML::TreeBuilder->new_from_content($html); print $h->as_HTML('<>&',' ',{}), "\n"; $h->look_down(_tag => q{font}, sub{ $_[0]->replace_with_content( $_[0]->content_refs_list ); return; }, ); print $h->as_HTML('<>&',' ',{}), "\n"; } __END__ <html> <head> </head> <body> <p>and a <font face="Verdana">one <a href="link.html">two</a> three< +/font> four</p> <p>another |<font name="one">one</font>| and another |<font name="tw +o"> one </font>|</p> </body> </html> <html> <head> </head> <body> <p>and a one <a href="link.html">two</a> three four</p> <p>another |one| and another | one |</p> </body> </html>
      Bah, that won't handle nested font tags properly, this is better
      #!/usr/bin/perl -- use warnings; use strict; use HTML::TreeBuilder; my $html = <<'__HTML__'; <p>and a <font face="Verdana">one <a href="link.html">two</a> three</f +ont> four</p> <p>another |<font name="one">one</font>| and another |<font name="two" +> one </font>|</p> <p>6 nested font tags <font>1<font>2<font>3<font>4<font>5<font>6</font>5</font>4</font>3</fo +nt>2</font>1</font> </p> __HTML__ { my $h = HTML::TreeBuilder->new_from_content($html); print $h->as_HTML('<>&',' ',{}), "\n"; for my $font( $h->look_down(_tag => q{font}) ){ $font->replace_with_content( $font->content_refs_list ); } print $h->as_HTML('<>&',' ',{}), "\n"; } __END__ <html> <head> </head> <body> <p>and a <font face="Verdana">one <a href="link.html">two</a> three< +/font> four</p> <p>another |<font name="one">one</font>| and another |<font name="tw +o"> one </font>|</p> <p>6 nested font tags <font>1<font>2<font>3<font>4<font>5<font>6</fo +nt>5</font>4</font>3</font>2</font>1</font></p> </body> </html> <html> <head> </head> <body> <p>and a one <a href="link.html">two</a> three four</p> <p>another |one| and another | one |</p> <p>6 nested font tags 12345654321</p> </body> </html>