use HTML::TokeParser::Simple; sub remove_final_blockquotes { my $html_string = shift; $html_string =~ s/\s+$//smg; # remove trailing whitespace my $qty_blockquote = 0; my $scratch_pad = ''; my $parser = HTML::TokeParser::Simple->new(string => $html_string) +; $parser->unbroken_text(1); my $ret = ''; while (my $token = $parser->get_token) { if ($token->is_start_tag('blockquote')) { $qty_blockquote++; #$ret .= "\nqty_blockquote-> $qty_blockquote"; } if (! $qty_blockquote) { $ret .= $scratch_pad; # We've left the blockquote, +so add it $scratch_pad = ''; $ret .= $token->as_is(); } else { $scratch_pad .= $token->as_is(); } if ($token->is_end_tag('blockquote')) { $qty_blockquote--; #$ret .= "\nqty_blockquote-> $qty_blockquote"; } } return $ret; }
Also, please: Comments and criticism greatly appreciated!!
In reply to Re^3: Using Perl to snip the end off of HTML
by eastcoastcoder
in thread Using Perl to snip the end off of HTML
by eastcoastcoder
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |