in reply to huge multiline regex

This uses HTML::TokeParser::Simple. It finds the text within the second font tag within a blockquote

#!/usr/bin/perl use strict; use warnings; use HTML::TokeParser::Simple; my $html; { local $/; $html = <DATA>; } my $p = HTML::TokeParser::Simple->new(\$html); my (@content, $in_bq); my $font_tag = 0; while (my $t = $p->get_token){ $in_bq++, next if $t->is_start_tag('blockquote'); next unless $in_bq; $font_tag++, next if $t->is_start_tag('font'); push @content, $t->as_is if $t->is_text and $font_tag == 2; ($in_bq, $font_tag) = (0,0) if $font_tag == 2; } print "@content"; __DATA__ <blockquote> <DIV class=code_box> <DIV class=code_box_header> <font size="2" face="Verdana, Arial, Helvetica, sans-serif"> code </font> </DIV> <font size="2" face="Verdana, Arial, Helvetica, sans-serif"> all other text and stuff goes here </font> </DIV> </blockquote> <blockquote> <DIV class=code_box> <DIV class=code_box_header> <font size="2" face="Verdana, Arial, Helvetica, sans-serif"> code </font> </DIV> <font size="2" face="Verdana, Arial, Helvetica, sans-serif"> all other text and stuff goes here </font> </DIV> </blockquote>