Re: huge multiline regex

This uses HTML::TokeParser::Simple. It finds the text within the second font tag within a blockquote

#!/usr/bin/perl

use strict;
use warnings;
use HTML::TokeParser::Simple;

my $html;
{
  local $/;
  $html = <DATA>;
}

my $p = HTML::TokeParser::Simple->new(\$html);

my (@content, $in_bq);
my $font_tag = 0;

while (my $t = $p->get_token){
  
  $in_bq++, next 
    if $t->is_start_tag('blockquote');
    
  next 
    unless $in_bq;
    
  $font_tag++, next 
    if $t->is_start_tag('font');
    
  push @content, $t->as_is 
    if $t->is_text and $font_tag == 2;
    
  ($in_bq, $font_tag) = (0,0) if $font_tag == 2;
  
}

print "@content";

  
__DATA__
<blockquote>
  <DIV class=code_box>
    <DIV class=code_box_header>
      <font size="2" face="Verdana, Arial, Helvetica, sans-serif">
        code
      </font>
    </DIV>
    <font size="2" face="Verdana, Arial, Helvetica, sans-serif">
      all other text and stuff goes here
    </font>
  </DIV>
</blockquote>

<blockquote>
  <DIV class=code_box>
    <DIV class=code_box_header>
      <font size="2" face="Verdana, Arial, Helvetica, sans-serif">
        code
      </font>
    </DIV>
    <font size="2" face="Verdana, Arial, Helvetica, sans-serif">
      all other text and stuff goes here
    </font>
  </DIV>
</blockquote>
[download]

Comment on Re: huge multiline regex Download Code