$data =~ s{(](?!href))*href\s*)(&(&[^;]+;)?(?:.(?!\3))+(?:\3)?)([^>]+>)} {$1.decode_entities($2).$4}gsei; ####

#### package HTML::Token; sub new { my ($class,$token) = @_; my $self = bless { token => $token }, $class; my $identifier = $self->identifier; return $self; } sub identifier { my $self = shift; my $token = $self->{token}; my $attributes = $token->return_attrseq; my $tag = $token->return_tag; if (ref $attributes eq 'ARRAY') { return sprintf "%s %s", $tag, join ' ', sort @$attributes; } else { return $tag; } } #### my $html = <This is a test

so what??? END_HTML my $parser = HTML::TokeParser::Simple->new(\$html); my @tokens = (); while (my $token = $parser->get_tag) { push @tokens => HTML::Token->new($token); } push @tokens => (qw[* . *]); # make the (p) tag zero or more, followed by anything #### use Token::Regex; my $regex = Token::Regex->new('HTML::Token'); $regex->parse(\@tokens); #### my $tokens = html_tokens(<This is html

so what???

so what???

so what???

and this is okay

END_HTML if ($regex->match($tokens)) { print "Yes\n"; } else { print "No\n"; } $tokens = html_tokens(<so what???

so what???

so what???

and this is okay

END_HTML if ($regex->match($tokens)) { print "Yes\n"; } else { print "No\n"; } sub html_tokens { my $html = shift; my $parser = HTML::TokeParser::Simple->new(\$html); my @tokens; while (my $token = $parser->get_tag) { push @tokens => HTML::Token->new($token); } return \@tokens; }