sub check_bogus_html_tags { # now check for bogus tags: my ($body) = @_; my $reason = ""; use HTML::Tagset; use HTML::PullParser; my $p = HTML::PullParser->new( doc => \$body, start => '"S", tagname', end => '"E", tagname', ); my %seen; while (my $token = $p->get_token()) { my ($start,$tag) = @$token; $seen{$tag}++ unless ($HTML::Tagset::isKnown{$tag} ); }; $reason = "Bogus tags " . join(" ",sort keys %seen) . "\n" if (scalar keys %seen > 10 ); }; #### # decode the possibly encoded body, either # from MIME-multipart message or from message body $body = unpack_mail_body($mail); # body is HTML # Check the HTML for bad dtds etc. $part_reason .= "wrong inline dtd\n" if $body =~ m#<\s*!\s*[a-z]{1,5}\s*>#mg > 5; $part_reason .= check_bogus_html_tags($body);