sub check_bogus_html_tags {
# now check for bogus tags:
my ($body) = @_;
my $reason = "";
use HTML::Tagset;
use HTML::PullParser;
my $p = HTML::PullParser->new(
doc => \$body,
start => '"S", tagname',
end => '"E", tagname',
);
my %seen;
while (my $token = $p->get_token()) {
my ($start,$tag) = @$token;
$seen{$tag}++
unless ($HTML::Tagset::isKnown{$tag} );
};
$reason = "Bogus tags " . join(" ",sort keys %seen) . "\n"
if (scalar keys %seen > 10 );
};
####
# decode the possibly encoded body, either
# from MIME-multipart message or from message body
$body = unpack_mail_body($mail);
# body is HTML
# Check the HTML for bad dtds etc.
$part_reason .= "wrong inline dtd\n"
if $body =~ m#<\s*!\s*[a-z]{1,5}\s*>#mg > 5;
$part_reason .= check_bogus_html_tags($body);