sub _is_html_clean {
# create state variable contain hash of unbalanced tags
# that will persist across calls
state $is_unbalanced = {
area => 1,
base => 1,
basefont => 1,
bgsound => 1,
br => 1,
col => 1,
colgroup => 1,
embed => 1,
frame => 1,
hr => 1,
img => 1,
input => 1,
isindex => 1,
li => 1,
link => 1,
marquee => 1,
meta => 1,
p => 1,
'!doctype' => 1,
};
# remove self closing tags
$_[0] =~ s/(.*)<.+?\/>/$1/g;
# remove commented sections
$_[0] =~ s///msg;
# load tag names in array
my (@a) = ( $_[0] =~ m/<(\S+?)[ >]/msg );
# process each tag counting the open and closes and
# then increment or decrement a counter for that tag
my %h;
foreach (@a) {
if (m[^/]) { # closing tag
substr( $_, 0, 1 ) = ""; # remove the /
$h{$_}--;
}
else {
$h{$_}++;
}
}
foreach ( keys %h ) {
if (m/[A-Z]/) {
# combine keys in case insensitive manner
$h{ lc($_) } += $h{$_};
delete $h{$_};
}
}
foreach ( sort keys %h ) {
next if ( $is_unbalanced->{$_} ); # ignore if tag is in the is unbalanced hash
if ( $h{$_} != 0 ) {
return 0; # return as soon as an non-paired tag is found
}
}
return 1; # return if all is good
}