use strict;
use warnings;
use HTML::TokeParser::Simple;
my $parser = HTML::TokeParser::Simple->new('tricky.html');
# these are the tags we just want to skip
my %skip = (
u => 1,
b => 1,
i => 1,
em => 1,
big => 1,
img => 1,
strong => 1,
);
# these are the styles we are going to add to h, p, and li tags
my %modify = (
h => ';text-indent: 10px; word-spacing: 30px; letter-spacing: 3px; color: black',
p => ';text-indent: 10px; word-spacing: 10px; letter-spacing: 2px; color: black',
li => ';text-indent: 10px; word-spacing: 10px; letter-spacing: 2px; color: black',
);
while (my $token = $parser->get_token) {
# replace body bgcolor
if ($token->is_start_tag('body')) {
$token->set_attr(style => 'background-color: white');
}
# find and skip our "skip" tags
next if $token->is_tag and $skip{$token->return_tag};
# find and modify attributes for our "modify" tags
if ($token->is_start_tag) {
my $candidate = $token->return_tag;
$candidate =~ s/h[1-6]/h/i; #hack to handle all h tags
# here we get the original style attr and add the new CSS
if (my $add_attr = $modify{$candidate}) {
my $orig_attr = $token->return_attr;
$orig_attr->{style} .= $add_attr;
$token->set_attr(%$orig_attr);
}
}
# just print to STDOUT ... change to fit your needs
print $token->as_is;
}