####
####
text
####
blah blah
####
use HTML::Parser;
use warnings;
use strict;
my $p = HTML::Parser->new(
api_version => 3,
start_h => [\&start, 'tagname, attr, attrseq, text'],
end_h => [\&end, 'tagname, text'],
comment_h => [\&comm, 'text' ],
default_h => [ sub {print shift}, 'text'],
);
$p->unbroken_text(1);
my $file = shift;
$p->parse_file($file);
my ($pending, $link);
sub start {
my($tag, $attr, $attrseq, $text) = @_;
for my $k (keys %$attr) {
if ($attr->{$k} =~ /\!/) {
($attr->{$k},$link) = transform_comments($attr->{$k});
}
}
$pending++;
my $a = join ' ',
map {
$_ eq '/' ? $_ :
"$_=\"$attr->{$_}\""
} @$attrseq;
print "<$tag", $a ? " $a>" : '>';
}
sub end {
my ($tag,$text) = @_;
print $text;
if ($pending) {
print $link;
$pending = $link = '';
}
}
sub comm {
my $text;
($text,$link) = transform_comments($_[0]);
print $text;
print $link unless $pending;
}
sub transform_comments {
my $str = shift;
if ($str =~ /(\S+) --!>([^<]+?)[^<]+?