);
exit;
sub processHTML {
my $tp = HTML::TokeParser->new(\$_[0]);
my $return;
my $SENTINEL=1;
while (my $token = $tp->get_token)
{
my $ttype = shift @{ $token };
if($ttype eq "S") # start tag?
{
$return .= $token->[3];
}
elsif($ttype eq "T") # text?
{
$token->[0] =~ s/(perl)/\$1\<\/B\>/ig
unless $SENTINEL;
$return .= $token->[0];
}
elsif($ttype =~ /(?:C|D)/) # comment?declaration
{
$return .= $token->[0];
}
elsif($ttype =~ /(?:E|PI)/) # end tag?process instrunction
{
$SENTINEL = 0 if $token->[0] eq 'title';
$return .= $token->[1];
}
} # endof while (my $token = $p->get_token)
undef $tp;
return $return;
}
sub processHTML2 {
my $SENTINEL = 1;
my $p = HTML::Parser->new( api_version => 3);
my $return;
$p->handler(default => sub {
$return .= $_[0];
$SENTINEL = 0 if $_[1] eq 'end' and $_[2] eq '/title';
return undef;
}
,'text,event,tag');
=head1 the default handler could also be rewritten as
$p->handler(default => sub { $return .= $_[0];
$SENTINEL = 0 if $_[0] =~ m{}i;
return undef;
}
,'text');
this version would only have a default handler
=cut
$p->handler(text => sub {
$_[0] =~ s!(perl)!$1!ig
unless $SENTINEL;
$return .= $_[0];
return undef;
}
,'text');
$p->parse($_[0]);
undef $p;
return $return;
}
__END__
This title contains Perl but does not get changed.
This is some text containing the term 'perl'.
- Unix
- Perl
- Linux
Notice how the term perl in the following link doesn't change, but the text does.
Perlmonks.org