#!/usr/bin/perl -w #boldemhtml.pl use strict; use warnings; use HTML::Parser; use HTML::TokeParser; my ${Where_does_data_end} = tell DATA; undef $/; print processHTML(); seek DATA, ${Where_does_data_end}, 0; print 'x' x 30, " HERE GO a little faster version \n"; print processHTML2(); exit; sub processHTML { my $tp = HTML::TokeParser->new(\$_[0]); my $return; my $SENTINEL=1; while (my $token = $tp->get_token) { my $ttype = shift @{ $token }; if($ttype eq "S") # start tag? { $return .= $token->[3]; } elsif($ttype eq "T") # text? { $token->[0] =~ s/(perl)/\$1\<\/B\>/ig unless $SENTINEL; $return .= $token->[0]; } elsif($ttype =~ /(?:C|D)/) # comment?declaration { $return .= $token->[0]; } elsif($ttype =~ /(?:E|PI)/) # end tag?process instrunction { $SENTINEL = 0 if $token->[0] eq 'title'; $return .= $token->[1]; } } # endof while (my $token = $p->get_token) undef $tp; return $return; } sub processHTML2 { my $SENTINEL = 1; my $p = HTML::Parser->new( api_version => 3); my $return; $p->handler(default => sub { $return .= $_[0]; $SENTINEL = 0 if $_[1] eq 'end' and $_[2] eq '/title'; return undef; } ,'text,event,tag'); =head1 the default handler could also be rewritten as $p->handler(default => sub { $return .= $_[0]; $SENTINEL = 0 if $_[0] =~ m{}i; return undef; } ,'text'); this version would only have a default handler =cut $p->handler(text => sub { $_[0] =~ s!(perl)!$1!ig unless $SENTINEL; $return .= $_[0]; return undef; } ,'text'); $p->parse($_[0]); undef $p; return $return; } __END__ This title contains Perl but does not get changed.

This is some text containing the term 'perl'.

  1. Unix
  2. Perl
  3. Linux

Notice how the term perl in the following link doesn't change, but the text does. Perlmonks.org