use strict;
use File::Find;
use HTML::TokeParser;
my $bak_ext = '.bak';
my $root_dir = '/temp';
find(\&wanted, $root_dir);
sub wanted {
# if the extension fits...
if ( /\.html?/i ) {
print "Processing $_\n";
my $new = $_;
my $bak = $_ . $bak_ext;
rename $_, $bak or die "Cannot rename $_ to $bak: $!";
open NEW, "> $new" or die "Cannot open $new for writing: $!";
my $p = HTML::TokeParser->new( $bak );
while ( my $token = $p->get_token ) {
# this index is the 'raw text' of the token
my $text_index = $token->[0] eq 'T' ? 1 : -1;
# it's both a start tag and a meta tag
if ( $token->[0] eq 'S' and $token->[1] eq 'meta' ) {
$token->[ $text_index ] =~ s/AA\.//g;
}
print NEW $token->[ $text_index ];
}
close NEW;
} else {
print "Skipping $_\n";
}
}