01: use strict;
02: use File::Find;
03: use HTML::TokeParser;
04:
05: my $bak_ext = '.bak';
06: my $root_dir = '/temp';
07:
08: find(\&wanted, $root_dir);
09:
10: sub wanted {
11: # if the extension fits...
12: if ( /\.html?$/i ) {
13: print "Processing $_\n";
14: my $new = $_;
15: my $bak = $_ . $bak_ext;
16: rename $_, $bak or die "Cannot rename $_ to $bak: $!";
17:
18: open NEW, "> $new" or die "Cannot open $new for writing: $!";
19: + #WHAT IS THE + DOING?
20: #I DONT UNDERSTAND THIS TOKEN PART
21: my $p = HTML::TokeParser->new( $bak ); #IS new( $bak ) A FUNCTION
22: # AND IF SO WHAT IS IT DOING?
23: while ( my $token = $p->get_token ) {
24:
25: # this index is the 'raw text' of the token
26: #I AM LOST ON THIS PART ALTHOUGH I UNDERSTAND IT IS
27: #AN IF ELSE STATEMENT WHAT IS THE 'T' AND 1 AND -1 DOING??
28: my $text_index = $token->[0] eq 'T' ? 1 : -1;
29:
30: # it's both a start tag and a meta tag
31: #PLEASE EXPLAIN THIS PART
32: if ( $token->[0] eq 'S' and $token->[1] eq 'meta' ) {
33: $token->[ $text_index ] =~ s/AA\.//g;
34: }
35: #I DONT UNDERSTAND THIS PART.
36: print NEW $token->[ $text_index ];
37: }
38: close NEW;
39: } else {
40: print "Skipping $_\n";
41: }
42: }
####
<
input
type="hidden"
name="weird indenting is legal"
value=???
>
####
28: my $text_index = $token->[0] eq 'T' ? 1 : -1;
####
["S", $tag, $attr, $attrseq, $text]
["E", $tag, $text]
["T", $text, $is_data]
["C", $text]
["D", $text]
["PI", $token0, $text]
####
[
'S',
'meta',
{
'content' => 'Web data ',
'name' => 'doc'
},
[
'name',
'content'
],
''
];
####
32: if ( $token->[0] eq 'S' and $token->[1] eq 'meta' ) {
33: $token->[ $text_index ] =~ s/AA\.//g;
34: }