poj#!perl use strict; use warnings; use File::Find; use HTTP::Headers; use HTML::HeadParser; use Text::CSV; # config my $dfile = 'all_tags.csv'; my $dir = 'Test'; my @TAGS = ('Content-Base', 'Title', 'X-Meta-author', 'X-Meta-description', 'X-Meta-keywords', 'X-Meta-name',); # match words my @WORDS = qw( press founder professor Dr. Ph.D M.D called receives joins timing find two self bottom true amazing forget night next day ); my $words = join '|',map { quotemeta } @WORDS ; my $regex = qr/.{0,25} (?:$words) .{0,25}/; # output my $csv = Text::CSV->new({eol => $/}); open my $fh1, ">:encoding(utf8)", $dfile or die "Error opening $dfile: $!"; $csv->print($fh1,['Search Words',@WORDS]); # header $csv->print($fh1,['Filename',@TAGS,'Search Results']); # header # input find ({wanted =>\&HTML_Files, no_chdir => 1}, $dir); close $fh1 or die "Error closing $dfile: $!"; exit; sub HTML_Files { parse_HTML_Header($File::Find::name) if /\.html?$/; } sub parse_HTML_Header { my $ifile = shift; print "parsing $ifile\n"; open my $fh0, '<', $ifile or die "Error opening $ifile: $!\n"; my $text = do{ local $/; <$fh0> }; close $fh0; my @matches = ($text =~ /($regex)/gisx); #print join "\n",@matches; my $h = HTTP::Headers->new; my $p = HTML::HeadParser->new($h); $p->parse($text); my @cols = map{ $h->header($_) || '' }@TAGS; $csv->print($fh1, [$ifile,@cols,@matches]); }
In reply to Re^2: Perl Array Question, combining HTML::HeadParser and regex
by poj
in thread Perl Array Question, combining HTML::HeadParser and regex
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |