use strict; use warnings; use File::Find; use HTTP::Headers; use HTML::HeadParser; use Text::CSV; my $csv1 = Text::CSV->new ( { binary => 1 } ) or die Text::CSV->error_diag(); my $csv2 = Text::CSV->new ( { binary => 1 } ) or die Text::CSV->error_diag(); $csv1->eol ("\n"); $csv2->eol ("\n"); my $csv3= "\n"; my $dfile = 'all_tags.csv'; open my $fh1, ">:encoding(utf8)", "$dfile" or die "Error opening $dfile: $!"; #open my $fh2, ">:encoding(utf8)", "$dfile" or die "Error opening $dfile: $!"; my $dir = 'Test'; find ({wanted =>\&HTML_Files, no_chdir => 1}, $dir); ## find({wanted => \&listfiles, no_chdir => 1}, "."); sub listfiles{ # print $File::Find::name,"\n"; } ## #print "directory is"; #print $dir; close $fh1 or die "Error closing $dfile: $!"; exit; sub HTML_Files { Parse_HTML_Header($File::Find::name) if /\.html?$/; } sub Parse_HTML_Header { my $ifile = $File::Find::name; print $ifile; open(my $fh0, '<', $ifile) or die "Error opening $ifile: $!\n"; my $text = ''; { $/ = undef; $text = <$fh0>; } close $fh0; my $h = HTTP::Headers->new; my $p = HTML::HeadParser->new($h); $p->parse($text); my $csv = Text::CSV->new({eol => $/}); my @fields = ('Content-Base', 'Title', 'X-Meta-author', 'X-Meta-description', 'X-Meta-keywords', 'X-Meta-name',); #print $fh1, "Hi"; #for ($h->header_field_names) {$csv->print($fh1, [map { $h->header($_)} @fields]); for ($h->header_field_names) # {$csv->print ($fh1, [map {$File::Find::name, $h->header($_), "=CHAR(13)"} @fields]); #print $fh1 "\n"; # my $string = quotemeta 'CEO'; my $slurp; { local $/ = undef; #no_chdir => 1; #chdir("/Users/tlialin/desktop/01282016/"); print $ifile; open my $textfile, '<', $ifile or die $!; $slurp = <$textfile>; close $textfile; } while( $slurp =~ m/ ( .{0,25} $string.{0,25} ) /gisx ) { #print "Found $1\n"; print $fh1 $1; print $fh1 "\n"; } # } }