#### Filename Content-Base Title X-Meta-author X-Meta-description X-Meta-keywords X-Meta-name Test/1.html Aberdeen%20Animal%20trait%20analysis , News Search | Ask.com #### my $string = quotemeta 'CEO'; while ( $text =~ m/ ( .{0,25} $string.{0,25} ) /gisx ) { print $fh1 $1, ","; } #### Test/Ames_Animal trait analysis.html.result.txt_parsed_for_news.txt.html Ames Animal trait analysis , News Search | Ask.com Test/Ames_Biobank.html.result.txt_parsed_for_clinic.txt.html both adults and infants. Dr. Kocher has requested who hrough a separate study. Dr. Lazaridis' samples alon colon and rectal cancer. Dr. Nelson has requested sto rointestinal microbiome. Dr. Nelson and her colleague in a new research study. Dr. Ames is recruiting parti ers.

In addition Dr. Thibodeau has expanded t sh; who have PKD.

Dr. Harris' goal is to bette h another study.

Dr. Heit has also asked for ients who've had a clot. Dr. Heit's goal is to identi To study microvesicles Dr. Jayachandran is requesti pice caregivers.

Dr. Kaur is researching whet 18">Nilufer Taner M.D. Ph.D. is studying geneti 0027660">Janet E. Olson Ph.D. and Awards ## my $string = quotemeta 'CEO'; while ( $text =~ m/ ( .{0,25} $string.{0,25} ) /gisx ) { print $fh1 $1, ","; } #### #!perl use strict; use warnings; use File::Find; use HTTP::Headers; use HTML::HeadParser; use Text::CSV; # config my $dfile = 'all_tags.csv'; my $dir = 'Test'; my @TAGS = ('Content-Base', 'Title', 'X-Meta-author', 'X-Meta-description', 'X-Meta-keywords', 'X-Meta-name',); my @TAGS2 = ('CEO', 'founder', 'professor', 'Dr.', 'Ph.D', 'M.D.', 'company called', 'startup called', 'joins', 'receives funding', 'SBIR', 'receiving the grant', 'seed investment', 'seed fund', 'appointed', 'chosen', 'secures', 'award', 'seed investment', 'awarded', ); # output my $csv = Text::CSV->new({eol => $/}); open my $fh1, ">:encoding(utf8)", $dfile or die "Error opening $dfile: $!"; $csv->print($fh1,['Filename',@TAGS]); # parser header my $string = map {quotemeta} @TAGS2; #my $text = while ( my $text =~ m/ ( .{0,25} $string.{0,25} ) /gisx ) { $string->print($fh1, ['Filename',@TAGS2]);# regex header } # input find ({wanted =>\&HTML_Files, no_chdir => 1}, $dir); close $fh1 or die "Error closing $dfile: $!"; exit; sub HTML_Files { parse_HTML_Header($File::Find::name) if /\.html?$/; } sub parse_HTML_Header { my $ifile = shift; print "parsing $ifile\n"; open my $fh0, '<', $ifile or die "Error opening $ifile: $!\n"; my $text = do{ local $/; <$fh0> }; close $fh0; my $h = HTTP::Headers->new; my $p = HTML::HeadParser->new($h); $p->parse($text); my @cols = map{ $h->header($_) }@TAGS; $csv->print($fh1, [$ifile,@cols]); my @cols2 = map{ $h->$string($_) }@TAGS2; $string->print($fh1, [$ifile,@cols2]); #my $string = quotemeta 'awarded'; #while ( $text =~ m/ ( .{0,25} $string.{0,25} ) /gisx ) { #print $fh1 $1,"\n"; # } }