Re: Parsing web content with File::Find, HTML::HeadParser and using a regex

I think your problem is here, outputing the same line for each header tag

 for ($h->header_field_names){
   $csv->print ($fh1, [map {$File::Find::name, $h->header($_), "=CH
+AR(13)"} @fields]);
.
.
[download]

Here's a cleaned up version of your script (untested)

#!perl
use strict;
use warnings;
use File::Find;
use HTTP::Headers;
use HTML::HeadParser;
use Text::CSV;

# config
my $dfile  = 'all_tags.csv';
my $dir    = 'Test';
my @TAGS = ('Content-Base', 'Title', 
            'X-Meta-author', 'X-Meta-description', 
            'X-Meta-keywords', 'X-Meta-name',);
              
# output
my $csv = Text::CSV->new({eol => $/});
open my $fh1, ">:encoding(utf8)", $dfile 
    or die "Error opening $dfile: $!";
$csv->print($fh1,['Filename',@TAGS]); # header

# input              
find ({wanted =>\&HTML_Files, no_chdir => 1}, $dir);
close $fh1 or die "Error closing $dfile: $!";
exit;

sub HTML_Files {
  parse_HTML_Header($File::Find::name) if /\.html?$/;
}

sub parse_HTML_Header {

  my $ifile = shift;
  print "parsing $ifile\n";
  
  open my $fh0, '<', $ifile or die "Error opening $ifile: $!\n";
  my $text = do{ local $/; <$fh0> };
  close $fh0;

  my $h = HTTP::Headers->new;
  my $p = HTML::HeadParser->new($h);
  $p->parse($text);
   
  my @cols = map{ $h->header($_) }@TAGS;
  $csv->print($fh1, [$ifile,@cols]);
     
  my $string = quotemeta 'CEO';
  while ( $text =~ m/ ( .{0,25} $string.{0,25} ) /gisx ) {    
    print $fh1 $1,"\n";
  }
}
[download]

Did you solve this HTML::HeadParser challenges ?

poj

Comment on Re: Parsing web content with File::Find, HTML::HeadParser and using a regex Select or Download Code