Here is the program from your original post, unchanged except for numerous print statements.
#!/usr/bin/perl
# match5.pl perl match5.pl Test the entire program.
# From http://www.perlmonks.org/?node_id=1166649
use strict;
use warnings;
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print 'The program has started.', "\n";
+ # This code is for testing.
my @csv2 = ();
+
open CSV2, "<csv2" or die;
+
@csv2=<CSV2>;
+
close CSV2;
+
+
my %csv2hash = ();
+
for (@csv2) {
+
chomp;
+
my ($title) = $_ =~ /^.+?,\s*([^,]+?),/; #/ match the title
+
$csv2hash{$_} = $title;
+
}
+
+
open CSV1, "<csv1" or die;
+
while (<CSV1>) {
+
chomp;
+
my ($title) = $_ =~ /^.+?,\s*([^,]+?),/; #/ match the title
+
my %words;
+
$words{$_}++ for split /\s+/, $title; #/ get words
+
## Collect unique words
+
my @titlewords = keys(%words);
+
my @new; #add exception words which shouldn
+'t be matched
foreach my $t (@titlewords){
+
push(@new, $t) if $t !~ /^(rare|vol|volume|issue|double|magazi
+ne|mag)$/i;
}
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print '@new: ', join(", ", @new), "\n";
+ # This code is for testing.
@titlewords = @new;
my $desired = 5;
+
my $matched = 0;
+
foreach my $csv2 (keys %csv2hash) {
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print 'xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $csv2
+ (keys %csv2hash) { outer loop xxxxxxxxxxxxxxxxxxxxxxxx', "\n"; # Th
+is code is for testing.
my $count = 0;
+
my $value = $csv2hash{$csv2};
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print '$value: ', $value, "\n";
+ # This code is for testing.
foreach my $word (@titlewords) {
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print 'xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach
+my $word (@titlewords) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx', "\n";
+ # This code is for testing.
my @matches = ( $value=~/\b$word\b/ig );
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print '@matches: ', join(", ", @matches), "\n";
+ # This code is for testing.
my $numIncsv2 = scalar(@matches);
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print '$numIncsv2: ', $numIncsv2, "\n";
+ # This code is for testing.
@matches = ( $title=~/\b$word\b/ig );
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print '@matches: ', join(", ", @matches), "\n";
+ # This code is for testing.
my $numIncsv1 = scalar(@matches);
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print '$numIncsv1: ', $numIncsv1, "\n";
+ # This code is for testing.
++$count if $value =~ /\b$word\b/i;
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print '$count: ', $count, "\n";
+ # This code is for testing.
if ($count >= $desired || ($numIncsv1 >= $desired && $numI
+ncsv2 >= $desired)) {
$count = $desired+1;
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print '$count: ', $count, "\n";
+ # This code is for testing.
last;
+
}
+
}
+
if ($count >= $desired) {
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print "$csv2\n";
+
++$matched;
+
}
+
}
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print "$_\n\n" if $matched;
+
}
+
close CSV1;
print "File: ", __FILE__, " Line: ", __LINE__, "\n";
+ # This code is for testing.
print 'The program has ended.', "\n";
+ # This code is for testing.
__END__
Here is the input file named csv2.
12278788, TV & SATELLITE WEEK 11 MAY GILLIAN ANDERSON DOCTOR WHO NOT R
+ADIO TIMES , http://www.example.co.uk, 12
Here is the input file named csv1.
2523021356, RARE TV RADIO TIMES MAGAZINE DOCTOR WHO THE THREE 3 DOCTOR
+S DR JON PERTWEE, http://www.example.co.uk, 12
Here is the output.
Feel free to ask further questions. |