in reply to Re^14: Addional "year" matching functionality in word matching script
in thread Addional "year" matching functionality in word matching script
Here is the program from your original post, unchanged except for numerous print statements.
#!/usr/bin/perl # match5.pl perl match5.pl Test the entire program. # From http://www.perlmonks.org/?node_id=1166649 use strict; use warnings; print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print 'The program has started.', "\n"; + # This code is for testing. my @csv2 = (); + open CSV2, "<csv2" or die; + @csv2=<CSV2>; + close CSV2; + + my %csv2hash = (); + for (@csv2) { + chomp; + my ($title) = $_ =~ /^.+?,\s*([^,]+?),/; #/ match the title + $csv2hash{$_} = $title; + } + + open CSV1, "<csv1" or die; + while (<CSV1>) { + chomp; + my ($title) = $_ =~ /^.+?,\s*([^,]+?),/; #/ match the title + my %words; + $words{$_}++ for split /\s+/, $title; #/ get words + ## Collect unique words + my @titlewords = keys(%words); + my @new; #add exception words which shouldn +'t be matched foreach my $t (@titlewords){ + push(@new, $t) if $t !~ /^(rare|vol|volume|issue|double|magazi +ne|mag)$/i; } print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print '@new: ', join(", ", @new), "\n"; + # This code is for testing. @titlewords = @new; my $desired = 5; + my $matched = 0; + foreach my $csv2 (keys %csv2hash) { print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print 'xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $csv2 + (keys %csv2hash) { outer loop xxxxxxxxxxxxxxxxxxxxxxxx', "\n"; # Th +is code is for testing. my $count = 0; + my $value = $csv2hash{$csv2}; print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print '$value: ', $value, "\n"; + # This code is for testing. foreach my $word (@titlewords) { print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print 'xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach +my $word (@titlewords) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx', "\n"; + # This code is for testing. my @matches = ( $value=~/\b$word\b/ig ); print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print '@matches: ', join(", ", @matches), "\n"; + # This code is for testing. my $numIncsv2 = scalar(@matches); print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print '$numIncsv2: ', $numIncsv2, "\n"; + # This code is for testing. @matches = ( $title=~/\b$word\b/ig ); print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print '@matches: ', join(", ", @matches), "\n"; + # This code is for testing. my $numIncsv1 = scalar(@matches); print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print '$numIncsv1: ', $numIncsv1, "\n"; + # This code is for testing. ++$count if $value =~ /\b$word\b/i; print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print '$count: ', $count, "\n"; + # This code is for testing. if ($count >= $desired || ($numIncsv1 >= $desired && $numI +ncsv2 >= $desired)) { $count = $desired+1; print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print '$count: ', $count, "\n"; + # This code is for testing. last; + } + } + if ($count >= $desired) { print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print "$csv2\n"; + ++$matched; + } + } print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print "$_\n\n" if $matched; + } + close CSV1; print "File: ", __FILE__, " Line: ", __LINE__, "\n"; + # This code is for testing. print 'The program has ended.', "\n"; + # This code is for testing. __END__
Here is the input file named csv2.
12278788, TV & SATELLITE WEEK 11 MAY GILLIAN ANDERSON DOCTOR WHO NOT R +ADIO TIMES , http://www.example.co.uk, 12
Here is the input file named csv1.
2523021356, RARE TV RADIO TIMES MAGAZINE DOCTOR WHO THE THREE 3 DOCTOR +S DR JON PERTWEE, http://www.example.co.uk, 12
Here is the output.
File: match5.pl Line: 9 The program has started. File: match5.pl Line: 36 @new: JON, DOCTORS, RADIO, THREE, THE, 3, DR, DOCTOR, PERTWEE, WHO, TI +MES, TV File: match5.pl Line: 42 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $csv2 (keys %csv +2hash) { outer loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 46 $value: TV & SATELLITE WEEK 11 MAY GILLIAN ANDERSON DOCTOR WHO NOT RAD +IO TIMES File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: File: match5.pl Line: 55 $numIncsv2: 0 File: match5.pl Line: 58 @matches: JON File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 0 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: File: match5.pl Line: 55 $numIncsv2: 0 File: match5.pl Line: 58 @matches: DOCTORS File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 0 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: RADIO File: match5.pl Line: 55 $numIncsv2: 1 File: match5.pl Line: 58 @matches: RADIO File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 1 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: File: match5.pl Line: 55 $numIncsv2: 0 File: match5.pl Line: 58 @matches: THREE File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 1 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: File: match5.pl Line: 55 $numIncsv2: 0 File: match5.pl Line: 58 @matches: THE File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 1 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: File: match5.pl Line: 55 $numIncsv2: 0 File: match5.pl Line: 58 @matches: 3 File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 1 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: File: match5.pl Line: 55 $numIncsv2: 0 File: match5.pl Line: 58 @matches: DR File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 1 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: DOCTOR File: match5.pl Line: 55 $numIncsv2: 1 File: match5.pl Line: 58 @matches: DOCTOR File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 2 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: File: match5.pl Line: 55 $numIncsv2: 0 File: match5.pl Line: 58 @matches: PERTWEE File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 2 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: WHO File: match5.pl Line: 55 $numIncsv2: 1 File: match5.pl Line: 58 @matches: WHO File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 3 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: TIMES File: match5.pl Line: 55 $numIncsv2: 1 File: match5.pl Line: 58 @matches: TIMES File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 4 File: match5.pl Line: 49 xxxxxxxxxxxxxxxxxxxxxxxx At the top of the foreach my $word (@titlewor +ds) { inner loop xxxxxxxxxxxxxxxxxxxxxxxx File: match5.pl Line: 52 @matches: TV File: match5.pl Line: 55 $numIncsv2: 1 File: match5.pl Line: 58 @matches: TV File: match5.pl Line: 61 $numIncsv1: 1 File: match5.pl Line: 64 $count: 5 File: match5.pl Line: 68 $count: 6 File: match5.pl Line: 74 12278788, TV & SATELLITE WEEK 11 MAY GILLIAN ANDERSON DOCTOR WHO NOT R +ADIO TIMES , http://www.example.co.uk, 12 File: match5.pl Line: 79 2523021356, RARE TV RADIO TIMES MAGAZINE DOCTOR WHO THE THREE 3 DOCTOR +S DR JON PERTWEE, http://www.example.co.uk, 12 File: match5.pl Line: 84 The program has ended.
Feel free to ask further questions.
In Section
Seekers of Perl Wisdom