in reply to Extracting common words
Reworking your code slightly to use "built in" files results are as expected:
#!/usr/bin/perl use strict; use warnings; =pod Removed original file name code to make sample self contained. my $f1 = shift; my $f2 = shift; if (! defined($f1) or ! defined($f2)) { die "Need two text file names as arguments. \n"; } =cut my $file1Content = <<CONTENT; red green blue red orange CONTENT my $file2Content = <<CONTENT; yellow orange red grey purple CONTENT my %results; open my $file1, '<', \$file1Content; while (my $line = <$file1>) { $line =~ s/[[:punct:]]//g; for my $word (split(/\s+/, $line)) { $word =~ s/[^A-Za-z0-9]//g; $results{lc $word} = 1; } } my @words2; my @storage; open my $file2, '<', \$file2Content; while (my $line = <$file2>) { $line =~ s/[[:punct:]]/ /g; @words2 = grep {/\S/} split(/ /, $line); for (my $i = 0; $i < scalar @words2; $i++) { $words2[$i] = lc($words2[$i]); $words2[$i] =~ s/[^A-Za-z0-9]//g; push(@storage, $words2[$i]); if (grep {$_ eq $words2[$i]} @storage[0 .. $#storage - 1]) { $results{$words2[$i]} = 1; } else { $results{$words2[$i]}++; } } } my $counter = 0; foreach my $words (sort {$results{$b} <=> $results{$a}} keys %results) + { if ($results{$words} > 1) { $counter = $counter + 1; print $words, "\n\n"; } } printf "Found %1.0f words in common\n", $counter;
Prints:
orange red Found 2 words in common
Maybe you can provide "file contents" that fail in the way you didn't describe?
Of course, the code can be cleaned up a little:
#!/usr/bin/perl use strict; use warnings; my $file1Content = <<CONTENT; red green blue red orange CONTENT my $file2Content = <<CONTENT; yellow orange red grey purple CONTENT my %group1; open my $file1, '<', \$file1Content; while (my $line = <$file1>) { my @words = map {lc} grep {$_} split /[\W\d]+/, $line; $group1{$_} = $_ for @words; } my %common; open my $file2, '<', \$file2Content; while (my $line = <$file2>) { my @words = map {lc} grep {/\S/} split /[\W\d]+/, $line; $common{$_} = $_ for grep {exists $group1{$_}} @words; } print "$_\n\n" for sort values %common; printf "Found %1.0f words in common\n", scalar keys %common;
|
|---|