Ok, now try this with grep added
#!/usr/bin/perl
use strict;
my $t0 = time;
my $file1 = $ARGV[0] || 'ficc.txt';
my $file2 = $ARGV[1] || 'fic.txt';
my %uniq1=();
my $count1=0; my $words1=0;
open FICC,'<',$file1 or die "$file1 : $!";
while (<FICC>) {
my @words = split /\s+/,lc $_;
++$uniq1{$_} for @words;
$words1 += @words;
++$count1;
}
close FICC;
my $uniq1 = scalar keys %uniq1;
my %uniq2=();
my $count2=0; my $words2=0;
open FIC,'<',$file2 or die "$file2 : $!";
while (my $line = <FIC>) {
my @words = split /\s+/,lc $line;
++$uniq2{$_} for @words;
$words2 += @words;
++$count2;
my @match = grep $uniq1{$_}, @words;
}
close FICC;
my $uniq2 = scalar keys %uniq2;
my $dur = int time-$t0;
print "
File1 : $count1 lines $words1 words $uniq1 unique in $file1
File2 : $count2 lines $words2 words $uniq2 unique in $file2
Time : $dur seconds\n";
These are the results for my i5-2500K
File1 : 4000000 lines 7998273 words 6379952 unique in ficc1.txt
File2 : 4000000 lines 11999843 words 9364684 unique in fic1.txt
Time : 37 seconds
poj |