#! perl -w use strict; use Benchmark qw(cmpthese); sub intersect (\@\@) { my ($aryRef1, $aryRef2) = @_; my %count; $count{$_}++ foreach @$aryRef1, @$aryRef2; return grep{ $count{$_} == 1 } keys %count; } my @array1 = (1 .. 28000); my @array2 = (1..1000, 1..100); # create some duplicates with duplicates my %hashBase; @hashBase{@array1} = undef; sub faq { my @dup = intersect( @array1, @array2 ); } sub hash { my %hashAry; @hashAry{@array1} = undef; my @dup = delete @hashAry{@array2}; } sub prehash { my %hashAry = %hashBase; my @dup = delete @hashAry{@array2}; } sub hash_lret { my %hashAry; @hashAry{@array1} = undef; my @dup = delete @hashAry{@array2}; return keys %hashAry; } sub prehash_lret { my %hashAry = %hashBase; my @dup = delete @hashAry{@array2}; return keys %hashAry; } sub grepped { my %reject; $reject{$_} = 1 for @array2; my @clean=grep !$reject{$_}, @array1; } cmpthese 100,{ faq => \&faq, hash => \&hash, prehash => \&prehash, hash_lret => \&hash_lret, prehash_lret => \&prehash_lret, grepped => \&grepped, }; #### __END__ Benchmark: timing 100 iterations of faq, grepped, hash, hash_lret, prehash, prehash_lret... faq: 35 wallclock secs (34.09 usr + 0.02 sys = 34.11 CPU) @ 2.93/s (n=100) grepped: 11 wallclock secs ( 9.91 usr + 0.00 sys = 9.91 CPU) @ 10.09/s (n=100) hash: 10 wallclock secs (10.27 usr + 0.00 sys = 10.27 CPU) @ 9.74/s (n=100) hash_lret: 10 wallclock secs (10.23 usr + 0.00 sys = 10.23 CPU) @ 9.77/s (n=100) prehash: 16 wallclock secs (15.13 usr + 0.00 sys = 15.13 CPU) @ 6.61/s (n=100) prehash_lret: 16 wallclock secs (15.17 usr + 0.00 sys = 15.17 CPU) @ 6.59/s (n=100) Rate faq prehash_lret prehash hash hash_lret grepped faq 2.93/s -- -56% -56% -70% -70% -71% prehash_lret 6.59/s 125% -- -0% -32% -33% -35% prehash 6.61/s 126% 0% -- -32% -32% -35% hash 9.74/s 232% 48% 47% -- -0% -4% hash_lret 9.77/s 233% 48% 48% 0% -- -3% grepped 10.1/s 244% 53% 53% 4% 3% --