It would be interesting to benchmark the alternatives...
What do you think of
@h{<QA>}=undef;
? It still uses more memory than iteration, since it creates the (potentially large) temporary array to index, but on the flipside it's likely faster than the iteration approach for sane-sized files, I think...
Edit: benchmarks:
Re-Edit: benchmarks for different array sizes, and added "undef @h{@data}", thanks [id://hdb]:
These benchmarks are in-memory only - no file access involved, since I wasn't sure how to make sure caching didn't impact the results without creating a whole bunch of temp files and making sure the disk cache was trashed.
They're also just for the "load the data into the hash" micro-step, not the whole "compute the difference" operation.
#!/usr/bin/perl
use strict;
use warnings;
use Benchmark qw(:all :hireswallclock);
foreach my $arraySize(10,100,1_000,10_000,100_000) {
my @data=map rand,0..$arraySize;
print "========== ARRAY SIZE: $arraySize\n";
cmpthese(
-1,
{
'@tmp=@arr' => sub {
my @arr = @data;
my %h;
@h{@arr} = @arr;
},
'@tmp=undef' => sub {
my @arr = @data;
my %h;
@h{@arr} = undef;
},
'loop=1' => sub {
my %h;
foreach (@data) {
$h{$_} = 1;
}
},
'loop=undef' => sub {
my %h;
foreach (@data) {
$h{$_} = undef;
}
},
'@h{@data}=1' => sub {
my %h;
@h{@data} = 1;
},
'@h{@data}=undef' => sub {
my %h;
@h{@data} = undef;
},
'undef @h{@data}' => sub {
my %h;
undef @h{@data};
},
}
);
}
Results:
========== ARRAY SIZE: 10
Rate @tmp=@arr @tmp=undef loop=1 loop=undef @h{@data}=1 @h{@data}=undef undef @h{@data}
@tmp=@arr 49764/s -- -33% -54% -55% -64% -66% -67%
@tmp=undef 74467/s 50% -- -31% -33% -46% -48% -51%
loop=1 107884/s 117% 45% -- -3% -22% -25% -28%
loop=undef 111002/s 123% 49% 3% -- -20% -23% -26%
@h{@data}=1 138688/s 179% 86% 29% 25% -- -4% -8%
@h{@data}=undef 144398/s 190% 94% 34% 30% 4% -- -4%
undef @h{@data} 150886/s 203% 103% 40% 36% 9% 4% --
========== ARRAY SIZE: 100
Rate @tmp=@arr @tmp=undef loop=1 loop=undef @h{@data}=undef @h{@data}=1 undef @h{@data}
@tmp=@arr 5566/s -- -36% -57% -57% -65% -66% -66%
@tmp=undef 8660/s 56% -- -33% -34% -45% -46% -47%
loop=1 12992/s 133% 50% -- -0% -18% -20% -20%
loop=undef 13035/s 134% 51% 0% -- -17% -19% -20%
@h{@data}=undef 15754/s 183% 82% 21% 21% -- -3% -3%
@h{@data}=1 16162/s 190% 87% 24% 24% 3% -- -0%
undef @h{@data} 16214/s 191% 87% 25% 24% 3% 0% --
========== ARRAY SIZE: 1000
Rate @tmp=@arr @tmp=undef loop=1 loop=undef @h{@data}=undef undef @h{@data} @h{@data}=1
@tmp=@arr 534/s -- -38% -59% -59% -66% -67% -67%
@tmp=undef 860/s 61% -- -34% -35% -45% -46% -46%
loop=1 1309/s 145% 52% -- -1% -16% -18% -18%
loop=undef 1316/s 147% 53% 1% -- -15% -18% -18%
@h{@data}=undef 1555/s 191% 81% 19% 18% -- -3% -3%
undef @h{@data} 1601/s 200% 86% 22% 22% 3% -- 0%
@h{@data}=1 1601/s 200% 86% 22% 22% 3% 0% --
========== ARRAY SIZE: 10000
Rate @tmp=@arr @tmp=undef loop=1 loop=undef @h{@data}=1 undef @h{@data} @h{@data}=undef
@tmp=@arr 51.5/s -- -33% -57% -60% -64% -65% -66%
@tmp=undef 77.3/s 50% -- -36% -40% -45% -48% -49%
loop=1 121/s 135% 56% -- -6% -14% -19% -20%
loop=undef 128/s 149% 66% 6% -- -9% -14% -15%
@h{@data}=1 141/s 175% 83% 17% 10% -- -5% -6%
undef @h{@data} 149/s 189% 93% 23% 16% 5% -- -1%
@h{@data}=undef 151/s 193% 95% 25% 18% 7% 1% --
========== ARRAY SIZE: 100000
Rate @tmp=@arr @tmp=undef loop=undef loop=1 @h{@data}=1 @h{@data}=undef undef @h{@data}
@tmp=@arr 3.72/s -- -35% -61% -62% -67% -68% -69%
@tmp=undef 5.74/s 54% -- -39% -41% -49% -51% -51%
loop=undef 9.42/s 153% 64% -- -4% -17% -20% -20%
loop=1 9.80/s 164% 71% 4% -- -13% -16% -17%
@h{@data}=1 11.3/s 204% 97% 20% 15% -- -4% -4%
@h{@data}=undef 11.7/s 216% 104% 25% 20% 4% -- -1%
undef @h{@data} 11.8/s 218% 106% 26% 21% 5% 1% --
|