#!perl -w
use strict;
use 5.010;
use Benchmark qw(:all);
use File::Map 'map_file';
my $testfile= "$0.testdata";
my $data= '0123456789' x 20e6;
# Create the test file. This likely means that it is still hot in the cache...
open my $fh, '>', $testfile
or die "Couldn't create '$testfile': $!";
print {$fh} $data;
undef $fh;
sub tr_in_memory {
(my $fn)= @_;
my $count= ($data =~ tr[0][0]);
$count
};
sub tr_map_file {
(my $fn)= @_;
map_file my($content), $testfile;
my $count= ($content =~ tr[0][0]);
$count
};
sub tr_via_readline_10_000 {
my $total_filtered = 0;
open my $cgs, "<", $testfile;
local $/ = \10_000; # blocksize
$total_filtered += tr/0/0/ while <$cgs>;
};
sub tr_via_readline_100_000 {
my $total_filtered = 0;
open my $cgs, "<", $testfile;
local $/ = \100_000; # blocksize
$total_filtered += tr/0/0/ while <$cgs>;
};
sub tr_via_readline_1_000_000 {
my $total_filtered = 0;
open my $cgs, "<", $testfile;
local $/ = \1_000_000; # blocksize
$total_filtered += tr/0/0/ while <$cgs>;
};
say sprintf "Running with a dataset of %d", length $data;
cmpthese( 30, {
'tr_map_file' => \&tr_map_file,
'tr_in_memory' => \&tr_in_memory,
'tr_via_readline 10k' => \&tr_via_readline_10_000,
'tr_via_readline 100k' => \&tr_via_readline_100_000,
'tr_via_readline 1m' => \&tr_via_readline_1_000_000,
}
);
####
X:\>perl -w tmp.pl
Running with a dataset of 200000000
Rate tr_via_readline 10k tr_via_readline 100k tr_via_readline 1m tr_map_file tr_in_memory
tr_via_readline 10k 2.18/s -- -15% -17% -71% -76%
tr_via_readline 100k 2.55/s 17% -- -2% -66% -72%
tr_via_readline 1m 2.61/s 20% 2% -- -66% -72%
tr_map_file 7.60/s 249% 198% 191% -- -18%
tr_in_memory 9.25/s 325% 263% 254% 22% --
####
s/iter tr_via_readline 10k tr_via_readline 100k tr_via_readline 1m tr_map_file tr_in_memory
tr_via_readline 10k 4.61 -- -15% -17% -65% -77%
tr_via_readline 100k 3.91 18% -- -2% -59% -72%
tr_via_readline 1m 3.83 20% 2% -- -58% -72%
tr_map_file 1.60 189% 145% 140% -- -32%
tr_in_memory 1.08 327% 262% 255% 48% --