Update: Added results from a 4.0 GHz machine running CentOS 7.3.
The script may be useful in the future for comparing with later versions of Perl.
timethese 200000, { Big => sub { big_look($big_ref) }, # $cells->{ ($p->[1] << 32) | ($ +p->[0] & 0xFFFFFFFF) } Kgb => sub { kgb_look($kgb_ref) }, # $cells->{ $str } # optimized Lan => sub { lan_look($lan_ref) }, # $cells->{ "@$p" } Mat => sub { mat_look($mat_ref) }, # $cells->{ $_->[0] }{ $_->[1] } Pak => sub { pak_look($pak_ref) }, # $cells->{ pack "ii", $p->[0], +$p->[1] } St2 => sub { st2_look($st2_ref) }, # $cells->{ join(':', @$p) } St3 => sub { st3_look($st3_ref) }, # $cells->{ $str } # optimized Str => sub { str_look($str_ref) }, # $cells->{ $p->[0] .':'. $p->[1 +] } };
So, here is the test.pl script. Basically, the OP script with extra solutions by various monks in this thread.
use strict; # use warnings; use feature qw(state); use Benchmark qw(timethese); use Data::Dump qw(pp); my @points = ( [ 0, 0 ], [ -1, -2 ], [ 1, 2 ], [ -1, 2 ], [ 1, -2 ], [ 0, 1 ], [ 1, 0 ], [ -1, 0 ], [ 0, -1 ], [ 2147483647, 2147483647 ], [ 2147483647, -2147483647 ], [ -2147483647, 2147483647 ], [ -2147483647, -2147483647 ], [ -1, 2147483647 ], [ 2147483647, 1 ], [ -2, 2147483646 ], [ 1, -2147483647 ], [ 1234561, 1234562 ], [ 1234563, -1234564 ], [ -1234565, 1234566 ], [ -1234567, -1234568 ], [ 10, 11 ], [ 11, 12 ], [ 12, 13 ], [ 13, 14 ], [ 14, 15 ], [ 15, 16 ], [ 16, 17 ], [ 17, 18 ], [ 18, 19 ], [ 19, 20 ], [ 1001, 1002 ], [ 1003, 1004 ], [ 1005, 1006 ], [ 1007, 1008 ], [ 1009, 1010 ], [ 1011, 1012 ], [ 1013, 1014 ], [ 1015, 1016 ], [ 1017, 1018 ], [ 1019, 1020 ], [ -1001, -1002 ], [ -1003, -1004 ], [ -1005, -1006 ], [ -1007, -1008 ], [ -1009, -1010 ], [ -1011, -1012 ], [ -1013, -1014 ], [ -1015, -1016 ], [ -1017, -1018 ], [ -1019, -1020 ], [ 99910, 99911 ], [ 99911, 99912 ], [ 99912, 99913 ], [ 99913, 99914 ], [ 99914, 99915 ], [ 99915, 99916 ], [ 99916, 99917 ], [ 99917, 99918 ], [ 99918, 99919 ], [ 99919, 99920 ], [ -99910, -99911 ], [ -99911, -99912 ], [ -99912, -99913 ], [ -99913, -99914 ], [ -99914, -99915 ], [ -99915, -99916 ], [ -99916, -99917 ], [ -99917, -99918 ], [ -99918, -99919 ], [ -99919, -99920 ], [ 1099910, 1099911 ], [ 1099911, 1099912 ], [ 1099912, 1099913 ], [ 1099913, 1099914 ], [ 1099914, 1099915 ], [ 1099915, 1099916 ], [ 1099916, 1099917 ], [ 1099917, 1099918 ], [ 1099918, 1099919 ], [ 1099919, 1099920 ], [ -1099910, -1099911 ], [ -1099911, -1099912 ], [ -1099912, -1099913 ], [ -1099913, -1099914 ], [ -1099914, -1099915 ], [ -1099915, -1099916 ], [ -1099916, -1099917 ], [ -1099917, -1099918 ], [ -1099918, -1099919 ], [ -1099919, -1099920 ], [ 91099910, 91099911 ], [ 91099911, 91099912 ], [ 91099912, 91099913 ], [ 91099913, 91099914 ], [ 91099914, 91099915 ], [ 91099915, 91099916 ], [ 91099916, 91099917 ], [ 91099917, 91099918 ], [ 91099918, 91099919 ], [ 91099919, 91099920 ], [ -91099910, -91099911 ], [ -91099911, -91099912 ], [ -91099912, -91099913 ], [ -91099913, -91099914 ], [ -91099914, -91099915 ], [ -91099915, -91099916 ], [ -91099916, -91099917 ], [ -91099917, -91099918 ], [ -91099918, -91099919 ], [ -91099919, -91099920 ], [ 491099910, 491099911 ], [ 491099911, 491099912 ], [ 491099912, 491099913 ], [ 491099913, 491099914 ], [ 491099914, 491099915 ], [ 491099915, 491099916 ], [ 491099916, 491099917 ], [ 491099917, 491099918 ], [ 491099918, 491099919 ], [ 491099919, 491099920 ], [ -491099910, -491099911 ], [ -491099911, -491099912 ], [ -491099912, -491099913 ], [ -491099913, -491099914 ], [ -491099914, -491099915 ], [ -491099915, -491099916 ], [ -491099916, -491099917 ], [ -491099917, -491099918 ], [ -491099918, -491099919 ], [ -491099919, -491099920 ], ); my $npoints = @points; sub str_hash { # print "string_hash---------------\n"; my %cells; # insert the points into the hash for my $p (@points) { my $h = $p->[0] . ':' . $p->[1]; my ( $x, $y ) = split ':', $h; # print "x='$p->[0]' y='$p->[1]' h='$h' (x='$x' y='$y')\n"; if ($x != $p->[0]) { die; } if ($y != $p->[1]) { die; } $cells{$h} = undef; # ++$cells{$h}; } scalar(keys %cells) == $npoints or die; # lookup each points in the hash for my $p (@points) { my $h = $p->[0] . ':' . $p->[1]; exists $cells{$h} or die; } exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub big_hash { # print "bigint_hash---------------\n"; my %cells; # insert the points into the hash for my $p (@points) { my $h = ($p->[1] << 32) | ($p->[0] & 0xFFFFFFFF); my $x = $h & 0x00000000FFFFFFFF; my $y = ($h & 0xFFFFFFFF00000000) >> 32; if ($x >> 31) { $x -= 0xFFFFFFFF + 1 } if ($y >> 31) { $y -= 0xFFFFFFFF + 1 } # print "x='$p->[0]' y='$p->[1]' h='$h' (x='$x' y='$y')\n"; if ($x != $p->[0]) { die; } if ($y != $p->[1]) { die; } $cells{$h} = undef; # ++$cells{$h}; } scalar(keys %cells) == $npoints or die; # lookup each point in the hash for my $p (@points) { my $h = ($p->[1] << 32) | ($p->[0] & 0xFFFFFFFF); exists $cells{$h} or die; } exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub pak_hash { # print "unpack_hash---------------\n"; my %cells; # insert the points into the hash for my $p (@points) { my $h = pack "ii", $p->[0], $p->[1]; my ( $x, $y ) = unpack "ii", $h; # print "x='$p->[0]' y='$p->[1]' h='$h' (x='$x' y='$y')\n"; if ($x != $p->[0]) { die; } if ($y != $p->[1]) { die; } $cells{$h} = undef; # ++$cells{$h}; } scalar(keys %cells) == $npoints or die; # lookup each point in the hash for my $p (@points) { my $h = pack "ii", $p->[0], $p->[1]; exists $cells{$h} or die; } exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub str_look { my $cells = shift; for my $p (@points) { # my $h = $p->[0] . ':' . $p->[1]; exists $cells->{$p->[0] . ':' . $p->[1]} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub big_look { my $cells = shift; for my $p (@points) { # my $h = ($p->[1] << 32) | ($p->[0] & 0xFFFFFFFF); exists $cells->{($p->[1] << 32) | ($p->[0] & 0xFFFFFFFF)} or die +; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub pak_look { my $cells = shift; for my $p (@points) { # my $h = pack "ii", $p->[0], $p->[1]; exists $cells->{pack "ii", $p->[0], $p->[1]} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub st2_look { my $cells = shift; for my $p (@points) { exists $cells->{ join(':',@$p) } or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub st3_look { my $cells = shift; state $points_str = [ map { join ':', @{$_} } @points ]; for my $p (@{ $points_str }) { exists $cells->{$p} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub mat_hash { my %cells; $cells{$_->[0]}{$_->[1]} = undef for @points; my $ncells = 0; $ncells += keys %{$cells{$_}} for keys %cells; $ncells == $npoints or die; exists $cells{$_->[0]}{$_->[1]} or die for @points; exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub mat_look { my $cells = shift; for my $p (@points) { exists $cells->{$p->[0]} or die; exists $cells->{$p->[0]}{$p->[1]} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub lan_hash { # print "string_hash---------------\n"; my %cells; # insert the points into the hash for my $p (@points) { my $h = $p->[0] . ' ' . $p->[1]; my ( $x, $y ) = split ' ', $h; # print "x='$p->[0]' y='$p->[1]' h='$h' (x='$x' y='$y')\n"; if ($x != $p->[0]) { die; } if ($y != $p->[1]) { die; } $cells{$h} = undef; # ++$cells{$h}; } scalar(keys %cells) == $npoints or die; # lookup each points in the hash for my $p (@points) { my $h = $p->[0] . ' ' . $p->[1]; exists $cells{$h} or die; } exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub lan_look { my $cells = shift; for my $p (@points) { exists $cells->{ "@$p" } or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub kgb_hash { my %cells = map { pp($_) => undef } @points; \%cells; } sub kgb_look { my $cells = shift; state $points_str = [ map { pp($_) } @points ]; for my $p (@{ $points_str }) { exists $cells->{$p} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } my $big_ref = big_hash(); my $kgb_ref = kgb_hash(); my $lan_ref = lan_hash(); my $mat_ref = mat_hash(); my $pak_ref = pak_hash(); my $st2_ref = str_hash(); my $st3_ref = str_hash(); my $str_ref = str_hash(); timethese 200000, { Big => sub { big_look($big_ref) }, # $cells->{ ($p->[1] << 32) | ($ +p->[0] & 0xFFFFFFFF) } Kgb => sub { kgb_look($kgb_ref) }, # $cells->{ $str } # optimized Lan => sub { lan_look($lan_ref) }, # $cells->{ "@$p" } Mat => sub { mat_look($mat_ref) }, # $cells->{ $_->[0] }{ $_->[1] } Pak => sub { pak_look($pak_ref) }, # $cells->{ pack "ii", $p->[0], +$p->[1] } St2 => sub { st2_look($st2_ref) }, # $cells->{ join(':', @$p) } St3 => sub { st3_look($st3_ref) }, # $cells->{ $str } # optimized Str => sub { str_look($str_ref) }, # $cells->{ $p->[0] .':'. $p->[1 +] } };
Perhaps, future processors may reach 5 GHz. The following was captured on a CentOS 7.3 machine running at 4.0 GHz. These days, processors are equipped with Turbo Boost allowing up to 4.0 GHz for single task.
The native Perl on CentOS 7.3 is v5.16.3. I've gone ahead and compiled Perl v5.26.0 for comparison.
config_args='-Dprefix=/opt/perl-5.26.0 -sder -Dusethreads -Accflags=-m +sse4.2'
$ /usr/bin/perl test.pl Benchmark: timing 200000 iterations of Big, Kgb, Lan, Mat, Pak, St2, S +t3, Str... Big: 6 wallclock secs ( 5.39 usr + 0.0 sys = 5.39 CPU) @ 37105.75/s +(n=200000) Kgb: 2 wallclock secs ( 1.90 usr + 0.0 sys = 1.90 CPU) @ 105263.16/s +(n=200000) Lan: 4 wallclock secs ( 3.94 usr + 0.0 sys = 3.94 CPU) @ 50761.42/s +(n=200000) Mat: 6 wallclock secs ( 5.51 usr + 0.0 sys = 5.51 CPU) @ 36297.64/s +(n=200000) Pak: 5 wallclock secs ( 4.61 usr + 0.0 sys = 4.61 CPU) @ 43383.95/s +(n=200000) St2: 4 wallclock secs ( 3.59 usr + 0.0 sys = 3.59 CPU) @ 55710.31/s +(n=200000) St3: 2 wallclock secs ( 1.78 usr + 0.0 sys = 1.78 CPU) @ 112359.55/s +(n=200000) Str: 5 wallclock secs ( 4.41 usr + 0.0 sys = 4.41 CPU) @ 45351.47/s +(n=200000) $ /opt/perl-5.26.0/bin/perl test.pl Benchmark: timing 200000 iterations of Big, Kgb, Lan, Mat, Pak, St2, S +t3, Str... Big: 4 wallclock secs ( 4.15 usr + 0.0 sys = 4.15 CPU) @ 48192.77/s +(n=200000) Kgb: 2 wallclock secs ( 1.63 usr + 0.0 sys = 1.63 CPU) @ 122699.39/s +(n=200000) Lan: 3 wallclock secs ( 3.26 usr + 0.0 sys = 3.26 CPU) @ 61349.69/s +(n=200000) Mat: 5 wallclock secs ( 5.14 usr + 0.0 sys = 5.14 CPU) @ 38910.51/s +(n=200000) Pak: 4 wallclock secs ( 4.07 usr + 0.0 sys = 4.07 CPU) @ 49140.05/s +(n=200000) St2: 4 wallclock secs ( 3.25 usr + 0.0 sys = 3.25 CPU) @ 61538.46/s +(n=200000) St3: 1 wallclock secs ( 1.55 usr + 0.0 sys = 1.55 CPU) @ 129032.26/s +(n=200000) Str: 4 wallclock secs ( 3.48 usr + 0.0 sys = 3.48 CPU) @ 57471.26/s +(n=200000)
Regards, Mario
In reply to Re^2: Fastest way to lookup a point in a set (added test script)
by marioroy
in thread Fastest way to lookup a point in a set
by eyepopslikeamosquito
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |