Update: Added results from a 4.0 GHz machine running CentOS 7.3.

The script may be useful in the future for comparing with later versions of Perl.

timethese 200000, { Big => sub { big_look($big_ref) }, # $cells->{ ($p->[1] << 32) | ($ +p->[0] & 0xFFFFFFFF) } Kgb => sub { kgb_look($kgb_ref) }, # $cells->{ $str } # optimized Lan => sub { lan_look($lan_ref) }, # $cells->{ "@$p" } Mat => sub { mat_look($mat_ref) }, # $cells->{ $_->[0] }{ $_->[1] } Pak => sub { pak_look($pak_ref) }, # $cells->{ pack "ii", $p->[0], +$p->[1] } St2 => sub { st2_look($st2_ref) }, # $cells->{ join(':', @$p) } St3 => sub { st3_look($st3_ref) }, # $cells->{ $str } # optimized Str => sub { str_look($str_ref) }, # $cells->{ $p->[0] .':'. $p->[1 +] } };

So, here is the test.pl script. Basically, the OP script with extra solutions by various monks in this thread.

use strict; # use warnings; use feature qw(state); use Benchmark qw(timethese); use Data::Dump qw(pp); my @points = ( [ 0, 0 ], [ -1, -2 ], [ 1, 2 ], [ -1, 2 ], [ 1, -2 ], [ 0, 1 ], [ 1, 0 ], [ -1, 0 ], [ 0, -1 ], [ 2147483647, 2147483647 ], [ 2147483647, -2147483647 ], [ -2147483647, 2147483647 ], [ -2147483647, -2147483647 ], [ -1, 2147483647 ], [ 2147483647, 1 ], [ -2, 2147483646 ], [ 1, -2147483647 ], [ 1234561, 1234562 ], [ 1234563, -1234564 ], [ -1234565, 1234566 ], [ -1234567, -1234568 ], [ 10, 11 ], [ 11, 12 ], [ 12, 13 ], [ 13, 14 ], [ 14, 15 ], [ 15, 16 ], [ 16, 17 ], [ 17, 18 ], [ 18, 19 ], [ 19, 20 ], [ 1001, 1002 ], [ 1003, 1004 ], [ 1005, 1006 ], [ 1007, 1008 ], [ 1009, 1010 ], [ 1011, 1012 ], [ 1013, 1014 ], [ 1015, 1016 ], [ 1017, 1018 ], [ 1019, 1020 ], [ -1001, -1002 ], [ -1003, -1004 ], [ -1005, -1006 ], [ -1007, -1008 ], [ -1009, -1010 ], [ -1011, -1012 ], [ -1013, -1014 ], [ -1015, -1016 ], [ -1017, -1018 ], [ -1019, -1020 ], [ 99910, 99911 ], [ 99911, 99912 ], [ 99912, 99913 ], [ 99913, 99914 ], [ 99914, 99915 ], [ 99915, 99916 ], [ 99916, 99917 ], [ 99917, 99918 ], [ 99918, 99919 ], [ 99919, 99920 ], [ -99910, -99911 ], [ -99911, -99912 ], [ -99912, -99913 ], [ -99913, -99914 ], [ -99914, -99915 ], [ -99915, -99916 ], [ -99916, -99917 ], [ -99917, -99918 ], [ -99918, -99919 ], [ -99919, -99920 ], [ 1099910, 1099911 ], [ 1099911, 1099912 ], [ 1099912, 1099913 ], [ 1099913, 1099914 ], [ 1099914, 1099915 ], [ 1099915, 1099916 ], [ 1099916, 1099917 ], [ 1099917, 1099918 ], [ 1099918, 1099919 ], [ 1099919, 1099920 ], [ -1099910, -1099911 ], [ -1099911, -1099912 ], [ -1099912, -1099913 ], [ -1099913, -1099914 ], [ -1099914, -1099915 ], [ -1099915, -1099916 ], [ -1099916, -1099917 ], [ -1099917, -1099918 ], [ -1099918, -1099919 ], [ -1099919, -1099920 ], [ 91099910, 91099911 ], [ 91099911, 91099912 ], [ 91099912, 91099913 ], [ 91099913, 91099914 ], [ 91099914, 91099915 ], [ 91099915, 91099916 ], [ 91099916, 91099917 ], [ 91099917, 91099918 ], [ 91099918, 91099919 ], [ 91099919, 91099920 ], [ -91099910, -91099911 ], [ -91099911, -91099912 ], [ -91099912, -91099913 ], [ -91099913, -91099914 ], [ -91099914, -91099915 ], [ -91099915, -91099916 ], [ -91099916, -91099917 ], [ -91099917, -91099918 ], [ -91099918, -91099919 ], [ -91099919, -91099920 ], [ 491099910, 491099911 ], [ 491099911, 491099912 ], [ 491099912, 491099913 ], [ 491099913, 491099914 ], [ 491099914, 491099915 ], [ 491099915, 491099916 ], [ 491099916, 491099917 ], [ 491099917, 491099918 ], [ 491099918, 491099919 ], [ 491099919, 491099920 ], [ -491099910, -491099911 ], [ -491099911, -491099912 ], [ -491099912, -491099913 ], [ -491099913, -491099914 ], [ -491099914, -491099915 ], [ -491099915, -491099916 ], [ -491099916, -491099917 ], [ -491099917, -491099918 ], [ -491099918, -491099919 ], [ -491099919, -491099920 ], ); my $npoints = @points; sub str_hash { # print "string_hash---------------\n"; my %cells; # insert the points into the hash for my $p (@points) { my $h = $p->[0] . ':' . $p->[1]; my ( $x, $y ) = split ':', $h; # print "x='$p->[0]' y='$p->[1]' h='$h' (x='$x' y='$y')\n"; if ($x != $p->[0]) { die; } if ($y != $p->[1]) { die; } $cells{$h} = undef; # ++$cells{$h}; } scalar(keys %cells) == $npoints or die; # lookup each points in the hash for my $p (@points) { my $h = $p->[0] . ':' . $p->[1]; exists $cells{$h} or die; } exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub big_hash { # print "bigint_hash---------------\n"; my %cells; # insert the points into the hash for my $p (@points) { my $h = ($p->[1] << 32) | ($p->[0] & 0xFFFFFFFF); my $x = $h & 0x00000000FFFFFFFF; my $y = ($h & 0xFFFFFFFF00000000) >> 32; if ($x >> 31) { $x -= 0xFFFFFFFF + 1 } if ($y >> 31) { $y -= 0xFFFFFFFF + 1 } # print "x='$p->[0]' y='$p->[1]' h='$h' (x='$x' y='$y')\n"; if ($x != $p->[0]) { die; } if ($y != $p->[1]) { die; } $cells{$h} = undef; # ++$cells{$h}; } scalar(keys %cells) == $npoints or die; # lookup each point in the hash for my $p (@points) { my $h = ($p->[1] << 32) | ($p->[0] & 0xFFFFFFFF); exists $cells{$h} or die; } exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub pak_hash { # print "unpack_hash---------------\n"; my %cells; # insert the points into the hash for my $p (@points) { my $h = pack "ii", $p->[0], $p->[1]; my ( $x, $y ) = unpack "ii", $h; # print "x='$p->[0]' y='$p->[1]' h='$h' (x='$x' y='$y')\n"; if ($x != $p->[0]) { die; } if ($y != $p->[1]) { die; } $cells{$h} = undef; # ++$cells{$h}; } scalar(keys %cells) == $npoints or die; # lookup each point in the hash for my $p (@points) { my $h = pack "ii", $p->[0], $p->[1]; exists $cells{$h} or die; } exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub str_look { my $cells = shift; for my $p (@points) { # my $h = $p->[0] . ':' . $p->[1]; exists $cells->{$p->[0] . ':' . $p->[1]} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub big_look { my $cells = shift; for my $p (@points) { # my $h = ($p->[1] << 32) | ($p->[0] & 0xFFFFFFFF); exists $cells->{($p->[1] << 32) | ($p->[0] & 0xFFFFFFFF)} or die +; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub pak_look { my $cells = shift; for my $p (@points) { # my $h = pack "ii", $p->[0], $p->[1]; exists $cells->{pack "ii", $p->[0], $p->[1]} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub st2_look { my $cells = shift; for my $p (@points) { exists $cells->{ join(':',@$p) } or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub st3_look { my $cells = shift; state $points_str = [ map { join ':', @{$_} } @points ]; for my $p (@{ $points_str }) { exists $cells->{$p} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub mat_hash { my %cells; $cells{$_->[0]}{$_->[1]} = undef for @points; my $ncells = 0; $ncells += keys %{$cells{$_}} for keys %cells; $ncells == $npoints or die; exists $cells{$_->[0]}{$_->[1]} or die for @points; exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub mat_look { my $cells = shift; for my $p (@points) { exists $cells->{$p->[0]} or die; exists $cells->{$p->[0]}{$p->[1]} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub lan_hash { # print "string_hash---------------\n"; my %cells; # insert the points into the hash for my $p (@points) { my $h = $p->[0] . ' ' . $p->[1]; my ( $x, $y ) = split ' ', $h; # print "x='$p->[0]' y='$p->[1]' h='$h' (x='$x' y='$y')\n"; if ($x != $p->[0]) { die; } if ($y != $p->[1]) { die; } $cells{$h} = undef; # ++$cells{$h}; } scalar(keys %cells) == $npoints or die; # lookup each points in the hash for my $p (@points) { my $h = $p->[0] . ' ' . $p->[1]; exists $cells{$h} or die; } exists $cells{'notfound'} and die; exists $cells{'notfound2'} and die; exists $cells{'notfound3'} and die; return \%cells; } sub lan_look { my $cells = shift; for my $p (@points) { exists $cells->{ "@$p" } or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } sub kgb_hash { my %cells = map { pp($_) => undef } @points; \%cells; } sub kgb_look { my $cells = shift; state $points_str = [ map { pp($_) } @points ]; for my $p (@{ $points_str }) { exists $cells->{$p} or die; } exists $cells->{'notfound'} and die; exists $cells->{'notfound2'} and die; exists $cells->{'notfound3'} and die; } my $big_ref = big_hash(); my $kgb_ref = kgb_hash(); my $lan_ref = lan_hash(); my $mat_ref = mat_hash(); my $pak_ref = pak_hash(); my $st2_ref = str_hash(); my $st3_ref = str_hash(); my $str_ref = str_hash(); timethese 200000, { Big => sub { big_look($big_ref) }, # $cells->{ ($p->[1] << 32) | ($ +p->[0] & 0xFFFFFFFF) } Kgb => sub { kgb_look($kgb_ref) }, # $cells->{ $str } # optimized Lan => sub { lan_look($lan_ref) }, # $cells->{ "@$p" } Mat => sub { mat_look($mat_ref) }, # $cells->{ $_->[0] }{ $_->[1] } Pak => sub { pak_look($pak_ref) }, # $cells->{ pack "ii", $p->[0], +$p->[1] } St2 => sub { st2_look($st2_ref) }, # $cells->{ join(':', @$p) } St3 => sub { st3_look($st3_ref) }, # $cells->{ $str } # optimized Str => sub { str_look($str_ref) }, # $cells->{ $p->[0] .':'. $p->[1 +] } };

Perhaps, future processors may reach 5 GHz. The following was captured on a CentOS 7.3 machine running at 4.0 GHz. These days, processors are equipped with Turbo Boost allowing up to 4.0 GHz for single task.

The native Perl on CentOS 7.3 is v5.16.3. I've gone ahead and compiled Perl v5.26.0 for comparison.

config_args='-Dprefix=/opt/perl-5.26.0 -sder -Dusethreads -Accflags=-m +sse4.2'
$ /usr/bin/perl test.pl Benchmark: timing 200000 iterations of Big, Kgb, Lan, Mat, Pak, St2, S +t3, Str... Big: 6 wallclock secs ( 5.39 usr + 0.0 sys = 5.39 CPU) @ 37105.75/s +(n=200000) Kgb: 2 wallclock secs ( 1.90 usr + 0.0 sys = 1.90 CPU) @ 105263.16/s +(n=200000) Lan: 4 wallclock secs ( 3.94 usr + 0.0 sys = 3.94 CPU) @ 50761.42/s +(n=200000) Mat: 6 wallclock secs ( 5.51 usr + 0.0 sys = 5.51 CPU) @ 36297.64/s +(n=200000) Pak: 5 wallclock secs ( 4.61 usr + 0.0 sys = 4.61 CPU) @ 43383.95/s +(n=200000) St2: 4 wallclock secs ( 3.59 usr + 0.0 sys = 3.59 CPU) @ 55710.31/s +(n=200000) St3: 2 wallclock secs ( 1.78 usr + 0.0 sys = 1.78 CPU) @ 112359.55/s +(n=200000) Str: 5 wallclock secs ( 4.41 usr + 0.0 sys = 4.41 CPU) @ 45351.47/s +(n=200000) $ /opt/perl-5.26.0/bin/perl test.pl Benchmark: timing 200000 iterations of Big, Kgb, Lan, Mat, Pak, St2, S +t3, Str... Big: 4 wallclock secs ( 4.15 usr + 0.0 sys = 4.15 CPU) @ 48192.77/s +(n=200000) Kgb: 2 wallclock secs ( 1.63 usr + 0.0 sys = 1.63 CPU) @ 122699.39/s +(n=200000) Lan: 3 wallclock secs ( 3.26 usr + 0.0 sys = 3.26 CPU) @ 61349.69/s +(n=200000) Mat: 5 wallclock secs ( 5.14 usr + 0.0 sys = 5.14 CPU) @ 38910.51/s +(n=200000) Pak: 4 wallclock secs ( 4.07 usr + 0.0 sys = 4.07 CPU) @ 49140.05/s +(n=200000) St2: 4 wallclock secs ( 3.25 usr + 0.0 sys = 3.25 CPU) @ 61538.46/s +(n=200000) St3: 1 wallclock secs ( 1.55 usr + 0.0 sys = 1.55 CPU) @ 129032.26/s +(n=200000) Str: 4 wallclock secs ( 3.48 usr + 0.0 sys = 3.48 CPU) @ 57471.26/s +(n=200000)

Regards, Mario


In reply to Re^2: Fastest way to lookup a point in a set (added test script) by marioroy
in thread Fastest way to lookup a point in a set by eyepopslikeamosquito

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.