#!/usr/bin/perl use strict; use warnings FATAL => 'all'; use Benchmark 'cmpthese'; use Bit::Vector; #use Data::Dump 'pp'; use constant { MAX_NUM => 1_000_000, SEARCH_SIZE => 400_000, FIND_SIZE => 100, }; srand 1; my @searched = map { int(rand MAX_NUM) } 1 .. SEARCH_SIZE; my @to_find = map { int(rand MAX_NUM) } 1 .. FIND_SIZE; my %jobs = ( baxy => sub { my @present; for my $number (@to_find) { my @a = grep { $_ == $number } @searched; push @present, $number if @a; } return 0+@present; }, bin_chop => sub { my @present; my @sorted = sort {$a<=>$b} @searched; for my $number (@to_find) { push @present, $number unless bin_chop(\@sorted, $number) == -1; } return 0+@present; }, hash => sub { my @present; my %hash; undef @hash{@searched}; for my $number (@to_find) { push @present, $number if exists $hash{$number}; } return 0+@present; }, citromatic => sub { my @present; my $ids = 1_000_000; # last id my $bin = 0; substr($bin, $_, 1, pack ("c",0)) for (0..$ids); # Create the index substr($bin, $_, 1, pack ("c",1)) for @searched; # Search $ids from file 2 for my $id2 (@to_find) { push @present, $id2 if (unpack "c",substr ($bin,$id2,1)) == 1; } return 0+@present; }, vec => sub { my @present; my $vec=""; vec($vec, $_, 1) = 1 for @searched; @present = grep { vec $vec, $_, 1 } @to_find; return 0+@present; }, bit_vec1 => sub { my @present; my $vector = Bit::Vector->new(1000001); map { $vector->Bit_On($_) } @searched; for (@to_find) { push @present, $_, if $vector->bit_test($_); } return 0+@present; }, array_slice => sub { my @present; my @index; @index[@searched] = (1) x @searched; for (@to_find) { push @present, $_ if $index[$_]; } return 0+@present; }, array_for => sub { my @present; my @index; $index[$_] = 1 for @searched; for (@to_find) { push @present, $_ if $index[$_]; } return 0+@present; }, bit_vec2 => sub { my $vec1 = Bit::Vector->new(1000001); $vec1->Index_List_Store(@searched); my $vec2 = Bit::Vector->new(1000001); $vec2->Index_List_Store(@to_find); my $vec3 = Bit::Vector->new(1000001); $vec3->And($vec1, $vec2); my @present = $vec3->Index_List_Read(); return 0+@present; }, ); print "$_: ", $jobs{$_}->(), "\n" for sort keys %jobs; #exit; cmpthese(-20, \%jobs); sub bin_chop { my ($search, $number) = @_; my ($left, $right) = (0, $#$search); while ($right >= $left) { my $mid = int(($left+$right) / 2); if ($number < $search->[$mid]) { $right = $mid-1 } elsif ($number > $search->[$mid]) { $left = $mid+1 } else { return $mid } } return -1; }