in reply to Re^5: fast+generic interface for range-based indexed retrieval
in thread fast+generic interface for range-based indexed retrieval
Per a private request, here is the proof-of-concept code used to test the idea above, with the addition of some comments:
#! perl -slw use strict; use Time::HiRes qw[ time ]; use Math::Random::MT qw[ rand ]; use constant MAXVALUE => 10_000; our $N ||= 100e6; $N /= MAXVALUE; my @lookup; for( 0 .. MAXVALUE -1 ) { $lookup[ $_ ] .= pack 'V*', sort{ $a<=>$b } map rand( 10e6 ), 1 .. + $N; } sub intAt{ my( $bucket, $pos ) = @_; return 0 unless defined $lookup[ $bucket ] and length( $lookup[ $b +ucket ] ); unpack 'V', substr $lookup[ $bucket ], $pos*4, 4; } sub iterateFromTo { my( $start, $end ) = @_; ## Extract the bucket and scaled integer for the start value my( $startBkt, $startVal ) = ( int( $start ), ( $start - int( $start ) ) * 10e6 ); ## Skip forward to find the first bucket that ## contains a value greater |= to the start value (if ness.) ++$startBkt if length( $lookup[ $startBkt ] ) and intAt( $startBkt, int( length( $lookup[ $startBkt ] ) /4 + ) -1 ) < $startVal; ## And bottle out if we moved passed the end value before finding +one return if ( intAt( $startBkt, 0 ) / 10e6 + $startBkt ) > $end; ## We've now located the first bucket so do a binary search to loc +ated ## The lowest value greater than or equal to the start value my( $lo, $hi ) = ( 0, int( length( $lookup[ $startBkt ] ) / 4 ) ); my $p; while( $lo < $hi ) { $p = $lo + int( ( $hi - $lo ) / 2 ); my $int = intAt( $startBkt, $p ); if( $int < $startVal ) { $lo = $p +1; } elsif( $int > $startVal ) { $hi = $p - 1; } else { last; } } ## and return an iterator that will emit that first value ## and all subsequent values until we reach the end value. return sub { my $int; do { ## move to the next bucket when $p moves ## past the end of the current one ++$startBkt, $p = 0 if $p > ( length( $lookup[ $startBkt +] ) / 4 ); ## Return undef if we've moved past the last bucket return if $startBkt >= MAXVALUE; ## Grab the integer at $p (and post increment) $int = intAt( $startBkt, $p++ ) } until $int; ## until we find the next value ## Reconsitute the float from the bucket index and scaled inte +ger my $float = $startBkt + ( $int / 10e6 ); ## undef if we've moved past the end return if $float > $end; ## else return the current float. return $float; }; } while( 1 ) { printf "min max: "; chomp( my $input = <STDIN> ); last unless length $input; my( $min, $max ) = split ' ', $input; my $iter = iterateFromTo( $min, $max ); print "nothing in that range", next unless $iter; my $start = time; my $n = 0; ++$n while defined( $_ = $iter->() ); my $stop = time; printf "$n values [$min < N < $max] located in %f seconds (%f)/sec +)\n", $stop - $start, ( $stop - $start ) / $n; }
|
|---|