in reply to Seeking a fast sum_of_ranks_between function
Update: Greatly simplified the else case.
Try this:
#! perl -slw use strict; use Data::Dump qw[ pp ]; use List::Util qw[ sum ]; sub rankSums { my( $aRef, $bRef ) = @_; my( $aSum, $bSum ) = (0) x 2; my( $a, $b ) = (0) x 2; my $rank = 1; while( $a < @$aRef && $b < @$bRef ) { if( $aRef->[ $a ] < $bRef->[ $b ] ) { $aSum += $rank++; ++$a; } elsif( $aRef->[ $a ] > $bRef->[ $b ] ) { $bSum += $rank++; ++$b } else { $aSum += ( $rank * 2 + 1 ) / 2; $bSum += ( $rank * 2 + 1 ) / 2; $rank += 2; ++$a, ++$b; } } $aSum += $rank++ while $a++ < @{ $aRef }; $bSum += $rank++ while $b++ < @{ $bRef }; return $aSum, $bSum; } my @a = split ' ', <DATA>; my @b = split ' ', <DATA>; my( $aSum, $bSum ) = rankSums( \@a, \@b ); print "asum:$aSum bSum:$bSum"; #__DATA__ #1 3 5 7 9 #2 4 6 8 10 #__DATA__ #1 2 3 4 5 6 7 8 9 10 #3.14 4.25 5.36 6.47 7.58 __DATA__ 1 2 3 4 5 3 3.14 4 4
It makes a single pass over the data, and does no copying or sorting or memory allocation, so it should be considerably faster than the current method, but full testing & benchmarking it is your task :)
A further simplified version of the function that runs a tad quicker and has been tested with 1000 runs of 1e6 x 1e6 random integers:
sub rankSums3 { my( $aRef, $bRef ) = @_; my( $aSum, $bSum ) = (0) x 2; my( $a, $b ) = (0) x 2; my $rank = 1; while( $a < @$aRef && $b < @$bRef ) { $aSum += $rank++, ++$a, next if $aRef->[ $a ] < $bRef->[ $b ]; $bSum += $rank++, ++$b, next if $aRef->[ $a ] > $bRef->[ $b ]; $aSum += ( $rank * 2 + 1 ) / 2; $bSum += ( $rank * 2 + 1 ) / 2; $rank += 2; ++$a, ++$b; } $aSum += $rank++ while $a++ < @{ $aRef }; $bSum += $rank++ while $b++ < @{ $bRef }; return $aSum, $bSum; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Seeking a fast sum_of_ranks_between function (1e6 x 1e6 in 1/5th sec.)
by msh210 (Monk) on Oct 14, 2015 at 14:20 UTC | |
by BrowserUk (Patriarch) on Oct 14, 2015 at 14:52 UTC | |
by msh210 (Monk) on Oct 14, 2015 at 15:33 UTC | |
by toolic (Bishop) on Oct 14, 2015 at 15:45 UTC | |
by msh210 (Monk) on Oct 14, 2015 at 15:56 UTC | |
|