See Tuning Algorithm::Diff
The problem with that benchmark is it only tests the call to the functions; not the subsequent accessing of the returned information.
Accessing the bits from Pure Perl will kill the performance stony dead.
Update: Added another few tests, including a bitvector version to the benchmark:
#! perl -slw use strict; use Config; use Inline C => Config => BUILD_NOISY => 1; use Inline C => <<'END_C', NAME => 'diffBench', CLEAN_AFTER_BUILD =>0 +; SV *diffAoA( U32 n ) { AV *av = newAV(); U32 i; for( i = 0; i < n/2; ++i ) { AV *av2 = newAV(); av_push( av2, newSViv( i*2 ) ); av_push( av2, newSViv( i*2+1 ) ); av_push( av, (SV*)av2 ); } return newRV_noinc( (SV*)av ); } SV *diffPacked( U32 n ) { U32 *diffs = malloc( sizeof( U32 ) * n ); SV *packed; U32 i; for( i = 0; i < n; ++i ) { diffs[ i ] = i; } packed = newSVpv( (char *)diffs, sizeof( U32 ) * n ); free( diffs ); return packed; } void diffPackedList( U32 n ) { inline_stack_vars; U32 i; inline_stack_reset; for( i = 0; i < n/2; ++i ) { U32 a[ 2 ] = { i*2, i*2+1 }; inline_stack_push( sv_2mortal( newSVpv( (char*)a, 8 ) ) ); } inline_stack_done; inline_stack_return( n/2 ); return; } SV *diff2dString( U32 n ) { SV *diffs = newSVpv( "", 0 ); U32 i; for( i = 0; i < n/2; ++i ) { sv_catpvf( diffs, "%u:%u ", i*2, i*2+1 ); } return diffs; } void diff1dList( U32 n ) { inline_stack_vars; U32 i; inline_stack_reset; for( i = 0; i < n/2; ++i ) { inline_stack_push( sv_2mortal( newSVpvf( "%u:%u", i*2, i*2+1 ) + ) ); } inline_stack_done; inline_stack_return( n/2 ); return; } void diffList( U32 n ) { inline_stack_vars; U32 i; inline_stack_reset; for( i = 0; i < n; ++i ) { inline_stack_push( sv_2mortal( newSViv( i ) ) ); } inline_stack_done; inline_stack_return( n ); return; } void diffLoA( U32 n ) { inline_stack_vars; U32 i; inline_stack_reset; for( i = 0; i < n/2; ++i ) { AV *av2 = newAV(); av_push( av2, newSViv( i*2 ) ); av_push( av2, newSViv( i*2+1 ) ); inline_stack_push( newRV_noinc( (SV*)av2 ) ); } inline_stack_done; inline_stack_return( n/2 ); return; } SV * diffBits( U32 n ) { unsigned __int64 bits = 0; U32 i; for( i = 0; i < n/2; ++i ) { bits |= ( 1ull << ( i*2) ); bits |= ~( 1ull << ( i*2+1 ) ); } return newSViv( bits ); } END_C use Data::Dump qw[ pp ]; use Benchmark qw[ cmpthese ]; our $N //= 10; cmpthese -1, { AoA => q[ my $AoA = diffAoA( $N ); # pp $AoA; for my $pair ( @{ $AoA } ) { my( $x, $y ) = @{ $pair }; } ], packed => q[ my $packed = diffPacked( $N ); # pp $packed; while( length( $packed ) ) { my( $x, $y ) = unpack 'VV', substr( $packed, 0, 8, '' ); } ], packedList => q[ for my $pair ( diffPackedList( $N ) ) { my( $x, $y ) = unpack 'VV', $pair; } ], twoDel => q[ my $string2d = diff2dString( $N ); # pp $string2d; for my $pair ( split ' ', $string2d ) { my( $x, $y ) = split ':', $pair; } ], oneDelList => q[ for my $pair ( diff1dList( $N ) ) { my( $x, $y ) = split ':', $pair; } ], list => q[ my @array = diffList( $N ); # pp \@array; while( @array ) { my( $x, $y ) = ( shift @array, shift @array ); } ], LoA => q[ my @array = diffLoA( $N ); # pp \@array; for my $pair ( @array ) { my( $x, $y ) = @{ $pair }; } ], bits => q[ my $bits = diffBits( $N ); for( my $i =0; $i < 64; ++$i ) { my( $x, $y ) = ( ( $bits & ( 1 << ( $i*2 ) ) ) >> ( $i *2 +), ( $bits & ( 1 << ( $i*2+1) ) ) >> ( $i*2+1 ) ); } ], }; __END__ C:\test>diffBench.pl -N=10 Rate bits twoDel oneDelList AoA LoA packedList pa +cked list bits 12197/s -- -81% -84% -89% -90% -90% +-91% -93% twoDel 65723/s 439% -- -13% -42% -46% -47% +-51% -61% oneDelList 75443/s 519% 15% -- -33% -37% -39% +-44% -55% AoA 113180/s 828% 72% 50% -- -6% -9% +-16% -32% LoA 120618/s 889% 84% 60% 7% -- -3% +-11% -28% packedList 123986/s 917% 89% 64% 10% 3% -- + -8% -26% packed 135458/s 1011% 106% 80% 20% 12% 9% + -- -19% list 167440/s 1273% 155% 122% 48% 39% 35% + 24% -- C:\test>diffBench.pl -N=64 Rate twoDel bits oneDelList packedList AoA LoA pa +cked list twoDel 11273/s -- -13% -13% -53% -54% -55% +-61% -65% bits 12979/s 15% -- -0% -46% -47% -48% +-55% -60% oneDelList 13013/s 15% 0% -- -46% -47% -48% +-55% -60% packedList 24162/s 114% 86% 86% -- -1% -3% +-17% -25% AoA 24506/s 117% 89% 88% 1% -- -2% +-16% -24% LoA 25004/s 122% 93% 92% 3% 2% -- +-14% -23% packed 29133/s 158% 124% 124% 21% 19% 17% + -- -10% list 32367/s 187% 149% 149% 34% 32% 29% + 11% --
In reply to Re^7: Faster creation of Arrays in XS?
by BrowserUk
in thread Faster creation of Arrays in XS?
by wollmers
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |