I know I'm late in this thread but wouldn't bother if
I won't win ;-)
I wrote the thing in assembler (for IA32, works
on gnu/gcc and on win32/msvc) ==> by_asm
Results:
[Athlon64/3200+ Win32/588/MsVC6]
split1 1.37/s
ikegami1 30.6/s
substr1 31.4/s
avar2 572/s
mrm_6 603/s
corion 630/s
avar2_pos 745/s
ikegami2 814/s
ikegami3 988/s
bart 995/s
ikegami4 1378/s
mrm_3 1596/s
mrm_1 1746/s
moritz 1759/s
mrm_4 2007/s
mrm_5 2695/s
by_asm 7736/s
[AthlonXP/2500+ Linux/588/gcc4]
split1 3.25/s
ikegami1 19.3/s
substr1 22.2/s
avar2 437/s
mrm_6 468/s
corion 483/s
ikegami2 500/s
ikegami3 567/s
bart 577/s
avar2_pos 605/s
ikegami4 1102/s
mrm_3 1129/s
mrm_1 1527/s
moritz 1562/s
mrm_4 1683/s
mrm_5 1818/s
by_asm 5618/s
[Core2q Q6600@3GHz Linux/588/gcc4]
split1 7.62/s
substr1 48.3/s
ikegami1 70.1/s
mrm_6 1129/s
avar2 1481/s
corion 1627/s
avar2_pos 2535/s
mrm_3 2575/s
ikegami2 3158/s
moritz 3188/s
mrm_1 3338/s
bart 3511/s
ikegami3 3519/s
ikegami4 3676/s
mrm_5 3791/s
mrm_4 4278/s
by_asm 4524/s
Note how the core2 starts to be
bound by mem bandwith only!
here wo go (by_asm is at the end) ==>
------------------------------------------------
use 5.8.0;
use strict;
use warnings FATAL => 'all';
use Benchmark qw( cmpthese );
use Test::More;
# ====================
my $s1 = do_rand(0, 100_000);
my $s2 = do_rand(1, 100_000);
my $subs = {
split1 => sub { split1 ( my $s3 = $s1, $s2 ); $s3 },
substr1 => sub { substr1 ( my $s3 = $s1, $s2 ); $s3 },
moritz => sub { moritz ( my $s3 = $s1, $s2 ); $s3 },
corion => sub { corion ( my $s3 = $s1, $s2 ); $s3 },
ikegami1 => sub { ikegami1 ( my $s3 = $s1, $s2 ); $s3 },
ikegami2 => sub { ikegami2 ( my $s3 = $s1, $s2 ); $s3 },
bart => sub { bart ( my $s3 = $s1, $s2 ); $s3 },
ikegami3 => sub { ikegami3 ( my $s3 = $s1, $s2 ); $s3 },
ikegami4 => sub { ikegami4 ( my $s3 = $s1, $s2 ); $s3 },
avar2 => sub { avar2 ( my $s3 = $s1, $s2 ); $s3 },
avar2_pos => sub { avar2_pos ( my $s3 = $s1, $s2 ); $s3 },
mrm_1 => sub { mrm_1 ( my $s3 = $s1, $s2 ); $s3 },
mrm_3 => sub { mrm_3 ( my $s3 = $s1, $s2 ); $s3 },
mrm_4 => sub { mrm_4 ( my $s3 = $s1, $s2 ); $s3 },
mrm_5 => sub { mrm_5 ( my $s3 = $s1, $s2 ); $s3 },
mrm_6 => sub { mrm_6 ( my $s3 = $s1, $s2 ); $s3 },
by_asm => sub { by_asm ( my $s3 = $s1, $s2 ); $s3 },
};
{
plan 'tests' => scalar keys %{$subs};
my $expected;
foreach my $subname ( keys %{$subs} ) {
my $sub = $subs->{$subname};
if ( defined $expected ) {
is( $sub->(), $expected, "$subname gets same value" );
}
else {
$expected = $sub->();
ok( defined $expected, "$subname gets some value" );
}
}
print("done.\n");
}
cmpthese( -3, $subs );
# ====================
sub split1 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
my @s1 = split //, $s1;
my @s2 = split //, $s2;
foreach my $idx ( 0 .. $#s1 ) {
if ( $s1[$idx] eq chr(0) ) {
$s1[$idx] = $s2[$idx];
}
}
$s1 = join '', @s1;
}
sub substr1 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
for my $idx ( 0 .. length($s1) ) {
if ( substr($s1, $idx,1) eq chr(0) ) {
substr($s1, $idx, 1) = substr($s2, $idx, 1);
}
}
}
sub moritz {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
my $pos = 0;
while ( -1 < ( $pos = index $s1, "\000", $pos ) ) {
substr( $s1, $pos, 1 ) = substr( $s2, $pos, 1 );
}
}
sub ikegami1 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
(my $mask = $s1) =~ s/[^\x00]/\xFF/g;
$s1 = ($s1 & $mask) | ($s2 & ~$mask);
}
sub ikegami2 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
(my $mask = $s1) =~ tr/\x00/\xFF/c;
$s1 = ($s1 & $mask) | ($s2 & ~$mask);
}
sub bart {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
(my $mask = $s1) =~ tr/\x00/\xFF/c;
$s1 = (($s1 ^ $s2) & $mask) ^ $s2;
}
sub ikegami3 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
use bytes;
(my $mask = $s1) =~ tr/\x00/\xFF/c;
$s1 = (($s1 ^ $s2) & $mask) ^ $s2;
}
sub ikegami4 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
use bytes;
(my $mask = $s1) =~ tr/\x00/\xFF/c;
$s1 ^= $s2;
$s1 &= $mask;
$s1 ^= $s2;
}
sub corion {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
my $ofs = 0;
$s1 = join "", map { $ofs += length; $_ => substr $s2, $ofs++, 1 }
+split /\0/, $s1, -1;
}
sub avar2 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
use bytes;
$s1 =~ s/(\0)/substr $s2, $+[0]-1, 1/eg;
}
sub avar2_pos {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
use bytes;
$s1 =~ s/\0/substr $s2, pos($s1), 1/eg;
}
sub mrm_1 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
# from [moritz]'s work
use bytes;
my $pos = 0;
while ( -1 < ( $pos = index $s1, "\x00", $pos ) ) {
substr( $s1, $pos, 1 ) = substr( $s2, $pos, 1 );
}
}
sub mrm_3 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
# from moritz's, builds a separate loop of zeros
use bytes;
my @zeros = ();
my $pos = 0;
while ( -1 < ( $pos = index $s1, "\x00", $pos+1 ) ) { # ikegam
+ +i: Added necessary +1
push @zeros, $pos;
}
for ( @zeros ) {
substr( $s1, $_, 1 ) = substr( $s2, $_, 1 );
}
}
sub mrm_4 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
# from [bart]'s vec()
{
use bytes;
my $pos = 0;
while ( -1 < ( $pos = index $s1, "\x00", $pos ) ) {
vec( $s1, $pos, 8 ) ||= vec( $s2, $pos, 8 );
}
}
}
sub mrm_5 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
# from moritz's, seeing if four-arg substr() is faster or slower t
+han lvalue substr()
use bytes;
my $pos = 0;
while ( -1 < ( $pos = index $s1, "\x00", $pos ) ) {
substr( $s1, $pos, 1, substr( $s2, $pos, 1 ) );
}
}
sub mrm_6 {
our $s1; local *s1 = \$_[0];
our $s2; local *s2 = \$_[1];
# from mrn_5, testing bytes::misc explicitly instead of importing
use bytes ();
my $pos = 0;
while ( -1 < ( $pos = bytes::index( $s1, "\x00", $pos ) ) ) {
bytes::substr( $s1, $pos, 1, bytes::substr( $s2, $pos, 1 ) );
}
}
use Inline C => qq{
// ==> inline
void by_asm(SV* has_zeros, SV* no_zeros)
{
STRLEN spacer1, srclen, dstlen;
char *src=SvPV(no_zeros, srclen), *spacer2=0, *dst=SvPV(has_zeros, ds
+tlen);
if( srclen < dstlen ) croak("block length mismatch!");
#ifdef _MSC_VER
_asm mov edi, dst
_asm mov esi, src
_asm mov ecx, dstlen
_asm xor eax, eax
_asm cld
start:
_asm repne scasb
_asm jne done
_asm mov edx, dstlen
_asm sub edx, ecx
_asm mov ah, byte ptr [-1+esi+edx]
_asm mov byte ptr [-1+edi], ah
_asm jmp start
done: ;
#else
__asm__ __volatile__(
"xorl %%eax, %%eax \\n\\t"
"cld \\n\\t"
"start: \\n\\t"
"repne \\n\\t"
"scasb \\n\\t"
"jne done \\n\\t"
"movl %[l], %%edx \\n\\t"
"subl %%ecx, %%edx \\n\\t"
"movb -1(%%esi,%%edx), %%ah \\n\\t"
"movb %%ah, -1(%%edi) \\n\\t"
"jmp start \\n\\t"
"done: \\n\\t"
: /* no output reg */
: "S"(src),"D"(dst),"c"(dstlen),[l]"m"(dstlen)
);
#endif
}
// <== inline
};
# ====================
sub do_rand {
my $min = shift;
my $len = shift;
{
my $n = "";
for (1 .. $len)
{
$n .= chr( rand(255-$min)+$min );
}
redo if $min == 0 && $n !~ /\X00/;
return $n;
}
}
------------------------------------------------
Too late for the meter of beer I guess ...
Mirco
|