($a & $b_mask) eq ($b & $a_mask) #### # a and mask[b] eq b and mask[a] ($_[0][1] & $_[1][2]) eq ($_[1][1] & $_[0][2]); #### Benchmark: timing 25000 iterations of cclever3, clever2, clever3, csimple3, simple3... cclever3: 1 wallclock secs ( 0.95 usr + 0.00 sys = 0.95 CPU) @ 26315.79/s (n=25000) clever2: 10 wallclock secs ( 8.73 usr + 0.02 sys = 8.75 CPU) @ 2857.14/s (n=25000) clever3: 1 wallclock secs ( 1.15 usr + 0.00 sys = 1.15 CPU) @ 21739.13/s (n=25000) csimple3: 1 wallclock secs ( 1.04 usr + -0.01 sys = 1.03 CPU) @ 24271.84/s (n=25000) simple3: 5 wallclock secs ( 3.97 usr + 0.01 sys = 3.98 CPU) @ 6281.41/s (n=25000) #### #!/usr/bin/perl use Benchmark; # Remove the following paragraph and the benchmarks for csimple3 and # cclever3 if you don't have Inline::C. use Inline C => 'DATA', VERSION => 0.0, NAME => 'SimpleTest', OPTIMIZE => '-O3'; Inline->init; sub simple3 { my($a,$b)=@_; my(@seen); return undef if (length($a) != length($b)); foreach my $i (0..length($a)) { my($ac,$bc)=(substr($a,$i,1),substr($b,$i,1)); if ($ac eq $bc) { ; # Do nothing } elsif ($ac eq '_') { return undef if (++$seen[$bc] > 1); } elsif ($bc eq '_') { return undef if (++$seen[$ac] > 1); } else { return undef } } 1; } # Represent each string as two strings and two masks. sub clever2 { # Data transformations; could be done beforehand in linear time. # Underscores become \0 for easier masking and comparison (my $a = $_[0]) =~ tr/_/\0/; (my $b = $_[1]) =~ tr/_/\0/; # Compute a "transposed" string showing the position of each character, # leaving \0 if it doesn't appear. my($a3,$b3)=("\0"x10,"\0"x10); foreach my $i (0..(length($a)-1)) { my $c = substr($a,$i,1); next if $c eq "\0"; substr($a3,$c,1)=$i; } foreach my $i (0..(length($b)-1)) { my $c = substr($b,$i,1); next if $c eq "\0"; substr($b3,$c,1)=$i; } # Concatenate together my $a_new = $a . $a3; my $b_new = $b . $b3; # Compute the mask (my $a_mask = $a_new) =~ tr/\0/\xff/c; (my $b_mask = $b_new) =~ tr/\0/\xff/c; # Comparisons; must be done for each comparison. return (($a_new & $b_mask) eq ($b_new & $a_mask)); } # Same as clever2, but with data structure precomputed. sub clever3 { # a and mask[b] eq b and mask[a] ($_[0][1] & $_[1][2]) eq ($_[1][1] & $_[0][2]); } # Precompute the data structure for clever3 sub clever3_xform { # Underscore to \0 for easier masking and comparison (my $a = $_[0]) =~ tr/_/\0/; # Compute a "transposed" string showing the position of each character, # leaving \0 if it doesn't appear. my($a3)="\0"x10; foreach my $i (0..(length($a)-1)) { my $c = substr($a,$i,1); next if $c eq "\0"; substr($a3,$c,1)=$i; } # Concatenate my $a_new = $a . $a3; # Calculate the mask (my $a_mask = $a_new) =~ tr/\0/\xff/c; # Return an arrayref; this is the data structure we'll use in clever3. return [$_[0],$a_new,$a_mask]; } my @tests = (qw/ _8__3__19 48____7__ _8__3__19 4_2___7__ _8__3__19 4_8___7__ __8_3__19 48____7__ __8_3__19 84____7__ _8__3__19 48_____7_ / ); sub run_tests { my($func,$verbose,@tests)=@_; my ($s1, $s2); while (defined($s1 = shift(@tests))) { $s2 = shift(@tests); my $result = $func->($s1, $s2); if ($verbose) { if (ref($s1) && ref($s2)) { $s1 = $s1->[0]; $s2 = $s2->[0]; } print "$s1\n$s2: ",$result?"compatible":"not compatible","\n"; } } } my @tests_clever3 = map { clever3_xform($_) } @tests; run_tests(\&clever3, 1, @tests_clever3); timethese(25_000, { simple3 => sub { run_tests(\&simple3, 0, @tests) }, csimple3 => sub { run_tests(\&csimple3, 0, @tests) }, clever2 => sub { run_tests(\&clever2, 0, @tests) }, clever3 => sub { run_tests(\&clever3, 0, @tests_clever3) }, cclever3 => sub { run_tests(\&cclever3, 0, @tests_clever3) }, }); __DATA__ __C__ int csimple3(const char *a, const char *b) { int i; int l; unsigned char seen[256]; memset(seen,0,256); if ((l=strlen(a)) != strlen(b)) return 0; for(i=0;i 1) return 0; } else if (b[i] == '_') { if (++seen[a[i]] > 1) return 0; } else return 0; } return 1; } int cclever3(SV *a, SV *b) { AV *a_arr, *b_arr; SV **tmp; char *a_val, *a_mask, *b_val, *b_mask; int i; /* First get the arrays from the references */ if (!SvROK(a) || !SvROK(b)) croak("a or b not arrayrefs!"); a_arr = (AV*)SvRV(a); b_arr = (AV*)SvRV(b); /* Now pull out the data */ if ( (tmp = av_fetch(a_arr, 1, 0)) == NULL) croak("a[1] is undef"); if ((a_val = SvPV(*tmp, PL_na)) == NULL) croak("a[1] contains NULL pointer?"); if ( (tmp = av_fetch(a_arr, 2, 0)) == NULL) croak("a[2] is undef"); if ((a_mask = SvPV(*tmp, PL_na)) == NULL) croak("a[2] contains NULL pointer?"); if ( (tmp = av_fetch(b_arr, 1, 0)) == NULL) croak("b[1] is undef"); if ((b_val = SvPV(*tmp, PL_na)) == NULL) croak("b[1] contains NULL pointer?"); if ( (tmp = av_fetch(b_arr, 2, 0)) == NULL) croak("b[2] is undef"); if ((b_mask = SvPV(*tmp, PL_na)) == NULL) croak("b[2] contains NULL pointer?"); /* OK, finally we have all of the data! */ for(i=0;i<20;i++) { if ((a_val[i] & b_mask[i]) != (b_val[i] & a_mask[i])) return 0; } return 1; }