in reply to An efficient way to gather a common portion of several strings' beginnings
Here's my solution (probably not the best, but at least O(n)), along with testing of all the others except atcroft's, which has many failures. (These failures may have a simple fix, but I've no time right now.)
File common_base_string_1.pl:
Output:use 5.010; use warnings; use strict; use List::Util qw(minstr maxstr); use Data::Dump qw(pp); use Test::More # tests => ?? + 1 # Test::NoWarnings adds 1 test 'no_plan' ; use Test::NoWarnings; use constant STR => qw(abcdefXyzzy abcdefFoobar abcdefWibble abcdef +Zot); FUNT: # functions under test for my $func_name (qw( common_base_anomalousmonk common_base_browseruk common_base_hippo common_base_grandfather ), # qw(common_base_atcroft), ) { note qq{\n---- testing $func_name() ---- \n\n}; *common_base = do { no strict 'refs'; *$func_name }; VECTOR: for my $ar_vector ( q{empty list: degenerate case returns undef in scalar context}, [ undef ], q{all these should have ZERO length common base}, [ '', '' ], [ '', '', '' ], [ '', 'x', 'y' ], [ '', 'abc', 'xyz +' ], [ '', STR, '' ], [ '', STR, 'x' ], [ '', '', STR ], [ '', 'x', STR ], [ '', STR, '', STR ], q{all these should have NON-ZERO length common base}, [ 'a', 'a' ], [ 'a', 'a', 'a' ], [ 'abcdef', STR ], q{strings containing nulls}, [ qq{\000}, qq{\000} ], [ qq{\000}, qq{\000}, qq{\000} ], [ qq{\000}, qq{\000}, qq{\000\000} ], [ qq{\000}, qq{\000\000}, qq{\000} ], [ qq{\000}, qq{\000}, qq{\000\000}, qq{\000\000\000} ], [ qq{\000}, qq{\000\000\000}, qq{\000\000}, qq{\000} ], [ qq{\000z}, qq{\000z} ], [ qq{\000}, qq{\000x}, qq{\000y} ], [ qq{\000z\000}, qq{\000z\000} ], [ qq{\000z\000}, qq{\000z\000y\000}, qq{\000z\000x\000} ], [ qq{\000}, qq{\000x\000}, qq{\000y\000} ], [ qq{\000\000}, qq{\000\000}, qq{\000\000\000} ], # [ qq{}, qq{}, qq{} ], ) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } my ($expected_common_base, @strings) = @$ar_vector; # my $cmnt_strings = join q{', '}, @strings; # $cmnt_strings = qq{'$cmnt_strings'} if length $cmnt_strings; # my $cmnt_expected = $expected_common_base // 'undef'; # $cmnt_expected = qq{'$cmnt_expected'} if defined $expected_co +mmon_base; is common_base(@strings), $expected_common_base, # qq{($cmnt_strings) -> $cmnt_expected} sprintf '%s -> %s', pp(@strings), pp($expected_common_base) ; } # end for VECTOR } # end for FUNT # functions under test ############################################# sub common_base_anomalousmonk { return unless @_; # degenerate case: empty list my $min = minstr @_; my $max = maxstr @_; return ($min ^ $max) =~ m{ [^\x00] }xms ? substr $min, 0, $-[0] : $min ; } sub common_base_browseruk { my (@strings) = @_; # additional: deal with degenerate and corner cases return unless @strings; return $strings[0] unless @strings > 1; # match m[(^\0+)] changed to m[(^\0*)] to avoid assigning # undef to $common if there is no common base substring; # empty string assigned in this case. # my( $mask ) = ( $strings[ 0 ] ^ $strings[ 1 ] ) =~ m[(^\0+)]; my( $mask ) = ( $strings[ 0 ] ^ $strings[ 1 ] ) =~ m[(^\0*)]; my $common = substr $strings[ 0 ], 0, length $mask; for my $i ( 2 .. $#strings ) { if( substr( $strings[ $i ], 0, length $common ) ne $common ) { # ( $mask ) = ( $strings[ 0 ] ^ $strings[ $i ] ) =~ m[(^\0+) +]; ( $mask ) = ( $strings[ 0 ] ^ $strings[ $i ] ) =~ m[(^\0*) +]; $common = substr $strings[ 0 ], 0, length $mask; } } return $common; } sub common_base_hippo { # additional: deal with degenerate and corner cases return unless @_; return $_[0] unless @_ > 1; my @strings = sort @_; return ($strings[0] ^ $strings[-1]) =~ m{ [^\x00] }xms ? substr $strings[0], 0, $-[0] : $strings[0] ; } sub common_base_grandfather { my (@strings) = @_; # additional: deal with degenerate and corner cases return unless @strings; return $strings[0] unless @strings > 1; my $common = $strings[0]; # match m/^\0+/ changed to m/^\0*/ to avoid assigning # undef to $common if there is no common base substring; # empty string assigned in this case. for my $str (@strings[1 .. $#strings]) { # ($common ^ $str) =~ m/^\0+/; # no common base substring: fail ($common ^ $str) =~ m/^\0*/; $common = substr $str, 0, $+[0] if $+[0] < length $common; } return $common; } sub common_base_atcroft { my (@string) = @_; # additional: deal with degenerate and corner cases return unless @string; return $string[0] unless @string > 1; my $common = shift @string; while ( my $teststr = shift @string and length $common ) { # if ( $common = substr( $teststr, 0, length $common ) { if ( $common = substr $teststr, 0, length $common ) { next; } my $flag = length $common; while ( $flag ) { # if ( $common eq substr( $teststr, 0, $flag ) { if ( $common eq substr $teststr, 0, $flag ) { $flag = 0; } else { $flag--; $common = substr( $common, 0, $flag ); } } } return $common; }
c:\@Work\Perl\monks\igoryonya>perl common_base_string_1.pl # # ---- testing common_base_anomalousmonk() ---- # # empty list: degenerate case returns undef in scalar context ok 1 - () -> undef # all these should have ZERO length common base ok 2 - "" -> "" ok 3 - ("", "") -> "" ok 4 - ("x", "y") -> "" ok 5 - ("abc", "xyz") -> "" ok 6 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot", "" +) -> "" ok 7 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot", "x +") -> "" ok 8 - ("", "abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot" +) -> "" ok 9 - ("x", "abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot +") -> "" ok 10 - ( # "abcdefXyzzy", # "abcdefFoobar", # "abcdefWibble", # "abcdefZot", # "", # "abcdefXyzzy", # "abcdefFoobar", # "abcdefWibble", # "abcdefZot", # ) -> "" # all these should have NON-ZERO length common base ok 11 - "a" -> "a" ok 12 - ("a", "a") -> "a" ok 13 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot") - +> "abcdef" # strings containing nulls ok 14 - "\0" -> "\0" ok 15 - ("\0", "\0") -> "\0" ok 16 - ("\0", "\0\0") -> "\0" ok 17 - ("\0\0", "\0") -> "\0" ok 18 - ("\0", "\0\0", "\0\0\0") -> "\0" ok 19 - ("\0\0\0", "\0\0", "\0") -> "\0" ok 20 - "\0z" -> "\0z" ok 21 - ("\0x", "\0y") -> "\0" ok 22 - "\0z\0" -> "\0z\0" ok 23 - ("\0z\0y\0", "\0z\0x\0") -> "\0z\0" ok 24 - ("\0x\0", "\0y\0") -> "\0" ok 25 - ("\0\0", "\0\0\0") -> "\0\0" # # ---- testing common_base_browseruk() ---- # # empty list: degenerate case returns undef in scalar context ok 26 - () -> undef # all these should have ZERO length common base ok 27 - "" -> "" ok 28 - ("", "") -> "" ok 29 - ("x", "y") -> "" ok 30 - ("abc", "xyz") -> "" ok 31 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot", " +") -> "" ok 32 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot", " +x") -> "" ok 33 - ("", "abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot +") -> "" ok 34 - ("x", "abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZo +t") -> "" ok 35 - ( # "abcdefXyzzy", # "abcdefFoobar", # "abcdefWibble", # "abcdefZot", # "", # "abcdefXyzzy", # "abcdefFoobar", # "abcdefWibble", # "abcdefZot", # ) -> "" # all these should have NON-ZERO length common base ok 36 - "a" -> "a" ok 37 - ("a", "a") -> "a" ok 38 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot") - +> "abcdef" # strings containing nulls ok 39 - "\0" -> "\0" ok 40 - ("\0", "\0") -> "\0" ok 41 - ("\0", "\0\0") -> "\0" not ok 42 - ("\0\0", "\0") -> "\0" # Failed test '("\0\0", "\0") -> "\0"' # at common_base_string_1.pl line 102. # got: ' ' # expected: ' ' ok 43 - ("\0", "\0\0", "\0\0\0") -> "\0" not ok 44 - ("\0\0\0", "\0\0", "\0") -> "\0" # Failed test '("\0\0\0", "\0\0", "\0") -> "\0"' # at common_base_string_1.pl line 102. # got: ' ' # expected: ' ' ok 45 - "\0z" -> "\0z" ok 46 - ("\0x", "\0y") -> "\0" ok 47 - "\0z\0" -> "\0z\0" ok 48 - ("\0z\0y\0", "\0z\0x\0") -> "\0z\0" ok 49 - ("\0x\0", "\0y\0") -> "\0" ok 50 - ("\0\0", "\0\0\0") -> "\0\0" # # ---- testing common_base_hippo() ---- # # empty list: degenerate case returns undef in scalar context ok 51 - () -> undef # all these should have ZERO length common base ok 52 - "" -> "" ok 53 - ("", "") -> "" ok 54 - ("x", "y") -> "" ok 55 - ("abc", "xyz") -> "" ok 56 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot", " +") -> "" ok 57 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot", " +x") -> "" ok 58 - ("", "abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot +") -> "" ok 59 - ("x", "abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZo +t") -> "" ok 60 - ( # "abcdefXyzzy", # "abcdefFoobar", # "abcdefWibble", # "abcdefZot", # "", # "abcdefXyzzy", # "abcdefFoobar", # "abcdefWibble", # "abcdefZot", # ) -> "" # all these should have NON-ZERO length common base ok 61 - "a" -> "a" ok 62 - ("a", "a") -> "a" ok 63 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot") - +> "abcdef" # strings containing nulls ok 64 - "\0" -> "\0" ok 65 - ("\0", "\0") -> "\0" ok 66 - ("\0", "\0\0") -> "\0" ok 67 - ("\0\0", "\0") -> "\0" ok 68 - ("\0", "\0\0", "\0\0\0") -> "\0" ok 69 - ("\0\0\0", "\0\0", "\0") -> "\0" ok 70 - "\0z" -> "\0z" ok 71 - ("\0x", "\0y") -> "\0" ok 72 - "\0z\0" -> "\0z\0" ok 73 - ("\0z\0y\0", "\0z\0x\0") -> "\0z\0" ok 74 - ("\0x\0", "\0y\0") -> "\0" ok 75 - ("\0\0", "\0\0\0") -> "\0\0" # # ---- testing common_base_grandfather() ---- # # empty list: degenerate case returns undef in scalar context ok 76 - () -> undef # all these should have ZERO length common base ok 77 - "" -> "" ok 78 - ("", "") -> "" ok 79 - ("x", "y") -> "" ok 80 - ("abc", "xyz") -> "" ok 81 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot", " +") -> "" ok 82 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot", " +x") -> "" ok 83 - ("", "abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot +") -> "" ok 84 - ("x", "abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZo +t") -> "" ok 85 - ( # "abcdefXyzzy", # "abcdefFoobar", # "abcdefWibble", # "abcdefZot", # "", # "abcdefXyzzy", # "abcdefFoobar", # "abcdefWibble", # "abcdefZot", # ) -> "" # all these should have NON-ZERO length common base ok 86 - "a" -> "a" ok 87 - ("a", "a") -> "a" ok 88 - ("abcdefXyzzy", "abcdefFoobar", "abcdefWibble", "abcdefZot") - +> "abcdef" # strings containing nulls ok 89 - "\0" -> "\0" ok 90 - ("\0", "\0") -> "\0" ok 91 - ("\0", "\0\0") -> "\0" not ok 92 - ("\0\0", "\0") -> "\0" # Failed test '("\0\0", "\0") -> "\0"' # at common_base_string_1.pl line 102. # got: ' ' # expected: ' ' ok 93 - ("\0", "\0\0", "\0\0\0") -> "\0" not ok 94 - ("\0\0\0", "\0\0", "\0") -> "\0" # Failed test '("\0\0\0", "\0\0", "\0") -> "\0"' # at common_base_string_1.pl line 102. # got: ' ' # expected: ' ' ok 95 - "\0z" -> "\0z" ok 96 - ("\0x", "\0y") -> "\0" ok 97 - "\0z\0" -> "\0z\0" ok 98 - ("\0z\0y\0", "\0z\0x\0") -> "\0z\0" ok 99 - ("\0x\0", "\0y\0") -> "\0" ok 100 - ("\0\0", "\0\0\0") -> "\0\0" ok 101 - no warnings 1..101 # Looks like you failed 4 tests of 101.
Give a man a fish: <%-{-{-{-<
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: An efficient way to gather a common portion of several strings' beginnings
by james28909 (Deacon) on Nov 15, 2015 at 20:49 UTC |