in reply to finding longest common substring
I'm sure that this approach could be made much faster and clearer.
#!/usr/bin/perl use warnings; use strict; use Data::Dumper; for ([ qw(fooabc123 fooabc321 foobca232) ], [ qw(abcfoo123 bcafoo321 foo123abc) ], [ qw(foo bor boz bzo) ]) { print Dumper($_); print find_lcs(@{ $_ }), "\n"; print "---\n"; } sub find_lcs { my @index_info; foreach (@_) { my $string = $_; push @index_info, [ \$string, [ 0..length($string) ], ]; } my ($length, @pos) = _find_lcs(0, @index_info); return $length ? substr(${$index_info[0]->[0]}, $pos[0], $length) : undef; } # Explanation of the datastructure. @index_info is an # array of refs like [$string_ref, [@positions]] where # $string_ref is a reference to a string (so copying # it around doesn't mean copying the string) and # @positions are the starting positions in the string # which so far have a common start. # # We will add a third element which is a hash of next # chars pointing to which positions follow that common # path. # # The eventual return will be the length farther that # we follow, followed by an array of starting positions, # one from each string, that goes to that depth. sub _find_lcs { my ($offset, @index_info) = @_; my $last_chars; foreach my $data (@index_info) { my %chars; foreach my $pos (@{$data->[1]}) { my $char = substr(${$data->[0]}, $pos + $offset, 1); # Filter out end of string and ones that can't match next unless length($char); next if $last_chars and not exists $last_chars->{$char}; push @{$chars{$char}}, $pos; } # EXIT: Break out early if we have run out of common chars return 0 unless %chars; $last_chars = $data->[2] = \%chars; } my $best_length = 0; my @best_pos; foreach my $char (keys %$last_chars) { my @index_info_for_char = map { [ $_->[0], $_->[2]->{$char} ] } @index_info; my ($length, @pos) = _find_lcs($offset + 1, @index_info_for_char); # I'm only interested if this is an improvement next if $length < $best_length; $best_length = $length + 1; if (0 == $length++) { @best_pos = map {$_->[1]->[0]} @index_info_for_char; } else { @best_pos = @pos; } } return $best_length, @best_pos; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Re: finding longest common substring
by BrowserUk (Patriarch) on Nov 20, 2003 at 05:39 UTC | |
by tilly (Archbishop) on Nov 20, 2003 at 06:49 UTC |