http://qs1969.pair.com?node_id=290101


in reply to Re: Re: LCCS time complexity
in thread LCCS time complexity

Well actually dominus wrote the module, bikenomad maintains it and merlyn brought it to my attention. Glad it works better. Might as well make it a module....

package String::LCSS::Fast; use 5.006; use strict; use warnings; use Algorithm::Diff qw(traverse_sequences); require Exporter; use vars qw( @ISA @EXPORT_OK $VERSION ); our @ISA = qw(Exporter); @EXPORT_OK = qw( LCSS CSS CSS_Sorted ); $VERSION = '0.01'; sub _tokenize { [split //, $_[0]] } sub CSS { my $is_array = ref $_[0] eq 'ARRAY' ? 1 : 0; my $sort = $_[2]; my ( $seq1, $seq2, @match, $from_match ); my $i = 0; if ( $is_array ) { $seq1 = $_[0]; $seq2 = $_[1]; traverse_sequences( $seq1, $seq2, { MATCH => sub { push @{$match[$i]}, $seq1->[$_[0]]; $from_m +atch = 1 }, DISCARD_A => sub { do{$i++; $from_match = 0} if $from_matc +h }, DISCARD_B => sub { do{$i++; $from_match = 0} if $from_matc +h }, }); } else { $seq1 = _tokenize($_[0]); $seq2 = _tokenize($_[1]); traverse_sequences( $seq1, $seq2, { MATCH => sub { $match[$i] .= $seq1->[$_[0]]; $from_match = + 1 }, DISCARD_A => sub { do{$i++; $from_match = 0} if $from_matc +h }, DISCARD_B => sub { do{$i++; $from_match = 0} if $from_matc +h }, }); } return \@match; } sub CSS_Sorted { my $match = CSS(@_); if ( ref $_[0] eq 'ARRAY' ) { @$match = map{$_->[0]}sort{$b->[1]<=>$a->[1]}map{[$_,scalar(@$_ +)]}@$match } else { @$match = map{$_->[0]}sort{$b->[1]<=>$a->[1]}map{[$_,length($_) +]}@$match } return $match; } sub LCSS { my $is_array = ref $_[0] eq 'ARRAY' ? 1 : 0; my $css = CSS(@_); my $index; my $length = 0; if ( $is_array ) { for( my $i = 0; $i < @$css; $i++ ) { next unless @{$css->[$i]}>$length; $index = $i; $length = @{$css->[$i]}; } } else { for( my $i = 0; $i < @$css; $i++ ) { next unless length($css->[$i])>$length; $index = $i; $length = length($css->[$i]); } } return $css->[$index]; } 1; __END__ =head1 NAME String::LCSS::Fast - Perl extension for getting the Longest Common Sub +String =head1 SYNOPSIS use String::LCSS::Fast qw( LCSS CSS CSS_Sorted ); =head1 DESCRIPTION This module uses Algoritm::Diff to implement LCSS and is orders of mag +nitude faster than String::LCSS. =head1 METHODS =head2 LCSS Returns the longest common sub string. If there may be more than one a +nd it matters use CSS instead. my $lcss_ary_ref = LCSS( \@SEQ1, \@SEQ2 ); # ref to array my $lcss_string = LCSS( $STR1, $STR2 ); # string =head2 CSS Returns all the common sub strings, unsorted. my $css_ary_ref = CSS( \@SEQ1, \@SEQ2 ); # ref to array of arrays my $css_str_ref = CSS( $STR1, $STR2 ); # ref to array of string +s =head2 CSS_Sorted Returns all the common sub strings, sorted from longest to shortest my $css_ary_ref = CSS( \@SEQ1, \@SEQ2 ); # ref to array of arrays my $css_str_ref = CSS( $STR1, $STR2 ); # ref to array of string +s =head1 EXPORT None by default. =head1 AUTHOR Dr James Freeman <james.freeman@id3.org.uk> =head1 SEE ALSO L<perl>. =cut

cheers

tachyon

s&&rsenoyhcatreve&&&s&n.+t&"$'$`$\"$\&"&ee&&y&srve&&d&&print