aseee has asked for the wisdom of the Perl Monks concerning the following question:
Hi to all Monks,
I am trying to find the common patterns in different strings. I used the KMP algorithm. It works best for a single string but show almost nothing for the same string in next iterator of loop, Here is my code
use CGI qw(:standard); use strict; print header; BEGIN{ push @INC,'C:/src/String-1.5','C:/src/bioperl-live','C:/src/ens +embl/modules','C:/src/ensembl-compara/modules','C:/src/ensembl-variat +ion/modules','C:/src/ensembl-functgenomics/modules','C:/src/DBD-mysql +-4.021';}; use strict; use warnings; use string; my $T_one='GAATTCACGGATAGCCWGGTACGATGATAGATGAATTCGACTAGAATGCCWGGAAGAAT +gGAATTC'; my $Ta='GAATTCACGGATAGCCWGGTACGATGATAGATGAATTCGACTAGAATGCCWGGAAGAATgGA +ATTC'; my $rr='GAATTCACGGATAGCCWGGTACGATGATAGATGAATTCGACTAGAATGCCWGGAAGAATgGA +ATTC'; my @P=('GAATTC','CCWGG'); my @next; my @loc; my $m; my $l=0; my $lt;my $T; my @text=(0); my @tt=($T_one,$Ta,$rr); for(my $i=0;$i<3;$i++){ $T=$tt[$i]; @loc=(); print "<\br>"; @text=(); my $str = new String($T); # USE OF STRING MODULE TO FETCH +THE SEQUENCE print "length of gene sequence array ", $lt= $str->length +, "\n"; # LENGTH OF THE SEQUENCE $lt= $str->length; my $z=0; # FOR ARRAY INCREMENT for(my $k=0; $k<$lt;$k++){ $text[$z]=$str->charAt($k); #ASSIGNING THE ARRAY +OBJECT VALUES TO AN ARRAY $z++; } #END OF FOR LOOP foreach my $P(@P){ print knuth_morris_pratt($T,$P); print"\n"; } @loc = sort {$a <=> $b} @loc; print"</br>";print "@loc";print "</br>"; my $i=0; for(my $k=0;$k<$lt;$k++){ print $i; if ($k == $loc[$i]){ print "<span style=background-color:re +d;>" . $text[$k] . "</span>"; $i++; } else { print $text[$k]; } } # end of $k for loop #print"\n";print "@loc";print "<\n>"; # FOR ARRAY INCREMENT print "</br>"; } #end of $T for each loop sub knuth_morris_pratt_next { my ( $P ) = @_; # The pattern. use integer; $m=length $P; my ($i, $j ) = ( 0, -1 ); for ($next[0] = -1; $i < $m; ) { # Note that this while() is skipped during the first for() pas +s. while ( $j > -1 && substr( $P, $i, 1 ) ne substr( $P, $j, 1 ) ) { $j = $next[ $j ]; } $i++; $j++; $next[ $i ] = substr( $P, $j, 1 ) eq substr( $P, $i, 1 ) ? $next[ $j ] : $j; } return ( $m, @next ); # Length of pattern and prefix function. } ####################################### sub knuth_morris_pratt { my ( $T, $P ) = @_; # Text and pattern. use integer; my( $m, @next) = knuth_morris_pratt_next( $P ); my ( $n, $i, $j ) = ( length($T), 0, 0 ); while ( $i < $n ) { while ( $j > -1 && substr( $P, $j, 1 ) ne substr( $T, $i, 1 ) +) { $j = $next[ $j ]; } $i++; $j++; if($j >= $m){ my $a=$i-$j; # $j=$j-1;print "\n"; $j=$next[$j]; for(my $z=0;$z<$m;$z++){ $loc[$l]=$a; $l++; $a++; } } } return ; # Mismatch. } #################################
I am new here so apology for any mistake. I am a biologist doing a project in perl.Please Monks help me
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Pattern Searching
by marquezc329 (Scribe) on Nov 10, 2012 at 07:31 UTC | |
by aseee (Novice) on Nov 10, 2012 at 10:09 UTC | |
by space_monk (Chaplain) on Nov 10, 2012 at 10:09 UTC | |
by aseee (Novice) on Nov 10, 2012 at 10:47 UTC | |
by marquezc329 (Scribe) on Nov 10, 2012 at 11:20 UTC | |
|
Re: Pattern Searching
by grondilu (Friar) on Nov 10, 2012 at 12:18 UTC | |
by Anonymous Monk on Nov 10, 2012 at 17:31 UTC | |
by aseee (Novice) on Nov 10, 2012 at 18:01 UTC | |
by Anonymous Monk on Nov 11, 2012 at 08:20 UTC | |
|
Re: Pattern Searching
by CountZero (Bishop) on Nov 10, 2012 at 21:19 UTC | |
by Anonymous Monk on Nov 12, 2012 at 15:34 UTC | |
by choroba (Cardinal) on Nov 12, 2012 at 15:35 UTC |