#!/usr/bin/perl #ENTERING A MOTIF TO BE SEARCHED FROM USER INPUT print "Enter a pattern to be searched:"; $seq =<STDIN>; chomp($seq); @seqss = split ('',$seq); $m=scalar @seqss; #FINDING OUT THE FIRST AND THE LAST CHARACTER OF THE MOTIF $firstch=$seqss[0]; $lastch=$seqss[$m-1]; #ASSIGNING KEY VALUES TO MOTIFS my @unique = (); my %seen = (); @pats=reverse @seqss; foreach my $elem ( @pats ) { next if $seen{ $elem }++; push @uni, $elem; @unique= reverse @uni; } $cc=1; foreach (@uni) { $count{$_} = $cc; $cc++; } $zen = $m+1; for(my $i=0;$i<scalar @unique;$i++) { $mcut=scalar @unique; $m=$mcut-$i; %num=("$unique[$i]"=>"$m"); while (($key1, $val1) = each(%num)) { push(@key,$key1); push(@val,$val1); } } #OPENING A DATABASE (A TEXT FILE WHERE STRINGS OF ALPHABETS SAVED) open (PIR,'/home/httpd/heidi/fasta/pir/heidi_pir/pirdb.txt'); $count=0; while (<PIR>) { if (/^ENTRY/) {$entry = $_;} elsif (/^>gi/) {$gi = $_;} elsif(/^TITLE/) {$title = (s/ /\n\t\t /g,$_);} elsif(/^ORGANISM/) {$org = (s/ /\n\t\t /g,$_);} elsif(/^ACCESSIONS/) {$acc = $_;} else { @array2 = $_; } #what i need is only this from the database (ie., the string where + i have to match the motif or pattern) if (defined $array2[0]) { @onlyseq = split('',$array2[0]); } @array2=(); #ASSIGNING KEY VALUES FOR THE STRING INCLUDING MOTIFS(VALUES) foreach $_(@onlyseq) { if($count{$_} != $zen) { if (defined $count{$_}) { push(@a,$count{$_}); push(@b,$_); #print @a; #print @b; } else { push(@a,$zen); push(@b,$_); #print @a; #print @b; } } } #SEARCHING FOR MOTIFS IN THE STRING<< THIS IS WHERE I AM STUCK WIT +H>> $m=scalar @seqss; for(my $i=$m;$i<=scalar @a;$i++) { #COMPARING THE LAST AND THE FIRST CHARACTER if(($lastch eq $b[$i]) && ($firstch eq $b[$i-($m-1)])) { #I WANT TO COMPARE THE INBETWEEN CHARACTERS HERE ITSELF.(( +( WHERE I NEED YOUR HELP))) #for(my $j=($i-($m-1));$j<=$i;$j++) #{ # push(@fnum,$a[$j]); # push(@flet,$b[$j]); #} #$t=$i+1; #$i=$t+$i-1; } } $m=scalar @seqss; while (@flet) { push(@words, join('', splice(@flet, 0, $m))); } #print "@words\n"; foreach (@words) { next unless $_ =~ /$seq/; print("$_\n"); $count++; } @onlyseq=(); @a=(); @b=(); @flet=(); @fnum=(); @words=(); } print "\nThe number of patterns found in PIR database : $count\n";
ENTRY CCHU #type complete TITLE cytochrome c [validated] - human ORGANISM #formal_name Homo sapiens #common_name man ACCESSIONS A31764; A05676; I55192; A00001 MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLE +NPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE ENTRY CCCZ #type complete TITLE cytochrome c - chimpanzee (tentative sequence) ORGANISM #formal_name Pan troglodytes #common_name chimpanzee ACCESSIONS A00002 GDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLEN +PKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE ENTRY CCMQR #type complete TITLE cytochrome c - rhesus macaque (tentative sequence) ORGANISM #formal_name Macaca mulatta #common_name rhesus macaq +ue ACCESSIONS A00003 GDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGITWGEDTLMEYLEN +PKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE ENTRY CCMKP #type complete TITLE cytochrome c - spider monkey ORGANISM #formal_name Ateles sp. #common_name spider monkey ACCESSIONS A00004 GDVFKGKRIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQASGFTYTEANKNKGIIWGEDTLMEYLEN +PKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE
Edited by planetscape - added readmore tags
In reply to here is my program for string matching in a database (Was: Re: doubt in string matching.)
by heidi
in thread doubt in string matching.
by heidi
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |