#!/usr/bin/perl #ENTERING A MOTIF TO BE SEARCHED FROM USER INPUT print "Enter a pattern to be searched:"; $seq =; chomp($seq); @seqss = split ('',$seq); $m=scalar @seqss; #FINDING OUT THE FIRST AND THE LAST CHARACTER OF THE MOTIF $firstch=$seqss[0]; $lastch=$seqss[$m-1]; #ASSIGNING KEY VALUES TO MOTIFS my @unique = (); my %seen = (); @pats=reverse @seqss; foreach my $elem ( @pats ) { next if $seen{ $elem }++; push @uni, $elem; @unique= reverse @uni; } $cc=1; foreach (@uni) { $count{$_} = $cc; $cc++; } $zen = $m+1; for(my $i=0;$i"$m"); while (($key1, $val1) = each(%num)) { push(@key,$key1); push(@val,$val1); } } #OPENING A DATABASE (A TEXT FILE WHERE STRINGS OF ALPHABETS SAVED) open (PIR,'/home/httpd/heidi/fasta/pir/heidi_pir/pirdb.txt'); $count=0; while () { if (/^ENTRY/) {$entry = $_;} elsif (/^>gi/) {$gi = $_;} elsif(/^TITLE/) {$title = (s/ /\n\t\t /g,$_);} elsif(/^ORGANISM/) {$org = (s/ /\n\t\t /g,$_);} elsif(/^ACCESSIONS/) {$acc = $_;} else { @array2 = $_; } #what i need is only this from the database (ie., the string where i have to match the motif or pattern) if (defined $array2[0]) { @onlyseq = split('',$array2[0]); } @array2=(); #ASSIGNING KEY VALUES FOR THE STRING INCLUDING MOTIFS(VALUES) foreach $_(@onlyseq) { if($count{$_} != $zen) { if (defined $count{$_}) { push(@a,$count{$_}); push(@b,$_); #print @a; #print @b; } else { push(@a,$zen); push(@b,$_); #print @a; #print @b; } } } #SEARCHING FOR MOTIFS IN THE STRING<< THIS IS WHERE I AM STUCK WITH>> $m=scalar @seqss; for(my $i=$m;$i<=scalar @a;$i++) { #COMPARING THE LAST AND THE FIRST CHARACTER if(($lastch eq $b[$i]) && ($firstch eq $b[$i-($m-1)])) { #I WANT TO COMPARE THE INBETWEEN CHARACTERS HERE ITSELF.((( WHERE I NEED YOUR HELP))) #for(my $j=($i-($m-1));$j<=$i;$j++) #{ # push(@fnum,$a[$j]); # push(@flet,$b[$j]); #} #$t=$i+1; #$i=$t+$i-1; } } $m=scalar @seqss; while (@flet) { push(@words, join('', splice(@flet, 0, $m))); } #print "@words\n"; foreach (@words) { next unless $_ =~ /$seq/; print("$_\n"); $count++; } @onlyseq=(); @a=(); @b=(); @flet=(); @fnum=(); @words=(); } print "\nThe number of patterns found in PIR database : $count\n"; #### ENTRY CCHU #type complete TITLE cytochrome c [validated] - human ORGANISM #formal_name Homo sapiens #common_name man ACCESSIONS A31764; A05676; I55192; A00001 MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE ENTRY CCCZ #type complete TITLE cytochrome c - chimpanzee (tentative sequence) ORGANISM #formal_name Pan troglodytes #common_name chimpanzee ACCESSIONS A00002 GDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE ENTRY CCMQR #type complete TITLE cytochrome c - rhesus macaque (tentative sequence) ORGANISM #formal_name Macaca mulatta #common_name rhesus macaque ACCESSIONS A00003 GDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGITWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE ENTRY CCMKP #type complete TITLE cytochrome c - spider monkey ORGANISM #formal_name Ateles sp. #common_name spider monkey ACCESSIONS A00004 GDVFKGKRIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQASGFTYTEANKNKGIIWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE