#!/gpsr/local/bin/perl use Getopt::Std; getopts('i:o:'); $infile=$opt_i; $outfile=$opt_o; if($infile eq "" or $outfile eq "") { print "\t./algpred.pl -i input_file -o output_file\n\n"; print "\t-i:\tinput file in fasta format\n"; print "\t-o:\toutput file\n"; print "\n\tExample: ./algpred.pl -i /gpsr/examples/example.fasta -o out.algpred\n"; exit; } $ptnnum = `grep -c "^>" $infile`; chomp($ptnnum); if($ptnnum > 1) { print "\tDon't Enter multiple sequences... Enter single sequence fasta file\n"; exit; } @array=(10001..99999); $rand =int(rand scalar(@array)); $jobnumber = $rand; $homedir = "/gpsr/standalone/algpred/progs"; $modeldir = "/gpsr/models/algpred"; $blastdatadir = "/gpsr/data/blastdata/algpred"; $tmpdir = "/gpsr/temp/algpred$jobnumber"; `mkdir $tmpdir`; `chmod 777 $tmpdir`; open(OUTMAIN, ">$outfile"); open(OUTHTML, ">$tmpdir/delout.html"); $seqname = "sequence"; $seq = `grep -v "^>" $infile`; chomp($seq); $seq =~ s/\n//g; $seq =~ s/\s+//g; $file = ""; $format = "nformat"; @approach = qw(1 2 3 4 5 6); #----------------------------------------------- #INPUT FROM WEB PAGE #----------------------------------------------- #$seqname=$cgi->param('SEQNAME'); #$seq=$cgi->param('SEQ'); #$file=$cgi->param('seqfile'); #$format=$cgi->param('format'); ##@method=$cgi->param('method'); #@approach=$cgi->param('approach'); $datevar = time(); $lt1 = localtime($datevar); chomp($seqname); chomp($seq); #chomp($file); chomp($format); chomp($approach); $file=~m/^.*(\\|\/)(.*)/; # strip the remote path and keep the filename while(<$file>) { $seqfi .= $_; } $tmpdir1="$tmpdir"; #opendir(DIR,"$tmpdir1"); #while($direc=readdir(DIR)){ # if(-M "$tmpdir1/$direc" > 2){ # system "rm -rf $tmpdir1/$direc"; # } #} #closedir DIR; #--------------------Sequence Detection------------------------------ #--------------------------------------------------------------------- if(($seq eq "") && ($seqfi eq "")){ open(FS,"start"); while($sl=){ #print "$sl"; } close FS; #print "


Warning:<\/BLINK> No input sequence is detected!!<\/H3>

"; open(FE,"end"); while($el=){ #print "$el"; } close FE; exit; } if(($seq ne "") && ($seqfi ne "")){ open(FS,"start"); while($sl=){ #print "$sl"; } close FS; # print "


Warning:<\/BLINK>Sequence input from both sources is detected!!<\/H3>

"; open(FE,"end"); while($el=){ # print "$el"; } close FE; exit; } #if(($seqfi ne "") && ($seq eq "")){ # $file=~m/^.*(\\|\/)(.*)/; # strip the remote path and keep the filename # while(<$file>) { #$seqfi .= $_; # chomp($seqfi); # if($seqfi=~/^>/){ # $seqfi=""; # } # } if($seqfi ne ""){ $seq = $seqfi; } #} #------------------------------------------------------------------ ###-------------------format part started-------------------------- #---------------------------------------------------------------- if ($format eq "nformat"){ $seq=~tr/[a-z]/[A-Z]/; $seq=~s/[^ACDEFGHIKLMNPQRSTVWXY]//g; $seq=~s/[\s]/b/g; $len=length($seq); } elsif ($format eq "sformat"){ open (FP1, ">$tmpdir/seqfor"); print FP1"$seq"; close FP1; open(FP1, "/gpsr/local/bin/readseq -a $tmpdir/seqfor -f8 |"); $fw1 = ; $seq = ""; while ($seaq = ){ $seq .= $seaq; } $seq =~ tr/[a-z]/[A-Z]/; $seq =~ s/[\s]/b/g; $seq =~ s/[^ACDEFGHIKLMNPQRSTVWXY]//g; $len=length($seq); } ####------------warning part for less than 10 amino acids-------------- if($seqname eq ""){ $seqname="Protein"; } if($len < 11){ open(FS,"start"); while($sl=){ # print "$sl"; } close FS; print OUTHTML "

WARNING:<\/BLINK>Sequence length too small to be analysed! The Resuts may be artifact. Please enter longer sequence.
<\/b>"; print OUTMAIN "Sequence length too small to be analysed! The Resuts may be artifact. Please enter longer sequence.\n"; open(FE,"end"); while($el=){ #print "$el"; } close FE; exit; } ##---------------------------ASSESSING VALUES TO METHOD FOR OUTPUT------- if($approach==1){ $method1="IgE epitope and its PID"; } if($approach==2){ $method1="MEME/MAST motif"; } if($approach==3){ $method1="Support vector machine (SVM)based on amino acid composition"; } if($approach==4){ $method1="Support vector machine (SVM) based on dipeptide composition"; } if($approach==5){ $method1="Blast search on allergen representative peptides(ARP)"; } ####----------------------------------------- ####--------------printing the output of web page (start part)----------------- open(FS,"starts"); while($sl=){ #print "$sl"; } close FS; print OUTHTML "
"; print OUTHTML ""; print OUTMAIN "Name of sequence:\t\t$seqname\n"; @yp=split(//,$seq); $kuk=@yp; $lenseque = length($seq); print OUTHTML "

Name of sequence $seqname
Length of Sequence $kuk
Preicted On$lt1
"; print OUTMAIN "Length of Sequence:\t\t$lenseque\nPreicted On:\t\t$lt1\n"; # print "
"; #------------------------------------------------------------------- # ANALYZE #------------------------------------------------------------------- $length=length($seq); #$check=0; #$filenum=0; $finalnum=0; #until($check>0){ # $filenum++; # if(!(-e "$tmpdir/algpred$filenum.dir/")){ # # $check=1; # $finalnum=$filenum; # } #} $finalnum = $jobnumber; $mydir="$tmpdir"; $infile="$mydir/input.txt"; $blastfile="$mydir/blast_input.txt"; $finalfile="$mydir/map_out"; open(FO,">$infile"); print FO "$seq\n"; #----------input file specified----- #print "++++++++$seq\n"; close FO; open(FB,">$blastfile"); print FB "> Protein\n"; print FB "$seq"; close FB; $seq=""; open(FG,"$infile"); while($lg=){ chomp($lg); $lg=~tr/[a-z]/[A-Z]/; $seq.=$lg; } close FG; $length=length($seq); ##----------------------choosing approach/Pattern generation and output ------ #********************************************************************* #-----------approach 1-------------------------------------- for($m5=0;$m5<7;$m5++){ #for loop if($approach[$m5]==1){ #loop for approach1 system "$homedir/map_IgE.pl $finalnum"; open(FO,"$tmpdir/pid.txt"); $ik=; if($ik>0){ print OUTHTML "
Predicted protein by mapping of IgE epitope and PID  

"; print OUTMAIN "Predicted protein by mapping of IgE epitope and PID\n"; print OUTHTML "     ALLERGEN and contains IgE epitope"; print OUTMAIN "ALLERGEN and contains IgE epitope\n"; print OUTMAIN "Mapping of IgE Epitopes\n"; print OUTMAIN "$seq\n"; open(FP,"$finalfile"); while($line=){ chomp($line); @pep=split(/\s+/,$line); $actualmotif[$m]=$pep[0]; $seq_motif[$m]=$pep[1]; $pos4[$m]=$pep[5]; $mlen[$m]=$pep[3]; $pid[$m]=$pep[4]; $m++; } close FP; for($i=0;$i<$m;$i++){ $limit=$length-$mlen[$i]; $pos[$i]=$pos4[$i]; $pos8[$i]=$pos[$i]+1; $space=$pos[$i]; $dx1=""; for($u=0;$u<$space;$u++){ $dx1.="-"; } $z=$mlen[$i]; $space1=$length-($pos[$i]+$z); $dx2=""; for($u=0;$u<$space1;$u++){ $dx2.="-"; } print OUTMAIN sprintf "$dx1%-$mlen[$i]s$dx2",$actualmotif[$i]; } print OUTMAIN "\n\n"; print OUTMAIN "Full information\n"; print OUTHTML sprintf ("%-22s %-22s %-12s %-8s
\n","IgE epitope","Sequence matched","position","PID"); print OUTMAIN sprintf ("%-22s %-22s %-12s %-8s \n","IgE epitope","Sequence matched","position","PID"); for($i=0;$i<$m;$i++){ print OUTHTML sprintf ("%-22s %-22s %-12s %-8s
\n","$actualmotif[$i]","$seq_motif[$i]","$pos8[$i]","$pid[$i]"); print OUTMAIN sprintf ("%-22s %-22s %-12s %-8s \n","$actualmotif[$i]","$seq_motif[$i]","$pos8[$i]","$pid[$i]"); } } #close loop for $ik>0 else { print OUTHTML "
 

Prediction by mapping of IgE epitope  

"; print OUTMAIN "Prediction by mapping of IgE epitope\n"; print OUTHTML "The protein sequence does not contain experimentally proven IgE epitope"; print OUTMAIN "The protein sequence does not contain experimentally proven IgE epitope\n"; } close FO; } #end loop for approach 1 #-----------approach 2------------------------------------- if($approach[$m5]==2){ #loop for approach 2 system "/gpsr/local/bin/mast $modeldir/meme_out_set1 $tmpdir/blast_input.txt -ev 1 -o $tmpdir/mast.out >/dev/null 2>1"; open(MAUM,"$tmpdir/mast.out/mast.txt"); while($lineM=){ chomp($lineM); @AA=split(/ +/,$line); if(("$AA[0]" eq "SEQUENCE NAME") && ("$AA[1]" eq "DESCRIPTION")){ $mline=; $kline="--"; until($kline =~ m/^$/){ $kline=; $y++; } } } print OUTHTML "
 

MAST RESULT   

"; print OUTMAIN "MAST RESULT\n"; if($y==0){ print OUTMAIN "MAST Results : No Hits found\tNON ALLERGEN\n"; } else{ print OUTMAIN "MAST Results : Hits found\tALLERGEN"; } close MAUM; } #end loop for approach 2 #------------approach 3----------------------------- if($approach[$m5]==3){ #loop for approach 3 system "$homedir/comp.pl $finalnum $tmpdir"; system "/gpsr/local/bin/svm5_classify $tmpdir/binary1.txt $modeldir/model_comp $tmpdir/output1 >$tmpdir/out1.algpred"; open(FO1,"$tmpdir/output1"); $line1=; $ppv1=0;$npv1=0; if($line1> -2.5){ $alg1=$line1; } close FO1; #---------calculation of predictive value----------- if($alg1>0.8){ $ppv1=85.64; $npv1=67.96; } elsif(($alg1>0.6) && ($alg1<=0.8)){ $ppv1=87.05; $npv1=71.53; } elsif(($alg1>0.4) && ($alg1<=0.6)){ $ppv1=81.83; $npv1=74.03; } elsif(($alg1>0.2) && ($alg1<=0.4)){ $ppv1=74.81; $npv1=76.94; } elsif(($alg1>0.0) && ($alg1<=0.2)){ $ppv1=70.05; $npv1=80.74; } elsif(($alg1>-0.2) && ($alg1<=0.0)){ $ppv1=64.55; $npv1=86.61; } elsif(($alg1>-0.4) && ($alg1<=-0.2)){ $ppv1=47.13; $npv1=89.71; } elsif(($alg1>-0.6) && ($alg1<=-0.4)){ $ppv1=18.21; $npv1=71.24; } elsif(($alg1>-0.8) && ($alg1<-0.6)){ $ppv1=22.82; $npv1=92.94; } elsif($alg1<-1){ $ppv1=15.19; $npv1=94.18; } #------------------ print OUTHTML "
 

Prediction by SVM method based on amino acid composition  

"; print OUTMAIN "Prediction by SVM method based on amino acid composition\n"; if($alg1 >=0.4){ print OUTHTML "     Potential ALLERGEN       

  Score= $alg1     [Threshold= -0.4]

  Positive Predictive Value= $ppv1\%    Negative Predictive Value= $npv1\%"; print OUTMAIN "Potential ALLERGEN\tScore= $alg1\t[Threshold= -0.4]\tPositive Predictive Value= $ppv1\tNegative Predictive Value= $npv1\n"; } elsif($alg1 >=-.4){ print OUTHTML "     ALLERGEN       

  Score= $alg1     [Threshold= -0.4]

  Positive Predictive Value= $ppv1\%    Negative Predictive Value= $npv1\%"; print OUTMAIN "ALLERGEN\tScore= $alg1\t[Threshold= -0.4]\tPositive Predictive Value= $ppv1\tNegative Predictive Value= $npv1\n"; } else{ print OUTHTML "     NON ALLERGEN       

  Score=$line1     [Threshold=-0.4]

  Positive Predictive Value=$ppv1\%     Negative Predictive Value=$npv1\%"; chomp($line1); print OUTMAIN "NON ALLERGEN\tScore=$line1\t[Threshold=-0.4]\tPositive Predictive Value=$ppv1\tNegative Predictive Value=$npv1\n"; } } #end loop for approach 3 #-----------approach 4----------------------- if($approach[$m5]==4){ #loop for approach 4 system "$homedir/dipep.pl $finalnum $tmpdir"; system "/gpsr/local/bin/svm5_classify $tmpdir/binary.txt $modeldir/model_dipep $tmpdir/output2 >$tmpdir/out2.algpred"; open(FOU,"$tmpdir/output2"); $line=; $ppv=0;$npv=0; if($line> -5){ $alg=$line; } close FOU; #---------calculation of predictive value----------- if($alg>0.8){ $ppv=100.00; $npv=59.74; } if(($alg>0.6) && ($alg<=0.8)){ $ppv=82.97; $npv=62.40; } if(($alg>0.4) && ($alg<=0.6)){ $ppv=86.55; $npv=66.47; } if(($alg>0.2) && ($alg<=0.4)){ $ppv=85.88; $npv=72.01; } if(($alg>0.0) && ($alg<=0.2)){ $ppv=74.14; $npv=79.04; } if(($alg>-0.2) && ($alg<=0.0)){ $ppv=63.10; $npv=85.56; } if(($alg>-0.4) && ($alg<=-0.2)){ $ppv=39.40; $npv=89.34; } if(($alg>-0.6) && ($alg<=-0.4)){ $ppv=27.66; $npv=92.40; } if(($alg>-0.8) && ($alg<-0.6)){ $ppv=13.26; $npv=74.19; } if($alg<-1){ $ppv=8.69; $npv=75.22; } #------------------ print OUTHTML "
 

Prediction based on SVM method based on dipeptide composition   

"; print OUTMAIN "Prediction based on SVM method based on dipeptide composition\n"; if($alg>=0.2){ print OUTHTML "     Potential ALLERGEN       

  Score= $line     [Threshold= -0.2]

  Positive Predictive Value=$ppv\%     Negative Predictive Value=$npv\%"; print OUTMAIN "Potential ALLERGEN\tScore= $line\t[Threshold= -0.2]\tPositive Predictive Value=$ppv\tNegative Predictive Value=$npv\n"; } elsif($alg>=-0.2){ print OUTHTML "     ALLERGEN       

  Score= $line     [Threshold= -0.2]

  Positive Predictive Value=$ppv\%     Negative Predictive Value=$npv\%"; print OUTMAIN "ALLERGEN\tScore= $line]\t[Threshold= -0.2]\tPositive Predictive Value=$ppv\tNegative Predictive Value=$npv\n"; } else{ print OUTHTML "     NON ALLERGEN       

  Score= $line     [Threshold= -0.2]

  Positive Predictive Value=$ppv\%     Negative Predictive Value=$npv\%"; print OUTMAIN "NON ALLERGEN\tScore= $line\t[Threshold= -0.2]\tPositive Predictive Value=$ppv\tNegative Predictive Value=$npv\n"; } } #end loop for approach 4 #-----------approach 5----------------------- if($approach[$m5]==5){ #loop for approach 5 #------- handling Blast part******************************* system "/gpsr/software/blastpr/blastpgp -e 0.001 -j 1 -d $blastdatadir/ARP_database -i $tmpdir/blast_input.txt -o $tmpdir/blast.out"; open(MAB,"$tmpdir/blast.out"); while($lin=){ chomp($lin); if(($lin=~ /significant/) && ($lin=~ /alignments:/)){ $lin=; $lin=; if($y>0){ $lin=; } $y++; chomp($lin); @ff=split(/ +/,$lin); } if($lin=~ /Sbjct: /){ $predline=$lin; @pp=split(/ +/,$predline); } } close MAB; print OUTHTML "
 

Blast RESULT   

"; print OUTMAIN "BLAST RESULT\n"; if($y==0){ print OUTHTML "BLAST Results of ARPS : No Hits found


     NON ALLERGEN    "; print OUTMAIN "BLAST Results of ARPS : No Hits found\tNON ALLERGEN\n"; } #if($ff[0]=~ /ARP/) else { print OUTHTML "Hits found with ARPs database: $pp[2]


     ALLERGEN    "; print OUTMAIN "Hits found with ARPs database: $pp[2]\tALLERGEN\n"; } }#end loop for approach 5 #-----------approach 6 Hybrid approach----------------------- if($approach[$m5]==6){ #loop for approach 6 print OUTHTML "
 

                                        Prediction by Hybrid Approach                                        

"; print OUTMAIN "Prediction by Hybrid Approach\n"; #-------IgE method-------------- system "$homedir/map_IgE_hyb.pl $finalnum"; open(FOHY,"$tmpdir/pid_hyb.txt"); $ikH=; if($ikH>0){ print OUTHTML "     ALLERGEN (By IgE method)       

"; print OUTMAIN "ALLERGEN (By IgE method)]\n"; } close FOHY; #------------ARP BLAST--------- system "/gpsr/software/blastpr/blastpgp -e 0.001 -j 1 -d $blastdatadir/ARP_database -i $tmpdir/blast_input.txt -o $tmpdir/blast_hyb.out"; open(MABH,"$tmpdir/blast_hyb.out"); while($linH=){ chomp($linH); if(($linH=~ /significant/) && ($linH=~ /alignments:/)){ $linH=; $linH=; if($yh>0){ $linH=; } $yh++; chomp($linH); @ff=split(/ +/,$linH); } if($linH=~ /Sbjct: /){ $predline=$linH; @pp=split(/ +/,$predline); } } close MABH; if($yh>0){ print OUTHTML " ALLERGEN (By ARPs BLAST method)   

"; print OUTMAIN "ALLERGEN (By ARPs BLAST method)\n"; } #---------svmcomp------------- system "$homedir/comp_hyb.pl $finalnum $tmpdir"; system "/gpsr/local/bin/svm5_classify $tmpdir/binary1_hyb.txt $modeldir/model_comp $tmpdir/output1_hyb >$tmpdir/out1_hyb.algpred"; open(FO1H,"$tmpdir/output1_hyb"); $line1H=; chomp($line1H); if($line1H> -5){ $alg1H=$line1H; } close FO1H; if($alg1H >=-0.4){ print OUTHTML "     ALLERGEN       

 "; print OUTMAIN "ALLERGEN\n"; } else{ print OUTHTML "     NON ALLERGEN       "; print OUTMAIN "NON ALLERGEN\n"; } }#end loop for approach 6 } #end for loop m5 #----------------------print of end section--------------------------- open(FE,"end1"); while($el=){ #print "$el"; } close FE; if(-e "/gpsr/standalone/algpred/1") { `rm /gpsr/standalone/algpred/1`; }