#!/usr/bin/perl use strict; use strict; use warnings; use Bio::SearchIO; use Bio::Seq; use Bio::SeqIO; my($seqio); my($genio); if(-e $ARGV[0]) { $seqio = Bio::SeqIO->new( '-format' => 'fasta' , -file => $ARGV[0]); } else { die "$ARGV[0] not found\n"; } my($seq) = ""; my($len) = ""; my($head) = ""; while ( my $seqobj = $seqio->next_seq ) { $len = $seqobj->length(); $head = $seqobj->id(); $seq = $seqobj->seq(); chomp($seq); } my(%genetic_code) = ( 'TCA' => 'S', # Serine 'TCC' => 'S', # Serine 'TCG' => 'S', # Serine 'TCT' => 'S', # Serine 'TTC' => 'F', # Phenylalanine 'TTT' => 'F', # Phenylalanine 'TTA' => 'L', # Leucine 'TTG' => 'L', # Leucine 'TAC' => 'Y', # Tyrosine 'TAT' => 'Y', # Tyrosine 'TAA' => '_', # Stop 'TAG' => '_', # Stop 'TGC' => 'C', # Cysteine 'TGT' => 'C', # Cysteine 'TGA' => '_', # Stop 'TGG' => 'W', # Tryptophan 'CTA' => 'L', # Leucine 'CTC' => 'L', # Leucine 'CTG' => 'L', # Leucine 'CTT' => 'L', # Leucine 'CCA' => 'P', # Proline 'CCC' => 'P', # Proline 'CCG' => 'P', # Proline 'CCT' => 'P', # Proline 'CAC' => 'H', # Histidine 'CAT' => 'H', # Histidine 'CAA' => 'Q', # Glutamine 'CAG' => 'Q', # Glutamine 'CGA' => 'R', # Arginine 'CGC' => 'R', # Arginine 'CGG' => 'R', # Arginine 'CGT' => 'R', # Arginine 'ATA' => 'I', # Isoleucine 'ATC' => 'I', # Isoleucine 'ATT' => 'I', # Isoleucine 'ATG' => 'M', # Methionine 'ACA' => 'T', # Threonine 'ACC' => 'T', # Threonine 'ACG' => 'T', # Threonine 'ACT' => 'T', # Threonine 'AAC' => 'N', # Asparagine 'AAT' => 'N', # Asparagine 'AAA' => 'K', # Lysine 'AAG' => 'K', # Lysine 'AGC' => 'S', # Serine 'AGT' => 'S', # Serine 'AGA' => 'R', # Arginine 'AGG' => 'R', # Arginine 'GTA' => 'V', # Valine 'GTC' => 'V', # Valine 'GTG' => 'V', # Valine 'GTT' => 'V', # Valine 'GCA' => 'A', # Alanine 'GCC' => 'A', # Alanine 'GCG' => 'A', # Alanine 'GCT' => 'A', # Alanine 'GAC' => 'D', # Aspartic Acid 'GAT' => 'D', # Aspartic Acid 'GAA' => 'E', # Glutamic Acid 'GAG' => 'E', # Glutamic Acid 'GGA' => 'G', # Glycine 'GGC' => 'G', # Glycine 'GGG' => 'G', # Glycine 'GGT' => 'G', # Glycine ); my @startsRF1 =(); my @startsRF2 =(); my @startsRF3 =(); my @stopsRF1 = (); my @stopsRF2 = (); my @stopsRF3 = (); my @arrayOfORFs = (); my @arrayOfTranslations = (); my $joinedAminoAcids = (); while ($seq =~ m/ATG|TTG|CTG|ATT|CTA|GTG|ATT/gi){ my $matchPosition = pos($seq) - 3; if (($matchPosition % 3) == 0) { push (@startsRF1, $matchPosition); } while ($seq =~ m/TAG|TAA|TGA/gi){ my $matchPosition = pos($seq); if (($matchPosition % 3) == 0) { push (@stopsRF1, $matchPosition); } my $codonRange = ""; my $startPosition = 0; my $stopPosition = 0; @startsRF1 = reverse(@startsRF1); @stopsRF1 = reverse(@stopsRF1); while (scalar(@startsRF1) > 0) { $codonRange = ""; $startPosition = pop(@startsRF1); if ($startPosition < $stopPosition) { next; } my $ORFseq = ""; while (scalar(@stopsRF1) > 0) { $stopPosition = pop(@stopsRF1); if ($stopPosition > $startPosition) { my $difF = $stopPosition - $startPosition; $ORFseq = substr($seq, $startPosition,(length($seq)-$difF)); push (@arrayOfORFs, $ORFseq); } foreach $ORFseq (@arrayOfORFs){ my @growingProtein = (); for (my $i = 0; $i <= (length($ORFseq) - 3); $i = $i + 3) { my $codon = substr($ORFseq, $i, 3); if (exists( $genetic_code{$codon} )){ push (@growingProtein, $genetic_code{$codon}); } else { push (@growingProtein, "X"); } } my $joinedAminoAcids = join("",@growingProtein); push (@arrayOfTranslations, $joinedAminoAcids); } foreach(@arrayOfTranslations) { print $_, "\n"; } } } } }