sub proteintrans {
my ($dna) = @_;
my $protein = '';
# split amino acids into 3 with for loop and concatenate
for (my $i = 0; $i < (length($dna) - 2) ; $i +=3) {
$protein .= codons(substr($dna, $i, 3));
}
if ($protein =~ m/_/) {
# if string $protein matches stop codon '_'
# find match with index()
# form a substring before stop codon with substr()
# return new substring protein_
$index = index($protein, '_');
$protein_ = substr($protein, 0, $index);
return $protein_;
}
else {
# else return protein unmodified
return $protein;
}
}
####
sub proteintrans {
my ($dna) = @_;
# split amino acids into 3 with for loop and concatenate
my $protein = '';
for(my $i = 0; $i < (length($dna) - 2) ; $i +=3) {
++$cnt;
my $codon = codons(substr($dna, $i, 3));
# Return our protein if we hit our end marker
return $protein if $codon eq '_';
$protein .= $codon;
}
# else return protein (rest of $dna)
return $protein;
}
####
#!/usr/bin/perl -w
use strict;
use warnings;
# DNA to scan...
my $s1 = 'AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG';
my %genetic_code = (
'GCA'=>'A', 'GCC'=>'A', 'GCG'=>'A', 'GCT'=>'A',# Alanine
'AGA'=>'R', 'AGG'=>'R', 'CGA'=>'R', 'CGC'=>'R',# Arginine
'CGG'=>'R', 'CGT'=>'R', # Arginine
'AAC'=>'N', 'AAT'=>'N', # Asparagine
'GAC'=>'D', 'GAT'=>'D', # Aspartic Acid
'TGC'=>'C', 'TGT'=>'C', # Cysteine
'CAA'=>'Q', 'CAG'=>'Q', 'GAA'=>'E', 'GAG'=>'E',# Glutamate
'GGA'=>'G', 'GGC'=>'G', 'GGG'=>'G', 'GGT'=>'G',# Glycine
'CAC'=>'H', 'CAT'=>'H', # Histidine
'ATA'=>'I', 'ATC'=>'I', 'ATT'=>'I', # Isoleucine
'CTA'=>'L', 'CTC'=>'L', 'CTG'=>'L', 'CTT'=>'L',# Leucine
'TTA'=>'L', 'TTG'=>'L', # Leucine
'AAA'=>'K', 'AAG'=>'K', # Lysine
'ATG'=>'M', # Methionine
'CCA'=>'P', 'CCC'=>'P', 'CCG'=>'P', 'CCT'=>'P',# Phynyalanine
'TTC'=>'F', 'TTT'=>'F', # Phynyalanine
'AGC'=>'S', 'AGT'=>'S', 'TCA'=>'S', 'TCC'=>'S',# Serine
'TCG'=>'S', 'TCT'=>'S', # Serine
'ACA'=>'T', 'ACC'=>'T', 'ACG'=>'T', 'ACT'=>'T',# Serine
'TGG'=>'W', # Tryptophan
'TAC'=>'Y', 'TAT'=>'Y', # Tyrosine
'GTA'=>'V', 'GTC'=>'V', 'GTG'=>'V', 'GTT'=>'V',# Valine
'TAA'=>'_', 'TAG'=>'_', 'TGA'=>'_', # exit
);
print "DNA: $s1\n";
my $start = 'M';
my $end = '_';
my $idx = -1;
while (my $prefix = substr($s1, ++$idx, 3)) {
last if length $prefix < 3;
# Check for the start indicator.
next unless codons($prefix) eq 'M';
my $peptide = proteintrans($end, substr($s1, $idx));
next if !defined $peptide;
print "\n\nIDX $idx: $peptide\n";
dbg_dump($s1, $idx, $peptide);
}
sub dbg_dump {
my ($dna, $idx, $peptide) = @_;
print $dna, "\n", " " x ($idx-1), '<';
print join(" ", split //, $peptide), ">\n";
}
sub proteintrans {
my ($end, $dna) = @_;
my $protein = '';
for (my $i; $i