#!/usr/bin/perl -w use strict; use warnings; # DNA to scan... my $s1 = 'AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG'; my %genetic_code = ( 'GCA'=>'A', 'GCC'=>'A', 'GCG'=>'A', 'GCT'=>'A',# Alanine 'AGA'=>'R', 'AGG'=>'R', 'CGA'=>'R', 'CGC'=>'R',# Arginine 'CGG'=>'R', 'CGT'=>'R', # Arginine 'AAC'=>'N', 'AAT'=>'N', # Asparagine 'GAC'=>'D', 'GAT'=>'D', # Aspartic Acid 'TGC'=>'C', 'TGT'=>'C', # Cysteine 'CAA'=>'Q', 'CAG'=>'Q', 'GAA'=>'E', 'GAG'=>'E',# Glutamate 'GGA'=>'G', 'GGC'=>'G', 'GGG'=>'G', 'GGT'=>'G',# Glycine 'CAC'=>'H', 'CAT'=>'H', # Histidine 'ATA'=>'I', 'ATC'=>'I', 'ATT'=>'I', # Isoleucine 'CTA'=>'L', 'CTC'=>'L', 'CTG'=>'L', 'CTT'=>'L',# Leucine 'TTA'=>'L', 'TTG'=>'L', # Leucine 'AAA'=>'K', 'AAG'=>'K', # Lysine 'ATG'=>'M', # Methionine 'CCA'=>'P', 'CCC'=>'P', 'CCG'=>'P', 'CCT'=>'P',# Phynyalanine 'TTC'=>'F', 'TTT'=>'F', # Phynyalanine 'AGC'=>'S', 'AGT'=>'S', 'TCA'=>'S', 'TCC'=>'S',# Serine 'TCG'=>'S', 'TCT'=>'S', # Serine 'ACA'=>'T', 'ACC'=>'T', 'ACG'=>'T', 'ACT'=>'T',# Serine 'TGG'=>'W', # Tryptophan 'TAC'=>'Y', 'TAT'=>'Y', # Tyrosine 'GTA'=>'V', 'GTC'=>'V', 'GTG'=>'V', 'GTT'=>'V',# Valine 'TAA'=>'_', 'TAG'=>'_', 'TGA'=>'_', # exit ); print "DNA: $s1\n"; my $start = 'M'; my $end = '_'; my $idx = -1; while (my $prefix = substr($s1, ++$idx, 3)) { last if length $prefix < 3; # Check for the start indicator. next unless codons($prefix) eq 'M'; my $peptide = proteintrans($end, substr($s1, $idx)); next if !defined $peptide; print "\n\nIDX $idx: $peptide\n"; dbg_dump($s1, $idx, $peptide); } sub dbg_dump { my ($dna, $idx, $peptide) = @_; print $dna, "\n", " " x ($idx-1), '<'; print join(" ", split //, $peptide), ">\n"; } sub proteintrans { my ($end, $dna) = @_; my $protein = ''; for (my $i; $i