# "$ _" is really "$_", and change the qq to a double-quote $_ = "CG T--A A---T A----T C----G T----A A---T and so on "; @_{A => C => G => T => } = 0..3; s|.*(\w).*(\w).*\n|$_{$-++ / 9 % 2 ? $2:$ 1}|gex; s|(.)(.)(.)(.)|chr (64*$1 + 16*$2 + 4*$3 + $4)|gex; eval #### @_{A => C => G => T => } = 0..3; # is really... @_{'A', 'C', 'G', 'T'} = 0..3; #### s| .* # greedily match (\w) # match first letter, and store into $1 .* # greedily match (\w) # match last letter, and store into $2 .*\n # eat up remainder of line | # this expression maps the relevant character to its Base4 digit from # the %_ hash. The $- is used as a line counter (it defaults to 0). When # the DNA strands flip positions, this continues decoding on the correct # strand (see physi's comment for a visual representation of this) $_{$-++ / 9 % 2 ? $2:$ 1} |gex; #### s| # store next four characters into $1,$2,$3, and $4 (.)(.)(.)(.) | # replace with a Base4-to-ASCII conversion of those characters chr (64*$1 + 16*$2 + 4*$3 + $4) |gex; #### use strict; my $BASE = 4; my %NUC_PAIRS = ( A => T => C => G => G => C => T => A => ); my @DIGIT_TO_NUC = qw( A C G T ); my $FMT_DNA = <