# "$ _" is really "$_", and change the qq to a double-quote
$_ =
"CG
T--A
A---T
A----T
C----G
T----A
A---T
and
so
on
";
@_{A => C => G => T => } = 0..3;
s|.*(\w).*(\w).*\n|$_{$-++ / 9 % 2 ? $2:$ 1}|gex;
s|(.)(.)(.)(.)|chr (64*$1 + 16*$2 + 4*$3 + $4)|gex;
eval
####
@_{A => C => G => T => } = 0..3;
# is really...
@_{'A', 'C', 'G', 'T'} = 0..3;
##
##
s|
.* # greedily match
(\w) # match first letter, and store into $1
.* # greedily match
(\w) # match last letter, and store into $2
.*\n # eat up remainder of line
|
# this expression maps the relevant character to its Base4 digit from
# the %_ hash. The $- is used as a line counter (it defaults to 0). When
# the DNA strands flip positions, this continues decoding on the correct
# strand (see physi's comment for a visual representation of this)
$_{$-++ / 9 % 2 ? $2:$ 1}
|gex;
##
##
s|
# store next four characters into $1,$2,$3, and $4
(.)(.)(.)(.)
|
# replace with a Base4-to-ASCII conversion of those characters
chr (64*$1 + 16*$2 + 4*$3 + $4)
|gex;
##
##
use strict;
my $BASE = 4;
my %NUC_PAIRS = (
A => T =>
C => G =>
G => C =>
T => A =>
);
my @DIGIT_TO_NUC = qw( A C G T );
my $FMT_DNA = <