#!/usr/bin/perl
use warnings;
use strict;
use 5.012;
# 902200
my @lines = qw(
>seq1
ASDFGHASDFGHJ
ERTYUIOOIUYLK
NBGFEWERTY
>seq2
BGTNHYMJUKOPK
MNBFSDFGHJ
>seq3
USE_STRICT&USE_WARNINGS
lastline
);
my $line;
my ($DNA, $seq); # descriptive var names
my %all_hash = ();
for my $line (@lines){ # insert after this line, see update 2
chomp $line;
if ($line =~ /^>(seq\d)/ ) { # captures seq# withOUT the '>'
$seq = "$1\n";
say "\n\$seq: $seq"; # useful for debug; otherwise, not
} else {
$DNA = "$line\n";
say "\$DNA: $DNA";
}
no warnings; #otherwise, warns 'unitialized' for the first
+$DNA
$all_hash{ $seq } .= $DNA; # concat $DNAs, which now
# have '\n's restored for readab
+ility
use warnings;
}
print "\n =============== \n";
print %all_hash;
Output (when fixed as per update 2):
$seq: seq1
$DNA: ASDFGHASDFGHJ
$DNA: ERTYUIOOIUYLK
$DNA: NBGFEWERTY
$seq: seq2
$DNA: BGTNHYMJUKOPK
$DNA: MNBFSDFGHJ
$seq: seq3
$DNA: USE_STRICT&USE_WARNINGS
$DNA: lastline
===============
seq1
ASDFGHASDFGHJ
ERTYUIOOIUYLK
NBGFEWERTY
seq3
USE_STRICT&USE_WARNINGS
lastline
seq2
BGTNHYMJUKOPK
MNBFSDFGHJ
Update Added comment re say at line 29
Update2 : Missed a serious mistake here: the concat at line 36 actually inserts the last $DNA from the previous $seq into each new $seq. Bad on me! A fix is to reset $DNA to an empty string by inserting $DNA = ''; as a new line 26. |