#! perl
use strict;
use warnings;
my $fasta =
'>gi1 cds
ATG fun
>gi2 cds
ATG fun
>gi3 cds
GGG fun';
my %hdrs;
$hdrs{$1} = $2 while $fasta =~ / > (.+) \s+ cds \s+ (.*) \s+ fun /gx;
print " A. Header & sequences are:\n";
printf ">%s cds\n%s\n", $_, $hdrs{$_} for sort keys %hdrs;
my %seqs;
while (my ($key, $value) = each %hdrs)
{
push @{$seqs{$value}}, $key;
}
print " B. Only sequences are:\n";
printf "$_\n" for sort keys %seqs;
print " C. Non-redundant sequences are:\n";
printf ">%s cds\n%s\n", ( sort @{$seqs{$_}} )[0], $_ for sort keys %seqs;
####
18:47 >perl 1009_SoPW.pl
A. Header & sequences are:
>gi1 cds
ATG
>gi2 cds
ATG
>gi3 cds
GGG
B. Only sequences are:
ATG
GGG
C. Non-redundant sequences are:
>gi1 cds
ATG
>gi3 cds
GGG
18:47 >
####
my %seqs = reverse %hdrs;
...
print " C. Non-redundant sequences are:\n";
printf ">%s cds\n%s\n", $seqs{$_}, $_ for sort keys %seqs;