#! perl use strict; use warnings; my $fasta = '>gi1 cds ATG fun >gi2 cds ATG fun >gi3 cds GGG fun'; my %hdrs; $hdrs{$1} = $2 while $fasta =~ / > (.+) \s+ cds \s+ (.*) \s+ fun /gx; print " A. Header & sequences are:\n"; printf ">%s cds\n%s\n", $_, $hdrs{$_} for sort keys %hdrs; my %seqs; while (my ($key, $value) = each %hdrs) { push @{$seqs{$value}}, $key; } print " B. Only sequences are:\n"; printf "$_\n" for sort keys %seqs; print " C. Non-redundant sequences are:\n"; printf ">%s cds\n%s\n", ( sort @{$seqs{$_}} )[0], $_ for sort keys %seqs;