#use local so that the setting doesn't interfere with #other places you might want to read in data. local $/='>'; #load sequences my %hSequences; while (my $line = ) { chomp $line; next if $line eq ''; #extract data from record # -- s at end of regex needed so that . matches new lines my ($subs, $gid, $sSequences) = $line =~ /^(\d+)_\d+\s+geneid(\d+)\s+\d+\s+\d+\slen=\d+\s+(.*)$/s; #populate hash my $sKey="$subs $gid"; my $aSequences = $hSequences{$sKey}; $hSequences{$sKey} = $aSequences = [] unless defined($aSequences); push @$aSequences, split(/\s+/, $sSequences); } #print results while (my ($k, $v) = each(%hSequences)) { $k =~ s/ / gid=/; print ">$k\n" . join("\n", sort @$v) . "\n"; }