%protein = ( protein1 => 'ASFGTHTRHTHRHTHTRHTRHTR', protein2 => 'ERYRYTRYHTRHTGEFEWWFEEFFFFREFRGRE', #... ); #### use strict; use warnings; use Data::Dumper; my %protein = (); my $key = ''; foreach my $line () { chomp($line); # Get the key if it's a key line then skip to the next line if ($line =~ /^>protein/) { $key = $line; next; } if ($key and $line) { # So this is the protein if (exists($protein{$key})) { # Have we seen it before # Test the length and assign if greater $protein{$key} = $line if ( length($protein{$key}) < length($line) ); } else { # We haven't seen it before so just assign $protein{$key} = $line; } $key = ''; # Reset Key } } print Dumper \%protein __DATA__ >protein1 ASFGTHTRHTHRHTHTRHTRHTR >protein2 ERYRYTRYHTRHTGEFEWWFEEFFFFREFRGRE >protein3 AWEERERGRGRGREGRGREGRRRRRRRRTTHTHTRHRHTRHTR >protein2 AASEFEFEFE >protein4 REYTRHTRGRVEVCREVR