while () { { next if /^\s*CLUSTAL/; #remove line starting with CLUSTAL heading next if /:/; #skip the consensus lines which contain : and * next if /\*/; next if /^ /; my($id, $seq) = split; #splits each line to id and sequence $newid = substr ($id, 0, 9); #remove all but 9 characters from id #so the id's are all the same length push @{$species{$newid}}, $seq; #each time adds sequence to #a unique id and puts sequence in an array (hash is %species) } } while (($key, $sequence)=each %species) { print DATA ">", $key, " ", join " ", @$sequence; #\t to separate with tab print DATA "\n"; } close(MYFILE); close(DATA);