$/ = "//"; $count = 0; while ($chunkData = <>) { @data = grep {$_ !~ /^\s*$/} map {/.+?\=(.+?);/g} grep {$_ =~ /^DE.+?\=(.+?);|^GN.+?\=(.+?);/} split ("\n", $chunkData); foreach $term (@data) { next if ($term =~ /Putative uncharacterised protein/); if ($term =~ m/\,/g) { foreach (split (/\,\s/, $term)) { $hash{lc ($_)}++; next if $hash{lc ($_)} > 1; $count++; print "$count "; print lc($_)."\n"; } } elsif ($term =~ /(.+?)\((.+?)\)/g) { $hash{lc ($1)}++; next if $hash{lc ($1)} >1; $count++; print "$count "; print lc($1)."\n"; $hash{lc ($2)}++; next if $hash{lc ($2)} >1; $count++; print "$count "; print lc($2)."\n"; } else { $hash{lc ($term)}++; next if $hash{lc ($term)} > 1; $count++; print "$count "; print lc ($term)."\n"; } } print "\n"; }