use strict; use warnings; use Data::Dumper; my %reference; $\ = "\n"; while () { # Fill an array with the information from the reference table $reference{$2} = {Value => $1, Name => $3} if /(\S+)\s+(\S+)\s+(\S+)/; # Three groups of non space characters separated by blanks } # Show the content of the hash print Dumper \%reference; # quotemeta adds a \ in front of special chars (here *) so that they lose their special meaning in the regex # map applies the expression on all the elements in the list, so here this is a quotemeta applied on all the keys my $pattern = join "|", map quotemeta, keys %reference; my $sequence = 'ABBAPERL**'; # With this method, BB and ** become a single element, not two as in split // my @acids = $sequence =~ /($pattern)/g; print "Splitted sequence is: @acids\n"; my %count; my $sum = 0; for my $acid (@acids) { # Translate the name with the reference table print "Found $reference{$acid}{Name}"; $count{$acid}++; $sum += $reference{$acid}{Value}; } print "Sum: $sum\n"; # Bonus: a quick way to translate all the acids into their longer name, using map to apply the translation on the whole list print join " ", map $reference{$_}{Name}, @acids; __DATA__ 1.800 A Ala -3.500 BB Asx 2.500 C Cys Note: Columns 1-8 must contain 1 numeric value only -3.500 D Asp -3.500 E Glu Note: This file is required for amphpathic helic 2.800 F Phe -0.400 G Gly -3.200 H His 4.500 I Ile -3.900 K Lys 3.800 L Leu 1.900 M Met -3.500 N Asn -1.600 P Pro -3.500 Q Gln -4.500 R Arg -0.800 S Ser -0.700 T Thr 4.200 V Val -0.900 W Trp -0.490 X- Unk -1.300 Y Tyr -3.500 ZZ Glx -0.490 ** ***