use strict; use warnings; use Data::Dump qw(dd); # for debug # function declarations (just for syntactic sweetening). sub remove_leading_trailing_whitespace; sub normalize_embedded_whitespace; sub normalize_case; # build phrase translation dictionary. my %dict = map @$_, # expand to key/value pairs map { $_->[0] = # do all key normalizations normalize_case normalize_embedded_whitespace remove_leading_trailing_whitespace $_->[0] ; $_; } # keys will be normalized in hash; value strings used as defined. [ 'Animal Source Food' => q{aliment d'origine animale}, ], [ ' balanced diet ' => q{regime alimentaire equilibre}, ], [ ' food ' => q{aliment}, ], [ 'nutrition' => q{diet}, ], [ 'nutrition assessment' => q{evaluation de l'etat nutritionnel}, ], ; # dd \%dict; # FOR DEBUG # regex construction assumes dictionary keys are fully normalized. my ($rx_phrase) = map qr{ (?i) \b (?: $_) \b }xms, join ' | ', map match_any_embedded_whitespace($_), sort { length($b) <=> length($a) } keys %dict ; # print "$rx_phrase \n"; # FOR DEBUG my $text = <<"EOT"; this is a nutrition assessment for animal source food needed for a Balanced \t\t Diet of FOOD for proper nutrition. EOT print "before: [[$text]] \n\n"; $text =~ s{ ($rx_phrase) } {<<$dict{ normalize_embedded_whitespace normalize_case $1 }>>}xmsg; print "after: [[$text]] \n\n"; exit; # subroutines ###################################################### # s/// and tr/// expressions assume pre-5.14 perl version: no /r # modifier. sub remove_leading_trailing_whitespace { (my $r = $_[0]) =~ s{ \A \s+ | \s+ \z }{}xmsg; # pre-5.14 return $r; } sub normalize_embedded_whitespace { (my $r = $_[0]) =~ tr/ \t\n\r\f/ /s; # pre-5.14 return $r; } sub match_any_embedded_whitespace { (my $r = $_[0]) =~ s{ \s+ }' \s+ 'xmsg; # pre-5.14 return $r; } sub normalize_case { return lc $_[0]; }