use strict; use warnings; use Const::Fast; use Data::Dump; const my $DELIM => '\034'; my %stops = map { lc $_ => 1 } qw( I am the of and you are ); my @terms = ('manager of sales', 'chairman of the board'); @terms = sort { length $b <=> length $a } @terms; # longest first my $file3 = 'I am the Senior Manager of Sales and of Marketing. ' . 'You are the Chairman of the Board of Directors.'; $file3 =~ tr/A-Z/a-z/; # convert to lower case # replace terms with temporary markers $file3 =~ s{$terms[$_]}{$DELIM$_$DELIM}gi for 0 .. $#terms; my @file3 = split /\s+/, $file3; @file3 = grep { ! exists $stops{$_} } @file3; for my $entry (@file3) { if ($entry =~ /\Q$DELIM\E(\d+)\Q$DELIM\E/) { $entry = '*' . $terms[$1] . '*'; } else { $entry =~ s{[[:punct:]]}{}g; # remove punctuation } } print "$_\n" for @file3; #### 17:35 >perl 1997_SoPW.pl senior *manager of sales* marketing *chairman of the board* directors 17:35 >