#!/usr/bin/perl use warnings; use strict; open (DATA, "dictionary") || die "Error opening the input file\n"; print "Reading mapping file\n"; print "----------------------------\n"; open (INFILE, "corpus.txt") || die "Error opening the input file\n"; print "Reading input file\n"; print "----------------------------\n"; my %dict; while ( ) { my ( $key, $val ) = /^(\d+)\s+(\w+)/; $val = lc($val); $dict{ $val } = $key; } my $cc = join '', keys %dict; my ( $min ) = my ( $max ) = map length, keys %dict; for ( map length, keys %dict ) { $min = $_ if $min > $_; $max = $_ if $max < $_; } my $pattern = qr/\b([$cc]{$min,$max})\b/; while (my $line = ) { my $old_string = $line; ( my $new_string = $old_string ) =~ s/$pattern/ exists $dict{ $1 } ? $dict{ $1 } : $1 /eg; print "$new_string"; } close (INFILE); close (DATA); #### __DATA__ 1 cross 2 reference 3 cross-reference __INFILE__ cross-reference __OUTPUT__ 3-2 __EXPECTED-OUTPUT__ 3