#!/usr/bin/perl use strict; use warnings; die "Usage: run batch file 'run' not this one\n" unless @ARGV==3; my ($wordfile, $textfile, $outfile)=@ARGV; open my $tfh, '<', $textfile or die "Can't open `$textfile': $!\n"; open my $ofh, '>', $outfile or die "Can't open `$outfile': $!\n"; my %words=ReadDataInHash($wordfile); while (my $line=<$tfh>) { chomp $line; my ($id, $title, $abs)=split /\|/, $line; my @sentences=split /\./, $abs; print $ofh "$id|"; for my $idx (0..$#sentences) { print $ofh '<', $idx+1, '>'; for my $word (split ' ', $sentences[$idx]) { if ($words{uc $word}) { print $ofh "$word " } else { for (sort keys %words) { if ($words{$_}{uc $word}) { print $ofh "mainterm:$_:matchedterm:$word "; last; } } } } } print $ofh "\n"; } sub ReadDataInHash { my $file=shift; open my $fh, '<', $file or die "Can't open `$file': $!\n"; my %words; while (<$fh>) { chomp; my ($first, @rest)=split; $words{$first}{$_}=1 for @rest; } %words; } __END__