#!/usr/bin/perl
use warnings;
use strict;
####
if ($#ARGV != 4) {
print "usage: run batch file 'run' not this one\n";
exit;
}
my $wordfile = $ARGV[0];
my $textfile=$ARGV[3];
my $OutPutFile=$ARGV[4];
####
die "Usage: run batch file 'run' not this one\n"
unless @ARGV==3;
my ($wordfile, $textfile, $outfile)=@ARGV;
####
open (IF1,"$wordfile")|| die "cannot open the file";
open (PF, "$textfile")|| die "cannot open the file";
open (OF,">$OutPutFile")|| die "cannot open the file";
####
open my $wfh, '<', $wordfile or die "Can't open `$wordfile': $!\n";
open my $tfh, '<', $textfile or die "Can't open `$textfile': $!\n";
open my $ofh, '>', $outfile or die "Can't open `$outfile': $!\n";
####
my $List1Ref=ReadDataInHash (*IF1);
####
my %words=ReadDataInHash($wordfile);
####
sub ReadDataInHash()
####
{
my $x = shift;
my %list1=();
while (my $line =<$x>) {
chomp $line;
my @arr=split /\s/,$line;
for (my $i=0;$i<=$#arr ;$i++) {
if ($i==0) {
$list1{$arr[$i]}={};
}
else{
${%{$list1{$arr[0]}}}{$arr[$i]} = 1;
}
}
}
return {%list1};
}
####
sub ReadDataInHash {
my $file=shift;
open my $fh, '<', $file or die "Can't open `$file': $!\n";
my %words;
while (<$fh>) {
chomp;
my ($first, @rest)=split;
$words{$first}{$_}=1 for @rest;
}
%words;
}
####
while (my $line=) {
chomp($line);
my @arrAbs=split (/\|/,$line);
my $ID=$arrAbs[0];
my $Title=$arrAbs[1];
my $Abs=$arrAbs[2];
@arrAbs=split (/\./,$Abs);
print OF"$ID|";
for (my $SentenceNumber=0;$SentenceNumber<=$#arrAbs ;$SentenceNumber++) {
my $i=$SentenceNumber+1;
print OF "<".$i.">";
my $Sentence=$arrAbs[$SentenceNumber];
my @arrAbsSen=split (' ',$Sentence);
foreach my $word(@arrAbsSen) {
#to match terms in the list, stored in %{$List1Ref}.
if (exists(${%{$List1Ref}}{uc($word)})) {
print OF "$word ";
}
else {
foreach my $p (sort keys (%{$List1Ref})) {
if (exists(${%{${%{$List1Ref}}{$p}}}{uc($word)})) {
print OF "mainterm:$p:matchedterm:$word ";
last;
}
}
}
}
@arrAbsSen=();
}
print OF "\n";
@arrAbs=();
}
####
while (my $line=<$tfh>) {
chomp $line;
my ($id, $title, $abs)=split /\|/, $line;
my @sentences=split /\./, $abs;
print $ofh "$id|";
for my $idx (0..$#sentences) {
print $ofh '<', $idx+1, '>';
for my $word (split ' ', $sentences[$idx]) {
if ($words{uc $word}) {
print $ofh "$word "
}
else {
for (sort keys %words) {
if ($words{$_}{uc $word}) {
print $ofh "mainterm:$_:matchedterm:$word ";
last;
}
}
}
}
}
print $ofh "\n";
}
####
#!/usr/bin/perl
use strict;
use warnings;
die "Usage: run batch file 'run' not this one\n"
unless @ARGV==3;
my ($wordfile, $textfile, $outfile)=@ARGV;
open my $tfh, '<', $textfile or die "Can't open `$textfile': $!\n";
open my $ofh, '>', $outfile or die "Can't open `$outfile': $!\n";
my %words=ReadDataInHash($wordfile);
while (my $line=<$tfh>) {
chomp $line;
my ($id, $title, $abs)=split /\|/, $line;
my @sentences=split /\./, $abs;
print $ofh "$id|";
for my $idx (0..$#sentences) {
print $ofh '<', $idx+1, '>';
for my $word (split ' ', $sentences[$idx]) {
if ($words{uc $word}) {
print $ofh "$word "
}
else {
for (sort keys %words) {
if ($words{$_}{uc $word}) {
print $ofh "mainterm:$_:matchedterm:$word ";
last;
}
}
}
}
}
print $ofh "\n";
}
sub ReadDataInHash {
my $file=shift;
open my $fh, '<', $file or die "Can't open `$file': $!\n";
my %words;
while (<$fh>) {
chomp;
my ($first, @rest)=split;
$words{$first}{$_}=1 for @rest;
}
%words;
}
__END__
####
#!/usr/bin/perl
use strict;
use warnings;
die "Usage: $0 []\n" if @ARGV < 1;
{
my %words;
my $file=shift;
open my $fh, '<', $file or die "Can't open `$file': $!\n";
while (<$fh>) {
chomp;
my ($first, @rest)=split;
$words{$first}{$_}=1 for @rest;
}
sub matchword {
my $word=shift;
my $uword=uc $word;
return $word if $words{$uword};
$words{$_}{$uword} and
return "mainterm:$_:matchedterm:$word"
for sort keys %words;
return; # nothing if nothing is found
}
}
while (<>) {
chomp;
my ($id, undef, $abs)=split /\|/;
my @sentences=split /\./, $abs;
print "$id|";
for (0..$#sentences) {
print '<', $_+1, '>',
join ' ', map matchword($_), split ' ', $sentences[$_];
}
print "\n";
}
__END__
####
while (<>) {
chomp;
my ($id, undef, $abs)=split /\|/;
print "$id|";
my $i;
print '<', ++$i, '>',
join ' ', map matchword($_), split
for split /\./, $abs;
print "\n";
}