#!/usr/bin/perl use warnings; use strict; #### if ($#ARGV != 4) { print "usage: run batch file 'run' not this one\n"; exit; } my $wordfile = $ARGV[0]; my $textfile=$ARGV[3]; my $OutPutFile=$ARGV[4]; #### die "Usage: run batch file 'run' not this one\n" unless @ARGV==3; my ($wordfile, $textfile, $outfile)=@ARGV; #### open (IF1,"$wordfile")|| die "cannot open the file"; open (PF, "$textfile")|| die "cannot open the file"; open (OF,">$OutPutFile")|| die "cannot open the file"; #### open my $wfh, '<', $wordfile or die "Can't open `$wordfile': $!\n"; open my $tfh, '<', $textfile or die "Can't open `$textfile': $!\n"; open my $ofh, '>', $outfile or die "Can't open `$outfile': $!\n"; #### my $List1Ref=ReadDataInHash (*IF1); #### my %words=ReadDataInHash($wordfile); #### sub ReadDataInHash() #### { my $x = shift; my %list1=(); while (my $line =<$x>) { chomp $line; my @arr=split /\s/,$line; for (my $i=0;$i<=$#arr ;$i++) { if ($i==0) { $list1{$arr[$i]}={}; } else{ ${%{$list1{$arr[0]}}}{$arr[$i]} = 1; } } } return {%list1}; } #### sub ReadDataInHash { my $file=shift; open my $fh, '<', $file or die "Can't open `$file': $!\n"; my %words; while (<$fh>) { chomp; my ($first, @rest)=split; $words{$first}{$_}=1 for @rest; } %words; } #### while (my $line=) { chomp($line); my @arrAbs=split (/\|/,$line); my $ID=$arrAbs[0]; my $Title=$arrAbs[1]; my $Abs=$arrAbs[2]; @arrAbs=split (/\./,$Abs); print OF"$ID|"; for (my $SentenceNumber=0;$SentenceNumber<=$#arrAbs ;$SentenceNumber++) { my $i=$SentenceNumber+1; print OF "<".$i.">"; my $Sentence=$arrAbs[$SentenceNumber]; my @arrAbsSen=split (' ',$Sentence); foreach my $word(@arrAbsSen) { #to match terms in the list, stored in %{$List1Ref}. if (exists(${%{$List1Ref}}{uc($word)})) { print OF "$word "; } else { foreach my $p (sort keys (%{$List1Ref})) { if (exists(${%{${%{$List1Ref}}{$p}}}{uc($word)})) { print OF "mainterm:$p:matchedterm:$word "; last; } } } } @arrAbsSen=(); } print OF "\n"; @arrAbs=(); } #### while (my $line=<$tfh>) { chomp $line; my ($id, $title, $abs)=split /\|/, $line; my @sentences=split /\./, $abs; print $ofh "$id|"; for my $idx (0..$#sentences) { print $ofh '<', $idx+1, '>'; for my $word (split ' ', $sentences[$idx]) { if ($words{uc $word}) { print $ofh "$word " } else { for (sort keys %words) { if ($words{$_}{uc $word}) { print $ofh "mainterm:$_:matchedterm:$word "; last; } } } } } print $ofh "\n"; } #### #!/usr/bin/perl use strict; use warnings; die "Usage: run batch file 'run' not this one\n" unless @ARGV==3; my ($wordfile, $textfile, $outfile)=@ARGV; open my $tfh, '<', $textfile or die "Can't open `$textfile': $!\n"; open my $ofh, '>', $outfile or die "Can't open `$outfile': $!\n"; my %words=ReadDataInHash($wordfile); while (my $line=<$tfh>) { chomp $line; my ($id, $title, $abs)=split /\|/, $line; my @sentences=split /\./, $abs; print $ofh "$id|"; for my $idx (0..$#sentences) { print $ofh '<', $idx+1, '>'; for my $word (split ' ', $sentences[$idx]) { if ($words{uc $word}) { print $ofh "$word " } else { for (sort keys %words) { if ($words{$_}{uc $word}) { print $ofh "mainterm:$_:matchedterm:$word "; last; } } } } } print $ofh "\n"; } sub ReadDataInHash { my $file=shift; open my $fh, '<', $file or die "Can't open `$file': $!\n"; my %words; while (<$fh>) { chomp; my ($first, @rest)=split; $words{$first}{$_}=1 for @rest; } %words; } __END__ #### #!/usr/bin/perl use strict; use warnings; die "Usage: $0 []\n" if @ARGV < 1; { my %words; my $file=shift; open my $fh, '<', $file or die "Can't open `$file': $!\n"; while (<$fh>) { chomp; my ($first, @rest)=split; $words{$first}{$_}=1 for @rest; } sub matchword { my $word=shift; my $uword=uc $word; return $word if $words{$uword}; $words{$_}{$uword} and return "mainterm:$_:matchedterm:$word" for sort keys %words; return; # nothing if nothing is found } } while (<>) { chomp; my ($id, undef, $abs)=split /\|/; my @sentences=split /\./, $abs; print "$id|"; for (0..$#sentences) { print '<', $_+1, '>', join ' ', map matchword($_), split ' ', $sentences[$_]; } print "\n"; } __END__ #### while (<>) { chomp; my ($id, undef, $abs)=split /\|/; print "$id|"; my $i; print '<', ++$i, '>', join ' ', map matchword($_), split for split /\./, $abs; print "\n"; }