#!/usr/bin/env perl -w
if(scalar(@ARGV) != 3){
die "Usage: dc inputfile.txt excludefile.txt outputfile.txt \n" ;
}
my $exclude = read_hash( $ARGV[ 1 ] );
my $dict = read_hash( $ARGV[ 2 ] );
open(OUT, ">>$ARGV[2]") or die "Error opening output file: $!\n";
open(INPUT, "$ARGV[0]") or die "Error opening input: $!\n";
while () {
@sentence = split(/\s+/);
foreach $word (@sentence) {
@count = split(//, $word); ### why split before removing \W ???
$word=~s/\W//g;
next if @count < 4; ### short circuit (keep nesting down); don't
### need scalar
next if $exclude->{ $word } || $dict->{ $word };
$dict->{ $word } = 1;
print OUT "$word\n";
}
}
close OUT or die $!;
close INPUT or die $!;
sub read_hash {
my $file = shift;
open my $fh, $file or die "Error reading $file: $!\n";;
chomp ( my @words = <$fh> );
close $fh or die "Error closing $file: $!\n";
return +{ map +( $_ => 1 ), @words };
}
####
@count = split(//, $word); ### why split before removing \W ???
$word=~s/\W//g;
next if @count < 4; ### short circuit (keep nesting down); don't
### need scalar
####
$word =~ s/\W+//g;
next if length $word < 4;