#!/usr/bin/perl use warnings; use strict; my %stops; my %terms; open (FILE, $ARGV[0]); while () { chomp; $stops{$_} = 1; } open (FILE, $ARGV[1]); while () { chomp; $terms{$_} = 1; } open (FILE, $ARGV[2]); while () { chomp; #Starting with the longest term from ARGV[1], then going to the next largest, and so on, if the term also exists in ARGV[2], surround it by *, print the term, and remove the term from further processing. #after that, remove the stop words from the remainder of the file that didn't match a string in [ARGV[1] $_ =~ tr/A-Z/a-z/; my @words = split ('[^a-z0-9]', $_); for my $word (@words) { unless ($stops{$word}++){ print "$word\n" } } }