$ cat union.txt
ABC Union
XYZ Union
####
$ for i in agreements other; do for j in `ls $i`; do echo "*** $i/$j ***"; cat $i/$j; done; done
*** agreements/abc.txt ***
...
ABC Union
...
*** agreements/abc_xyz.pdf ***
...
XYZ Union and ABC Union
...
*** agreements/def.txt ***
...
DEF Union
...
*** agreements/pqrpdf ***
... temp data ...
*** agreements/xyz.pdf ***
.................. XYZ
Union .............
*** other/dummy_empty ***
####
#!/usr/bin/env perl
use strict;
use warnings;
use autodie;
use File::Spec;
use List::Util 'first';
{
my $union_file = 'union.txt';
my @dirs = qw{agreements other};
my $unions = get_unions($union_file);
print "Unions to check:\n";
print "\t$_\n" for @$unions;
process_files($_, $unions) for @dirs;
}
sub get_unions {
my ($union_file) = @_;
open my $fh, '<', $union_file;
my @unions;
while (<$fh>) {
chomp;
y/ / /s;
push @unions, $_;
}
return [ sort { length $a <=> length $b } @unions ];
}
sub process_files {
my ($dir, $unions) = @_;
print "Prcessing directory: $dir\n";
opendir(my $dh, $dir);
for (grep /\.(?:txt|pdf)\z/, readdir $dh) {
my $path = File::Spec::->catfile($dir, $_);
print "\tProcessing path: $path\n";
my $text = do { open my $fh, '<:crlf', $path; local $/; <$fh> };
$text =~ y/ \n/ /s;
my $found = first { -1 < index $text, $_ } @$unions;
if (defined $found) {
print "\t\tMATCH: $found\n";
}
else {
print "\t\tNo matches found.\n";
}
}
return;
}
####
Unions to check:
ABC Union
XYZ Union
Prcessing directory: agreements
Processing path: agreements/abc.txt
MATCH: ABC Union
Processing path: agreements/abc_xyz.pdf
MATCH: ABC Union
Processing path: agreements/def.txt
No matches found.
Processing path: agreements/xyz.pdf
MATCH: XYZ Union
Prcessing directory: other