#!/usr/bin/perl

use strict;
use warnings;
use Data::Dumper;

my %seqs;

# slurp the file containing the sequences you want to find into a scalar
# like this
# open FILE, $finds or die "Can't open $finds, Perl says $!\n";
# do { local $/; $file = <FILE> }
# close FILE;

# simulate the file slurp result thusly
my $file =
'AAA
GGG
AAAGGG
TTTATAATA
AGA
ATA
TTT';

print "METHOD 1\n\n";
# use a hash of hashes to store compiled regexes and also count (below)
for my $seq (split "\n", $file) {
    $seqs{$seq}->{'re'} = qr/\Q$seq/;
}

# process the big file line by line (use DATA filehandle in simulation)
while (<DATA>) {
    for my $seq (keys %seqs) {
        $seqs{$seq}->{'count'}++ for m/$seqs{$seq}->{'re'}/g;
    }
}

print Dumper \%seqs;

print "\n\n\nMETHOD 2\n\n";

# re-read data, need to fix seek bug on DATA filehandle for simulation
# also clear %seqs hash....
seek DATA, 0,0;
my $bugfix;
$bugfix = <DATA> until $bugfix and $bugfix eq "__DATA__\n";
%seqs = ();

# generate a regex that searches for all the sequences
# sorted according to length to find longest possible matches
# note this method will miss overlaps (see Data::Dumper output).....
my $re = join '|', sort {length $b <=> length $a} split "\n", $file;
# compile the regex only once using qr
$re = qr/($re)/;

# process the big file line by line (use DATA filehandle in simulation)
while (<DATA>) {
    # get all the matches on each line
    $seqs{$_}++ for m/$re/g;
}

print Dumper \%seqs

__DATA__
AAAGGGAAA
TTTATAATA
GGGTTTATA
CCCTTTCCC
UUUUUUUUU
TTTGGGATA