while ( ($pos = index($sequence, $motif, $pos)) >= 0 ) {
lock $semaphore;
print $fh join("\t", $genome_build, @row, $chr, $pos+1), "\n";
$pos += $len;
}
####
while ( $sequence =~ /$motif/g ) {
lock $semaphore;
print $fh join("\t", $genome_build, @row, $chr, pos($sequence)), "\n";
}
####
Out of memory!
Callback called exit at find_and_load_motifs_test.pl line 150.
Thread 3 terminated abnormally: Callback called exit at find_and_load_motifs_test.pl line 150.
Perl exited with active threads:
9 running and unjoined
1 finished and unjoined
0 running and detached
####
### INCLUDES #########################################################################################
use strict;
use Bio::SeqIO;
use Carp;
use threads;
use threads::shared;
# turn off STDOUT buffering to allow smooth printing from multiple threads
$| = 1;
### PARAMETERS #######################################################################################
my $chr_file = $ARGV[0];
my $motif_file = $ARGV[1];
my $genome_build = $ARGV[2];
my $max_threads = 10;
if ( 3 != scalar(@ARGV) ) {
croak 'Invalid parameter number';
}
elsif ( ! -e $chr_file || ! -T $chr_file ) {
croak 'Missing or invalid chromsome-listing file';
}
elsif ( ! -e $motif_file || ! -T $motif_file ) {
croak 'Missing or invalid motif-list file';
}
my $out_file = 'results.out';
### LOCALS ###########################################################################################
my @chromosomes;
my %threads;
my $semaphore : shared;
### LOAD THE CHROMOSOME LIST #########################################################################
open(my $fh_chr, '<', $chr_file) or croak "Unable to open chromsome list: $chr_file";
while (<$fh_chr>) {
s/^\s+//;
s/\s+$//;
my $row = $_;
next() if (!$row);
push @chromosomes, $row;
}
close($fh_chr);
### FUNCTION FOR THREADING ###########################################################################
sub threading_function($$$) {
my $chromosome = $_[0];
my $motif_file = $_[1];
my $fh = $_[2];
# set up paths
my $chr = 'chr'.$chromosome;
my $directory = $chromosome.'/';
my $fa_file = $chr.'.fa.masked';
my $gz_file = $fa_file.'.gz';
my $fa_path = $directory.$fa_file;
my $gz_path = $directory.$gz_file;
# decompress sequence file
my $tst = `gunzip $gz_path`;
# read sequence for motif-scanning
my $seqio = Bio::SeqIO->new(
-file => "<$fa_path",
-format => 'largefasta'
);
my $seq = $seqio->next_seq();
my $sequence = $seq->seq();
# open the motif file
open(my $fh_motif, '<', $motif_file) or croak "Unable to open motif file: $motif_file";
# process each motif in turn
while (<$fh_motif>) {
s/^\s+//;
s/\s+$//;
my @row = split("\t");
next() if ( 4 != scalar(@row) );
my $motif = pop(@row);
while ( $sequence =~ /$motif/g ) {
lock $semaphore;
print $fh join("\t", $genome_build, @row, $chr, pos($sequence)), "\n";
}
}
# close the motif file
close($fh_motif);
# recompress sequence file
$tst = `gzip -9 $fa_path`;
}
### THREAD MANAGEMENT #################################################################################
# initially all chromosomes are waiting to be run
my @waiting = @chromosomes;
# open up the output file
open(my $fh_out, '>', $out_file) or croak 'Unable to open outfile';
# turn off buffering (make the file-handle "hot") to allow smooth printing from multiple threads
select((select($fh_out), $|=1)[0]);
# keep going while either chromosomes are waiting or threads are executing
while ( scalar(@waiting) || scalar( keys(%threads) ) ) {
# Note: I could have used threads->list( threads::running ) to count the running threads
# first check if old threads are finished
foreach my $chr ( keys(%threads) ) {
# if a thread is finished join it and remove from %threads
if ( $threads{$chr}->is_joinable() ) {
$threads{$chr}->join();
delete $threads{$chr};
}
}
# then check if we can start a new thread
if ( ( scalar( keys(%threads) ) < $max_threads ) && ( scalar(@waiting) ) ) {
my $chr = pop(@waiting);
$threads{$chr} = threads->new(\&threading_function, $chr, $motif_file, $fh_out);
}
# take a break if we are all full-up on threads to not waste CPU
sleep 60 if ( scalar( keys(%threads) ) >= $max_threads );
}