while ( ($pos = index($sequence, $motif, $pos)) >= 0 ) { lock $semaphore; print $fh join("\t", $genome_build, @row, $chr, $pos+1), "\n"; $pos += $len; } #### while ( $sequence =~ /$motif/g ) { lock $semaphore; print $fh join("\t", $genome_build, @row, $chr, pos($sequence)), "\n"; } #### Out of memory! Callback called exit at find_and_load_motifs_test.pl line 150. Thread 3 terminated abnormally: Callback called exit at find_and_load_motifs_test.pl line 150. Perl exited with active threads: 9 running and unjoined 1 finished and unjoined 0 running and detached #### ### INCLUDES ######################################################################################### use strict; use Bio::SeqIO; use Carp; use threads; use threads::shared; # turn off STDOUT buffering to allow smooth printing from multiple threads $| = 1; ### PARAMETERS ####################################################################################### my $chr_file = $ARGV[0]; my $motif_file = $ARGV[1]; my $genome_build = $ARGV[2]; my $max_threads = 10; if ( 3 != scalar(@ARGV) ) { croak 'Invalid parameter number'; } elsif ( ! -e $chr_file || ! -T $chr_file ) { croak 'Missing or invalid chromsome-listing file'; } elsif ( ! -e $motif_file || ! -T $motif_file ) { croak 'Missing or invalid motif-list file'; } my $out_file = 'results.out'; ### LOCALS ########################################################################################### my @chromosomes; my %threads; my $semaphore : shared; ### LOAD THE CHROMOSOME LIST ######################################################################### open(my $fh_chr, '<', $chr_file) or croak "Unable to open chromsome list: $chr_file"; while (<$fh_chr>) { s/^\s+//; s/\s+$//; my $row = $_; next() if (!$row); push @chromosomes, $row; } close($fh_chr); ### FUNCTION FOR THREADING ########################################################################### sub threading_function($$$) { my $chromosome = $_[0]; my $motif_file = $_[1]; my $fh = $_[2]; # set up paths my $chr = 'chr'.$chromosome; my $directory = $chromosome.'/'; my $fa_file = $chr.'.fa.masked'; my $gz_file = $fa_file.'.gz'; my $fa_path = $directory.$fa_file; my $gz_path = $directory.$gz_file; # decompress sequence file my $tst = `gunzip $gz_path`; # read sequence for motif-scanning my $seqio = Bio::SeqIO->new( -file => "<$fa_path", -format => 'largefasta' ); my $seq = $seqio->next_seq(); my $sequence = $seq->seq(); # open the motif file open(my $fh_motif, '<', $motif_file) or croak "Unable to open motif file: $motif_file"; # process each motif in turn while (<$fh_motif>) { s/^\s+//; s/\s+$//; my @row = split("\t"); next() if ( 4 != scalar(@row) ); my $motif = pop(@row); while ( $sequence =~ /$motif/g ) { lock $semaphore; print $fh join("\t", $genome_build, @row, $chr, pos($sequence)), "\n"; } } # close the motif file close($fh_motif); # recompress sequence file $tst = `gzip -9 $fa_path`; } ### THREAD MANAGEMENT ################################################################################# # initially all chromosomes are waiting to be run my @waiting = @chromosomes; # open up the output file open(my $fh_out, '>', $out_file) or croak 'Unable to open outfile'; # turn off buffering (make the file-handle "hot") to allow smooth printing from multiple threads select((select($fh_out), $|=1)[0]); # keep going while either chromosomes are waiting or threads are executing while ( scalar(@waiting) || scalar( keys(%threads) ) ) { # Note: I could have used threads->list( threads::running ) to count the running threads # first check if old threads are finished foreach my $chr ( keys(%threads) ) { # if a thread is finished join it and remove from %threads if ( $threads{$chr}->is_joinable() ) { $threads{$chr}->join(); delete $threads{$chr}; } } # then check if we can start a new thread if ( ( scalar( keys(%threads) ) < $max_threads ) && ( scalar(@waiting) ) ) { my $chr = pop(@waiting); $threads{$chr} = threads->new(\&threading_function, $chr, $motif_file, $fh_out); } # take a break if we are all full-up on threads to not waste CPU sleep 60 if ( scalar( keys(%threads) ) >= $max_threads ); }