#! perl -slw use strict; use Carp; use threads; use threads::shared; my $chr_file = $ARGV[0]; my $seq_file = $ARGV[1]; if ( 2 != scalar(@ARGV) ) { croak 'Invalid parameter number'; } elsif ( ! -e $chr_file || ! -T $chr_file ) { croak 'Missing or invalid chromsome-listing file'; } elsif ( ! -e $seq_file || ! -T $seq_file ) { croak 'Missing or invalid sequence-listing file'; } my %motifs : shared; sub thread { my $chromosome = shift; my $directory = 'fasta/'; my $file = 'chr_'.$chromosome.'.fa'; my $path = $directory.$file; # my $seqio = Bio::SeqIO->new( # -file => "<$path", # -format => 'largefasta' # ); # my $seq = $seqio->next_seq(); # my $sequence = $seq->seq(); ## Crude fasta load--Expects 1 sequence per file open my $fh, '<', $path or croak "$path : $!\n"; <$fh>; ## discard header ( my $sequence = do{ local $/; <$fh> } ) =~ s[\s+][]g; close $fh; foreach my $motif ( keys(%motifs) ) { my $str = $motifs{$motif}; my $len = length($str); my $pos = 0; while ( ($pos = index( $sequence, $str, $pos)) >= 0 ) { print join "\t", $chromosome, $pos, $motif; $pos += $len; } } } open my $fh_seq, '<', $seq_file or croak "Unable to open motif file: $seq_file"; while (<$fh_seq>) { s/^\s+//; s/\s+$//; my @row = split("\t"); next if 2 != scalar @row; $motifs{ $row[0] } = $row[1]; } close($fh_seq); my @chromosomes; open my $fh_chr, '<', $chr_file or croak "Unable to open chromsome list: $chr_file"; while (<$fh_chr>) { s/^\s+//; s/\s+$//; my $row = $_; next if !$row; push @chromosomes, $row; } close($fh_chr); my @threads = map{ threads->new( \&thread, $_ ) } @chromosomes; $_->join for @threads; __END__ C:\test>568393 fasta\chromosomes.lst fasta\motifs.lst 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6666540 6 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 6540 3 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 24666540 9 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 12666540 7 22_14-09-2001 540 2 22_14-09-2001 24666540 9 22_14-09-2001 540 2 22_14-09-2001 24666540 9 22_14-09-2001 24666540 9 22_14-09-2001 24666540 9 22_14-09-2001 540 2 22_14-09-2001 540 2 22_14-09-2001 24666540 9 22_14-09-2001 540 2 22_14-09-2001 24666540 9 22_14-09-2001 24666540 9 22_14-09-2001 540 2 22_14-09-2001 24666540 9 22_14-09-2001 24666540 9 22_14-09-2001 540 2