#! perl -slw use strict; sub readFasta { my $fh = shift; local $/ = '>'; (undef) = scalar <$fh>; ## Discard first delimiter return sub { local $/ = "\n>"; defined( my $record = <$fh> ) or return; my @lines = split "\n", $record; pop @lines if $lines[-1] eq '>'; my $id = shift @lines; return $id, join'', @lines; }; } sub fuzzy { my( $rSeq, $sub, $fuzz ) = @_; my $lSeq = length $$rSeq; my $lSub = length $sub; my $nMatch = $lSub - $fuzz; my $nSubs = int( $lSeq / $lSub ); my $mask = $sub x $nSubs; my $lMask = length $mask; my $nOffsets = $lSeq - $lMask; my @matches; for my $o ( 0 .. $nOffsets ) { my $res = $mask ^ substr $$rSeq, $o, $lMask; for my $sub ( 0 .. $nSubs - 1 ) { my $n = substr( $res, $sub * $lSub, $lSub ) =~ tr[\0][\0]; $n >= $nMatch and push @matches, $sub * $lSub + $o; } } return \@matches; } $|++; my %subs; open IN, '<', 'DNAfuzzy.subs' or die $!; my $nextSub = readFasta( \*IN ); $subs{ $_->[0] } = $_->[ 1 ] while @{ $_ = [ $nextSub->() ] }; close IN; open IN, '<', 'DNAfuzzy.seqs' or die $!; my $nextSeq = readFasta( \*IN ); while( my( $id, $seq ) = $nextSeq->() ) { while( my( $subId, $sub ) = each %subs ) { my $rMatches = fuzzy( \$seq, $sub, 4 ); printf "%s : %s\n", $subId, join '', @{ $rMatches } if @$rMatches; } } close IN; #### #! perl -slw use strict; use threads ( stack_size => 4096 ); use threads::shared; use Thread::Queue; sub readFasta { my $fh = shift; local $/ = '>'; (undef) = scalar <$fh>; ## Discard first delimiter return sub { local $/ = "\n>"; defined( my $record = <$fh> ) or return; my @lines = split "\n", $record; pop @lines if $lines[-1] eq '>'; my $id = shift @lines; return $id, join'', @lines; }; } sub fuzzy { my( $rSeq, $sub, $fuzz ) = @_; my $lSeq = length $$rSeq; my $lSub = length $sub; my $nMatch = $lSub - $fuzz; my $nSubs = int( $lSeq / $lSub ); my $mask = $sub x $nSubs; my $lMask = length $mask; my $nOffsets = $lSeq - $lMask; my @matches; for my $o ( 0 .. $nOffsets ) { my $res = $mask ^ substr $$rSeq, $o, $lMask; for my $sub ( 0 .. $nSubs - 1 ) { my $n = substr( $res, $sub * $lSub, $lSub ) =~ tr[\0][\0]; $n >= $nMatch and push @matches, $sub * $lSub + $o; } } return \@matches; } $|++; our $NTHREADS //= 4; my %subs; open IN, '<', 'DNAfuzzy.subs' or die $!; my $nextSub = readFasta( \*IN ); $subs{ $_->[0] } = $_->[ 1 ] while @{ $_ = [ $nextSub->() ] }; close IN; sub worker { my $Q = shift; while( my( $id, $seq ) = split ':', $Q->dequeue ) { while( my( $subId, $sub ) = each %subs ) { my $rMatches = fuzzy( \$seq, $sub, 4 ); printf "%s : %s\n", $subId, join '', @{ $rMatches } if @$rMatches; } } } my $Q = new Thread::Queue; my @threads = map threads->create( \&worker, $Q ), 1 .. $NTHREADS; open IN, '<', 'DNAfuzzy.seqs' or die $!; my $nextSeq = readFasta( \*IN ); while( my( $id, $seq ) = $nextSeq->() ) { $Q->enqueue( join ':', $id, $seq ); sleep 1 while $Q->pending > 8; } close IN; $Q->enqueue( (undef) x $NTHREADS ); $_->join for @threads;