#! perl -slw use strict; use threads; use threads::shared; use Thread::Queue; use Time::HiRes qw[ sleep time ]; use Text::LevenshteinXS qw(distance); our $M //= 4; our $S //= 25; our $L //= 100; our $C //= 50; my $minimum_score = $C; my $max_childs = $M; sub worker { my $Q = shift; while( $_ = $Q->dequeue ) { my( $srcId, $srcSeq, $dstId, $dstSeq ) = map split( ':' ), split; my $dist = distance( $srcSeq, $dstSeq ); my $score = 100 - ( $dist * 100 / length( $dstSeq ) || 1 ); if( $score >= $minimum_score ) { print "$srcId looks like $dstId"; } } } srand 1; my $Q = new Thread::Queue; my @workers = map async( \&worker, $Q ), 1 .. $max_childs; my %contacts; get_db_data( \%contacts ); my @ids = keys %contacts; print STDERR "Starting with max: $max_childs concurrent threads"; my $start = time; for my $idn1 ( 0 .. $#ids ) { my $srcId = $ids[ $idn1 ]; for my $idn2 ( $idn1 + 1 .. $#ids ) { my $dstid = $ids[ $idn2 ]; sleep 0.1 while $Q->pending > 10; $Q->enqueue( "$srcId:$contacts{ $srcId } $dstId:$contacts{ $dstId }" ); } } $Q->enqueue( (undef) x $max_childs ); $_->join for @workers; printf STDERR "Took %6f seconds\n", time() - $start; exit; sub get_db_data { ## mock_up my $ref = shift; $ref->{ $_ } = join '', map{ ( 'A', 'C', 'G', 'T' )[ rand 4 ] } 1 .. $L for 1 .. $S; return; } __END__ c:\test>866682-2 Starting with max: 4 concurrent threads 21 looks like 23 17 looks like 12 2 looks like 12 23 looks like 21 23 looks like 14 23 looks like 4 16 looks like 12 16 looks like 15 6 looks like 12 12 looks like 17 12 looks like 2 12 looks like 16 12 looks like 6 14 looks like 23 15 looks like 16 4 looks like 23 4 looks like 5 5 looks like 4 Took 1.167890 seconds