# to run perl demo.pl haukex perl demo.pl karlary perl demo.pl karlseq #### #!/usr/bin/env perl # Re^2: search for particular elements of hash with multiple values # http://www.perlmonks.org/?node_id=1188488 use strict; use warnings; use feature qw ( say ); use MCE::Loop; use constant { _FOWPRIM => 0, _REVPRIM => 1, _BC_PAIR => 2, _BC_PNUM => 3 }; # Note: The karlhash function requires MCE 1.828 to be released soon. sub usage { warn "usage: $0 ( haukex | karlary | karlhash | karlseq )\n\n"; exit 1; } my $func_name = shift || usage(); usage() unless __PACKAGE__->can($func_name); my $max = 1000000; my $data = [ 'AGCTCGTTGTTCGATCCA', 'GAGAGATAGATGATAGTG', 'TTTT_CCCC', 0 ]; our %barcode_hash = map { $_ => $data } 1 .. $max - 2; $barcode_hash{ ($max - 1) } = [ 'AGCTCGTTGTTCGATCCA', 'GAGAGATAGATGATAGTG', 'TTTT_AAAA', 0 ]; $barcode_hash{ ($max) } = [ 'AGCTCGTTGTTCGATCCA', 'GAGAGATAGATGATAGTG', 'TTTT_AAAA', 0 ]; our $barcode_pair_35 = 'TTTT_AAAA'; $| = 1; { no strict qw ( refs ); my @found = $func_name->(); # increment the bc_pair_num field $barcode_hash{$_}[_BC_PNUM]++ for @found; # display summary and found keys printf "Found %d keys\n", scalar @found; say join ', ', sort { $a <=> $b } @found; } exit(0); sub haukex { # Serial code. my @found; for my $key (1 .. $max) { if ($barcode_hash{$key}[_BC_PAIR] eq $barcode_pair_35) { say "Found at $key"; push @found, $key; } } return @found; } sub karlary { # Workers receive next array chunk. MCE::Loop::init { max_workers => 4 }; my @found = mce_loop { my ($mce, $chunk_ref, $chunk_id) = @_; for my $key ( @$chunk_ref ) { if ($barcode_hash{$key}[_BC_PAIR] eq $barcode_pair_35) { MCE->say("Found at $key, chunk_id: $chunk_id"); MCE->gather($key); } } } 1 .. $max; MCE::Loop::finish(); return @found; } sub karlhash { # Workers receive next array chunk. MCE::Loop::init { max_workers => 4 }; # Must have MCE 1.828 or later to run this demonstration. # See http://www.perlmonks.org/?node_id=1188593 for a faster # solution using hashref as input by spawning workers early. # That will run better on the Windows platform. my @found = mce_loop { my ($mce, $chunk_ref, $chunk_id) = @_; for my $key ( keys %{ $chunk_ref } ) { if ($chunk_ref->{$key}[_BC_PAIR] eq $barcode_pair_35) { MCE->say("Found at $key, chunk_id: $chunk_id"); MCE->gather($key); } } } \%barcode_hash; MCE::Loop::finish(); return @found; } sub karlseq { # Workers receive next sequence 'begin' and 'end' boundaries. MCE::Loop::init { max_workers => 4, bounds_only => 1 }; # The bounds_only option applies to sequence which means for # workers to compute the begin and end boundaries only and # not the items in between. The result is lesser overhead. my @found = mce_loop_s { my ($mce, $chunk_ref, $chunk_id) = @_; for my $key ($chunk_ref->[0] .. $chunk_ref->[1]) { if ($barcode_hash{$key}[_BC_PAIR] eq $barcode_pair_35) { MCE->say("Found at $key, chunk_id: $chunk_id"); MCE->gather($key); } } } 1, $max; MCE::Loop::finish(); return @found; }