in reply to Re^3: Dereferencing Performance - Confused by Results
in thread Dereferencing Performance - Confused by Results

BroswerUK, Sorry to keep this thread going but could you give a little more detail about this?

The code above is part of the subroutine &find_symmetry which the "threads" execute. I feed the threads input data, a string, from a queue and they output a hash ref to another queue. Other than the queues, the threads do not share variables between them. I would like them to be able to use the alias functionality shown above in that portion of code, which is part of &find_symmetry. So to me, I'm not wanting to alias across threads but rather within threads? Is this impossible?

More confusing to me, sometimes they will work fine if I keep retrying to run the script. They either fail very early or it will work fine and the output from the program is correct. I don't understand this, I would understand if it always failed or always worked, but on the same input every time, the sometimes working behavior confuses me. I think this requires sharing the code for the subroutine to give some context, there is more but all of the aliasing would be within here. Basically the input queue has a list of files, each thread opens the file, parses the file and makes the $compound hash ref. This is given to two subs to make Mapping and Adjacency, then to &find_symmetry, then the ref is placed on the output queue.

sub find_symmetry { my $compound = shift @_; $$compound{Symmetries} = []; my $M_matrix = $$compound{Mapping}; my $A_matrix = $$compound{Adjacency}; my @isomorphs; my $row = 0; my $dimension = $#$A_matrix; my @indices = (0..$dimension); my @reverse = reverse @indices; my @used = map {0;} (@indices); my @mapping = map {-1;} (@indices); my (@sm_rows, @same_rows); foreach my $row (@$A_matrix) { push @sm_rows, $row; } foreach my $row (@$M_matrix) { push @same_rows, $row;} my $equate_sub = sub { for (reverse(0..$row)) { my (@smrow, @omrow); alias @smrow = @{$sm_rows[$_]}; alias @omrow = @{$sm_rows[$mapping[$_]]}; for (0..$row) { if ($smrow[$_] != $omrow[$mapping[$_]]) { return 0; } } } return 1; }; LOOP: while(1) { do { $mapping[$row] ++ } while ( $mapping[$row] <= $dimension and +($used[$mapping[$row]] == 1 || $same_rows[$row][$mapping[$row]] == 0 +)); next if ($mapping[$row] > $dimension); if ( $row == $dimension ) { OUTER: { for (@indices) { my (@smrow2, @omrow2); alias @smrow2 = @{$sm_rows[$_]}; alias @omrow2 = @{$sm_rows[$mapping[$_]]}; for (@reverse) { if ( $smrow2[$_] != $omrow2[$mapping[$_]] ) { last OUTER; } } } push @{$$compound{Symmetries}}, [@mapping]; # print join(",",@mapping), "\n"; } } elsif ( $row == 0 or &$equate_sub == 1 ) { $used[$mapping[$row]] = 1; $row++; } } continue { while ( $row >= 0 and $mapping[$row] >= $dimension ) { last LOOP if ($row == 0); $mapping[$row] = -1; $row --; $used[$mapping[$row]] = 0; } } $$compound{Mapping} = undef; $$compound{Adjacency} = undef; # return \@isomorphs; }

Any clarity you can give on this would be great, threads always confuse me with what sometimes works with them and what sometimes won't. They are really useful but sometimes annoying.

Replies are listed 'Best First'.
Re^5: Dereferencing Performance - Confused by Results
by BrowserUk (Patriarch) on Oct 05, 2014 at 15:27 UTC

    Could you post the code where this sub is called?

    Including the declaration & initialisation of the $compound input parameter.


    With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
    Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
    "Science is about questioning the status quo. Questioning authority".
    In the absence of evidence, opinion is indistinguishable from prejudice.

      Here is everything, there is a little that isn't directly related, but cutting it out won't improve readability much and may be important so I left it. $compound is created in &find_symmetry_file which is the last sub in this script but it is used throughout. Thanks.

      #!/usr/bin/perl use Data::Alias; use strict; use warnings; use threads; use Thread::Queue; ### Set parameters for this run, no. of threads, where to print, what +to read my $thread_count = shift @ARGV; my $output_dir = shift @ARGV; our $THREADS = $thread_count; my $file_q = Thread::Queue->new(); my $result_q = Thread::Queue->new(); ###Make threads enqueue work my @threads = map { threads->create( \&thread_wrapper, $file_q, $resul +t_q ); } 1..$THREADS; while ( @ARGV ) { my $file = shift @ARGV; $file_q->enqueue($file); } sleep 1; ###Threads done, join them and wait $file_q->enqueue((undef) x $THREADS); map {$_->join();} @threads; sleep 1; ###Print results use JSON; if (!(-d "$output_dir")) { mkdir $output_dir; } for (1..$THREADS) { while ( my $result = $result_q->dequeue ) { my $json_result = encode_json $result; # print $json_result, "\n"; my $target = $output_dir . $$result{ID} . "_entry". $$result{Entry +} . ".txt.DONE"; print $json_result, "\n"; # open(RESULT, '>', $target); # print RESULT $json_result; # close RESULT; } } ###Subs ###This is what the threads need to do, not very elegant but easy to m +ake from the single thread version sub thread_wrapper { my ( $tid, $Qwork, $Qresults ) = ( threads->tid, $_[0], $_[1] ); while ( my $work = $Qwork->dequeue ) { my $compound = &find_symmetry_file("$work"); $Qresults->enqueue($compound); } $Qresults->enqueue(undef); } ###Find all mapping vectors for the molecule sub find_symmetry { #use Data::Alias; #this gives same errors. my $compound = shift @_; $$compound{Symmetries} = []; my $M_matrix = $$compound{Mapping}; my $A_matrix = $$compound{Adjacency}; my @isomorphs; my $row = 0; my $dimension = $#$A_matrix; my @indices = (0..$dimension); my @reverse = reverse @indices; my @used = map {0;} (@indices); my @mapping = map {-1;} (@indices); my (@sm_rows, @same_rows); foreach my $row (@$A_matrix) { push @sm_rows, $row; } foreach my $row (@$M_matrix) { push @same_rows, $row;} my $equate_sub = sub { for (reverse(0..$row)) { my (@smrow, @omrow); alias @smrow = @$sm_rows[$_]; alias @omrow = @$sm_rows[$mapping[$_]]; for (0..$row) { if ($smrow[$_] != $omrow[$mapping[$_]]) { return 0; } } } return 1; }; LOOP: while(1) { do { $mapping[$row] ++ } while ( $mapping[$row] <= $dimension and +($used[$mapping[$row]] == 1 || $same_rows[$row][$mapping[$row]] == 0 +)); next if ($mapping[$row] > $dimension); if ( $row == $dimension ) { OUTER: { for (@indices) { my (@smrow, @omrow); alias @smrow = @$sm_rows[$_]; alias @omrow = @$sm_rows[$mapping[$_]]; for (@reverse) { if ( $smrow2[$_] != $omrow2[$mapping[$_]] ) { last OUTER; } } } push @{$$compound{Symmetries}}, [@mapping]; } } elsif ( $row == 0 or &$equate_sub == 1 ) { $used[$mapping[$row]] = 1; $row++; } } continue { while ( $row >= 0 and $mapping[$row] >= $dimension ) { last LOOP if ($row == 0); $mapping[$row] = -1; $row --; $used[$mapping[$row]] = 0; } } delete $$compound{Mapping}; #don't need this anymore; delete $$compound{Ajdacency}; #don't need this anymore; # $$compound{Mapping} = undef; #same as above but leaves key so JSON +ain't pretty # $$compound{Adjacency} = undef; # return \@isomorphs; } ###Make M_matrix for molecule sub make_mapping_matrix { my $compound = shift @_; my @types = @{$$compound{KEGG_Types}}; my $mapping = $$compound{Mapping}; for my $i (0..$#types) { for my $j ($i..$#types) { if ($types[$i] eq $types[$j]) { $$mapping[$i][$j] = 1; $$mapping[$j][$i] = 1; } } } delete $$compound{KEGG_Types}; # $$compound{KEGG_Types} = undef; } ###Make A_matrix for molecule sub make_adjacency_matrix { my $compound = shift @_; my @bonds = @{$$compound{Bonds}}; my $adjacency = $$compound{Adjacency}; foreach my $bond (@bonds) { $$adjacency[$$bond[0]-1][$$bond[1]-1] = $$bond[2]; $$adjacency[$$bond[1]-1][$$bond[0]-1] = $$bond[2]; } delete $$compound{Bonds}; # $$compound{Bonds} = undef; } ###parse kcf file, make compound representation, make matrices, find s +ymmetry and return. sub find_symmetry_file { my $file = shift @_; open FILE, $file; my %compound; my $parser_state = 0; OUTER: while ( my $line = <FILE> ) { chomp $line; if ( $parser_state == 1 and $line =~ /COMPOUND/) { $parser_state++; $compound{"ID"} = join("",split(/COMPOUND|\s/,$line)); } elsif ( $parser_state == 0 and $line =~ /^1|^2/ ) { $parser_state++; $compound{"Entry"} = $line; } elsif ( $parser_state == 2 and $line =~ /ATOM/ ) { $parser_state++; # $compound{"Atom_Count"} = join("",split(/ATOM|\s/,$line)); my $atom_count = join("",split(/ATOM|\s/,$line)); $compound{"KEGG_Types"} = []; $compound{"Mapping"} = [ map { [ map { 0; } (0..$atom_count-1) +]; } (0..$atom_count-1) ]; $compound{"Adjacency"} = [ map { [ map { 0; } (0..$atom_count-1) + ]; } (0..$atom_count-1) ]; } elsif ( $parser_state == 3 and $line =~ /BOND/ ) { $parser_state++; # $compound{"Bond_Count"} = join("",split(/BOND|\s/,$line)); $compound{"Bonds"} = []; } elsif ( $parser_state == 4 and $line =~ /BRACKET/ ) { last OUTER; $parser_state++; } elsif ( $parser_state == 3 ) #and $line =~ /\s*\d*[A-Z]\d?[a-z]?\ +s*[A-Z]/ ) { my ($junk, $index, $kegg_type, $element_type, $x, $y ) = split(/ +\s+/,$line); push @{$compound{"KEGG_Types"}}, $kegg_type; } elsif ( $parser_state == 4 ) #and $line =~ /\s+\d+\s+\d+\s+/ ) { my ($bond, $junk1) = split(/#/,$line); my ($junk2, $index, $atom_1, $atom_2, $order) = split(/\s+/,$lin +e); my @array = ($atom_1, $atom_2, $order); push @{$compound{"Bonds"}}, \@array; } } &make_mapping_matrix(\%compound); &make_adjacency_matrix(\%compound); &find_symmetry(\%compound); return \%compound; close FILE; }