Here is everything, there is a little that isn't directly related, but cutting it out won't improve readability much and may be important so I left it. $compound is created in &find_symmetry_file which is the last sub in this script but it is used throughout. Thanks.
#!/usr/bin/perl
use Data::Alias;
use strict;
use warnings;
use threads;
use Thread::Queue;
### Set parameters for this run, no. of threads, where to print, what
+to read
my $thread_count = shift @ARGV;
my $output_dir = shift @ARGV;
our $THREADS = $thread_count;
my $file_q = Thread::Queue->new();
my $result_q = Thread::Queue->new();
###Make threads enqueue work
my @threads = map { threads->create( \&thread_wrapper, $file_q, $resul
+t_q ); } 1..$THREADS;
while ( @ARGV )
{
my $file = shift @ARGV;
$file_q->enqueue($file);
}
sleep 1;
###Threads done, join them and wait
$file_q->enqueue((undef) x $THREADS);
map {$_->join();} @threads;
sleep 1;
###Print results
use JSON;
if (!(-d "$output_dir"))
{
mkdir $output_dir;
}
for (1..$THREADS)
{
while ( my $result = $result_q->dequeue )
{
my $json_result = encode_json $result;
# print $json_result, "\n";
my $target = $output_dir . $$result{ID} . "_entry". $$result{Entry
+} . ".txt.DONE";
print $json_result, "\n";
# open(RESULT, '>', $target);
# print RESULT $json_result;
# close RESULT;
}
}
###Subs
###This is what the threads need to do, not very elegant but easy to m
+ake from the single thread version
sub thread_wrapper
{
my ( $tid, $Qwork, $Qresults ) = ( threads->tid, $_[0], $_[1] );
while ( my $work = $Qwork->dequeue )
{
my $compound = &find_symmetry_file("$work");
$Qresults->enqueue($compound);
}
$Qresults->enqueue(undef);
}
###Find all mapping vectors for the molecule
sub find_symmetry
{
#use Data::Alias; #this gives same errors.
my $compound = shift @_;
$$compound{Symmetries} = [];
my $M_matrix = $$compound{Mapping};
my $A_matrix = $$compound{Adjacency};
my @isomorphs;
my $row = 0;
my $dimension = $#$A_matrix;
my @indices = (0..$dimension);
my @reverse = reverse @indices;
my @used = map {0;} (@indices);
my @mapping = map {-1;} (@indices);
my (@sm_rows, @same_rows);
foreach my $row (@$A_matrix) { push @sm_rows, $row; }
foreach my $row (@$M_matrix) { push @same_rows, $row;}
my $equate_sub = sub
{
for (reverse(0..$row))
{
my (@smrow, @omrow);
alias @smrow = @$sm_rows[$_];
alias @omrow = @$sm_rows[$mapping[$_]];
for (0..$row)
{
if ($smrow[$_] != $omrow[$mapping[$_]])
{
return 0;
}
}
}
return 1;
};
LOOP: while(1)
{
do { $mapping[$row] ++ } while ( $mapping[$row] <= $dimension and
+($used[$mapping[$row]] == 1 || $same_rows[$row][$mapping[$row]] == 0
+));
next if ($mapping[$row] > $dimension);
if ( $row == $dimension )
{
OUTER:
{
for (@indices)
{
my (@smrow, @omrow);
alias @smrow = @$sm_rows[$_];
alias @omrow = @$sm_rows[$mapping[$_]];
for (@reverse)
{
if ( $smrow2[$_] != $omrow2[$mapping[$_]] )
{
last OUTER;
}
}
}
push @{$$compound{Symmetries}}, [@mapping];
}
}
elsif ( $row == 0 or &$equate_sub == 1 )
{
$used[$mapping[$row]] = 1;
$row++;
}
}
continue
{
while ( $row >= 0 and $mapping[$row] >= $dimension )
{
last LOOP if ($row == 0);
$mapping[$row] = -1;
$row --;
$used[$mapping[$row]] = 0;
}
}
delete $$compound{Mapping}; #don't need this anymore;
delete $$compound{Ajdacency}; #don't need this anymore;
# $$compound{Mapping} = undef; #same as above but leaves key so JSON
+ain't pretty
# $$compound{Adjacency} = undef;
# return \@isomorphs;
}
###Make M_matrix for molecule
sub make_mapping_matrix
{
my $compound = shift @_;
my @types = @{$$compound{KEGG_Types}};
my $mapping = $$compound{Mapping};
for my $i (0..$#types)
{
for my $j ($i..$#types)
{
if ($types[$i] eq $types[$j])
{
$$mapping[$i][$j] = 1;
$$mapping[$j][$i] = 1;
}
}
}
delete $$compound{KEGG_Types};
# $$compound{KEGG_Types} = undef;
}
###Make A_matrix for molecule
sub make_adjacency_matrix
{
my $compound = shift @_;
my @bonds = @{$$compound{Bonds}};
my $adjacency = $$compound{Adjacency};
foreach my $bond (@bonds)
{
$$adjacency[$$bond[0]-1][$$bond[1]-1] = $$bond[2];
$$adjacency[$$bond[1]-1][$$bond[0]-1] = $$bond[2];
}
delete $$compound{Bonds};
# $$compound{Bonds} = undef;
}
###parse kcf file, make compound representation, make matrices, find s
+ymmetry and return.
sub find_symmetry_file
{
my $file = shift @_;
open FILE, $file;
my %compound;
my $parser_state = 0;
OUTER: while ( my $line = <FILE> )
{
chomp $line;
if ( $parser_state == 1 and $line =~ /COMPOUND/)
{
$parser_state++;
$compound{"ID"} = join("",split(/COMPOUND|\s/,$line));
}
elsif ( $parser_state == 0 and $line =~ /^1|^2/ )
{
$parser_state++;
$compound{"Entry"} = $line;
}
elsif ( $parser_state == 2 and $line =~ /ATOM/ )
{
$parser_state++;
# $compound{"Atom_Count"} = join("",split(/ATOM|\s/,$line));
my $atom_count = join("",split(/ATOM|\s/,$line));
$compound{"KEGG_Types"} = [];
$compound{"Mapping"} = [ map { [ map { 0; } (0..$atom_count-1)
+]; } (0..$atom_count-1) ];
$compound{"Adjacency"} = [ map { [ map { 0; } (0..$atom_count-1)
+ ]; } (0..$atom_count-1) ];
}
elsif ( $parser_state == 3 and $line =~ /BOND/ )
{
$parser_state++;
# $compound{"Bond_Count"} = join("",split(/BOND|\s/,$line));
$compound{"Bonds"} = [];
}
elsif ( $parser_state == 4 and $line =~ /BRACKET/ )
{
last OUTER;
$parser_state++;
}
elsif ( $parser_state == 3 ) #and $line =~ /\s*\d*[A-Z]\d?[a-z]?\
+s*[A-Z]/ )
{
my ($junk, $index, $kegg_type, $element_type, $x, $y ) = split(/
+\s+/,$line);
push @{$compound{"KEGG_Types"}}, $kegg_type;
}
elsif ( $parser_state == 4 ) #and $line =~ /\s+\d+\s+\d+\s+/ )
{
my ($bond, $junk1) = split(/#/,$line);
my ($junk2, $index, $atom_1, $atom_2, $order) = split(/\s+/,$lin
+e);
my @array = ($atom_1, $atom_2, $order);
push @{$compound{"Bonds"}}, \@array;
}
}
&make_mapping_matrix(\%compound);
&make_adjacency_matrix(\%compound);
&find_symmetry(\%compound);
return \%compound;
close FILE;
}
|