Esteemed jethro, i appreciate your thoughts. i will look at the Coro package. Meanwhile, i have no io wait cuz the .db file is cached in memory from an earlier run. But i took a closer look at what was going on. I run a test code with no threads and it takes 94 sec. My KDE systems monitor is now set up to show CPU useage for each core separately with usr, sys and wait for each. With no threads only one CPU is used at 85% usr. With 4 threads the code takes 23 sec, and shows 85% usr on each of the 4 CPU's. It's hard to avoid the conclusion the threads are helping. Note the CPU usr time is 71 seconds showing the code is in fact running in parallel. I bet there is a much better way to do this. What i think i will do is just buy a new motherboard and 16 gig of memory, then just multiply a matrix times a vector and be done. But i have learned something, especially with your and esteemed clintons's help. thanks.
I attach the two test programs with output, and i hope you will excuse the crude perl, i am only learning and i want to keep things clear so i don't get confused.
#! /usr/bin/perl -w
use strict;
use threads;
use MLDBM::Sync;
use MLDBM qw(DB_File Storable);
use Fcntl;
use Benchmark;
require '/home/silly/g/prep/Prep.pl';
my $file = "/home/silly/g/data/117373HOHsynTriNormCV.db";
my %syntrihash;
tie %syntrihash,'MLDBM::Sync',$file, O_RDONLY or die "tie failed for d
+b $!\n";
my $NumbOfKeys = keys %syntrihash;
print "Number of Dots to compute: $NumbOfKeys\n";
my $t0 = new Benchmark;
foreach my $synset (keys %syntrihash){
my $hashref = \%{$syntrihash{$synset}};
#for testing i just dot the hash with itself
&hashValueDot($hashref,$hashref,my $dot);
}
untie %syntrihash;
my $t1 = new Benchmark;
my $td1 = timediff($t1, $t0);
print STDERR "the code took:",timestr($td1,'all')," to do dots\n";
silly@bluetit:~/perl/threads$ nothread.pl
Number of Dots to compute: 117369
the code took:94 wallclock secs (80.81 usr 13.03 sys + 0.00 cusr 0.0
+0 csys = 93.84 CPU) to do dots
Now with 4 threads:
silly@bluetit:~/perl/threads$ cat thread.pl
#! /usr/bin/perl -w
use strict;
use threads;
use MLDBM::Sync;
use MLDBM qw(DB_File Storable);
use Fcntl;
use Benchmark;
my $t0 = new Benchmark;
require '/home/silly/g/prep/Prep.pl';
my $file = "/home/silly/g/data/117373HOHsynTriNormCV.db";
my %syntrihash;
tie %syntrihash,'MLDBM::Sync',$file, O_RDONLY or die "tie failed for d
+b $!\n";
my $NumbOfKeys = keys %syntrihash;
+
print "Number of Dots to compute: $NumbOfKeys\n";
+
my $EndSection1 = int($NumbOfKeys/4);
+
my $EndSection2 = $EndSection1 + int($NumbOfKeys/4);
+
my $EndSection3 = $EndSection2 + int($NumbOfKeys/4);
+
my @synsetPart1;
+
my @synsetPart2;
+
my @synsetPart3;
+
my @synsetPart4;
+
my $k = 0;
+
foreach my $ss (keys %syntrihash){
+
if($k < $EndSection1){ $synsetPart1[$k] = $ss; }
+
elsif ($k < $EndSection2){ $synsetPart2[$k - $EndSection1]
+= $ss; }
elsif ($k < $EndSection3){ $synsetPart3[$k - $EndSection2]
+= $ss; }
else { $synsetPart4[$k - $EndSection3] = $ss; }
+
$k += 1;
+
}
+
untie %syntrihash;
+
my $t1 = new Benchmark;
+
my $td1 = timediff($t1, $t0);
+
print STDERR "the code took:",timestr($td1,'all')," to prep\n";
+
$t0 = new Benchmark;
+
my $thr1 = threads->create({'context' => 'array'}, \&subthr1, "test1")
+;
my $thr2 = threads->create({'context' => 'array'}, \&subthr2, "test2")
+;
my $thr3 = threads->create({'context' => 'array'}, \&subthr3, "test3")
+;
my $thr4 = threads->create({'context' => 'array'}, \&subthr4, "test4")
+;
my %return1 = $thr1 -> join();
+
my %return2 = $thr2 -> join();
+
my %return3 = $thr3 -> join();
+
my %return4 = $thr4 -> join();
+
my %synsetDotHash;
+
$t1 = new Benchmark;
+
$td1 = timediff($t1, $t0);
+
print STDERR "the code took:",timestr($td1,'all')," to do threaded dot
+s\n";
sub subthr1{
+
tie %syntrihash,'MLDBM::Sync',$file, O_RDONLY or die "tie fail
+ed for db $!\n";
foreach my $synset (@synsetPart1){
+
my $hashref = \%{$syntrihash{$synset}};
+
&hashValueDot($hashref,$hashref,my $dot);
+
$synsetDotHash{$synset} = $dot;
+
}
+
my ($message) = @_;
+
print "Thread Message is $message\n";
+
return (%synsetDotHash);
+
}
+
sub subthr2{
+
tie %syntrihash,'MLDBM::Sync',$file, O_RDONLY or die "tie fail
+ed for db $!\n";
foreach my $synset (@synsetPart2){
my $hashref = \%{$syntrihash{$synset}};
&hashValueDot($hashref,$hashref,my $dot);
$synsetDotHash{$synset} = $dot;
}
my ($message) = @_;
print "Thread Message is $message\n";
return (%synsetDotHash);
}
sub subthr3{
tie %syntrihash,'MLDBM::Sync',$file, O_RDONLY or die "tie fail
+ed for db $!\n";
foreach my $synset (@synsetPart3){
my $hashref = \%{$syntrihash{$synset}};
&hashValueDot($hashref,$hashref,my $dot);
$synsetDotHash{$synset} = $dot;
}
my ($message) = @_;
print "Thread Message is $message\n";
return (%synsetDotHash);
}
sub subthr4{
tie %syntrihash,'MLDBM::Sync',$file, O_RDONLY or die "tie fail
+ed for db $!\n";
foreach my $synset (@synsetPart4){
my $hashref = \%{$syntrihash{$synset}};
&hashValueDot($hashref,$hashref,my $dot);
$synsetDotHash{$synset} = $dot;
}
my ($message) = @_;
print "Thread Message is $message\n";
return (%synsetDotHash);
}
silly@bluetit:~/perl/threads$ thread.pl
Number of Dots to compute: 117369
the code took: 2 wallclock secs ( 1.92 usr 0.67 sys + 0.00 cusr 0.0
+0 csys = 2.59 CPU) to prep
Thread Message is test2
Thread Message is test3
Thread Message is test1
Thread Message is test4
the code took:23 wallclock secs (70.72 usr 12.29 sys + 0.00 cusr 0.0
+0 csys = 83.01 CPU) to do threaded dots
|