in reply to dynamic number of threads based on CPU utilization
This compiles clean, but is of necessity untested.
Substitute your procXML() routine and it should come close to doing the same thing(*) as the code you posted, but rather more quickly:
*But verify carefully that I've refactored it correctly!
#!/usr/bin/perl use strict; use warnings; use Carp qw(carp cluck croak confess); use XML::Hash; use File::Slurp; use Date::Parse; binmode STDOUT, ":utf8"; use threads; use threads::shared; use Thread::Queue; use Sys::CPU; use Devel::Size qw(size total_size); use List::MoreUtils qw(uniq); #use Data::Dumper; local $| = 1; print `/bin/date`."\n"; our $THREADS = Sys::CPU::cpu_count()*2; my $dir='/xmlFeeds'; my ($DIR,@files); opendir($DIR,$dir); foreach(readdir($DIR)) { push @files, $_ if $_ =~ m/.*\.xml/; } closedir($DIR); my $outFile='./out.nt'; my $OUTFILE; open($OUTFILE,'>:utf8',$outFile); my %similar :shared; my $recordCount :shared; $recordCount=1; my $Qwork = new Thread::Queue; ## Create the pool of workers my @pool = map{ threads->create( \&worker, $Qwork ) } 1 .. $THREADS; $Qwork->enqueue(@files); ## Tell the workers there are no more work items $Qwork->enqueue( (undef) x $THREADS ); ## Clean up the threads $_->join for @pool; my @doms = keys %similar; ## get keys into non-shared space for speed my %bigrams; for my $dom ( @doms ) { undef @{ $bigrams{ $dom } }{ uniq( unpack '(A2)*', $dom ) }; } for my $dom1 ( @doms ) { my $type = $similar{ $dom1 }; my $cDom1 = keys %{ $bigrams{ $dom1 } }; for my $dom2 ( @doms ) { next if $dom1 eq $dom2; my $innerType = $similar{ $dom2 }; my $cDom2 = keys %{ $bigrams{ $dom2 } }; my $counter = grep{ exists $bigrams{ $dom1 }{ $_ } } keys %{ $bigrams{ $dom2 } }; my $value = ( $counter * 2 ) / ( $cDom1 + $cDom2 ); if( $value >= 0.9 ) { my $triple .= qq|<http://cs.org/$type#$dom1> <http://cs.or +g/p/similarName> <http://cs.org/$innerType#$dom2> .\n|; print $triple; print $OUTFILE $triple; } } } close($OUTFILE); print `/bin/date`."\n"; sub worker { my $tid = threads->tid; my( $Qwork ) = @_; while( my $file = $Qwork->dequeue ) { my $triple = procXml($file); print $OUTFILE $triple if defined $triple; } } sub procXml { [code here] }
|
---|
Replies are listed 'Best First'. | |
---|---|
Re^2: dynamic number of threads based on CPU utilization
by mabossert (Scribe) on Sep 26, 2012 at 16:41 UTC | |
Re^2: dynamic number of threads based on CPU utilization
by mabossert (Scribe) on Sep 27, 2012 at 01:03 UTC | |
by BrowserUk (Patriarch) on Sep 27, 2012 at 01:50 UTC | |
by mabossert (Scribe) on Sep 27, 2012 at 14:59 UTC | |
by BrowserUk (Patriarch) on Sep 27, 2012 at 17:27 UTC | |
by BrowserUk (Patriarch) on Sep 27, 2012 at 15:05 UTC |