zentara has asked for the wisdom of the Perl Monks concerning the following question:
But after some experimenting and testing, I found that it did not make a difference.... in fact the speeds for seaching 1 huge file were nearly identical to 4 smaller files. It almost seemed that the whole threaded process group was limited in how many lines per second it could process.
In the sample code below, I make 4 files of 1,000,000 lines each, and I take the very last number of the numbers4 file, as the search-number. Then I concat all the 4 files into 1 big numbers5 file, with the search-number still the last line. Regardless of which way I do it, it takes 15 seconds to find the last line.
Can someone shed some light on why the 4 threads cannot process their smaller files faster than the one big file in a single thread?
Now I thought it might be because of the timer callback, but I cut that to a minimum and it was not a factor. Then I thought it may be a collision between the FH handle names? Or is it just that linux only has 1 filehandle pointer available to anyone process, and it must be shared between all threads?
I did find that I got a triple speed improvement if I did a bactick in the threads, instead of letting perl loop thru the filehandles.
that made both test runs run in one third the time, BUT once again, both times were identical. On my 2ghz linux system, it takes 15 seconds for the Perl filehandle searching, and 5 seconds with the backticked grep, regardless of whether it's 1 big file or 4 smaller ones.my $return = `grep -w $searchnum $file`; if(length $return){ $shash{$dthread}{'luck'} = 1;}
#!/usr/bin/perl use warnings; use strict; use Tk; use Tk::Dialog; use threads; use threads::shared; use Tk::Animation; my $debug = 0; #if you want some helpful timing messages ########## a script to make the data files numbers1..4 ##!/usr/bin/perl #use warnings; #use strict; #my $file = shift or die; #open (FH,">$file") or die; #for(1..1000000){ # my $first = sprintf('%03d', int rand 999); # my $second = sprintf('%03d', int rand 999); # my $third = sprintf('%04d', int rand 9999); # print FH "$first-$second-$third\n"; # #} ########################################################## my @sfiles = qw(numbers1 numbers2 numbers3 numbers4); #my @sfiles = qw(numbers5); # make threads first before any tk code my %shash; my %thash; my $numworkers = scalar @sfiles; my $searchnum; share $searchnum; $searchnum = '626-008-7681'; # last number of numbers4 foreach my $dthread(1..$numworkers){ share ($shash{$dthread}{'go'}); share ($shash{$dthread}{'die'}); share ($shash{$dthread}{'file'}); share ($shash{$dthread}{'return'}); #signal number found share ($shash{$dthread}{'luck'}); #signal no number found $shash{$dthread}{'go'} = 0; $shash{$dthread}{'file'} = shift @sfiles; $shash{$dthread}{'die'} = 0; $shash{$dthread}{'return'} = ''; $shash{$dthread}{'luck'} = 0; $thash{$dthread}{'thread'} = threads->new(\&work,$dthread); } my $searching = 0; #flag to prevent multiple button pushes my $mw = MainWindow->new; #prevents mw from closing $mw->protocol('WM_DELETE_WINDOW' => \&Quit); # topframe my $tframe = $mw->Frame()->pack(-fill =>'x'); my $lab = $tframe->Label ( -text => "Enter Phone Number", -background => "white" )->pack(-side =>'left'); ##############################Label my $Entry = $tframe->Entry( -textvariable => \$searchnum, -background => "white" )->pack(-side =>'left',-padx => 2); #allow an enter key press to start search in entry box $Entry->bind('<Return>', \&Search); my $labWarning = $tframe->Label ( -text => 'Example XXX - XXX - XXXX', -background => "white" )->pack(-side => 'left',-padx => 10); ########################################## my $text = $mw->Scrolled('Text', -height => 15, -width => 55, -wrap => 'char', -bg=> => 'lightyellow', -scrollbars =>'osoe', ); $text->pack(-expand => 1, -fill=>'both'); # bottom frame my $bframe = $mw->Frame(-bg => 'black')->pack(-fill =>'x',-padx =>5); my $quit = $bframe->Button( -text => "Quit", -padx => "8", -command => \&Quit)->pack(-side =>'right'); ########################### my $Find = $bframe->Button( -text => "Search", -command => \&Search)->pack(-side =>'right', -padx =>10); # make active indicator my $spinner = $mw->Animation('-format' => 'gif', -data => get_gif() ) +; $bframe->Label( -bg => 'black', -image => $spinner, )->pack(-side =>'left'); MainLoop; ################################# sub Quit { my $d = $mw->Dialog( -text => "Are you sure you want to quit?", -buttons => ["Yes", "No"]); my $answer = $d->Show(); #cleanup threads if($answer eq "Yes"){ for(1..$numworkers){ $shash{$_}{'die'} = 1; $thash{$_}{'thread'}->join; } exit; } } ############################################################## sub Search { return if $searching; $searching = 1; my $found = 0; for(1..$numworkers){ $shash{$_}{'go'} = 1; } #setup a timer to watch for returns thru shared variables my $timer; # declare outside of block so you can kill it in callback $timer = $mw->repeat(100,sub{ my $luck = 0; my $luckythread; my $message = ''; $spinner->next_image; #loop and check flags for lucky bit for(1..$numworkers){ if( $shash{$_}{'luck'} == 1){ $luck = 1; $luckythread = $_; } } #check for no luck in all threads if( $luck == 0){ my $count = 0; for(1..$numworkers){ if( $shash{$_}{'luck'} == -1 ){ $count += $shash{$_}{'luck'}; } } print "count -> $count\n" if $debug; if( $count == -$numworkers ){ $message = "$searchnum NOT FOUND \n"; $text->insert('end',"$message"); #stop other threads for(1..$numworkers){ $shash{$_}{'go'} = 0; $shash{$_}{'luck'} = 0; $shash{$_}{'return'} = ''; } $spinner->stop_animation; #stop timer callback $searching = 0; $timer->cancel; } } if($luck == 1){ $message = "$searchnum in thread $luckythread ". "line $shash{$luckythread}{'return'}\n"; $text->insert('end',"$message"); #stop other threads for(1..$numworkers){ $shash{$_}{'go'} = 0; $shash{$_}{'luck'} = 0; $shash{$_}{'return'} = ''; } $spinner->stop_animation; #stop timer callback $searching = 0; $timer->cancel; } }); } ###################################################################### +# sub work{ my $dthread = shift; $|++; my $file = $shash{$dthread}{'file'}; open(FH,"< $file") or die "$!\n"; print "thread $dthread created\n" if $debug; while(1){ if($shash{$dthread}{'die'} == 1){ goto END }; if ( $shash{$dthread}{'go'} == 1 ){ seek FH, 0, 0; #reset to top $shash{$dthread}{'return'} = ''; $shash{$dthread}{'luck'} = 0; $. = 0; #reset filehandle line counter print "thread $dthread going\n" if $debug; while(my $line = <FH>){ chomp $line; if($line eq $searchnum){ $shash{$dthread}{'return'} = $.; $shash{$dthread}{'luck'} = 1; print "$dthread found $shash{$dthread}{'return'}\ +n" if $debug; goto RESET; } if($shash{$dthread}{'go'} == 0){goto RESET} if($shash{$dthread}{'die'} == 1){ goto END }; + } RESET: if( $shash{$dthread}{'luck'} == 0){ $shash{$dthread}{'luc +k'} = -1;} #signal no luck $shash{$dthread}{'go'} = 0; #turn off self before returning + print "thread $dthread going back to sleep\n" if $debug; }else { select(undef,undef,undef,.1) } # tenth second sleep, can be + smaller } END: } #################################################################### sub get_gif{ #base64encoded gif89a my $gif = 'R0lGODlhEAAQAPEEAAAAAP8AAP//AP///yH/C05FVFNDQVBFMi4wAwEAAAAh+QQFCgAEA +CwAAAAA EAAQAAADPki63B4wOhWrZFYEfWm2SwCMZDkGiglsajqU2viOablBJkVCnHhSGoFgYBGhgM +Me7ugR KlfM0DPaqKwmWEcCACH5BAUKAAQALAEAAAAPAA8AAAM8SKrR+ysA0CokM1cXwcjUxoCZYF +oNOZ1O BQqTGAiVScebMOwnWdsuj6ZB26gYmxQJmZRkIE5j4EKQJB8JACH5BAUKAAQALAEAAQAOAA +4AAAM3 SBoMzioy4cYLMojgOsOTQHXAFw4baZ7NtYap9prU1ryezZnqR+wcgKXU+O1IRMwi2ItkPE +pCAgAh +QQFCgAEACwBAAEADwAPAAADO0ga3KyQNEEZCHGKYYFfzhZ4wHBJFyOSJOGFAvs6aszSMI +nfnrDL gMpjRDJdhBjUjRaRMSOuWQOaeVATACH5BAUKAAQALAEAAQAOAA4AAAM2SBoB/Coy9wST7Q +XB79Tb 0H2gaFkNQG2TmqqUBc/A4AqzTQMy/e4wEAMFImhOxYUQEiGsNJEEACH5BAUKAAQALAAAAQ +APAA8A AAM8SErRDW2tAB2o8l7Hg9Ja5xDgxgnWNZiB4KIP2ApDDafzjTKpIEcOV8nEyw0hig5o5Z +lwSpLk Exl1RiQJACH5BAUKAAQALAEAAQAOAA4AAAM4SBoBzkFJ5ipgk9qGrx4PB2khBQlNCXmBAK +BjjF5C HY8VM9jxJuywlaUGIwhzxUUK9MJIjCmWJAEAIfkEBQoABAAsAAAAAA8ADwAAAz1IutGxUL +kGaiQz 1A2z3sCDNYLwDeDjlODmCdUXZ1vpOa3ttYBOAQReYGCiqACoGDGjSA19nVCgVBR1bosEAD +s='; return $gif; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: threads and multiple filehandles
by BrowserUk (Patriarch) on Sep 17, 2006 at 21:52 UTC | |
by chromatic (Archbishop) on Sep 18, 2006 at 06:59 UTC | |
by BrowserUk (Patriarch) on Sep 18, 2006 at 07:52 UTC | |
|
Re: threads and multiple filehandles
by shmem (Chancellor) on Sep 17, 2006 at 21:45 UTC | |
|
Re: threads and multiple filehandles
by aquarium (Curate) on Sep 17, 2006 at 23:42 UTC | |
by zentara (Cardinal) on Sep 21, 2006 at 18:20 UTC | |
|
Re: threads and multiple filehandles
by zentara (Cardinal) on Sep 18, 2006 at 12:00 UTC |