in reply to Re^2: use threads for dir tree walking really hurts
in thread use threads for dir tree walking really hurts
The following is an attempt for having all workers remain busy, "regardless of working in 1 directory with a million files, or a directory tree."
For extra performance, the execution time completes ~ 25% faster via cperl-5.22c.
use strict; use warnings; # usage: ./count_files.pl /var/tmp [ /path/to/dir2 ... ] # usage: perl count_files.pl C:\ use Data::Dumper; use File::Spec::Functions qw( catfile ); use MCE; use MCE::Queue; use MCE::Shared; # Get directories to start with my @start_dirs = @ARGV or die "Please specify one or more starting directories, stopped"; for ( @start_dirs ) { -d or die "No such directory: $_, stopped"; } # Shared queue, counter, and arrays my $shared_work = MCE::Queue->new( fast => 1, queue => \@start_dirs ); my $free_count = MCE::Shared->scalar( 0 ); my $isDir = MCE::Shared->array; my $isFile = MCE::Shared->array; my $denied = MCE::Shared->array; # Walk routine sub traverse { $free_count->decr; my ( $entry, $path ); my @work = ( $_ ); my $count = 0; while ( defined ( $entry = shift @work ) ) { if ( ref $entry ) { $isFile->push( @{ $entry } ); } else { opendir DH, $entry or $denied->push( $entry ), next; $isDir->push( $entry ); my @files; for ( readdir DH ) { next if $_ eq '.' || $_ eq '..'; if ( -d ( $path = catfile( $entry, $_ ) ) ) { ( @work < 5 ) ? push @work, $path : $shared_work->enqueue( $path ); next; } elsif ( -f _ ) { push @files, $path; if ( ++$count == 2000 ) { $shared_work->enqueue( \@files ); $count = 0, @files = (); } } } $isFile->push( @files ) if @files; } } # Done with our work, let everyone know we're free if ( $free_count->incr == MCE->max_workers && !$shared_work->pendin +g ) { $shared_work->enqueue( (undef) x MCE->max_workers ); } } # Instantiate MCE and run. MCE->new( max_workers => 6, user_begin => sub { $free_count->incr; }, user_func => sub { traverse() while ( defined ( $_ = $shared_work->dequeue ) ); }, )->run; # Display counts. printf "count dirs : %d\n", $isDir->len; printf "count files : %d\n", $isFile->len; printf "count denied : %d\n", $denied->len; # print Dumper( $isDir->export ), "\n"; # print Dumper( $isFile->export ), "\n"; # print Dumper( $denied->export ), "\n";
Best regards, Mario.
|
|---|