in reply to scalable duplicate file remover
In total you call stat or lstat ten times on the same file (eleven times if you uncomment the printf statement.) You also use $File::Find::name in most places where $_ would have the same effect.
sub process_file { my $dir_configs = $_[ 0 ]; ##optimisation using -d -l -f -s just once for return and also for + adding #if current "file"(unix terminology) is a directory and the yaml c +onfiguration #tells us to eliminate directories from the search we do so by ret +urning from the #callback return if -l && ! $dir_configs->{ link }; # call lstat on current +file to test for symlink my ( $size, $mtime ) = ( stat )[ 7, 9 ]; return if -d _ && ! $dir_configs->{ dir }; return if -f _ && ! $dir_configs->{ file }; return if $size < $config->{ minsize }; unless ( $File::Find::name =~ /$dir_configs->{regex}/ ) { if ( -d _ ) { $File::Find::prune = 1; } return; } my $last_modif_time = DateTime->from_epoch( epoch => $mtime ); # print "$File::Find::name ", file2sha1( $_ ), " $size $last_modif_ +time\n", add_to_db( file2sha1( $_ ), $last_modif_time, $size, $File::Find:: +name ); #print Dumper $dir_configs; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: scalable duplicate file remover
by spx2 (Deacon) on Mar 03, 2008 at 09:26 UTC | |
by jwkrahn (Abbot) on Mar 03, 2008 at 18:41 UTC |