comment on

sub du {
    my @files;
    my $extra = 0;
    my $total = 0;
    my $recursive = 1;
    #my $blocksize = 1024; # XXX hardcoded
    #my $follow_symlinks = 0;

    for my $e (@_) {
        if (ref($e) eq 'HASH') {
            for (keys %$e) {
                if ($_ eq 'extra')               { $extra = $e->{$_} }
                elsif ($_ eq 'total')            { $total = $e->{$_} }
                #elsif ($_ eq 'blocksize')       { $blocksize = $e->{$
+_} }
                #elsif ($_ eq 'follow_symlinks') { $follow_symlinks = 
+$e->{$_} }
                elsif ($_ eq 'recursive')        { $recursive = $e->{$
+_} }
            }
        } else {
            push @files, $e;
        }
    }

    my @res;
    my %inodes;

    my $doit;
    $doit = sub {
        my ($files, $skip_dot_ddot, $recursive, $idx) = @_;
        my $i = 0;
        #print "\@", `pwd`, ": doit([".join(", ", @$files)."], skip_do
+t_ddot=$skip_dot_ddot, recursive=$recursive, idx=$idx)\n";
        for my $f (@$files) {
            #print "file #$i=$f\n";
            #system "pwd";
            next if $skip_dot_ddot && $f =~ /^\.\.?$/;
            my @st = stat($f) or next;
            my $is_dir = (-d _) && !(-l $f);
            my $j = $idx == -1 ? $i : $idx;
            my $counted = $inodes{"$st[0]:$st[1]"}++;
            if (!$res[$j]) {
                $res[$j] = [ 0, 0, 0, 0, 0 ]; # SIZE, FILES, DIRS, UNI
+QUE_FILES, UNIQUE_DIRS
            }
            my $r = $res[$j];
            $r->[0] += $st[7] unless $counted;
            if ($is_dir) {
                $r->[2]++;
                $r->[4]++ unless $counted;
                if ($recursive) {
                    if (chdir $f) {
                        opendir my $dh, ".";
                        $doit->([readdir $dh], 1, 1, $j);
                        chdir "..";
                    }
                }
            } else {
                $r->[1]++;
                $r->[3]++ unless $counted;
            }
            $i++;
        }
    };
    $doit->(\@files, 0, $recursive, ($total ? 0 : -1));
    if ($extra) {
        return map { {size=>$_->[0], files=>$_->[1], dirs=>$_->[2], un
+ique_files=>$_->[3], unique_dirs=>$_->[4] } } @res;
    } else {
        return map { $_->[0] } @res;
    }
}

use Benchmark qw(:all);
timethis(1, sub {@res=du(".")});
[download]

When pitted against the C-based "du" command on a tree with +- 150k entries:

Perl: 1.58s user, 0.46s sys
C: 0.10s user, 0.47s sys
[download]

This means the Perl version is about 15 times less efficient. Any ideas on how to make it more efficient? I'll settle with 2-4x slower, but 15x is rather unsatisfactory for me.

In reply to How to make this perl version of "du" faster? by dgaramond2

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.