comment on

Hi Hawson,

I have to do the same kind of task routinely, and try to farm as much of it off to the users themselves as possible, and wrote this to help both of us out.

It does close to the same thing as du (though any help on figuring out how du actually comes up with its numbers would be appreciated!), and is pretty handy for not having to dig through directory trees doing du's over and over again in subdirectories. Though it's noticeably slower than du on large directories, I find it's actually faster because I have to do the du only once, even for several levels of nested dirst.

It also displays age, which can be very useful to determine what needs killing, and I find novices have little problem understanding the output. YMMV, but I hope you like it.

I call it "duke" --Gremio

#!/usr/bin/perl -w
use strict;
use Getopt::Std;
use Cwd;

# don't bother displaying info for cumulative sizes smaller than total
# divided by this:
my $detail = 30;

#< options 
my %opts;
getopts('bkmghd:', \%opts); 
#(b)ytes, (k)ilobytes, (m)egabytes, (g)igabytes, (h)uman-readable
#(d)etail
if ($opts{h}) {
    print qq(usage: duke [-bkmg] [-d n] [dir]
             -bkmg   -  show sizes in bytes, kb, Mb, or Gb, respective
+ly.
                        (default is human-readable)
             -d n    -  amount of detail: show entities at most total/
+n in size
                        (default $detail -- by necessity this is a bit
+ more 
                         than the number of items displayed)\n);
    exit 0;
}
$detail = $opts{d} if $opts{d} and $opts{d} =~ /^\d+$/ and $opts{d} > 
+1; 

#< size output format
my $divfactor=0;
if ($opts{b}) {
    $divfactor=1;
} elsif ($opts{k}) {
    $divfactor=1024;
} elsif ($opts{m}) {
    $divfactor = 1024 * 1024;
} elsif ($opts{g}) {
    $divfactor = 1024 * 1024 * 1024;
} else {
    $opts{human}=1;
}
sub human_readable {
    my $bytes = shift;
    unless ($opts{human}) {
        return sprintf "%1.0f", ($bytes / $divfactor);
    }
    my $kb = $bytes / 1024;
    if ($bytes < 1000) {
        return sprintf " %d",$bytes;
    } elsif ($bytes < 1024) {
        return sprintf "%0.2fk", $kb;
    } elsif ($kb < 10) {
        return sprintf "%1.1fk", $kb;
    } else {
        my $mb = $kb / 1024;
        if ($kb < 1000) {
            return sprintf "%3.0fk", $kb;
        } elsif ($kb < 1024) {
            return sprintf "%0.2fM", $mb;
        } elsif ($mb < 10) {
            return sprintf "%1.1fM", $mb;
        } else {
            my $gb = $mb / 1024;
            if ($mb < 1000) {
                return sprintf "%3.0fM", $mb
            } elsif ($mb < 1024) {
                return sprintf "%0.2fG", $gb;
            } elsif ($gb < 10) {
                return sprintf "%1.1fG", $gb;
            } else {
                return sprintf "%3.0fG", $gb;
            }
        }
    }
}
#>
#> 
#< walk(dir)
sub walk {  
    # for each directory I'm given, I want to return an arrayref of th
+e form
    # [ sum_of_content_sizes, name, age, 
    #   [biggest_constituent], [next_biggest_constituent], ...
    #       ... [smallest_constituent]
    # ]

    my $dirname = shift;
    my $dirsize = -s $dirname;
    my $dirage = -M _;
    
    local *DIR;
    opendir DIR, $dirname or do {
        warn "WARNING: cannot open directory [$dirname]: $!\n";
        warn "         this directory will be skipped: the report will
+ be wrong.
\n";
        return [$dirsize, $dirage, $dirname];
    };

    my @sizes = ();
    my @files = readdir(DIR);
    closedir DIR;
    shift @files; shift @files; # skip . and ..
    foreach my $file (@files) {
        # get the stat info:
        my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, 
            $atime, $mtime, $ctime, $blksize, $blocks) = lstat qq($dir
+name/$file
);
        # we won't use most of it.  The real size is returned in $size
+,
        # but this isn't what actually matters to counting disk usage,
        $size = $blksize*(1+(int($size / $blksize)));

        if (not -l _ and -d _) {
            my $subdir = walk(qq($dirname/$file));
            push @sizes, $subdir;
            $dirsize += $subdir->[0];
            $dirage = $subdir->[1] if $subdir->[1] < $dirage;
        } else {
            my $age = -M _;
            push @sizes, [ $size, $age, $file ];
            $dirsize += $size;
            $dirage = $age if $age < $dirage;
        }
    }
    return [ $dirsize, $dirage, $dirname,
             sort { $b->[0] <=> $a->[0] } @sizes ];
}
#>
#< display(sizes) 
sub display {
    my $sizes = shift;
    my $depth = shift;
    my $total = shift;
    $depth ||= 0;
    my ($size, $age, $name, @contents) = @$sizes;
    $name =~ s|/$||;
    $name =~ s|.*/|| if scalar @contents;
    $total ||= $size;
    return if ($size * $detail < $total);
    print "   "x$depth;
    printf "%s  %s (%1.1f)\n", human_readable($size), $name, $age;
    foreach my $ref (@contents) {
        last unless display($ref, $depth+1, $total);
    }
    return 1;
}
#>

print "#size  name (age in days)\n";
display(walk(shift(@ARGV) or cwd()));
[download]

mail q.gremio..q.@..q.speakeasy..q@.@.q.org.;

In reply to Re: What's eating all your disk space? -- duke! by gremio
in thread What's eating all your disk space? by hawson

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.