in reply to What's eating all your disk space?

Hi Hawson,

I have to do the same kind of task routinely, and try to farm as much of it off to the users themselves as possible, and wrote this to help both of us out.

It does close to the same thing as du (though any help on figuring out how du actually comes up with its numbers would be appreciated!), and is pretty handy for not having to dig through directory trees doing du's over and over again in subdirectories. Though it's noticeably slower than du on large directories, I find it's actually faster because I have to do the du only once, even for several levels of nested dirst.

It also displays age, which can be very useful to determine what needs killing, and I find novices have little problem understanding the output. YMMV, but I hope you like it.

I call it "duke" --Gremio

#!/usr/bin/perl -w use strict; use Getopt::Std; use Cwd; # don't bother displaying info for cumulative sizes smaller than total # divided by this: my $detail = 30; #< options my %opts; getopts('bkmghd:', \%opts); #(b)ytes, (k)ilobytes, (m)egabytes, (g)igabytes, (h)uman-readable #(d)etail if ($opts{h}) { print qq(usage: duke [-bkmg] [-d n] [dir] -bkmg - show sizes in bytes, kb, Mb, or Gb, respective +ly. (default is human-readable) -d n - amount of detail: show entities at most total/ +n in size (default $detail -- by necessity this is a bit + more than the number of items displayed)\n); exit 0; } $detail = $opts{d} if $opts{d} and $opts{d} =~ /^\d+$/ and $opts{d} > +1; #< size output format my $divfactor=0; if ($opts{b}) { $divfactor=1; } elsif ($opts{k}) { $divfactor=1024; } elsif ($opts{m}) { $divfactor = 1024 * 1024; } elsif ($opts{g}) { $divfactor = 1024 * 1024 * 1024; } else { $opts{human}=1; } sub human_readable { my $bytes = shift; unless ($opts{human}) { return sprintf "%1.0f", ($bytes / $divfactor); } my $kb = $bytes / 1024; if ($bytes < 1000) { return sprintf " %d",$bytes; } elsif ($bytes < 1024) { return sprintf "%0.2fk", $kb; } elsif ($kb < 10) { return sprintf "%1.1fk", $kb; } else { my $mb = $kb / 1024; if ($kb < 1000) { return sprintf "%3.0fk", $kb; } elsif ($kb < 1024) { return sprintf "%0.2fM", $mb; } elsif ($mb < 10) { return sprintf "%1.1fM", $mb; } else { my $gb = $mb / 1024; if ($mb < 1000) { return sprintf "%3.0fM", $mb } elsif ($mb < 1024) { return sprintf "%0.2fG", $gb; } elsif ($gb < 10) { return sprintf "%1.1fG", $gb; } else { return sprintf "%3.0fG", $gb; } } } } #> #> #< walk(dir) sub walk { # for each directory I'm given, I want to return an arrayref of th +e form # [ sum_of_content_sizes, name, age, # [biggest_constituent], [next_biggest_constituent], ... # ... [smallest_constituent] # ] my $dirname = shift; my $dirsize = -s $dirname; my $dirage = -M _; local *DIR; opendir DIR, $dirname or do { warn "WARNING: cannot open directory [$dirname]: $!\n"; warn " this directory will be skipped: the report will + be wrong. \n"; return [$dirsize, $dirage, $dirname]; }; my @sizes = (); my @files = readdir(DIR); closedir DIR; shift @files; shift @files; # skip . and .. foreach my $file (@files) { # get the stat info: my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = lstat qq($dir +name/$file ); # we won't use most of it. The real size is returned in $size +, # but this isn't what actually matters to counting disk usage, $size = $blksize*(1+(int($size / $blksize))); if (not -l _ and -d _) { my $subdir = walk(qq($dirname/$file)); push @sizes, $subdir; $dirsize += $subdir->[0]; $dirage = $subdir->[1] if $subdir->[1] < $dirage; } else { my $age = -M _; push @sizes, [ $size, $age, $file ]; $dirsize += $size; $dirage = $age if $age < $dirage; } } return [ $dirsize, $dirage, $dirname, sort { $b->[0] <=> $a->[0] } @sizes ]; } #> #< display(sizes) sub display { my $sizes = shift; my $depth = shift; my $total = shift; $depth ||= 0; my ($size, $age, $name, @contents) = @$sizes; $name =~ s|/$||; $name =~ s|.*/|| if scalar @contents; $total ||= $size; return if ($size * $detail < $total); print " "x$depth; printf "%s %s (%1.1f)\n", human_readable($size), $name, $age; foreach my $ref (@contents) { last unless display($ref, $depth+1, $total); } return 1; } #> print "#size name (age in days)\n"; display(walk(shift(@ARGV) or cwd()));
mail q.gremio..q.@..q.speakeasy..q@.@.q.org.;

Replies are listed 'Best First'.
Re: Re: What's eating all your disk space? -- duke!
by bikeNomad (Priest) on Jul 14, 2001 at 23:18 UTC
    If your $size is an integer multiple of your $blksize, you'll overstate $size by $blksize in

    $size = $blksize*(1+(int($size / $blksize)));