#!/usr/bin/perl
use File::Find;
use FileHandle;
use strict;
use warnings;
#proggie to processs filesystems on celerra and work out which departments
#own what.
#5Mb, if it's smaller then it doesn't bother recursing.
my $size_threshold = 5242880;
my $config_file = "disk_usage.conf";
# file looks like:
# /fs001:SYS:Any
# /fs001/Tech/res:Tech:Research
# ...
my @dirs = ( "/fs001", "/fs002", "/fs003", "/fs004", "/fs005", "/fs006",
"/fs007", "/fs008", "/fs009", "/fs010", "/fs011", "/fs012", );
my $report_file = "disk_usage.html";
my $debug = 1;
my $global_recurse = 2; #default value but used for some interesting calcs.
my @excl_directories = ( '.', '..' ); #cos they're unusual
my %is_excluded;
for ( @excl_directories ) { $is_excluded{$_} = 1; }
my %sizes;
my %customers;
my %totals;
sub data
{
my $factor = 1024;
my @sequence = ('b ', 'kb', 'Mb', 'Gb', 'Tb' );
foreach my $input_number (@_)
{
my $seq_num = 0;
my $output_number = $input_number;
while ( $output_number / $factor > 1 )
{
$seq_num++;
$output_number /= $factor;
}
return sprintf("%3.2f$sequence[$seq_num]", $output_number);
}
}
sub getsize
{
#watch out for this. It's a _very_ expensive call.
#look out for optimisations lower down in the chain
my @args = @_;
my $sum = 0;
if ( $debug ) { print "Getting size for @args\n"; }
find sub { if ( -s ) { $sum += -s } }, @args;
return $sum;
}
sub get_size_of_files
{
my @args= @_;
my $sum = 0;
if ( $debug ) { print "Sizing files in @args\n"; }
foreach my $thisdir ( @args )
{
opendir(IDIR, $thisdir) or
print "WARNING: Couldn't open $thisdir\n";
while ( my $fname = readdir ( IDIR ) )
{
#print ( "$thisdir/$fname" );
if ( ( ! -d "$thisdir/$fname" ) and
( -s "$thisdir/$fname" ) )
{ $sum += -s "$thisdir/$fname" }
}
close (IDIR);
}
return $sum;
}
sub dusage
{
my $startpoint = shift(@_) || '.';
my $recurse_depth = shift(@_) || 0;
my @dusage_list;
#function to show disk usage of all subdirectories.
if ( $debug ) { print "Reading $startpoint\n"; }
if ( -d $startpoint)
{
#if ( $recurse_depth-- > 0 && $stuff{$startpoint} > $size_threshold )
if ( $recurse_depth-- > 0 )
{
$sizes{$startpoint} = get_size_of_files($startpoint);
if ( $debug ) { print "adding $sizes{$startpoint} to $startpoint\n"; }
#my $tmp = $startpoint;
#$tmp =~ s,/[A-Za-z0-9_\.\,\- ]+$,,g;
# $stuff{$tmp} += $stuff{$startpoint};
# if ( $debug ) { print "in lo adding value of $startpoint ( $stuff{$startpoint} ) to $tmp = $stuff{$tmp}\n" };
opendir ( DIR, $startpoint);
while ( my $filename = readdir(DIR) )
{
if ( -d "$startpoint/$filename" && !($is_excluded{$filename}) )
{
$dusage_list[++$#dusage_list] = "$startpoint/$filename";
}
}
for my $dir (@dusage_list)
{
dusage("$dir", $recurse_depth);
}
}
else
{
#only process the expensive bit, if we're not going to recurse 'deeper'
$sizes{$startpoint} = getsize($startpoint);
}
}
}
sub do_output
{
open ( REPORT, ">$report_file" );
my $base_indent = ( pop(@_) =~ tr,/,, );
my %output = %sizes;
my %basic_sizes = %sizes;
foreach my $item ( sort ( keys ( %sizes ) ) )
{
print ("directory size: $sizes{$item} = $item \n");
}
while ( keys(%sizes) )
{
foreach my $value ( sort ( keys ( %sizes ) ) )
{
my $upd = $value;
$upd =~ s,/[A-Za-z0-9_\.\,\- ]+$,,g;
print "upd = $upd value = $value\n";
if ( ! ( "$upd" eq "$value" ) )
{
$output{$upd} += $sizes{$value};
$sizes{$upd} += $sizes{$value};
if ( $debug ) { print "adding $value ( $sizes{$value} ) to $upd\n" }
}
delete($sizes{$value});
}
}
if ( $debug )
{
foreach my $item ( sort ( keys ( %output ) ) )
{
print ("$output{$item} = $item \n");
}
}
print REPORT "
\n";
foreach my $item ( sort ( keys ( %output ) ))
{
if ( $output{$item} > $size_threshold )
{
my $base_object = $item;
$base_object =~ s,.*/,/,g;
my $indent_depth = ( $item =~ tr,/,, ) - $base_indent;
my $indent_html = '| ' x $indent_depth;
$indent_depth *= 8;
#now we work out who 'owns' that data by doing substring matches
#with the config array.
my @dir_list = split("/", $item);
my $owner = "";
while ( !$owner && @dir_list )
{
my $srch_string = join("/", @dir_list);
#the 'dir' to look for in the customers array.
#might or might not have a trailing '/'
$srch_string =~ 's,/$,,g'; #strip training /
#if ( $debug ) { print "$item: checking for \"$srch_string\"\n"; }
if ( $customers{$srch_string} )
{
$owner = $customers{$srch_string};
}
pop (@dir_list);
}
if ( $debug ) { print "$owner\n"; }
print REPORT " | | ", $indent_html;
print REPORT data($output{$item});
print REPORT " | ", $base_object, " | \n";
printf ("%${indent_depth}s", data($output{$item}));
print ("\t $base_object\n");
if ( !$owner ) { $owner = "unknown:unknown" };
my ( $customer, $dept ) = split (":", $owner );
$totals{$customer}{'Total'}{'du'} += $basic_sizes{$item};
push ( @{$totals{$customer}{'Total'}{'dirs'}}, $item );
if ( $debug ) { print "$customer $dept = $basic_sizes{$item}\n"; }
$totals{$customer}{$dept}{'du'} += $basic_sizes{$item};
push ( @{$totals{$customer}{$dept}{'dirs'}}, $item );
#push ( @customer_chain, join(" ", $owner, $item, data($output{$item}) ) );
}
}
print "Listing by Customer and Department\n";
print REPORT "
| \n";
print REPORT "Listing by Customer and Department\n";
print REPORT " |
";
foreach my $customer ( sort ( keys ( %totals ) ) )
{
#print "key: $customer\n";
foreach my $dept ( sort ( keys ( %{$totals{$customer}} ) ) )
{
# print "key: $customer $dept\n";
print "$customer $dept ", data($totals{$customer}{$dept}{'du'}), "\n";
print REPORT "| $customer $dept ", data($totals{$customer}{$dept}{'du'}), " |
\n";
#foreach my $dir ( @{$totals{$customer}{$dept}{'dirs'}} )
#{
# print " $dir\n";
#}
}
}
close REPORT;
}
#MAIN
#print get_size_of_files("test");
if ( -f $config_file )
{
open ( CONF, "$config_file");
while ( )
{
chomp;
my ( $fs, $cust, $dept ) = split(":");
$customers{$fs} = join(":", $cust, $dept);
if ( $debug ) { print "got $fs - $cust - $dept\n"; }
}
close ( CONF );
}
if ( $debug ) { print keys ( %customers ); }
STDOUT -> autoflush(1);
foreach my $dir ( @dirs )
{
if ( $debug ) { print "\nSTARTING $dir\n"; }
dusage ( $dir, $global_recurse );
}
do_output ( "/fs001" );