/fs001 /fs001/ITC /fs001/ITC/LSC/ /fs001/ITC/LSC/users /fs001/ITC/LSC/users/my_user /fs001/ITC/LSC/users/some_user /fs001/ITC/LSC/users/other_user #### #!/usr/bin/perl use File::Find; use FileHandle; use POSIX; use URI::Escape; use Time::HiRes qw ( usleep ); use strict; use warnings; #proggie to processs filesystems on celerra and work out which departments #own what. #5Mb, if it's smaller then it doesn't bother recursing. my $size_threshold = 5242880; my $config_file = "/usr/local/apache/htdocs/dusage/disk_usage.conf"; my @dirs = ( "/fs001", "/fs002", "/fs003", "/fs004", "/fs005", "/fs006", "/fs007", "/fs008", "/fs009", "/fs010", "/fs011", "/fs012", ); #my @dirs = ( "/SiteWide/home/erolison" ); my $debug = 0; my $global_recurse = 5; #default value but used for some interesting calcs. my @excl_directories = ( '.', '..' ); #cos they're unusual my %is_excluded; for ( @excl_directories ) { $is_excluded{$_} = 1; } my %sizes; my %customers; my %totals; my ( $allfiles, $tenk, $fiftyk, $hundredk, $twohundredk, $fivehundredk, $onemeg ); sub data { my $factor = 1024; my @sequence = ('b ', 'kb', 'Mb', 'Gb', 'Tb' ); foreach my $input_number (@_) { my $seq_num = 0; my $output_number = $input_number; if ( ! $input_number ) { return sprintf("%3.2fb", 0 ); } while ( $output_number / $factor >= 1 ) { $seq_num++; $output_number /= $factor; } return sprintf("%3.2f$sequence[$seq_num]", $output_number); } } sub getsize { #watch out for this. It's a _very_ expensive call. #look out for optimisations lower down in the chain my @args = @_; my $sum = 0; if ( $debug ) { print "Getting size for @args\n"; } find sub { if ( -s ) { $sum += -s ; $allfiles++; if ( -s > 10240 ) { $tenk++ } if ( -s > 51200 ) { $fiftyk++ } if ( -s > 102400 ) { $hundredk++ } if ( -s > 204800 ) { $twohundredk++ } if ( -s > 512000 ) { $fivehundredk++ } if ( -s > 1048576 ) { $onemeg++ } }; if ( $debug ) { usleep(1000) } }, @args; return $sum; } sub get_size_of_files { my @args= @_; my $sum = 0; if ( $debug ) { print "Sizing files in @args\n"; } foreach my $iggle ( @args ) { opendir(IDIR, $iggle) or print "WARNING: Couldn't open $iggle\n"; while ( my $fname = readdir ( IDIR ) ) { #print ( "$iggle/$fname" ); if ( ( -s "$iggle/$fname" ) ) { $sum += -s "$iggle/$fname" } } close (IDIR); } return $sum; } sub dusage { my $startpoint = shift(@_) || '.'; my $recurse_depth = shift(@_) || 0; my @dusage_list; #function to show disk usage of all subdirectories. if ( $debug ) { print "Reading $startpoint\n"; } if ( -d $startpoint) { #if ( $recurse_depth-- > 0 && $stuff{$startpoint} > $size_threshold ) if ( $recurse_depth-- > 0 ) { $sizes{$startpoint} = get_size_of_files($startpoint); if ( $debug ) { print "adding $sizes{$startpoint} to $startpoint\n"; } #my $tmp = $startpoint; #$tmp =~ s,/[A-Za-z0-9_\.\,\- ]+$,,g; # $stuff{$tmp} += $stuff{$startpoint}; # if ( $debug ) { print "in lo adding value of $startpoint ( $stuff{$startpoint} ) to $tmp = $stuff{$tmp}\n" }; opendir ( DIR, $startpoint); while ( my $filename = readdir(DIR) ) { if ( -d "$startpoint/$filename" && !($is_excluded{$filename}) ) { $dusage_list[++$#dusage_list] = "$startpoint/$filename"; } } for my $dir (@dusage_list) { dusage("$dir", $recurse_depth); } } else { #only process the expensive bit, if we're not going to recurse 'deeper' $sizes{$startpoint} = getsize($startpoint); } } } sub do_output { my $target = pop(@_); my $base_indent = ( $target =~ tr,/,, ); $target =~ s,/,,g; if ( $debug ) { print "$target\n"; } open ( FILESTAT, ">file_sizes${target}.html" ); print FILESTAT "Total number of files = ", $allfiles, "
\n"; print FILESTAT "files over 10k in size = ", $tenk, "
\n"; print FILESTAT "files over 50k in size = ", $fiftyk, "
\n"; print FILESTAT "files over 100k in size = ", $hundredk, "
\n"; print FILESTAT "files over 200k in size = ", $twohundredk, "
\n"; print FILESTAT "files over 500k in size = ", $fivehundredk, "
\n"; print FILESTAT "files over 1M in size = ", $onemeg, "
\n"; close ( FILESTAT ); open ( REPORT, ">disk_usage${target}.xml" ); open ( CSV, ">disk_usage${target}.csv" ); my %output = %sizes; my %basic_sizes = %sizes; if ( $debug ) { foreach my $item ( sort ( keys ( %sizes ) ) ) { print ("directory size: $sizes{$item} = $item \n"); } } while ( keys(%sizes) ) { foreach my $value ( sort ( keys ( %sizes ) ) ) { my $upd = $value; $upd =~ s,/[A-Za-z0-9_\.\,\- ]+$,,g; if ( $debug) { print "upd = $upd value = $value\n"; } if ( ! ( "$upd" eq "$value" ) ) { $output{$upd} += $sizes{$value}; $sizes{$upd} += $sizes{$value}; if ( $debug ) { print "adding $value ( $sizes{$value} ) to $upd\n" } } delete($sizes{$value}); } } if ( $debug ) { foreach my $item ( sort ( keys ( %output ) ) ) { print ("$output{$item} = $item \n"); } } print REPORT "\n"; print REPORT "\n"; print REPORT "\n"; print REPORT "Disk usage report for $target \n"; print REPORT "",strftime("%d/%m/%y", localtime(time)), "\n"; print REPORT "\n"; print REPORT "Listing by Directory\n"; my $current_indent = -1; foreach my $item ( sort ( keys ( %output ) )) { #if ( $output{$item} > $size_threshold ) #{ my $base_object = $item; $base_object =~ s,.*/,/,g; my $indent_depth = ( $item =~ tr,/,, ); #my $indent_html = join("", "\n"); my $indent_html = join("", "" ); #now we work out who 'owns' that data by doing substring matches #with the config array. my @dir_list = split("/", $item); my $owner = ""; while ( !$owner && @dir_list ) { my $srch_string = join("/", @dir_list); #the 'dir' to look for in the customers array. #might or might not have a trailing '/' $srch_string =~ 's,/$,,g'; #strip training / #if ( $debug ) { print "$item: checking for \"$srch_string\"\n"; } if ( $customers{$srch_string} ) { $owner = $customers{$srch_string}; } pop (@dir_list); } if ( $debug ) { print "$owner\n"; } if ( $debug ) { print "$indent_depth to $current_indent\n"; } if ( $indent_depth <= $current_indent ) { for ( my $i = $current_indent; $i >= $indent_depth; $i-- ) { print REPORT "\n"; } } if ( $indent_depth > $current_indent + 1 ) { for ( my $i = $current_indent + 1; $i > $indent_depth; $i++ ) { print REPORT ""; } } $current_indent = $indent_depth; print REPORT "\n",$indent_html,"\n"; print REPORT "", data($output{$item}), "\n"; print REPORT "", uri_escape($base_object,"^A-Za-z0-9\-_.!~+ *'()\/"), "\n"; print REPORT "", uri_escape($item,"^A-Za-z0-9\-_.!~+ *'()\/"), "\n"; print REPORT "", $output{$item}, "\n"; my ( $lcust, $ldept ) = split(":", $owner); if ( not $lcust ) { $lcust = "Unknown"; }; if ( not $ldept ) { $ldept = "Unknown"; }; print REPORT "", $lcust, "\n"; print REPORT "", $ldept,"\n"; if ( $debug ) { printf ("%${indent_depth}s", data($output{$item})); print ("\t $base_object\n"); } if ( !$owner ) { $owner = "unknown:unknown" }; my ( $customer, $dept ) = split (":", $owner ); unless ( $totals{$customer}{'Total'}{'du'} ) { $totals{$customer}{'Total'}{'du'} = 0; } if ( $basic_sizes{$item} ) { $totals{$customer}{'Total'}{'du'} += $basic_sizes{$item}; } push ( @{$totals{$customer}{'Total'}{'dirs'}}, $item ); if ( $debug ) { print "$customer $dept = $basic_sizes{$item}\n"; } if ( $basic_sizes{$item} ) { $totals{$customer}{$dept}{'du'} += $basic_sizes{$item}; } push ( @{$totals{$customer}{$dept}{'dirs'}}, $item ); #push ( @customer_chain, join(" ", $owner, $item, data($output{$item}) ) ); #} #if size } for ( my $i = $current_indent; $i >= 0; $i--) { print REPORT "\n"; } print REPORT "\n\n\n"; if ( $debug ) { print "Listing by Customer and Department\n"; } print REPORT "Listing by Customer and Department\n"; print CSV "Customer, Dept, total usage (bytes),\n"; foreach my $customer ( sort ( keys ( %totals ) ) ) { foreach my $dept ( sort ( keys ( %{$totals{$customer}} ) ) ) { if ( $debug ) { print "$customer $dept ", data($totals{$customer}{$dept}{'du'}), "\n"; } #print REPORT "$customer $dept ", data($totals{$customer}{$dept}{'du'}), "\n"; print CSV $customer,",",$dept,",",$totals{$customer}{$dept}{'du'},",\n"; print REPORT "$customer"; print REPORT "", $dept, ""; print REPORT "", data($totals{$customer}{$dept}{'du'}); print REPORT "\n"; } } print REPORT "\n"; close REPORT; close CSV; } #MAIN #print get_size_of_files("test"); my ($idir) = @ARGV; if ( -f $config_file ) { open ( CONF, "$config_file"); while ( ) { chomp; my ( $fs, $cust, $dept ) = split(":"); $customers{$fs} = join(":", $cust, $dept); if ( $debug ) { print "got $fs - $cust - $dept\n"; } } close ( CONF ); } if ( $debug ) { print keys ( %customers ); } STDOUT -> autoflush(1); if ( ! $idir ) { foreach my $dir ( @dirs ) { if ( $debug ) { print "\nSTARTING $dir\n"; } dusage ( $dir, $global_recurse ); } do_output ( "ALL" ); } else { dusage ( $idir, $global_recurse ); do_output($idir); }