#!/usr/bin/perl use File::Find; use FileHandle; use strict; use warnings; #proggie to processs filesystems on celerra and work out which departm +ents #own what. #5Mb, if it's smaller then it doesn't bother recursing. my $size_threshold = 5242880; my $config_file = "disk_usage.conf"; # file looks like: # /fs001:SYS:Any # /fs001/Tech/res:Tech:Research # ... my @dirs = ( "/fs001", "/fs002", "/fs003", "/fs004", "/fs005", "/fs006 +", "/fs007", "/fs008", "/fs009", "/fs010", "/fs011", "/fs012", +); my $report_file = "disk_usage.html"; my $debug = 1; my $global_recurse = 2; #default value but used for some interesting c +alcs. my @excl_directories = ( '.', '..' ); #cos they're unusual my %is_excluded; for ( @excl_directories ) { $is_excluded{$_} = 1; } my %sizes; my %customers; my %totals; sub data { my $factor = 1024; my @sequence = ('b ', 'kb', 'Mb', 'Gb', 'Tb' ); foreach my $input_number (@_) { my $seq_num = 0; my $output_number = $input_number; while ( $output_number / $factor > 1 ) { $seq_num++; $output_number /= $factor; } return sprintf("%3.2f$sequence[$seq_num]", $output_number); } } sub getsize { #watch out for this. It's a _very_ expensive call. #look out for optimisations lower down in the chain my @args = @_; my $sum = 0; if ( $debug ) { print "Getting size for @args\n"; } find sub { if ( -s ) { $sum += -s } }, @args; return $sum; } sub get_size_of_files { my @args= @_; my $sum = 0; if ( $debug ) { print "Sizing files in @args\n"; } foreach my $thisdir ( @args ) { opendir(IDIR, $thisdir) or print "WARNING: Couldn't open $thisdir\n"; while ( my $fname = readdir ( IDIR ) ) { #print ( "$thisdir/$fname" ); if ( ( ! -d "$thisdir/$fname" ) and ( -s "$thisdir/$fname" ) ) { $sum += -s "$thisdir/$fname" } } close (IDIR); } return $sum; } sub dusage { my $startpoint = shift(@_) || '.'; my $recurse_depth = shift(@_) || 0; my @dusage_list; #function to show disk usage of all subdirectories. if ( $debug ) { print "Reading $startpoint\n"; } if ( -d $startpoint) { #if ( $recurse_depth-- > 0 && $stuff{$startpoint} > $size_thresho +ld ) if ( $recurse_depth-- > 0 ) { $sizes{$startpoint} = get_size_of_files($startpoint); if ( $debug ) { print "adding $sizes{$startpoint} to $startpoint +\n"; } #my $tmp = $startpoint; #$tmp =~ s,/[A-Za-z0-9_\.\,\- ]+$,,g; # $stuff{$tmp} += $stuff{$startpoint}; # if ( $debug ) { print "in lo adding value of $startpoint ( $st +uff{$startpoint} ) to $tmp = $stuff{$tmp}\n" }; opendir ( DIR, $startpoint); while ( my $filename = readdir(DIR) ) { if ( -d "$startpoint/$filename" && !($is_excluded{$filename}) +) { $dusage_list[++$#dusage_list] = "$startpoint/$filename"; } } for my $dir (@dusage_list) { dusage("$dir", $recurse_depth); } } else { #only process the expensive bit, if we're not going to recurse 'dee +per' $sizes{$startpoint} = getsize($startpoint); } } } sub do_output { open ( REPORT, ">$report_file" ); my $base_indent = ( pop(@_) =~ tr,/,, ); my %output = %sizes; my %basic_sizes = %sizes; foreach my $item ( sort ( keys ( %sizes ) ) ) { print ("directory size: $sizes{$item} = $item \n"); } while ( keys(%sizes) ) { foreach my $value ( sort ( keys ( %sizes ) ) ) { my $upd = $value; $upd =~ s,/[A-Za-z0-9_\.\,\- ]+$,,g; print "upd = $upd value = $value\n"; if ( ! ( "$upd" eq "$value" ) ) { $output{$upd} += $sizes{$value}; $sizes{$upd} += $sizes{$value}; if ( $debug ) { print "adding $value ( $sizes{$value} ) to $up +d\n" } } delete($sizes{$value}); } } if ( $debug ) { foreach my $item ( sort ( keys ( %output ) ) ) { print ("$output{$item} = $item \n"); } } print REPORT "<TABLE BORDER=1>\n"; foreach my $item ( sort ( keys ( %output ) )) { if ( $output{$item} > $size_threshold ) { my $base_object = $item; $base_object =~ s,.*/,/,g; my $indent_depth = ( $item =~ tr,/,, ) - $base_indent; my $indent_html = '</TD><TD>' x $indent_depth; $indent_depth *= 8; #now we work out who 'owns' that data by doing substring matches #with the config array. my @dir_list = split("/", $item); my $owner = ""; while ( !$owner && @dir_list ) { my $srch_string = join("/", @dir_list); #the 'dir' to look for in the customers array +. #might or might not have a trailing '/' $srch_string =~ 's,/$,,g'; #strip training / #if ( $debug ) { print "$item: checking for \"$srch_string\"\n +"; } if ( $customers{$srch_string} ) { $owner = $customers{$srch_string}; } pop (@dir_list); } if ( $debug ) { print "$owner\n"; } print REPORT "<TR><TD>", $indent_html; print REPORT data($output{$item}); print REPORT "</TD><TD>", $base_object, "</TD>\n"; printf ("%${indent_depth}s", data($output{$item})); print ("\t $base_object\n"); if ( !$owner ) { $owner = "unknown:unknown" }; my ( $customer, $dept ) = split (":", $owner ); $totals{$customer}{'Total'}{'du'} += $basic_sizes{$item}; push ( @{$totals{$customer}{'Total'}{'dirs'}}, $item ); if ( $debug ) { print "$customer $dept = $basic_sizes{$item}\n"; + } $totals{$customer}{$dept}{'du'} += $basic_sizes{$item}; push ( @{$totals{$customer}{$dept}{'dirs'}}, $item ); #push ( @customer_chain, join(" ", $owner, $item, data($output{$ +item}) ) ); } } print "Listing by Customer and Department\n"; print REPORT "</TABLE><BR><BR><TABLE BORDER=1><TR><TD>\n"; print REPORT "Listing by Customer and Department\n"; print REPORT "</TD></TR>"; foreach my $customer ( sort ( keys ( %totals ) ) ) { #print "key: $customer\n"; foreach my $dept ( sort ( keys ( %{$totals{$customer}} ) ) ) { # print "key: $customer $dept\n"; print "$customer $dept ", data($totals{$customer}{$dept}{'du'}), + "\n"; print REPORT "<TR><TD>$customer $dept ", data($totals{$customer} +{$dept}{'du'}), "</TD></TR>\n"; #foreach my $dir ( @{$totals{$customer}{$dept}{'dirs'}} ) #{ # print " $dir\n"; #} } } close REPORT; } #MAIN #print get_size_of_files("test"); if ( -f $config_file ) { open ( CONF, "$config_file"); while ( <CONF> ) { chomp; my ( $fs, $cust, $dept ) = split(":"); $customers{$fs} = join(":", $cust, $dept); if ( $debug ) { print "got $fs - $cust - $dept\n"; } } close ( CONF ); } if ( $debug ) { print keys ( %customers ); } STDOUT -> autoflush(1); foreach my $dir ( @dirs ) { if ( $debug ) { print "\nSTARTING $dir\n"; } dusage ( $dir, $global_recurse ); } do_output ( "/fs001" );
In reply to Disk usage by customer by Preceptor
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |