Category: File Untilties
Author/Contact Info Neil
Description: The program searches recursively through a directory that you specify and creates an html report (at the $repdir variable) of the ten largest files for your internal administration web site.
#!/usr/bin/perl -w

#By Neil Watson on 08/18/01 (

#Usage filehog <directory>

use strict;
use warnings;
use File::Find;
use CGI qw(:standard);

#change this for the locale of final report:
my $repdir = "/home/neil";

my @field;
my $dir = $ARGV[0];
my $host = `hostname`;
my @date = split(/\s+/, `date`);

open(RAW, ">/tmp/filehog") || die "Could not open /tmp/filehog";

#calls sub wanted
find(\&wanted, $dir);


open(HTML, ">$repdir/hogs_$host.html") || die "Could not open $repdir/
print HTML start_html("Space Hogs on server $host for $date[0], $date[
+1] $date[2] $date[5]");
print HTML h1("Space Hogs on server <i>$host</i> for$ date[0], $date[1
+] $date[2] $date[5]")."\n\n";
print HTML h3("These people are shamefully using more than their fair 
+share of disk space.  Hopefully, under this public scrutiny, they wil
+l change their greedy ways.");

print HTML "\n<hr>";
print HTML "\n<table>\n";
print HTML "<tr>\n";
print HTML "\t<th>Size (MB)<th>\n";
print HTML "\t<th>File Name and Location<th>\n";
print HTML "\t<th>File Owner<th>\n";
print HTML "<tr>\n";

#sort RAW data
system "sort -nr /tmp/filehog|head>/tmp/filehog_sort";

#write final report
open(RAW, "/tmp/filehog_sort") || die "Could not open /tmp/filehog_sor
while (<RAW>) {
    @field = split(/,/, $_);
    $field[2] = `grep x:$field[2] /etc/passwd|cut -d: -f1`;

    print HTML "<tr>\n";
    print HTML "\t<td>$field[0]<td>\n";
    print HTML "\t<td>$field[1]<td>\n";
    print HTML "\t<td>$field[2]<td>\n";
    print HTML "<tr>\n";

print HTML "</table>";
print HTML end_html();

system "rm -f /tmp/filehog /tmp/filehog_sort";

sub wanted {
    my $uid = (lstat($_))[4];
    my $size = (lstat($_))[7];
    $size = $size/1000000;

    printf RAW "%5.1f", $size;
    print RAW ",$File::Find::name,$uid\n";


Edited by Ovid 2001-11-06

Replies are listed 'Best First'.
Re: Spacehog
by mkmcconn (Chaplain) on Nov 07, 2001 at 01:57 UTC

    The basic idea looks useful, neilwatson.

    Do you think you might be interested in something like HTML::Template for separating the collection and reporting of information, from the format it's printed in? It might make your code more manageable and extensible in the long-run.

      Thanks, I'll look into that.

      Neil Watson