#! /usr/bin/perl -w use strict; use File::Find; use Data::Dumper; use PDF; use PDF::Parse; my %conferences = (); my $start_dir = "/gruvi/Data/Proceedings"; sub find_pdfs { # simple, yet shoud be effective if(/\.pdf$/) { # NOTE: this seems to ignore "top-level" PDFs # That's not a bug, it's a feature! my ($conference) = ($File::Find::dir =~ m!$start_dir/(\w+?)/!); $conference ||= "Top-level"; push @{$conferences{$conference}}, "$File::Find::dir/$_"; return 1; } return 0; } &File::Find::find(\&find_pdfs, $start_dir); for (sort keys %conferences) { &write_conference_page($_, "$_.html", $conferences{$_}); } sub write_conference_page { my ($conference, $outfile, $pdfs) = @_; open PROCS, '>', "./$outfile" or die "Cannot open $outfile: $!\n"; # we're going to callously ignore proper HTML for the present print PROCS "\n\n"; # we're also going to callously ignore proper Perl HTML-gen style # for the moment print PROCS "

$conference

\n"; print PROCS "\n
\n\n"; print PROCS "\n\n"; close PROCS or die "Cannot close $outfile: $!\n"; }