#!perl -w use strict; use Data::Dumper; # Output a row of information sub flush { my( $record ) = @_; if( $record->{permit} ) { print Dumper $record; }; delete $record->{permit}; }; # This will collect all information for one entry: my %info; my $last_page; my $expected_pages; my $record_kind; my %next_record = ( address => 'description', description => undef, ); while() { if( m!^Page (\d+) of (\d+)! ) { $last_page = $1; $expected_pages ||= $2; next; }; if( m!^(Jan|Feb|...|Jun|...) (19\d\d|20\d\d)! ) { $info{ report_date } = "$2-$1"; next; }; # ... more code to skip the header left for the reader next if( m!MONTHLY EXTERNAL MODIFICATIONS PERMITS REPORT! ); next if( m!^Permit Issued! ); if( m!^(\d\d)/(\d\d)/((?:19|20)\d\d)$! ) { flush(\%info); $info{ permit } = "$3-$2-$1"; $record_kind = 'address'; <>; # skip empty line next }; # Fast-forward until the next set of lines if( $record_kind ) { while( <> !~ /^\s*$/ ) { s!\s*$!!; $info{ $record_kind } .= " " . $_; }; $record_kind = $next_record{ $record_kind }; } else { die "Unknown line [$_] on line $."; }; }; warn "Uhoh - expected $expected_pages but only read up to $last_page" if( $expected_pages != $last_page ); __DATA__ Page 1 of 3 100 Civic Center Way Calabasas, California 91302 7/12/2012 9:21:02AM MONTHLY EXTERNAL MODIFICATIONS PERMITS REPORT Jun 2012 Permit Issued Address Description 06/01/2012 26166 ROYMOR DR Upgrade panel from 100 amp to 200 amp 06/04/2012 24956 NORMANS WAY (6) light fixtures @ patio; (3) branch circuits; (4) electric heaters 06/05/2012 4273 VICASA DR Construct 339 SF Covered Loggia