use strict;
use HTML::Parser ();
# Create parser object
my $p = HTML::Parser->new( api_version => 3,
start_h => [\&a_tag, "tag, attr"],
);
$p->parse_file('c:/data/perl/script/volvo.html');
sub a_tag {
my $tag = shift;
return unless $tag eq 'a';
my $hashref = shift;
return unless $hashref->{'href'}=~m|'CIS Report for (\d{2}/\d{2}/\d{4})|;
my $date = $1;
(my $file) = $hashref->{'href'}=~m|PopupInfo\('(.*pdf)|;
print "$date: $file\n";
}
####
12/26/2007: CE1B85BDDDE8F58A852573BD003B1FBC/$file/vcmorarorp346601226200712262007944204.pdf
12/26/2007: CE1B85BDDDE8F58A852573BD003B1FBC/$file/vcmorarorp346601226200712262007944204.pdf
12/25/2007: C0FBD06860B80B7B852573BC003B3738/$file/vcmorarorp346601225200712252007939900.pdf
12/25/2007: C0FBD06860B80B7B852573BC003B3738/$file/vcmorarorp346601225200712252007939900.pdf
12/24/2007: 31ED64C95AFF1A06852573BB003F55E5/$file/vcmorarorp346601224200712242007930592.pdf
12/24/2007: 31ED64C95AFF1A06852573BB003F55E5/$file/vcmorarorp346601224200712242007930592.pdf