use strict; use HTML::Parser (); # Create parser object my $p = HTML::Parser->new( api_version => 3, start_h => [\&a_tag, "tag, attr"], ); $p->parse_file('c:/data/perl/script/volvo.html'); sub a_tag { my $tag = shift; return unless $tag eq 'a'; my $hashref = shift; return unless $hashref->{'href'}=~m|'CIS Report for (\d{2}/\d{2}/\d{4})|; my $date = $1; (my $file) = $hashref->{'href'}=~m|PopupInfo\('(.*pdf)|; print "$date: $file\n"; } #### 12/26/2007: CE1B85BDDDE8F58A852573BD003B1FBC/$file/vcmorarorp346601226200712262007944204.pdf 12/26/2007: CE1B85BDDDE8F58A852573BD003B1FBC/$file/vcmorarorp346601226200712262007944204.pdf 12/25/2007: C0FBD06860B80B7B852573BC003B3738/$file/vcmorarorp346601225200712252007939900.pdf 12/25/2007: C0FBD06860B80B7B852573BC003B3738/$file/vcmorarorp346601225200712252007939900.pdf 12/24/2007: 31ED64C95AFF1A06852573BB003F55E5/$file/vcmorarorp346601224200712242007930592.pdf 12/24/2007: 31ED64C95AFF1A06852573BB003F55E5/$file/vcmorarorp346601224200712242007930592.pdf