#always use strict; #load modules use File::Find; require HTML::LinkExtor; #create a HTML::LinkExtor-instance for later use my $links = HTML::LinkExtor->new ( # first argument is a subroutine that will # be called for every link in the html # the object parses sub { # $tag can contain "a" or "img" # %links contains the "attributes" of the link my ($tag, %links) = @_; #print if we have a "a"-link that is not #page internal (no "#") print "$links{href}\n" if $tag eq "a" && $links{href} =~ /^[^#]/ ; } ); #find all html-files in a tree find ( #first argument is the sub that will be called #for every file AND directory found sub { # check if we have file that has htm or html-suffix if ( -f $File::Find::name && if $File::Find::name =~ /\.htm(l)?/ ) { #if so, parse it for links print "$File::Find::name contains:\n"; $links->parse_file($File::Find::name); } } , "c:/perl" );