#always
use strict;

#load modules
use File::Find;
require HTML::LinkExtor;

#create a HTML::LinkExtor-instance for later use
my $links = HTML::LinkExtor->new
(
	# first argument is a subroutine that will
	# be called for every link in the html
	# the object parses
	sub
	{
		# $tag can contain "a" or "img"
		# %links contains the "attributes" of the link
     		my ($tag, %links) = @_;
     		
     		#print if we have a "a"-link that is not
     		#page internal (no "#")
     		print "$links{href}\n"
     			if $tag eq "a" && $links{href} =~ /^[^#]/ ;
 	}
 );

#find all html-files in a tree
find
(
	#first argument is the sub that will be called
	#for every file AND directory found
	sub
	{
		# check if we have file that has htm or html-suffix
		if ( -f $File::Find::name && if $File::Find::name =~ /\.htm(l)?/ )
		{
			#if so, parse it for links
			print "$File::Find::name contains:\n";
			$links->parse_file($File::Find::name);
		}
	}
	, "c:/perl"
);