use HTML::LinkExtor; my $p = HTML::LinkExtor->new(); sub process_file { return unless /\.html?$/i; # skip unless *.htm / *.html file return unless -f _; # skip processing unless $_ is really a file (not a dir) $p->parse_file($_); print $p->links; }