#!/usr/bin/perl -w
# Adapted, in part, from Recipe 20.7 of The Perl Cookbook.
use strict;

use HTML::LinkExtor;
use LWP::Simple;

my @links;  # holds all links found on target page
my %dead;   # list of unresolveable (dead) links.
my $base_url = shift
   or die "Usage: $0 <url_to_parse>\n\n",
          "(including scheme prefix, e.g. http://)\n";
          
unless ( url_exists( $base_url ) )
{ 
  die "Unable to test links;\n",
      "$base_url could not be reached\n\n"; 
}

print "Parsing $base_url...\n";
my $parser = HTML::LinkExtor->new( undef, $base_url );
$parser->parse( get( $base_url ) );
@links = $parser->links;

print "Checking links";
foreach my $linkarray ( @links ) 
{
   
   print '.';  # show user that something is happening.

   my @element = @$linkarray;
   my $elt_type = shift @element;

   while ( @element ) 
   {

     ( my $attr_name, my $attr_value) = splice( @element, 0, 2 );

     # skip MAILTO's, which you shouldn't have anyway.
     next if ( $attr_value =~ /\b(mailto)\b/ ); 
     
     unless ( url_exists( $attr_value ) ) 
       { $dead{ $attr_value }++; }
   }
}

print "\nAll links checked.\n\n",
      "$base_url contains ", scalar @links, " link(s)";

unless ( scalar keys %dead )
  { print "; all live.\n"; }
else
  { 
    print "; sadly, the following links aren't working:\n\n";
    for (sort keys %dead) { print $_, "\n"} 
    print "\n";
}

sub url_exists
{
   # Two tests are done because some sites do not appear to 
   # properly support HEAD requests.  Ex: www.borland.com 
   # As a result, we were getting some false positives; the
   # extra test prevents those.

   my $url = shift;
   return head( $url ) || get( $url );  
}