#!/usr/bin/perl -w use strict; ## initialize the objects that we need use WWW::Mechanize; ## used to fetch the page we want my $mech = WWW::Mechanize->new(); ## our ::Mechanize object ## initialize an array of "bad" links ## we'll write this to a file when we're done my @bad_links; ## site root my $site_root = "http://www.mscd.edu/~women/scholarships/"; ## array of URLs to check ## probably wanna stick these in a file in the future my @urls_to_check = ('schola-f.shtml', 'scholg-l.shtml', 'scholm-r.shtml', 'schols-z.shtml'); my $bad_links_file = "badlinks.txt"; ## Start! ## loop through our urls we need to check for ( @urls_to_check ) { print "Getting $site_root$_...\n"; $mech->get( $site_root . $_ ); if ( $mech->success ) { print "Successfully retrieved $site_root$_\n"; } else { print "Couldn't retrieve $site_root$_!\n"; } ## loop through our list of links while ( $mech->links ) { print "Following $_\n"; $mech->follow_link( text_regex => qr/$_/i ); ## we need to either move on to the next link if this one is ## successful or push it into the @bad_links array if it isn't if ( $mech->success ) { print "Successfully followed $_\n"; } else { push @bad_links, $_; print "Unsuccessful in retrieving $_, moving on\n"; } } } print "Finished checking links. Writing results.\n"; open (BADLINKS, '>>', $bad_links_file); for ( @bad_links ) { print BADLINKS $_ . "\n"; } close (BADLINKS); ## Finished!