#!/usr/bin/perl -w
use strict;

## initialize the objects that we need
use WWW::Mechanize;            ## used to fetch the page we want

my $mech       = WWW::Mechanize->new();        ## our ::Mechanize object

## initialize an array of "bad" links 
## we'll write this to a file when we're done
my @bad_links;

## site root
my $site_root = "http://www.mscd.edu/~women/scholarships/";

## array of URLs to check
## probably wanna stick these in a file in the future
my @urls_to_check = ('schola-f.shtml', 'scholg-l.shtml', 'scholm-r.shtml', 'schols-z.shtml');


my $bad_links_file = "badlinks.txt";

## Start!
## loop through our urls we need to check
for ( @urls_to_check ) {

    print "Getting $site_root$_...\n";
    
    $mech->get( $site_root . $_ );
    
    if ( $mech->success ) {

        print "Successfully retrieved $site_root$_\n";

    } else { 

       print "Couldn't retrieve $site_root$_!\n";

    }

    ## loop through our list of links
    while ( $mech->links ) {
        
        print "Following $_\n";
        $mech->follow_link( text_regex => qr/$_/i );
        
        ## we need to either move on to the next link if this one is
        ## successful or push it into the @bad_links array if it isn't
        if ( $mech->success ) {
            
            print "Successfully followed $_\n";

        } else {
 
           push @bad_links, $_;
           print "Unsuccessful in retrieving $_, moving on\n";
   
        }

    }

}

print "Finished checking links.  Writing results.\n";
open (BADLINKS, '>>', $bad_links_file);
for ( @bad_links ) {
    
    print BADLINKS $_ . "\n";

}
close (BADLINKS); 



## Finished!