in reply to Merlyns Web Link checker

Actually, I have used this very code (modified somewhat) recently and found it most useful (thanks, Merlyn).

Your problem comes from the declaration at the beginning. Why did you start the URL with a tilde and then add it again enclosed in angle brackets? Take those out and it should work.

Also, the line "$request = HTTP::Request('GET',$thisurl);" should be "$request = new HTTP::Request('GET',$thisurl);"


I admit it, I am Paco.

Replies are listed 'Best First'.
Re: Re: Merlyns Web Link checker
by Anonymous Monk on Feb 27, 2002 at 17:25 UTC
    I now get the following error after running the script:
    C:\Perl\bin>we3.pl verifying http://www.sun.com Cannot fetch http://www.sun.com (status 501 Protocol scheme '' is not +supported)
    </CODE> Am I suppose to use '/perl/bin' for my path? Please advise what I need to do to make this work. Here is the script:
    #!/usr/bin/perl use lib "/perl/bin"; use LWP::UserAgent; use HTML::Parser; use URI::URL; ## begin configure @CHECK = # list of initial starting points qw(http://osis.nima.mil); sub PARSE { # verify existance, parse for further URLs ## $_[0] is the absolute URL $_[0] =~ m!^<http://www>\.(teleport|stonehenge)\.com/~merlyn! and not $_[0] =~ /refindex/; } sub PING { # verify existence, but don't parse ## $_[0] is the absolute URL $_[0] =~ m!^(http):!; } ## end configure { package ParseLink; @ISA = qw(HTML::Parser); sub start { # called by parse my $this = shift; my ($tag, $attr) = @_; if ($tag eq "a") { $this->{links}{$attr->{href}}++; } elsif ($tag eq "img") { $this->{links}{$attr->{src}}++; } } sub get_links { my $this = shift; sort keys %{$this->{links}}; } } # end of ParseLink $ua = new LWP::UserAgent; $ua->agent("hverify/1.0"); $ua->env_proxy; $| = 1; MAINLOOP: while ($thisurl = shift @CHECK) { $thisurl =~ s/%7e/~/ig; # ugh :-) next if $did{$thisurl}++; if (PARSE $thisurl) { warn "fetching $thisurl\n"; $request = HTTP::Request('GET',$thisurl); $response = $ua->request($request); # fetch! unless ($response->is_success) { warn "Cannot fetch $thisurl (status ", $response->code, " ", $response->message,")\n"; next MAINLOOP; } next MAINLOOP unless $response->content_type =~ /text\/html/i; $base = $response->base; my $p = ParseLink->new; $p->parse($response->content); $p->parse(undef); for $link ($p->get_links) { $abs = url($link, $base)->abs; warn "... $link => $abs\n"; push(@CHECK, $abs); } next MAINLOOP; } if (PING $thisurl) { warn "verifying $thisurl\n"; for $method (qw(HEAD GET)) { $request = new HTTP::Request('GET',$thisurl); $response = $ua->request($request); # fetch! next MAINLOOP if $response->is_success; # ok } warn "Cannot fetch $thisurl (status ", $response->code, " ", $response->message,")\n"; next MAINLOOP; } warn "[skipping $thisurl]\n"; }