#!/usr/bin/perl -w use strict; use LWP::RobotUA; use HTML::SimpleLinkExtor; use vars qw/$http_ua $link_extractor/; my @queue; @queue = qw ("http://www.wired.com"); sub crawl { my $a = 0; my %visited; my $links; my @links; while(my $url = shift @queue) { next if $visited{$url}; my $content = $http_ua->get($url)->content; open(FILE,">/var/www/data/$a.txt"); print FILE "$url\n"; print FILE "$content"; close(FILE); print qq{Downloaded: "$url"\n}; push @queue, do { $link_extractor->parse($content); @links = $link_extractor->a }; foreach $links(@links) { unshift @queue, $links; } $visited{$url} = 1; $a++; } } $http_ua = new LWP::RobotUA theusefulbot => 'bot@theusefulnet.com'; $http_ua->delay(10/6000); $link_extractor = new HTML::SimpleLinkExtor; crawl(@ARGV);