use LWP::Simple;
use File::Compare;
use HTML::TreeBuilder::XPath;
use LWP::UserAgent;
use Win32::Console::ANSI;
use Term::ANSIColor;
sub crawl_content
{
{
open(FILE, "C:/Users/jeyakuma/Desktop/shipping project/input/input.txt");
{
while()
{
chomp;
$url=$_;
foreach ($url)
{
($domain) = $url =~ m|www.([A-Z a-z 0-9]+.{3}).|x;
}
do 'C:/Users/jeyakuma/Desktop/perl/mainsub.pl';
&domain_check();
my $ua = LWP::UserAgent->new( agent => "Mozilla/5.0" );
my $req = HTTP::Request->new( GET => "$url" );
my $res = $ua->request($req);
if ( $res->is_success )
{
print "working on $domain\n";
binmode ":utf8";
my $xp = HTML::TreeBuilder::XPath->new_from_url($url);
my @node = $xp->findnodes_as_string("$xpath") or print "couldn't find the node\n" ;
open HTML, '>:encoding(cp1252)',"C:/Users/jeyakuma/Desktop/shipping project/data_$date/$competitor.html";
foreach(<@node>)
{
print HTML @node;
close HTML ;
}
}
else
{
print color("green"), "$domain Invalid url\n", color("reset") and open HTML,">C:/Users/jeyakuma/Desktop/log.txt"; print HTML " $domain Invalid URL";
}
}
}
}
}
do 'C:/Users/jeyakuma/Desktop/perl/comparefinal.pl';
compare_result();
}