my $i=1;
my $array_var;
my $window = MainWindow->new;
$window->title("Breadcrumb Buster");
$window->geometry("600x850");
#my $window=$window->Frame()->pack();
my $lbl_var_1 = $window -> Label(-text=>"Enter the keyword: ")->pack()
+;
my $txt_var_1 = $window -> Entry()->pack(-pady=>20);
my $btn_var_1 = $window -> Button(-text => "Get Breadcrumb !", -comman
+d => \&Getbread)->pack();
$array_var->{"0,0"} = "Sno";
$array_var->{"0,1"} = "Manufacturer Name";
$array_var->{"0,2"} = "Manufacturer URL";
$array_var->{"0,3"} = "Breadcrumb";
my $table = $window->Scrolled('TableMatrix', -cols=>4, -drawmode=>'fas
+t', -variable => $array_var,-state=>"disabled", -flashtime=>0.25,-fla
+shmode=>1,-resizeborders => 'both',-colstretchmode => 'unset', -rowst
+retchmode => 'last')->pack(-pady=>20, -fill=>'x');
$table->configure(-height=>"200");
#$table->rowHeight(0,1);
$table->tagRow('title',0);
$table->tagConfigure('title', -bd=>2, -relief=>'raised');
$table->tagConfigure('OddRow', -bg => 'white', -fg => 'purple');
#$table->colWidth(0,5,3,6,4,10);
sub Getbread()
{
my $bread;
my $input_keyword=$txt_var_1->get();
print "$input_keyword\n";
my $query_url="http://www.google.com/products?as_q=$input_keyword&
+as_epq=&as_oq=&as_eq=&num=100&scoring=r&as_occt=any&price1=&price2=&s
+how=dd&safe=active";
loop:
my $content=&Geturl($query_url);
while($content=~m/<li\s*class\=\"result\"[^>]*?>([\w\W]*?)<\/li>/i
+gs)
{
my ($temp_content, $cse_url, $cse_url_temp, $cse_url_old, $pro
+duct_title, $new_price, $used_price, $number_sellers, $cse_content, $
+mer_url, $merchant_name);
$temp_content=$1;
if($temp_content=~m/\"result\-seller\">\s*from\s*[\d]+\s*selle
+rs/is)
{
if($temp_content=~m/<h3\s*class\=\"result\-title\">\s*<a\s
+*href\=\"([^>]*?)\"\s*>\s*([\w\W]*?)\s*<\/h3>/is)
{
$cse_url_temp=$1;
$product_title=$2;
$cse_url="http://www.google.com".$cse_url_temp;
decode_entities($cse_url);
$product_title=~s/<[^>]*?>//igs;
decode_entities($product_title);
$product_title=~s/\'/\'\'/igs;
}
$cse_url=~s/\#p/\&os\=sellers\#p/igs;
$cse_url_old=$cse_url;
paging:
$cse_content=&Geturl($cse_url);
while($cse_content=~m/(<tr[^>]*?id\=\"[^>]*?>[\w\W]*?<\/sp
+an>\s*<\/td>\s*<\/tr>)/igs)
{
my ($yahoo_store, $meta_keyword, $meta_description, $m
+erchant_content, $online_since, $alexa_rank, $merchant_status, $merc
+hant_url_temp, $merchant_url, $merchant_name, $temp_cse_content);
$temp_cse_content=$1;
if($temp_cse_content=~m/\"seller\-name\"><a\s*href\=\"
+[^>]*?\?q\=(http(?:s)?\:\/\/[^>]*?\/[^>]*?)\&fr[^>]*?\"\s*>([\w\W]*?)
+<\/a>/is)
{
$mer_url=$1;
$merchant_name=$2;
$merchant_name=decode_entities($merchant_name);
$merchant_name=~s/\'/\'\'/igs;
$mer_url=uri_unescape($mer_url);
+
}
$bread=&Get_merchant($mer_url);
}
}
elsif($temp_content=~m/<h3\s*class\=\"result\-title\">\s*<a\s*
+href\=\"([^>]*?)\"\s*>\s*([\w\W]*?)\s*<\/h3>/is)
{
$cse_url_temp=$1;
$cse_url_temp=uri_unescape($cse_url_temp);
$merchant_name=$2;
if($cse_url_temp=~m/q\=([^>]*?)\&fr/is)
{
$mer_url=$1;
$mer_url=uri_unescape($mer_url);
}
$merchant_name=~s/<[^>]*?>/ /igs;
$bread=&Get_merchant($mer_url);
}
if($bread ne "")
{
$array_var->{"$i,0"} = $i ;
$array_var->{"$i,1"} = "$merchant_name";
$array_var->{"$i,2"} = "$mer_url";
$array_var->{"$i,3"} = "$bread";
$i++;
print "\n$merchant_name";
last if $i==6;
}
}
}
sub Geturl()
{
my $url=shift;
$url=uri_unescape(uri_unescape(uri_unescape($url)));
start:
my $req=HTTP::Request->new(GET => "$url");
$req->header("Content-Type"=> "application/x-www-form-urlencoded")
+;
my $res=$ua->request($req);
my $con=$res->content();
if($con=~m/<form\s*action\=\"Captcha\"/is)
{
print "\nGoogle Blocked - Now Sleeping...";
sleep(1200);
goto start;
}
return $con;
}
sub Get_merchant()
{
my $merchant_url=shift;
my $breadcrumb;
my $merchant_content=&Geturl($merchant_url);
if($merchant_content=~m/<h1\s*class\=\"breadcrumbfull\">\s*([\w\W]
+*?)\s*<\/h1>/is)
{
$breadcrumb=$1;
}
elsif($merchant_content=~m/<div[^>]*?(?:bread|crumb)[^>]*?>\s*([\w
+\W]*?)\s*<\/div>/is)
{
$breadcrumb=$1;
}
elsif($merchant_content=~m/<td[^>]*?NavPath[^>]*?>\s*([\w\W]*?)\s*
+<\/td>/is)
{
$breadcrumb=$1;
}
$breadcrumb=~s/<[^>]*?>//igs;
decode_entities($breadcrumb);
$breadcrumb=~s/\'/\'\'/igs;
chomp($breadcrumb);
return $breadcrumb;
}
MainLoop;
|