#!/usr/bin/perl use strict; use DBI; use LWP::Simple; use Parallel::ForkManager; my $dbh=DBI->connect("dbi:mysql:shsearch","","", {RaiseError=>1}); my $sth=$dbh->prepare("select siteid, imageurl from big_ass_table grou +p by siteid, imageurl"); $sth->execute(); my ($siteid, $imageurl, $type, $size, $flag, %sites, $count, $counter) +; $count=0; $sth->bind_columns(\($siteid, $imageurl)); while($sth->fetch()) { $sites{$siteid}=$imageurl; $count++; } $dbh->disconnect(); my $pm=new Parallel::ForkManager(30); print "Ready to fetch headers ($count to process)...\n"; open(FILE, ">sql.torun") || die "Cannot open file for writting: $!"; print FILE "update big_ass_table set imagedown=0;\n"; $counter=0; foreach $siteid (sort{$a <=> $b} keys %sites) { $imageurl=$sites{$siteid}; $counter++; $pm->start and next; print "($counter of $count) Checking $imageurl for site $siteid..."; if(($type, $size)=(head($imageurl))[0,1]) { if($size < 25600) { if($type =~ /image\/(gif|jpeg)/) { print "OK ($size bytes, $type)\n"; $flag=1; } else { $flag=0; print "Wrong file type ($type, must be image/gif or image/jpeg)\n" +; } } else { $flag=0; print "Image size exceeded ($size bytes, should be < 25600)\n"; } } else { print "Error\n"; $flag=0; } if($flag==0) { print FILE "update big_ass_table set imagedown=imagedown+1 where sit +eid=$siteid and imageurl='$imageurl';\n"; } else { print FILE "update big_ass_table set imagedown=0 where siteid=$sitei +d and imageurl='$imageurl';\n"; } $pm->finish(); } $pm->wait_all_children(); close(FILE);
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: URLs' Checking (Search Engines)
by CharlesClarkson (Curate) on Feb 01, 2002 at 13:42 UTC | |
by nikos (Scribe) on Feb 05, 2002 at 00:16 UTC |