Re^2: Swimsuits2004
by demerphq (Chancellor) on Jan 20, 2005 at 19:24 UTC
|
I modified it to put a sleep(1+rand(5)) as the last statement of each loop. Given that this is downloading something like a thousand images I reckon maintaing a low profile (ie not hammering their servers) when you do it is probably a good thing.
| [reply] [d/l] |
Swimsuits2005
by merlyn (Sage) on Feb 15, 2005 at 18:38 UTC
|
The 2005 results just went online... I'm fetching them with this:
#!/usr/bin/perl
use strict;
$|++;
use LWP::Simple;
-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";
my $all_model_index = get "http://sportsillustrated.cnn.com/features/2
+005_swimsuit/";
while ($all_model_index =~ /(\/features\/2005_swimsuit\/models\/[-\w]+
+\.html)/g) {
my $model_index = get "http://sportsillustrated.cnn.com/$1";
while ($model_index =~ /\"(http:\/\/i\.a\.cnn\.net\/si\/features\/20
+05_swimsuit\/models\/images\/)([-\w]+)t\.jpg\"/g) {
my $url = "$1$2.jpg";
my $file = "RESULTS/$2.jpg";
print "$url => $file: ";
if (-e $file) {
print "skip\n";
} else {
print mirror($url, $file), "\n";
}
}
}
| [reply] [d/l] |
|
Mere minutes after the 2006 site went live, I reverse engineered it for this. This doesn't get the "subscription exclusive" shots as the prior versions did... they finally got smart and put it in a separate index. Nor does it grab the videos and a few of the other odd extra things. "Until next time, enjoy!"
#!/usr/bin/perl
use strict;
$|++;
use LWP::Simple;
-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";
my $all_model_index = get "http://sportsillustrated.cnn.com/features/2
+006_swimsuit/";
while ($all_model_index =~ /(\/features\/2006_swimsuit\/(allstar|model
+s)\/[-\w]+\.html)/g) {
doit("$1");
}
doit("/features/2006_swimsuit/allstar/allstar_reunion.html");
doit("/features/2006_swimsuit/athletes/");
doit("/features/2006_swimsuit/painting/");
sub doit {
my $model_index = get "http://sportsillustrated.cnn.com/" . shift;
while ($model_index =~ /\'(http:\/\/i.a.cnn.net\/si\/features\/2006_
+swimsuit\/images\/gallery\/photos\/)([\w.\-]+)t.jpg\'/g) {
my $url = "$1$2.jpg";
my $file = "RESULTS/$2.jpg";
if (-e $file) {
print "$url => $file: ";
print "skip\n";
} else {
print "$url => $file: ";
print mirror($url, $file), "\n";
}
}
}
| [reply] [d/l] |
|
#!/usr/bin/env perl
use strict;
$|++;
use LWP::Simple;
-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";
my $all_model_index = get "http://sportsillustrated.cnn.com/features/2
+007_swimsuit/models/";
while ($all_model_index =~ /(\/features\/2007_swimsuit\/(models|painti
+ng|onlocation)\/[-\w]+\/)/g) {
doit("$1");
}
doit("/features/2007_swimsuit/beyonce/");
doit("/features/2007_swimsuit/3d/");
sub doit {
my $base = shift;
print "$base =>\n";
my $model_index = get "http://sportsillustrated.cnn.com/$base/index2
+.html";
unless ($model_index) {
$model_index = get "http://sportsillustrated.cnn.com/$base/";
}
while ($model_index =~ /\"(http:\/\/i.a.cnn.net\/si\/features\/2007_
+swimsuit\/images\/photos\/)([\w.\-]+)t.jpg\"/g) {
my $url = "$1$2.jpg";
my $file = "RESULTS/$2.jpg";
if (-e $file) {
print "$url => $file: ";
print "skip\n";
} else {
print "$url => $file: ";
print mirror($url, $file), "\n";
}
}
}
| [reply] [d/l] |
|
|
|
|
#!perl
use strict;
use warnings;
no warnings 'uninitialized';
use LWP::UserAgent;
use HTTP::Cookies;
if (!@ARGV or $ARGV[0] =~ m{^[-/]h(elp)?$} or $ARGV[0] =~ m{^[-/]\?$})
+ {
print <<'*END*';
getimagesx.pl [-r referer] url [prefix [-r referer] url prefix] ...
downloads all files in the sequence = finds the last number in the
+ URL
and keeps incrementing the number and downloading until it fails
three times in a row. The file will be named according to the part
+ of
the URL following the last slash.
If you specify just the prefix then the prefix will be prepended t
+o
the file names.
If you specify the -r followed by a URL then that URL will be
downloaded, cookies remembered and the URL will be sent to
the server as the HTTP_REFERER with the image requests.
*END*
exit;
}
my $ua = LWP::UserAgent->new(
env_proxy => 1,
keep_alive => 1,
timeout => 60,
agent => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; (R1 1
+.3); .NET CLR 1.0.3705)',
cookie_jar => HTTP::Cookies->new(),
);
our $referer = '';
while ($ARGV[0] =~ m{^[-/]r}) {
shift(@ARGV);
$referer = shift(@ARGV);
if ($referer !~ /##/) {
our $result = $ua->get( $referer, ':content_file' => 'referer.
+html');
if ($result->code() != 200) {
die "Failed to load the referer $referer\n";
}
}
}
while (@ARGV) {
my $url = shift(@ARGV);
if ($ARGV[0] =~ m{^[-/]r}) {
shift(@ARGV);
$referer = shift(@ARGV);
}
my $prefix = shift(@ARGV);
if ($ARGV[0] =~ m{^[-/]r}) {
shift(@ARGV);
$referer = shift(@ARGV);
}
if (! defined $prefix and $url =~ m{([^/]+)/\d+\.\w+$}) {
$prefix = $1;
print "Automatic prefix '$prefix'\n";
}
my $suffix = '';
if ($prefix =~ m{^(.*)/(.*)$}) {
($prefix, $suffix) = ($1, $2);
}
if ($url =~ m{^(.*/)(.*?)(\d+)([^0-9]*)$}) {
my ($url_beg, $file, $num, $file_end) = ($1,$2,$3,$4);
my ($result, $errors);
#print "Referer: $referer\n\n";
while ($errors < 10) {
local $referer = $referer;
if ($referer =~ s/##/$num/g) {
#print "Load referer: $referer\n";
$result = $ua->get( $referer, ':content_file' => 'refe
+rer.html');
if ($result->code() != 200) {
die "Failed to load the referer $referer\n";
}
}
if ($file =~ /[\&\?]/) {
print $url_beg.$file.$num.$file_end," => ",$prefix.$nu
+m.$file_end.$suffix,"\n";
$result = getstore($url_beg.$file.$num.$file_end, $pre
+fix.$num.$file_end.$suffix);
} else {
print $url_beg.$file.$num.$file_end," => ",$prefix.$fi
+le.$num.$file_end.$suffix,"\n";
$result = getstore($url_beg.$file.$num.$file_end, $pre
+fix.$file.$num.$file_end.$suffix);
}
if ($result == 200) {
$errors = 0;
} else {
$errors++;
print "\t$result\n";
}
$num++;
}
print "LAST RESULT: $result\n";
} else {
my $file = $url;
$file =~ s{^.*/}{};
$file = $prefix . $file;
if ($file) {
getstore( $url, $file);
} else {
print STDERR "Can't download directories! ($url)\n";
}
}
}
use Data::Dumper;
sub getstore {
my ($url, $file) = @_;
$file =~ s{(?:~(\d+))?(\.\w+)$}{'~' . ($1+1) . $2}e while (-e $fil
+e);
my $result = $ua->get($url, ':content_file' => $file, referer => $
+referer);
return $result->status_line() if $result->code() != 200;
if (wantarray()) {
return (200, $result->content_type);
} else {
if ($result->content_type =~ /^text/i) {
print "Bloody bastards. They do not return proper HTTP sta
+tus!\n";
# unlink $file;
return 404;
}
return 200;
}
}
Just in case you wanted all images in a series ;-)
| [reply] [d/l] |
|
|
Merlyn, I'm curious about something...
When you're whiping up short little scripts like this, do you find yourself using syntax like /a\/b\/c\/d/g instead of m{a/b/c/d}g out of habit from the old days when there wasn't much choice, or is it a conscious choice?
| [reply] [d/l] [select] |
|
I'm wrestling GNU Emacs "cperl-mode", which seems to not precisely appreciate the m{} and s{}{}g very well. But it seems to get the backwhacked version OK.
| [reply] [d/l] [select] |
|
|
merlyn,
This certainly isn't as succinct but I wanted to offer my contribution. It gets 110 of the 118 non-exclusive pics. All 8 of Marisa Miller's are skipped. It was fun - *shrug*.
I didn't bother fixing it to get the exclusive photos, but it shouldn't be too difficult. Incidently, there is a non-linked photo we both miss. Carolyn Murphy has a hidden #2 pic.
| [reply] [d/l] |
Re^2: Swimsuits2004
by zentara (Cardinal) on Jan 20, 2005 at 11:39 UTC
|
As usual, Merlyn does it best. :-) Maybe you should offer your service to SI, to better protect their "private" content? Then again, maybe not! :-)
I'm not really a human, but I play one on earth.
flash japh
| [reply] |
Re^2: Swimsuits2004
by reasonablekeith (Deacon) on Jan 26, 2005 at 23:26 UTC
|
May I suggest, for those who might not have linux, the following code, which will generate html link pages by genre and model name.
Please feel free to add html / body etc tags of the non compliance upsets you. Hmmm, I'm already thinking there should be a use cgi in there somewhere.
Enjoy.
#!/usr/bin/perl
use strict;
$|++;
my %files;
foreach my $file_name (glob "RESULTS/*") {
my ($top_level, $model_name) = ($file_name =~ m/RESULTS\/(.*?)_(.*?)
+_.*$/);
$top_level =~ s/^9/199/ or $top_level =~ s/^0/200/;
$files{$top_level}{$model_name}{$file_name} = 1;
$files{$model_name}{'all'}{$file_name} = 1;
}
-d 'HTML' or mkdir 'HTML' or die "mkdir HTML: $!";
open INDEX, ">HTML\\index.html";
foreach my $top_level (sort keys %files) {
print INDEX qq|<a href="./index_$top_level.html">$top_level</a><br/>
+\n|;
open TOP_INDEX, ">HTML\\index_$top_level.html";
foreach my $model_name (sort keys %{$files{$top_level}}) {
open MODEL_INDEX, ">HTML\\index_${top_level}_${model_name}.html";
my $image_count = 0;
foreach my $file_name (sort keys %{$files{$top_level}{$model_name}
+}) {
$image_count++;
print MODEL_INDEX qq|<img src="../$file_name"><br/>\n|;
}
print TOP_INDEX qq|<a href="./index_${top_level}_${model_name}.htm
+l">$model_name ($image_count)</a><br/>\n|;
}
}
| [reply] [d/l] |
Re^2: Swimsuits2004
by jbrugger (Parson) on Feb 14, 2005 at 07:29 UTC
|
Hmm...
Just could not help myself and create thumbs and links...
Have fun :-) (ps. feel free to improve it... that's the real fun, isn't it?)
#!/usr/bin/perl -w
use strict;
use Image::Magick;
use File::Glob qw(:globally :nocase);
use File::Basename;
use CGI;
my $realdir = qw( /var/www/html/thumbnailer/images/ ); # The dir tha
+t holds the origional images
my @ext = qw( jpg png gif); # Image exten
+tions to look for
my $savedir = qw( /var/www/html/thumbnailer/thumbs/ ); # Dir to save
+ thumbnails
my $serverdir = qw( /thumbnailer/ ); # Relative se
+rver-path
my $thumbdir = qw( thumbs/ ); # Holds the t
+humbnails for the webpage
my $imagedir = qw( images/ ); # Holds the r
+eal images for the webpage
my $x_size = 150; # Size in pix
+els.
my $y_size = 150; # Size in pix
+els.
my $resize = 1; # Resize befo
+re crop.
my $aspect_S = 0.5; # Only reseze
+ if aspect-ratio is above this value, else thumbnail becomes to blurr
+ed.
my $aspect_H = 2; # Only resize
+ if aspect-ratio is below this value, else thumbnaik becomes to blurr
+ed.
my $overwrite = 0; # Allways rem
+ake (overwrite) the thumbnails.
my $cols = 5; # Max horizon
+tal thumbs.
my $rows = 10; # Max vertica
+l thumbs.
my $cgi = new CGI;
main();
cleanUp();
sub main {
my $content = "<tr>";
my $files = readDir();
my $thumbs_per_page = $rows * $cols;
my $total = scalar(@$files) ? scalar(@$files) : 0;
my $pages = $total / $thumbs_per_page;
my $currentPage = $cgi->param('p') ? $cgi->param('p') : 1;
my $hasPrevious = $currentPage-1 ? 1 : 0;
my $hasNext = ($currentPage < $pages) ? 1 : 0 ;
my $startImage = (($currentPage-1) * $thumbs_per_page) ;
my $nav = "";
my $c = 1;
my $i = 0;
foreach my $file (@$files) {
$i++;
if ($i >= $total) {
$nav .= "<tr><td align=\"center\" nowrap=\"now
+rap\" colspan=\"$cols\">";
if ($hasPrevious) { $nav .= "<a href=\"?p=" .
+($currentPage - 1) . "\">Previous<\/a>\ \ "; }
if ($hasNext) { $nav .= "<a href=\"?p=" .
+($currentPage + 1) . "\">Next<\/a>"; }
$nav .= "<\/td><\/tr>";
}
next if ($i <= $startImage || $i > ($startImage + $thu
+mbs_per_page));
if ($c > $cols) {
$content .= "<\/tr><tr>\n";
$c = 1;
}
# Check if the file alreaddy exists:
my $filename = "thumb_" . fileparse($file);
if (!-e $savedir . $filename || $overwrite) {
# Make new thumbnails...
my $image = Image::Magick->new;
$image->Read($file);
my ($x,$y) = $image->Get('width', 'height');
# Enlarge image if thumbnail > origional, or r
+esize before crop is enabled...
if ($x < $x_size || $resize) {
my $aspectratio = $y / $x;
# Only resize if aspect-ratio is betwe
+en given apect ratio-borders
if ($aspectratio > $aspect_S && $aspec
+tratio < $aspect_H || $x < $x_size) {
$x = $x_size;
$y=$x * $aspectratio;
$image->Resize(width => $x, he
+ight => $y, filter => 'Cubic', blur => 1);
}
}
if ($y < $y_size) {
my $aspectratio = $x / $y;
$y = $y_size;
$x=$y * $aspectratio;
$image->Resize(width => $x, height =>
+$y, filter => 'Cubic', blur => 1);
}
# Get center (offset) of image, and crop to $x
+_size * $y_size.
my $ox = ($x - $x_size) / 2;
my $oy = ($y - $y_size) / 2;
$image->Crop("${x_size}x${y_size}+$ox+$oy");
$image->Write($savedir.$filename);
$content .= " <td> <a href=\"" . $serverdir .
+$imagedir . fileparse($file) . "\" > <img src=" . $serverdir . $thumb
+dir . $filename. " alt=\"\" border=\"1\"> <\/a><\/td> ";
} else {
# Skip writing...
$content .= " <td> <a href=\"" . $serverdir .
+$imagedir . fileparse($file) . "\" > <img src=" . $serverdir . $thumb
+dir . $filename. " alt=\"\" border=\"2\"> <\/a><\/td> ";
}
$c++;
}
$content .= "<\/tr>\n" . $nav;
printHtml($content);
}
sub printHtml {
my ($content) = @_;
# my $cgi = new CGI;
print $cgi->header(-type => 'text/html', );
print $cgi->start_html( -title => 'Testpage',
-BGCOLOR => '#ffffff',);
print "<table border=\"0\" cellpadding=\"0\" cellspacing=\"3\"
+>\n";
print $content;
print "\n<\/table>\n";
print $cgi->end_html;
}
sub readDir {
my $files="*.{" . join(",",@ext) . "}";
my @files= glob($realdir.$files);
return \@files;
}
sub cleanUp {
undef $cgi, $realdir, @ext, $serverdir, $savedir, $x_size, $co
+ls;
undef $y_size, $resize, $aspect_S , $aspect_H, $overwrite, $ro
+ws;
undef $thumbdir, $imagedir;
}
| [reply] [d/l] |