#!/usr/bin/env perl
use strict;
$|++;
use LWP::Simple;
-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";
my $all_model_index = get "http://sportsillustrated.cnn.com/features/2
+008_swimsuit/models/";
for ($all_model_index =~ m{"/features/2008[^"]+/index2\.html"}g) {
next if /(\/features\/2008_swimsuit\/(models|athleteswives|painting|
+cheerleaders|onlocation)\/[-\w]+\/)/;
print "$_\n";
}
# exit 0;
while ($all_model_index =~ /(\/features\/2008_swimsuit\/(models|athlet
+eswives|painting|cheerleaders|onlocation)\/[-\w]+\/)/g) {
doit("$1");
}
doit("/features/2008_swimsuit/selfportraits/");
doit("/features/2008_swimsuit/heidi-klum/");
doit("/features/2008_swimsuit/danica-patrick/");
sub doit {
my $base = shift;
print "$base =>\n";
my $model_index = get "http://sportsillustrated.cnn.com/$base/index2
+.html";
unless ($model_index) {
$model_index = get "http://sportsillustrated.cnn.com/$base/";
}
while ($model_index =~ m{\"(http://i.a.cnn.net/si/pr/subs/swimsuit/i
+mages/)([\w.\-]+)_t.jpg\"}g) {
my $url = "$1$2.jpg";
my $file = "RESULTS/$2.jpg";
if (-e $file) {
print "$url => $file: ";
print "skip\n";
} else {
print "$url => $file: ";
print mirror($url, $file), "\n";
}
}
}
| [reply] [d/l] |
This script is a bit messy, but it seems to be grabbing all of the core images. No video downloads yet... I'm not that smart.
#!/usr/bin/env perl
use strict;
$|++;
use LWP::Simple;
-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";
my $all_model_index = get "http://sportsillustrated.cnn.com/2009_swims
+uit/models/";
for ($all_model_index =~ m{"/2009[^"]+/index2\.html"}g) {
next if /(\/2009_swimsuit\/(models|dancers|tennis|onlocation)\/[-\w]
++\/)/;
print "$_\n";
}
# exit 0;
while ($all_model_index =~ /(\/2009_swimsuit\/(models|dancers|tennis|o
+nlocation)\/[-\w]+\/)/g) {
doit("$1");
}
doit("/2009_swimsuit/painting/$_/")
for qw(brooklyn-decker
julie-henderson
irina-shayk
jessica-white);
my %done;
sub doit {
my $base = shift;
return if $done{$base}++;
print "$base =>\n";
my $model_index = get "http://sportsillustrated.cnn.com/$base/index2
+.html";
unless ($model_index) {
$model_index = get "http://sportsillustrated.cnn.com/$base/";
}
while ($model_index =~ m{\"(http://i.cdn.turner.com/si/pr/subs/swims
+uit/images/)([\w.\-]+)_t.jpg\"}g) {
my $url = "$1$2.jpg";
my $file = "RESULTS/$2.jpg";
if (-e $file) {
print "$url => $file: ";
print "skip\n";
} else {
print "$url => $file: ";
print mirror($url, $file), "\n";
}
}
}
| [reply] [d/l] |
#!/usr/bin/perl
use strict;
$|++;
use LWP::Simple;
-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";
my $all_model_index = get "http://sportsillustrated.cnn.com/2010_swims
+uit/models/";
while ($all_model_index =~ m{"(/2010[^"]+/)index2\.html"}g) {
doit("$1");
}
exit 0;
my %done;
sub doit {
my $base = shift;
return if $done{$base}++;
print "$base =>\n";
my $model_index = get "http://sportsillustrated.cnn.com/$base/index2
+.html";
unless ($model_index) {
$model_index = get "http://sportsillustrated.cnn.com/$base/";
}
while ($model_index =~ m{\"(http://i.cdn.turner.com/si/pr/subs/swims
+uit/images/)([\w.\-]+)_t.jpg\"}g) {
my $url = "$1$2.jpg";
my $file = "RESULTS/$2.jpg";
if (-e $file) {
print "$url => $file: ";
print "skip\n";
} else {
print "$url => $file: ";
print mirror($url, $file), "\n";
}
}
}
-- Randal L. Schwartz, Perl hacker
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in RFC 2119.
| [reply] [d/l] |