in reply to Swimsuits2006
in thread Swimsuits2004

The annual tradition:
#!/usr/bin/env perl use strict; $|++; use LWP::Simple; -d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $! +"; my $all_model_index = get "http://sportsillustrated.cnn.com/features/2 +007_swimsuit/models/"; while ($all_model_index =~ /(\/features\/2007_swimsuit\/(models|painti +ng|onlocation)\/[-\w]+\/)/g) { doit("$1"); } doit("/features/2007_swimsuit/beyonce/"); doit("/features/2007_swimsuit/3d/"); sub doit { my $base = shift; print "$base =>\n"; my $model_index = get "http://sportsillustrated.cnn.com/$base/index2 +.html"; unless ($model_index) { $model_index = get "http://sportsillustrated.cnn.com/$base/"; } while ($model_index =~ /\"(http:\/\/i.a.cnn.net\/si\/features\/2007_ +swimsuit\/images\/photos\/)([\w.\-]+)t.jpg\"/g) { my $url = "$1$2.jpg"; my $file = "RESULTS/$2.jpg"; if (-e $file) { print "$url => $file: "; print "skip\n"; } else { print "$url => $file: "; print mirror($url, $file), "\n"; } } }

Replies are listed 'Best First'.
Swimsuits2008
by merlyn (Sage) on Feb 12, 2008 at 16:02 UTC
    The annual tradition:
    #!/usr/bin/env perl use strict; $|++; use LWP::Simple; -d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $! +"; my $all_model_index = get "http://sportsillustrated.cnn.com/features/2 +008_swimsuit/models/"; for ($all_model_index =~ m{"/features/2008[^"]+/index2\.html"}g) { next if /(\/features\/2008_swimsuit\/(models|athleteswives|painting| +cheerleaders|onlocation)\/[-\w]+\/)/; print "$_\n"; } # exit 0; while ($all_model_index =~ /(\/features\/2008_swimsuit\/(models|athlet +eswives|painting|cheerleaders|onlocation)\/[-\w]+\/)/g) { doit("$1"); } doit("/features/2008_swimsuit/selfportraits/"); doit("/features/2008_swimsuit/heidi-klum/"); doit("/features/2008_swimsuit/danica-patrick/"); sub doit { my $base = shift; print "$base =>\n"; my $model_index = get "http://sportsillustrated.cnn.com/$base/index2 +.html"; unless ($model_index) { $model_index = get "http://sportsillustrated.cnn.com/$base/"; } while ($model_index =~ m{\"(http://i.a.cnn.net/si/pr/subs/swimsuit/i +mages/)([\w.\-]+)_t.jpg\"}g) { my $url = "$1$2.jpg"; my $file = "RESULTS/$2.jpg"; if (-e $file) { print "$url => $file: "; print "skip\n"; } else { print "$url => $file: "; print mirror($url, $file), "\n"; } } }
      This script is a bit messy, but it seems to be grabbing all of the core images. No video downloads yet... I'm not that smart.
      #!/usr/bin/env perl use strict; $|++; use LWP::Simple; -d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $! +"; my $all_model_index = get "http://sportsillustrated.cnn.com/2009_swims +uit/models/"; for ($all_model_index =~ m{"/2009[^"]+/index2\.html"}g) { next if /(\/2009_swimsuit\/(models|dancers|tennis|onlocation)\/[-\w] ++\/)/; print "$_\n"; } # exit 0; while ($all_model_index =~ /(\/2009_swimsuit\/(models|dancers|tennis|o +nlocation)\/[-\w]+\/)/g) { doit("$1"); } doit("/2009_swimsuit/painting/$_/") for qw(brooklyn-decker julie-henderson irina-shayk jessica-white); my %done; sub doit { my $base = shift; return if $done{$base}++; print "$base =>\n"; my $model_index = get "http://sportsillustrated.cnn.com/$base/index2 +.html"; unless ($model_index) { $model_index = get "http://sportsillustrated.cnn.com/$base/"; } while ($model_index =~ m{\"(http://i.cdn.turner.com/si/pr/subs/swims +uit/images/)([\w.\-]+)_t.jpg\"}g) { my $url = "$1$2.jpg"; my $file = "RESULTS/$2.jpg"; if (-e $file) { print "$url => $file: "; print "skip\n"; } else { print "$url => $file: "; print mirror($url, $file), "\n"; } } }
        Updated for 2010:
        #!/usr/bin/perl use strict; $|++; use LWP::Simple; -d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $! +"; my $all_model_index = get "http://sportsillustrated.cnn.com/2010_swims +uit/models/"; while ($all_model_index =~ m{"(/2010[^"]+/)index2\.html"}g) { doit("$1"); } exit 0; my %done; sub doit { my $base = shift; return if $done{$base}++; print "$base =>\n"; my $model_index = get "http://sportsillustrated.cnn.com/$base/index2 +.html"; unless ($model_index) { $model_index = get "http://sportsillustrated.cnn.com/$base/"; } while ($model_index =~ m{\"(http://i.cdn.turner.com/si/pr/subs/swims +uit/images/)([\w.\-]+)_t.jpg\"}g) { my $url = "$1$2.jpg"; my $file = "RESULTS/$2.jpg"; if (-e $file) { print "$url => $file: "; print "skip\n"; } else { print "$url => $file: "; print mirror($url, $file), "\n"; } } }

        -- Randal L. Schwartz, Perl hacker

        The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in RFC 2119.