Re: Swimsuits2004

Replies are listed 'Best First'.

Re^2: Swimsuits2004
by demerphq (Chancellor) on Jan 20, 2005 at 19:24 UTC

I modified it to put a sleep(1+rand(5)) as the last statement of each loop. Given that this is downloading something like a thousand images I reckon maintaing a low profile (ie not hammering their servers) when you do it is probably a good thing.

---
demerphq

[reply]
[d/l]

Swimsuits2005
by merlyn (Sage) on Feb 15, 2005 at 18:38 UTC

#!/usr/bin/perl
use strict;
$|++;

use LWP::Simple;

-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";

my $all_model_index = get "http://sportsillustrated.cnn.com/features/2
+005_swimsuit/";

while ($all_model_index =~ /(\/features\/2005_swimsuit\/models\/[-\w]+
+\.html)/g) {
  my $model_index = get "http://sportsillustrated.cnn.com/$1";
  while ($model_index =~ /\"(http:\/\/i\.a\.cnn\.net\/si\/features\/20
+05_swimsuit\/models\/images\/)([-\w]+)t\.jpg\"/g) {
    my $url = "$1$2.jpg";
    my $file = "RESULTS/$2.jpg";
    print "$url => $file: ";
    if (-e $file) {
      print "skip\n";
    } else {
      print mirror($url, $file), "\n";
    }
  }
}
[download]

-- Randal L. Schwartz, Perl hacker
Be sure to read my standard disclaimer if this is a reply.

[reply]
[d/l]

Swimsuits2006

by merlyn (Sage) on Feb 14, 2006 at 15:40 UTC

#!/usr/bin/perl
use strict;
$|++;

use LWP::Simple;

-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";

my $all_model_index = get "http://sportsillustrated.cnn.com/features/2
+006_swimsuit/";

while ($all_model_index =~ /(\/features\/2006_swimsuit\/(allstar|model
+s)\/[-\w]+\.html)/g) {
  doit("$1");
}
doit("/features/2006_swimsuit/allstar/allstar_reunion.html");
doit("/features/2006_swimsuit/athletes/");
doit("/features/2006_swimsuit/painting/");

sub doit {
  my $model_index = get "http://sportsillustrated.cnn.com/" . shift;
  while ($model_index =~ /\'(http:\/\/i.a.cnn.net\/si\/features\/2006_
+swimsuit\/images\/gallery\/photos\/)([\w.\-]+)t.jpg\'/g) {
    my $url = "$1$2.jpg";
    my $file = "RESULTS/$2.jpg";
    if (-e $file) {
      print "$url => $file: ";
      print "skip\n";
    } else {
      print "$url => $file: ";
      print mirror($url, $file), "\n";
    }
  }
}
[download]

-- Randal L. Schwartz, Perl hacker
Be sure to read my standard disclaimer if this is a reply.

[reply]
[d/l]

Swimsuits2007

by merlyn (Sage) on Feb 15, 2007 at 14:01 UTC

#!/usr/bin/env perl
use strict;
$|++;

use LWP::Simple;

-d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!
+";

my $all_model_index = get "http://sportsillustrated.cnn.com/features/2
+007_swimsuit/models/";

while ($all_model_index =~ /(\/features\/2007_swimsuit\/(models|painti
+ng|onlocation)\/[-\w]+\/)/g) {
  doit("$1");
}

doit("/features/2007_swimsuit/beyonce/");
doit("/features/2007_swimsuit/3d/");

sub doit {
  my $base = shift;
  print "$base =>\n";
  my $model_index = get "http://sportsillustrated.cnn.com/$base/index2
+.html";
  unless ($model_index) {
    $model_index = get "http://sportsillustrated.cnn.com/$base/";
  }

  while ($model_index =~ /\"(http:\/\/i.a.cnn.net\/si\/features\/2007_
+swimsuit\/images\/photos\/)([\w.\-]+)t.jpg\"/g) {
    my $url = "$1$2.jpg";
    my $file = "RESULTS/$2.jpg";
    if (-e $file) {
      print "$url => $file: ";
      print "skip\n";
    } else {
      print "$url => $file: ";
      print mirror($url, $file), "\n";
    }
  }
}
[download]

-- Randal L. Schwartz, Perl hacker

[reply]
[d/l]

Swimsuits2008

by merlyn (Sage) on Feb 12, 2008 at 16:02 UTC

Swimsuits2009

by merlyn (Sage) on Feb 10, 2009 at 17:49 UTC

Some notes below your chosen depth have not been shown here

Re: Swimsuits2006

by Jenda (Abbot) on Feb 14, 2006 at 17:31 UTC

While we are at it:

#!perl
use strict;
use warnings;
no warnings 'uninitialized';
use LWP::UserAgent;
use HTTP::Cookies;

if (!@ARGV or $ARGV[0] =~ m{^[-/]h(elp)?$} or $ARGV[0] =~ m{^[-/]\?$})
+ {
    print <<'*END*';
getimagesx.pl [-r referer] url [prefix [-r referer] url prefix] ...
    downloads all files in the sequence = finds the last number in the
+ URL
    and keeps incrementing the number and downloading until it fails
    three times in a row. The file will be named according to the part
+ of
    the URL following the last slash.
    If you specify just the prefix then the prefix will be prepended t
+o
    the file names.
    If you specify the -r followed by a URL then that URL will be
    downloaded, cookies remembered and the URL will be sent to
    the server as the HTTP_REFERER with the image requests.
*END*
    exit;
}


my $ua = LWP::UserAgent->new(
    env_proxy => 1,
    keep_alive => 1,
    timeout => 60,
    agent => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; (R1 1
+.3); .NET CLR 1.0.3705)',
    cookie_jar => HTTP::Cookies->new(),
);

our $referer = '';
while ($ARGV[0] =~ m{^[-/]r}) {
    shift(@ARGV);
    $referer = shift(@ARGV);

    if ($referer !~ /##/) {
        our $result = $ua->get( $referer, ':content_file' => 'referer.
+html');
        if ($result->code() != 200) {
            die "Failed to load the referer $referer\n";
        }
    }
}

while (@ARGV) {
    my $url = shift(@ARGV);
    if ($ARGV[0] =~ m{^[-/]r}) {
        shift(@ARGV);
        $referer = shift(@ARGV);
    }
    my $prefix = shift(@ARGV);
    if ($ARGV[0] =~ m{^[-/]r}) {
        shift(@ARGV);
        $referer = shift(@ARGV);
    }

    if (! defined $prefix and $url =~ m{([^/]+)/\d+\.\w+$}) {
        $prefix = $1;
        print "Automatic prefix '$prefix'\n";
    }

    my $suffix = '';
    if ($prefix =~ m{^(.*)/(.*)$}) {
        ($prefix, $suffix) = ($1, $2);
    }

    if ($url =~ m{^(.*/)(.*?)(\d+)([^0-9]*)$}) {
        my ($url_beg, $file, $num, $file_end) = ($1,$2,$3,$4);
        my ($result, $errors);
#print "Referer: $referer\n\n";
        while ($errors < 10) {
            local $referer = $referer;
            if ($referer =~ s/##/$num/g) {
#print "Load referer: $referer\n";
                $result = $ua->get( $referer, ':content_file' => 'refe
+rer.html');
                if ($result->code() != 200) {
                    die "Failed to load the referer $referer\n";
                }
            }
            if ($file =~ /[\&\?]/) {
                print $url_beg.$file.$num.$file_end," => ",$prefix.$nu
+m.$file_end.$suffix,"\n";
                $result = getstore($url_beg.$file.$num.$file_end, $pre
+fix.$num.$file_end.$suffix);
            } else {
                print $url_beg.$file.$num.$file_end," => ",$prefix.$fi
+le.$num.$file_end.$suffix,"\n";
                $result = getstore($url_beg.$file.$num.$file_end, $pre
+fix.$file.$num.$file_end.$suffix);
            }
            if ($result == 200) {
                $errors = 0;
            } else {
                $errors++;
                print "\t$result\n";
            }
            $num++;
        }
        print "LAST RESULT: $result\n";
    } else {
        my $file = $url;
        $file =~ s{^.*/}{};
        $file = $prefix . $file;
        if ($file) {
            getstore( $url, $file);
        } else {
            print STDERR "Can't download directories! ($url)\n";
        }
    }
}

use Data::Dumper;

sub getstore {
    my ($url, $file) = @_;
    $file =~ s{(?:~(\d+))?(\.\w+)$}{'~' . ($1+1) . $2}e while (-e $fil
+e);
    my $result = $ua->get($url, ':content_file' => $file, referer => $
+referer);
    return $result->status_line() if $result->code() != 200;

    if (wantarray()) {
        return (200, $result->content_type);
    } else {
        if ($result->content_type =~ /^text/i) {
            print "Bloody bastards. They do not return proper HTTP sta
+tus!\n";
#            unlink $file;
            return 404;
        }
        return 200;
    }
}
[download]

Jenda
Support Denmark!
Defend the free world!

[reply]
[d/l]

Re^2: Swimsuits2006

by merlyn (Sage) on Feb 14, 2006 at 17:33 UTC

Re: Swimsuits2005

by hossman (Prior) on Feb 16, 2005 at 03:39 UTC

Merlyn, I'm curious about something...

When you're whiping up short little scripts like this, do you find yourself using syntax like /a\/b\/c\/d/g instead of m{a/b/c/d}g out of habit from the old days when there wasn't much choice, or is it a conscious choice?

[reply]
[d/l]
[select]

Re^2: Swimsuits2005

by merlyn (Sage) on Feb 16, 2005 at 03:45 UTC

m{}

s{}{}g

-- Randal L. Schwartz, Perl hacker
Be sure to read my standard disclaimer if this is a reply.

[reply]
[d/l]
[select]

Re^3: Swimsuits2005

by Aristotle (Chancellor) on Feb 19, 2005 at 05:17 UTC

Re: Swimsuits2005

by Limbic~Region (Chancellor) on Feb 23, 2005 at 02:19 UTC

merlyn

Read more... (3 kB)

Cheers - L~R

[reply]
[d/l]

Re^2: Swimsuits2004
by zentara (Cardinal) on Jan 20, 2005 at 11:39 UTC

I'm not really a human, but I play one on earth. flash japh

[reply]

Re^2: Swimsuits2004
by reasonablekeith (Deacon) on Jan 26, 2005 at 23:26 UTC

Please feel free to add html / body etc tags of the non compliance upsets you. Hmmm, I'm already thinking there should be a use cgi in there somewhere. Enjoy.

#!/usr/bin/perl
use strict;
$|++;

my %files;
foreach my $file_name (glob "RESULTS/*") {
  my ($top_level, $model_name) = ($file_name =~ m/RESULTS\/(.*?)_(.*?)
+_.*$/);
  $top_level =~ s/^9/199/ or $top_level =~ s/^0/200/;
  $files{$top_level}{$model_name}{$file_name} = 1;
  $files{$model_name}{'all'}{$file_name} = 1;
}

-d 'HTML' or mkdir 'HTML' or die "mkdir HTML: $!";

open INDEX, ">HTML\\index.html";
foreach my $top_level (sort keys %files) {
  print INDEX qq|<a href="./index_$top_level.html">$top_level</a><br/>
+\n|;
  open TOP_INDEX, ">HTML\\index_$top_level.html";
  foreach my $model_name (sort keys %{$files{$top_level}}) {
    open MODEL_INDEX, ">HTML\\index_${top_level}_${model_name}.html";
    my $image_count = 0;
    foreach my $file_name (sort keys %{$files{$top_level}{$model_name}
+}) {
      $image_count++;
      print MODEL_INDEX qq|<img src="../$file_name"><br/>\n|;
    }
    print TOP_INDEX qq|<a href="./index_${top_level}_${model_name}.htm
+l">$model_name ($image_count)</a><br/>\n|;
  }
}
[download]

[reply]
[d/l]

Re^2: Swimsuits2004
by jbrugger (Parson) on Feb 14, 2005 at 07:29 UTC

real

#!/usr/bin/perl -w
use strict;
use Image::Magick;
use File::Glob qw(:globally :nocase);
use File::Basename;
use CGI;

my $realdir   = qw( /var/www/html/thumbnailer/images/ ); # The dir tha
+t holds the origional images
my @ext       = qw( jpg png gif);                        # Image exten
+tions to look for
my $savedir   = qw( /var/www/html/thumbnailer/thumbs/ ); # Dir to save
+ thumbnails
my $serverdir = qw( /thumbnailer/ );                     # Relative se
+rver-path
my $thumbdir  = qw( thumbs/ );                           # Holds the t
+humbnails for the webpage
my $imagedir  = qw( images/ );                           # Holds the r
+eal images for the webpage
my $x_size    = 150;                                     # Size in pix
+els.
my $y_size    = 150;                                     # Size in pix
+els.
my $resize    = 1;                                       # Resize befo
+re crop.
my $aspect_S  = 0.5;                                     # Only reseze
+ if aspect-ratio is above this value, else thumbnail becomes to blurr
+ed.
my $aspect_H  = 2;                                       # Only resize
+ if aspect-ratio is below this value, else thumbnaik becomes to blurr
+ed.
my $overwrite = 0;                                       # Allways rem
+ake (overwrite) the thumbnails.
my $cols      = 5;                                       # Max horizon
+tal thumbs.
my $rows      = 10;                                      # Max vertica
+l thumbs.
my $cgi       = new CGI;

main();
cleanUp();

sub main {
        my $content = "<tr>";
        my $files = readDir();

        my $thumbs_per_page = $rows * $cols;
        my $total = scalar(@$files) ? scalar(@$files) : 0;
        my $pages = $total / $thumbs_per_page;
        my $currentPage = $cgi->param('p') ? $cgi->param('p') : 1;
        my $hasPrevious = $currentPage-1 ? 1 : 0;
        my $hasNext = ($currentPage < $pages) ? 1 : 0 ;
        my $startImage = (($currentPage-1) * $thumbs_per_page) ;

        my $nav = "";
        my $c = 1;
        my $i = 0;
        foreach my $file (@$files) {
                $i++;
                if ($i >= $total) {
                        $nav .= "<tr><td align=\"center\" nowrap=\"now
+rap\" colspan=\"$cols\">";
                        if ($hasPrevious) { $nav .= "<a href=\"?p=" . 
+($currentPage - 1) . "\">Previous<\/a>\&nbsp;\&nbsp;"; }
                        if ($hasNext)     { $nav .= "<a href=\"?p=" . 
+($currentPage + 1) . "\">Next<\/a>"; }
                        $nav .= "<\/td><\/tr>";
                }
                next if ($i <= $startImage || $i > ($startImage + $thu
+mbs_per_page));

                if ($c > $cols) {
                        $content .= "<\/tr><tr>\n";
                        $c = 1;
                }
                # Check if the file alreaddy exists:
                my $filename = "thumb_" . fileparse($file);
                if (!-e $savedir . $filename || $overwrite) {
                        # Make new thumbnails...
                        my $image = Image::Magick->new;
                        $image->Read($file);
                        my ($x,$y) = $image->Get('width', 'height');

                        # Enlarge image if thumbnail > origional, or r
+esize before crop is enabled...
                        if ($x < $x_size || $resize) {
                                my $aspectratio = $y / $x;
                                # Only resize if aspect-ratio is betwe
+en given apect ratio-borders
                                if ($aspectratio > $aspect_S && $aspec
+tratio < $aspect_H || $x < $x_size) {
                                        $x = $x_size;
                                        $y=$x * $aspectratio;
                                        $image->Resize(width => $x, he
+ight => $y, filter => 'Cubic', blur => 1);
                                }
                        }
                        if ($y < $y_size) {
                                my $aspectratio = $x / $y;
                                $y = $y_size;
                                $x=$y * $aspectratio;
                                $image->Resize(width => $x, height => 
+$y, filter => 'Cubic', blur => 1);
                        }
                        # Get center (offset) of image, and crop to $x
+_size * $y_size.
                        my $ox = ($x - $x_size) / 2;
                        my $oy = ($y - $y_size) / 2;

                        $image->Crop("${x_size}x${y_size}+$ox+$oy");
                        $image->Write($savedir.$filename);

                        $content .= " <td> <a href=\"" . $serverdir . 
+$imagedir . fileparse($file) . "\" > <img src=" . $serverdir . $thumb
+dir . $filename. " alt=\"\" border=\"1\"> <\/a><\/td> ";
                } else {
                        # Skip writing...
                        $content .= " <td> <a href=\"" . $serverdir . 
+$imagedir . fileparse($file) . "\" > <img src=" . $serverdir . $thumb
+dir . $filename. " alt=\"\" border=\"2\"> <\/a><\/td> ";
                }
                $c++;
        }
        $content .= "<\/tr>\n" . $nav;
        printHtml($content);
}

sub printHtml {
        my ($content) = @_;
#       my $cgi = new CGI;

        print $cgi->header(-type => 'text/html', );
        print $cgi->start_html( -title   => 'Testpage',
                                -BGCOLOR => '#ffffff',);
        print "<table border=\"0\" cellpadding=\"0\" cellspacing=\"3\"
+>\n";
        print $content;
        print "\n<\/table>\n";
        print $cgi->end_html;
}

sub readDir {
        my $files="*.{" . join(",",@ext) . "}";
        my @files= glob($realdir.$files);
        return \@files;
}

sub cleanUp {
        undef $cgi, $realdir, @ext, $serverdir, $savedir, $x_size, $co
+ls;
        undef $y_size, $resize, $aspect_S , $aspect_H, $overwrite, $ro
+ws;
        undef $thumbdir, $imagedir;
}
[download]

[reply]
[d/l]