#!/usr/binenv perl BEGIN{ $DB::signal=0; } use strict; use warnings; use Fcntl; use Digest::SHA1 qw(); use IO::File; use Text::CSV; use File::Spec qw(); use File::Basename qw(); use File::Path qw(); use File::stat; use Memoize; memoize( 'generate_tempname' ); use constant VERBOSE => 1; use feature ':5.10'; use HTTP::Request::Common qw(GET POST HEAD); sub POE::Kernel::ASSERT_DEFAULT () { 1 } use POE qw(Component::Client::HTTP) # Component::Client::Keepalive); #my $pool = POE::Component::Client::Keepalive->new( max_per_host => 4, timeout => 1800, keep_alive => 180 ); POE::Component::Client::HTTP->spawn( Alias => 'dmua' , Streaming => 4096 # , ConnectionManager => $pool , FollowRedirects => 2 , Agent => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.3) Gecko/2008101315 Ubuntu/8.10 (intrepid) Firefox/3.0.3' , From => 'evan@dealermade.com' ); POE::Session->create( inline_states => { _start => \&client_start , _stop => \&client_stop , got_response => \&client_got_response , transfer_complete => \&finalize_transfer } ); $poe_kernel->run(); ### Event handlers begin here. sub client_start { my ($kernel, $heap) = @_[KERNEL, HEAP]; ## $poe_kernel->sig(INT => "_stop"); my $fh = IO::File->new( 'dealermade_pictures.csv', 'r' ); my $header = $fh->getline; my $csv = Text::CSV->new; while ( my $line = $fh->getline ) { $csv->parse( $line ); my ( $picid, $url, $lot, $is_primary ) = $csv->fields; my $temp = generate_tempname( $url ); if ( -e $temp && -f $temp && -s $temp ) { $kernel->post( dmua => request => got_response => HEAD( $url ) ); } else { $kernel->post( dmua => request => got_response => GET( $url, Accept => 'image/*' ) ); } } } sub client_stop { my $heap = $_[HEAP]; } sub client_got_response { my ($heap, $req, $res, $data ) = ( $_[HEAP], $_[ARG0]->[0], @{$_[ARG1]} ); my $uri = $req->uri; my $temp = generate_tempname( $uri ); given ( $req->method ) { when ( 'HEAD' ) { if ( -e $temp && -f $temp ) { my $stat = stat( $temp ); my $badSize = $res->content_length && $stat->size != $res->content_length; my $badDate = $stat->mtime - $res->fresh_until > 0; ## My slow sledge hammer ## use DateTime qw(); ## my $badDate = DateTime->from_epoch( epoch => $stat->mtime ) ## ->subtract_datetime( DateTime->from_epoch( epoch => $res->fresh_until ) ) ## ->is_positive ## ; if ( VERBOSE ) { if ( $badDate || $badSize ) { say "Posting to the kernel a request to redownload $uri"; say "\tBAD SIZE detected, our file is ". $stat->size ." and it should be ". $res->content_length if $badSize ; say "\tBAD DATE detected -- file has since been modified" if $badDate; } else { say "Skipping $uri -- all is current"; } } $poe_kernel->post( dmua => request => got_response => GET( $uri, Accept => 'image/*' ) ) if $badSize || $badDate ; } else { warn "HEAD requested on non-cached file $temp\n"; } } when ( 'GET' ) { my $this = $_[HEAP]->{uri}{$uri}; my $fh = $this->{fh}; if ( !defined($res->code) || $res->code != '200' ) { say $res->code . " was received from request to $uri"; if ( $res->code == 500 ) { use XXX; YYY [ $req, $res, $_[HEAP]->{connection} ]; $DB::signal=1; } return; } ## If we've never encoutered a response from this request. unless ( $fh ) { if ( VERBOSE ) { say "Started download of $uri : " . $res->code; say "\tDestination temp name:\t$temp"; } ## If the file exists simply unlink it and start over. ## This is here so we can refresh the data behind the url if ( -e $temp && -f $temp ) { say "\tUnlinking preexiting uri first" if VERBOSE; unlink ( $temp ); } ## Else we might have to create the path to it. else { my $path = File::Basename::dirname( $temp ); unless ( -d $path and -e $path ) { File::Path::mkpath( $path ); say "\tCreating path:\t$path"; } } sysopen ( $fh , $temp , O_WRONLY|O_CREAT ); binmode($fh); ## win32 not required in linux $this = { fh => $fh, temp => $temp, uri => $uri }; $_[HEAP]->{uri}{$uri} = $this; } ## ## If we have data send it to our file handle ## if ( defined $data ) { print $fh $data; } ## ## If we have no more data hard link to store and remove ## else { close $fh; my $stor = generate_storename( $uri ); say "Linking $temp to $stor" if VERBOSE; my $path = File::Basename::dirname( $stor ); File::Path::mkpath( $path ) unless -e $path && -d $path; CORE::link( $temp, $stor ) unless -e $stor ; delete $heap->{uri}{$this->{uri}}; } } } } sub generate_tempname { my $uri = shift; my $sha1 = Digest::SHA1::sha1_hex( $uri ); my ( $f1, $f2, $file ) = unpack ( 'A2A2A*', $sha1 ); $uri =~ /.*([.].*?)$/; my $ext = $1; File::Spec->catfile( qw/out temp/, $f1, $f2, $file . $ext||'.jpg' ); } sub generate_storename { my $uri = shift; my $tempname = generate_tempname($uri); my $io = IO::File->new( $tempname, 'r' ); my $sha1 = Digest::SHA1->new; $sha1->addfile($io); $io->close; my ( $f1, $f2, $file ) = unpack ( 'A2A2A*', $sha1->hexdigest ); $uri =~ /.*([.].*?)$/; my $ext = $1; #File::Spec->catfile( qw/out store/, $sha1->hexdigest . $ext ); File::Spec->catfile( qw/out store/, $f1, $f2, $file . $ext||'.jpg' ); }