rbhyland has asked for the wisdom of the Perl Monks concerning the following question:
I have been using the following code for years and it recently stopped working. As near as I can tell it's because the site now has an https prefix. Can you show me how to tweak this program to get it working again? Thanks in advance!
#!"C:\xampp\perl\bin\perl.exe" use strict; use LWP::Simple; use CGI qw(:standard :cgi-lib); use CGI::Carp qw(fatalsToBrowser warningsToBrowser); my $current; my $currentUrl; my $title; my $alt; my $cgi = new CGI; print $cgi->header(); print start_html(-title =>'Save XKCD'); # Set Specifics my $sitePrefix = "https://xkcd.com/"; #my $sitePrefix = "http://www.google.com/"; ## Path to main XKCD directory ## my $path = "c:/Comics"; mkdir "$path/xkcd", 0755 or print "$path/xkcd Directory Exists\n",br; chomp($path = "$path/xkcd"); my $d = get("$sitePrefix"); if (!is_success($d)) { print "$d is not defined",br; } else { print "[ $d ]",br; } my $status; my $content; print "status = $status",br,"Content = $content",br; if ($d =~ /https:\/\/xkcd.com\/(\d+)\//) { $current = $1; print "Current = $current",br,"SitePrefix = $sitePrefix",br; } else { print "Permanent link not found",br; print "sitePrefix - ",$sitePrefix,br; print "\$d - [",$d,"]",br; } # Obtains all individual comic data sub getComicData { my $siteData = get("$sitePrefix$current/"); my @data = split /\n/, $siteData; foreach (@data) { if (/http:\/\/xkcd.com\/(\d+)\//) { $current = $1; } if ((/src="(http:\/\/imgs.xkcd.com\/comics\/.+\.\w{3})"/) || (/src="(\/\/imgs.xkcd.com\/comics\/.+\.\w{3})"/) ) { $currentUrl = $1; print "CurrentUrl = $currentUrl",br; if (/alt="(.+?)"/) { $title = $1; $title = "House of Pancakes" if $current == 472; # Co +lor title on comic 472 with weird syntax print "Title = $title",br; } if (/title="(.+?)"/) { #title commonly know as 'alt' te +xt $alt = $1; print "Alt = $alt",br; } } } } chdir "$path" or die "Cannot change directory: $!"; &getComicData(); while ( get("$sitePrefix$current/")){ print "Writing Files $current: $title\n",br,"CurrentUrl = $current +Url",br,br; # Create directories for individual comics mkdir "$current $title", 0755 or die "Previously Downloaded"; chdir "$path/$current $title" or die "Cannot change directory: $!" +; # Save image file if (index($currentUrl,"http") != 0) { $currentUrl = "http:".$currentUrl; } my $image = get($currentUrl); open my $IMAGE, '>>', "$title.png" or die "Cannot create image file!"; binmode($IMAGE); print $IMAGE $image; close $IMAGE; # Save alt text open my $TXT, '>>', "$title ALT.txt" or die "Cannot create text file!"; print $TXT $alt; close $TXT; chdir "$path" or die "Cannot change directory: $!"; $current--; # Check for non existent 404 comic $current-- if $current == 404; &getComicData(); } # End Gracefully print "Download Complete\n"; print end_html;
I have tried switching the LWP::UserAgent, but I still get the error.
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: LWP::Simple on HTTPS sites ( WWW::Mechanize )
by Anonymous Monk on Feb 10, 2017 at 03:28 UTC | |
|
Re: LWP::Simple on HTTPS sites
by poj (Abbot) on Feb 09, 2017 at 20:06 UTC | |
by rbhyland (Acolyte) on Feb 09, 2017 at 22:06 UTC | |
by rbhyland (Acolyte) on Feb 09, 2017 at 22:12 UTC | |
by poj (Abbot) on Feb 10, 2017 at 11:21 UTC | |
by rbhyland (Acolyte) on Feb 13, 2017 at 12:09 UTC | |
| |
|
Re: LWP::Simple on HTTPS sites
by nysus (Parson) on Feb 10, 2017 at 05:33 UTC | |
by rbhyland (Acolyte) on Feb 25, 2017 at 09:32 UTC | |
by noxxi (Pilgrim) on Feb 25, 2017 at 18:26 UTC |