#!/usr/bin/perl
#########################################################
#### CONFIGURATION PARAMETERS ####
#########################################################
######### The Page you want the script to fetch.
my $return_URL = "http://203.59.39.226/crabbait/index.html";
#my $return_URL = "http://203.59.39.226/cgi-bin/test.pl";
# The location of the file the html is saved as.
my $file = 'D:/Inetpub/Scripts/fetchtest/images/url.html';
# The url to the local directory the images are saved in.
my $images_url = 'http://192.168.0.4/cgi-bin/fetchtest/images/';
# The local server path to the directory the images are saved in.
my $images_path = 'D:/Inetpub/Scripts/fetchtest/images';
# The local server path to the directory this script is in.
my $cgi_path = 'D:/Inetpub/Scripts/fetchtest';
# The Local URL of the .html file.
my $html_file = 'http://192.168.0.4/cgi-bin/fetchtest/images/url.html';
#########################################################
use strict;
use warnings;
use diagnostics;
use LWP::Simple;
use HTML::TokeParser;
use URI;
my $query_string = $ENV{"QUERY_STRING"};
my $return_query = "$return_URL?$query_string";
$return_query = "http://$return_query" unless $return_query =~ m{^http://};
my $url = URI->new($return_query);
# Get requested page and save it locally
my $html = get($return_query) || '';
open(OUTPUT_FILE, ">$file") || die "Unable to open $file: $!";
flock(OUTPUT_FILE,2) or die "cannot lock file: $!";
print OUTPUT_FILE $html;
close(OUTPUT_FILE);
# Parse the Web page to identify images
my %imagefiles;
my $parser = HTML::TokeParser->new(\$html);
while (my $img_info = $parser->get_tag('img')) {
my $image_name = $img_info->[1]->{'src'};
my $image_url = URI->new($image_name);
my $image_file = $image_url->abs($url);
$imagefiles{$image_file} = 1;
}
# Retrieve all the images and save them locally
foreach my $this_image (keys %imagefiles) {
$this_image =~ m{.*/(.*)$};
my $local_image_name = $1;
$local_image_name =~ tr/A-Za-z0-9./_/c;
my $local_image_path_name = "$images_path/$local_image_name";
my $image_data = get($this_image) || '';
# Get the images after checking if they don't already exist.
my $local_img = "$images_path/$local_image_name";
unless(-e $local_image_path_name)
{
# Retrieve the page
my $image_data = get($this_image) || '';
# Save copy of image locally
open(OUTPUT_FILE, ">$local_image_path_name") || die "Unable to open $local_image_name: $!";
binmode(OUTPUT_FILE);
print OUTPUT_FILE $image_data;
close(OUTPUT_FILE);
#print "saved: $local_image_path_name \n";
}# end of if image exists.
#print "local img is: $local_img /n";
}
open FILE, "$file"
or die "Can't open receipt page html file for display $!\n";
flock(FILE,2) or die "cannot lock file: $!";
# now make all image paths local and absolute.
undef $/; # enable "slurp" mode
my $line = ; # whole file now here
$line =~ s/\n/ /g;
#Strip out potentially nasty stuff.
$line =~ s/