######### The Page you want the script to fetch.###### my $return_URL = "http://123.123.123.123/cgi-bin/index.pl"; # The location of the file the HTML is saved as. my $file = 'D:/Inetpub/Scripts/fetchtest/images/url.html'; # The URL to the local directory the images are saved in. my $images_url = 'https://my-secure-domain/cgi-bin/fetchtest/images/'; # The local server path to the directory the images are saved in. my $images_path = 'D:/Inetpub/Scripts/fetchtest/images'; # The local server path to the directory this script is in. my $cgi_path = 'D:/Inetpub/Scripts/fetchtest'; # The Local URL of the .HTML file. my $html_file = 'http://192.168.0.4/cgi-bin/fetchtest/images/url.html'; use strict; use LWP::Simple; use HTML::TokeParser; use URI; my $query_string = $ENV{"QUERY_STRING"}; my $return_query = "$return_URL?$query_string"; $return_query = "http://$return_query" unless $return_query =~ m{^http://}; my $url = URI->new($return_query); # Get requested page and save it locally #print "Retrieving $return_query...\n"; my $html = get($return_query) || ''; open(OUTPUT_FILE, ">$file") || die "Unable to open $file: $!"; flock(OUTPUT_FILE,2) or die "cannot lock file: $!"; print OUTPUT_FILE $html; close(OUTPUT_FILE); #### # Parse the Web page to identify images my %imagefiles; my $parser = HTML::TokeParser->new(\$html); while (my $img_info = $parser->get_tag('img')) { my $image_name = $img_info->[1]->{'src'}; my $image_url = URI->new($image_name); my $image_file = $image_url->abs($url); $imagefiles{$image_file} = 1; } # Retrieve all the images and save them locally foreach my $this_image (keys %imagefiles) { $this_image =~ m{.*/(.*)$}; my $local_image_name = $1; $local_image_name =~ tr/A-Za-z0-9./_/c; my $local_image_path_name = "$images_path/$local_image_name"; my $image_data = get($this_image) || ''; # Get the images after checking if they don't already exist. my $local_img = "$images_path/$local_image_name"; unless(-e $local_image_path_name) { my $image_data = get($this_image) || ''; # Save copy of image locally open(OUTPUT_FILE, ">$local_image_path_name") || die "Unable to open $local_image_name: $!"; binmode(OUTPUT_FILE); print OUTPUT_FILE $image_data; close(OUTPUT_FILE); #print "saved: $local_image_path_name
\n"; }# end of if image exists. #print "local img is: $local_img
/n"; } ##
## open FILE, "$file" or die "Can't open receipt page html file for display $!\n"; flock(FILE,2) or die "cannot lock file: $!"; # now make all image paths local and absolute. undef $/; # enable "slurp" mode my $line = ; # whole file now here $line =~ s/\n/ /g; #Strip out potentially nasty stuff. $line =~ s///sig; $line =~ s///sig; $line =~ s///sig; $line =~ s/ (<\s*(?:a|img|area)\b[^>]*(?:href|src)\s*=\s* ['"]?) ([^'"> ]+) (['"]? [^>]*>) / $1.sprintf("%s",URI->new($2)->abs($images_url)).$3 /segix; # now print the lot to the browser. print "Content-type: text/html\n\n"; print $line; close (FILE); #### my $https_img_path = 'https://123.123.123.255/images'; foreach my $image_url (@images){ # Split image from original path (relative or absolute) my ($path, $image) = split....... # Search the html ($file) for the images and replace them with my new paths. $file =~ s/$image_url/$https_img_path\/$image/ig; } # end of foreach. #### #!/usr/bin/perl ######################################################### #### CONFIGURATION PARAMETERS #### ######################################################### ######### The Page you want the script to fetch. my $return_URL = "http://203.59.39.226/crabbait/index.html"; #my $return_URL = "http://203.59.39.226/cgi-bin/test.pl"; # The location of the file the html is saved as. my $file = 'D:/Inetpub/Scripts/fetchtest/images/url.html'; # The url to the local directory the images are saved in. my $images_url = 'http://192.168.0.4/cgi-bin/fetchtest/images/'; # The local server path to the directory the images are saved in. my $images_path = 'D:/Inetpub/Scripts/fetchtest/images'; # The local server path to the directory this script is in. my $cgi_path = 'D:/Inetpub/Scripts/fetchtest'; # The Local URL of the .html file. my $html_file = 'http://192.168.0.4/cgi-bin/fetchtest/images/url.html'; ######################################################### use strict; use warnings; use diagnostics; use LWP::Simple; use HTML::TokeParser; use URI; my $query_string = $ENV{"QUERY_STRING"}; my $return_query = "$return_URL?$query_string"; $return_query = "http://$return_query" unless $return_query =~ m{^http://}; my $url = URI->new($return_query); # Get requested page and save it locally my $html = get($return_query) || ''; open(OUTPUT_FILE, ">$file") || die "Unable to open $file: $!"; flock(OUTPUT_FILE,2) or die "cannot lock file: $!"; print OUTPUT_FILE $html; close(OUTPUT_FILE); # Parse the Web page to identify images my %imagefiles; my $parser = HTML::TokeParser->new(\$html); while (my $img_info = $parser->get_tag('img')) { my $image_name = $img_info->[1]->{'src'}; my $image_url = URI->new($image_name); my $image_file = $image_url->abs($url); $imagefiles{$image_file} = 1; } # Retrieve all the images and save them locally foreach my $this_image (keys %imagefiles) { $this_image =~ m{.*/(.*)$}; my $local_image_name = $1; $local_image_name =~ tr/A-Za-z0-9./_/c; my $local_image_path_name = "$images_path/$local_image_name"; my $image_data = get($this_image) || ''; # Get the images after checking if they don't already exist. my $local_img = "$images_path/$local_image_name"; unless(-e $local_image_path_name) { # Retrieve the page my $image_data = get($this_image) || ''; # Save copy of image locally open(OUTPUT_FILE, ">$local_image_path_name") || die "Unable to open $local_image_name: $!"; binmode(OUTPUT_FILE); print OUTPUT_FILE $image_data; close(OUTPUT_FILE); #print "saved: $local_image_path_name
\n"; }# end of if image exists. #print "local img is: $local_img
/n"; } open FILE, "$file" or die "Can't open receipt page html file for display $!\n"; flock(FILE,2) or die "cannot lock file: $!"; # now make all image paths local and absolute. undef $/; # enable "slurp" mode my $line = ; # whole file now here $line =~ s/\n/ /g; #Strip out potentially nasty stuff. $line =~ s///sig; $line =~ s///sig; $line =~ s///sig; $line =~ s/ (<\s*(?:a|img|area)\b[^>]*(?:href|src)\s*=\s* ['"]?) ([^'"> ]+) (['"]? [^>]*>) / $1.sprintf("%s",URI->new($2)->abs($images_url)).$3 /segix; print "Content-type: text/html\n\n"; print $line; close (FILE); # This empties the url file after the display is successful # Since the final read doesn't alter the actual file, its # important to clear out the file after use so it can't # be accessed directly, instead of only via the script. open (FILE, ">$file"); flock(FILE,2) or die "cannot lock file: $!"; close (FILE); exit(0);