Hey guys, thanks for replying and giving me information. Please look at the code, the output says 400 url is missing. The complete code.
#!/usr/local/bin/perl
use LWP::Simple;
use HTML::Parse;
use HTML::Element;
use URI::URL;
use HTTP::Request;
use HTTP::Tiny;
use vars qw($opt_h $opt_i $opt_a);
use Getopt::Std;
#system "cls";
#print "\n\n";
getopts('hia:');
my $all = !($opt_i || $opt_a); # all=1 when no option is set
if ($opt_h || $#ARGV==-1) { print_help( ); # print help text when -h
+or no args
exit(0);
}
sub print_help {
print <<"HELP";
usage: $0 [-hia] [URL]
-h help
-i find img references only
-a find hyper link references only
Example: $0 -a https://www.url_here.com
HELP
}
$html = get $ARGV[0]; # always use [0]
$parsed_html = HTML::Parse::parse_html($html);
while ($html = shift @ARGV) {
my ($code, $type, $data) = get_html($html, $opt_i, $opt_a);
if (not_good($code, $type)) { next; }
if ($opt_i || $all) { print_images($data, $html);
exit(0);
}
if ($opt_a || $all) { print_hyperlinks($data, $html);
exit(0);
}
}
sub get_html( ) {
my($html, $want_image, $want_link) = @_;
# Create a User Agent object
my $ua = new LWP::UserAgent;
$ua->agent("Mozilla/61.0.0 (Windows; U; Windows 8.1; de; rv:1.9.2.3)
+ Gecko/20100401 Firefox/61.0.0 ");
# Ask the User Agent object to request a URL.
# Results go into the response object (HTTP::Reponse).
# use HTTP::Request::Common;
# my $request = GET $html;
# my $request = GET $ARGV[0];
my $url = $html;
print "$response->{status} $response->{reason}\n";
print $response->{content} if length $response->{content};
my $request = new HTTP::Request('GET', $ARGV[0]);
my $response = HTTP::Tiny->new->get($url);
my $response = $ua->request($request);
if ($response->is_success) {
print $response->content;
} else {
print $response->error_as_HTML;
}
print "$response->{status} $response->{reason}\n";
print $response->{content} if length $response->{content};
}
sub not_good {
my ($code, $type) = @_;
if ($code != RC_OK) {
warn("$html had response code of $code");
return 1;
}
if ($type !~ m@text/html@) {
warn("$html is not HTML.");
return 1;
}
return 0;
}
sub print_images {
my ($data, $model) = @_;
my $parsed_html=HTML::Parse::parse_html($data);
for (@{ $parsed_html->extract_links(qw (body img)) }) {
my ($link) = @$_;
my ($absolute_link) = globalize_url($link, $model);
print "$absolute_link\n";
}
$parsed_html->delete( ); # manually do garbage collection
}
sub print_hyperlinks {
my ($data, $model) = @_;
my $parsed_html=HTML::Parse::parse_html($data);
for (@{ $parsed_html->extract_links(qw (a)) }) {
my ($link) = @$_;
my ($absolute_link) = globalize_url($link, $model);
print "$absolute_link\n";
}
$parsed_html->delete( ); # manually do garbage collection
}
sub globalize_url( ) {
my ($partial, $model) = @_;
my $url = new URI::URL($partial, $model);
my $globalized = $url->as_string;
return $globalized;
}
|