package HTTP::Request; use IO::Socket::INET; use warnings; use strict; use Carp; our $VERSION = '1.000'; my ($content, $tags) = get_url( { webserver => 'www.example.com', url => '/', port => 80, verbose => 0, #Print the HTTP message sequence download => 1, #Mostly for crawler applications. Content types without #links are not retrieved if set to 0. }); if ($content) { print "Elapsed time: $tags->{elapsed}\n"; print "Downloaded page:\n$content\n"; } else { print "Content is not available\n"; } sub get_url { my ($parameter) = @_; my $submit; my $port = 80; my $content = ''; my %tag; my $t1 = time; if (!exists ($parameter->{webserver}) or !exists ($parameter->{url})) { croak "Missing webserver or URL information"; } if (defined ($parameter->{port})) { $port = $parameter->{port}; } my $webserver = $parameter->{webserver}; my $url = $parameter->{url}; my $sock = IO::Socket::INET->new( PeerAddr => $webserver, PeerPort => $port, Proto => 'tcp', Timeout => 10 ); my $line; my $new_location; my @output = (); my @headers = (); my $header; if ($sock) { $sock->autoflush(); $submit = <<"END_GET"; GET $url HTTP/1.0 Host: $webserver User-Agent: HTTPR/1.1 END_GET print $sock $submit; while ( $line = <$sock> ) { #separate loop to save processing on body $line =~ s/\s+$//; if ( $line =~ /^\s*$/ ) { last; } push @headers, $line; if ($line =~ /^Location: (.+)/) { $new_location = $1; } } $tag{proto} = $headers[0]; $tag{elapsed} = time - $t1; $tag{url} = $url; foreach my $stat (@headers[1 .. $#headers]) { $stat =~ /^(.+): (.*)$/; $tag{lc $1} = exists($tag{lc $1}) ? $tag{lc $1} . ', ' . $2 : $2; } $headers[0] =~ /HTTP\/\d+\.\d+ (\d+)/m; my $http_status = $1; if ($http_status == 200 and ($tag{'content-type'} =~ /text|css|html/ or $parameter->{download})) { if (exists($tag{'content-length'})) { #optimized reading if available $sock->read($output[0], $tag{'content-length'}); } else { local $/; while ( $line = <$sock> ) { $line =~ s/\s+$//; push @output, $line; } } } if ($parameter->{verbose}) { print $submit . "\n"; print join ("\n", @headers) . "\n"; print "\n\n" . join ("\n ", @output); print "\n==================================================\n"; } if ($http_status == 301) { print "Page is relocated to $new_location\n"; return; } elsif ($http_status != 200){ print "ERROR webserver could not process request!\n" . join ("\n ", @headers) . "\n"; return; } else { $content = join ('', @output); } } else { print "Could not connect to $webserver port 80.\n"; return; } return ($content, \%tag); }