#!/usr/bin/perl # Fetch all manner of URLs from STDIN; dumping the text of their # responses on STDOUT. use warnings; use strict; sub MAX_PARALLEL () { 8 } # Number of requests to run at once. use POE; # Cooperative multitasking framework. use POE::Component::Client::HTTP; # Non-blocking HTTP requests module. use HTTP::Request::Common qw(GET); ### Spawn the HTTP client component. It will be named "ua", which is ### short for "useragent". POE::Component::Client::HTTP->spawn(Alias => 'ua'); ### Start the session that will use the HTTP client. The _start event ### is fired by POE to kick-start a session. POE::Session->create( inline_states => { _start => \&initialize_session, got_response => \&handle_response, } ); ### Run the session that will visit pages. The run() function will ### not return until the session is through processing its last URL. $poe_kernel->run(); exit 0; ### Handle the _start event by setting up the session and starting an ### initial number of requests. As each request finishes, another ### will be started in its place. ### ### The $_[KERNEL] parameter convention is strange but useful. See: ### http://poe.perl.org/?POE_FAQ/Why_does_POE_pass_parameters_as_array_slices sub initialize_session { my $kernel = $_[KERNEL]; for (1..MAX_PARALLEL) { my $next_url = ; last unless defined $next_url; chomp $next_url; $kernel->post( "ua", # Post the request to the user agent. "request", # It is a request we're posting. "got_response", # The ua response should be "got_response". GET $next_url # The HTTP::Request to process. ); } } ### Receive a response and just dump it as_string() for demonstration ### purposes. Once dumped, it attempts to read and request yet ### another URL. The parameter convention is strange but useful ### again; this time pulling off only the values we need using a slice ### of @_. sub handle_response { my ($kernel, $heap, $req_packet, $resp_packet) = @_[KERNEL, HEAP, ARG0, ARG1]; my $http_request = $req_packet->[0]; # Original HTTP::Request my $http_response = $resp_packet->[0]; # Resulting HTTP::Response my $response_string = $http_response->as_string(); $response_string =~ s/^/| /mg; print ",---------- ", $http_request->uri," ----------\n"; print $response_string; print "`", '-' x 78, "\n"; # Start another request if it's available, or let the list of # pending URLs run out. The session will stop when it does run out. my $next_url = ; if (defined $next_url) { chomp $next_url; $kernel->post(ua => request => got_response => GET $next_url); } }