#!/usr/bin/perl use open ':utf8'; use Socket; use utf8; $/= "\012" ; my %cookie = (); my $count = 0; sub geturl($) { my $url = shift; my ($host,$path) = ($url =~ m{http://([^/]+)/?(.*)}io); return unless $host; socket(my $sock, AF_INET, SOCK_STREAM, (getprotobyname('tcp'))[2]) || die $!; if (connect($sock,sockaddr_in(80, inet_aton($host)))) { select((select($sock), $|=1)[0]); #send the request print $sock "GET /${path} HTTP/1.0\015\012". "User-Agent: Opera/9.23 (X11; Linux i686; U; en)\015\012". "Accept-Language: en-US\015\012". "Accept-Charset: utf-8\015\012". "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\015\012". ((scalar keys %cookie) ? "Cookie: ".join("; ", map {"$_=$cookie{$_}"} keys %cookie):""). "\015\012"; my $rin=""; vec($rin, fileno($sock), 1) = 1; #wait an set timeout select($rin, undef, undef, 30) || die $!; my $file = ""; #get content and set timeout check signal for your system eval {local $SIG{ALRM} = sub {die "timeout\n";}; alarm 30; $file.=$_ while read($sock, $_, 10240); alarm 0;}; close $sock; shutdown $sock, 2; die($@) if $@ eq "timeout\n"; #get head and body (my $head,$body) = ($file =~ m{(.+?)^\015\012^(.*)}osm); #set cookie foreach my $hl (split(/\015\012/,$head)) { if ($hl=~m{Set-Cookie[0-9]*:\s+([^=]+)=([^;]*);.*}oi) { #both all ready encode $cookie{$1}=$2; } } #detect status if ($head=~m{^HTTP/[0-9.]{3}\s+30[1-3]\s+.*}io) { #detect redirect if ($head =~ m{^Location:\s+(.+)\015\012}osmi) { #it's must be absolute for rfc but some guy use relative #complere with URI if you think use return geturl($1) if ++$count < 10; } } return $body; } return; } open F,'>:utf8','./save.html'; print F geturl 'http://www.yahoo.com'; close F;