Hi, thank again, i appreciate your concern but the page is correct encoded and i need solve this problem realy fast, the problem is in LWP internal, where? i don't know but i think that reinvent the wheel is faster in this case, the solution was made a new handle connections this is the code:
#!/usr/bin/perl
use open ':utf8';
use Socket;
use utf8;
$/= "\012" ;
my %cookie = ();
my $count = 0;
sub geturl($) {
my $url = shift;
my ($host,$path) = ($url =~ m{http://([^/]+)/?(.*)}io);
return unless $host;
socket(my $sock, AF_INET, SOCK_STREAM, (getprotobyname('tcp'))[2])
+ || die $!;
if (connect($sock,sockaddr_in(80, inet_aton($host)))) {
select((select($sock), $|=1)[0]);
#send the request
print $sock
"GET /${path} HTTP/1.0\015\012".
"User-Agent: Opera/9.23 (X11; Linux i686; U; en)\015\012".
"Accept-Language: en-US\015\012".
"Accept-Charset: utf-8\015\012".
"Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjp
+eg, image/png, */*\015\012".
((scalar keys %cookie) ?
"Cookie: ".join("; ", map {"$_=$cookie{$_}"} keys %cookie)
+:"").
"\015\012";
my $rin="";
vec($rin, fileno($sock), 1) = 1;
#wait an set timeout
select($rin, undef, undef, 30) || die $!;
my $file = "";
#get content and set timeout check signal for your system
eval {local $SIG{ALRM} = sub {die "timeout\n";};
alarm 30;
$file.=$_ while read($sock, $_, 10240);
alarm 0;};
close $sock;
shutdown $sock, 2;
die($@) if $@ eq "timeout\n";
#get head and body
(my $head,$body) = ($file =~ m{(.+?)^\015\012^(.*)}osm);
#set cookie
foreach my $hl (split(/\015\012/,$head)) {
if ($hl=~m{Set-Cookie[0-9]*:\s+([^=]+)=([^;]*);.*}oi) {
#both all ready encode
$cookie{$1}=$2;
}
}
#detect status
if ($head=~m{^HTTP/[0-9.]{3}\s+30[1-3]\s+.*}io) {
#detect redirect
if ($head =~ m{^Location:\s+(.+)\015\012}osmi) {
#it's must be absolute for rfc but some guy use relati
+ve
#complere with URI if you think use
return geturl($1) if ++$count < 10;
}
}
return $body;
}
return;
}
open F,'>:utf8','./save.html';
print F geturl 'http://www.yahoo.com';
close F;
The cookies must be for the whole process of the script, becouse i need that works just like this Thk anyway |