#!/usr/bin/perl # This software is released free under the terms of the GPL # This program downloads all messages (read and unread) from # a folder in your Yahoo mail account. # It probably doesn't work Internationally, but I'm sure # you could fix it without much fuss. # This program stores message ids in the message file to # avoid downloading the same message twice. # This works as of 1/25/2005. Yahoo can change it any time they want. It'll be busted then. # Yahoo lets you get about 200 hits in quick succession then locks your account for about # 15 minutes. Sometimes SSL works when regular HTTP doesn't, so I try both. # I read somewhere that loging into other countries might work too, but I didn't try that. # Anyway, you can use the delay option if you want it to download all night, but it takes # an average of 11 secs per message. That's worked fine for me when I'm patient. # Need MD5 for non-ssl logins. If you have ssleay and don't have md5 then skip it. # (If you have ssleay and don't have md5 then you're weird.) use Digest::MD5 qw(md5_hex); # Set up lwp use LWP; my $browser = LWP::UserAgent->new; # Pretend to be IE -- eeeeww $whitelie='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322)'; # we need to handle redirection manually -- lwp isn't entirely compatible @{$browser->requests_redirectable}=undef; $browser->cookie_jar({}); # generalized http get and post functions sub get{ my $r=$browser->get(@_,'User-Agent' => $whitelie); er($r) } # post requires a %f global. Tacky. Sorry. Fix it if you care. I don't. I resent it that you do. sub post { my $r=$browser->post(shift,[%f],$whitelie); er($r) } # if get or post go wrong use this -- handles redirects sub er { my $r=shift; if($r->is_success) { return $r->content } my $h=$r->as_string; if($h=~/\nLocation: (http.*)/i) { my $d=$1; $d=~s/\.com,\+.*/.com/; #print STDERR "redirect: $d\n"; return get($d); } print STDERR "get or post HTTP error: ".$r->as_string."\n\n"; return undef; } die "usage: $0 username password {folder(s)} {usedelay} -- folders can be separated by commas -- Defaults to Inbox if not specified -- enter yes or 1 after the folder to wait between messages to avoid yahoo blocking you. " unless $#ARGV > 0; ($uname,$passwd,$folders,$delay)=@ARGV; $folder='Inbox' unless defined($folder); print STDERR "Logging in..."; #login(); loginssl() unless $url; $p=get($url) if $url; print STDERR "\n"; ($host,$yy)=$p=~/\/\/(.*?)\/ym.*[\?\&]yy=([0-9A-Fa-f]*)/i; if(! $yy) { open(E,">er.txt"); print E $p; close E; die "Couldn't log in. Either something's wrong or you have to wait 15 minutes to try again.\n"; } for $folder (split(',',$folders)) { $folder=~s/^ *(.*?) *$/$1/; $out="$uname-$folder.txt"; print STDERR "Freshening $out\n"; $folder=~y/ /+/; # urlencode -- should do more, but I'm lazy. # scan the file for already downloaded messages. open(F,"<$out"); while() { $have{$1}=1 if /^mid=(.*)/; } $url="http://$host/ym/ShowFolder?rb=$folder&reset=1&YY=$yy"; open(F,">>$out"); while($url) { $p=get($url); ($totmes)=$p=~/Messages \S* of (\d+)/i; ($page)=$url=~/&Npos=(\d+)/i; printf STDERR "$folder page %d of %d \r",$page+1,int($totmes/25)+1; while($p=~/href="(\/ym\/ShowLetter\?MsgId=[^"]*)"/gi) { $m=$1; ($mid)=$m=~/MsgID=(.*?)&/i; next if $have{$mid}; $have{$mid}=1; ($idx)=$m=~/Idx=(\d+)/i; print STDERR "$folder message $idx of $totmes \r"; $b=get("http://$host$m&Nhead=f"); if(! $b=~/START TOC/) { open(E,">er.txt"); print E $b; die "It stopped working -- check er.txt"; } next unless $b=~/START TOC/; $b=~s/^.*START TOC.*?\n(.*)