#Usage: archive.pl USERNAME # #Description: Saves all entries of USERNAME's xanga to "archive.html" in the working directory use LWP::UserAgent; $end = 'http://www.xanga.com/'; if ($a=shift) { $uid = $a; } else { print "What is your username? "; $uid = ; chop $uid; } $first_page = 'http://www.xanga.com/home.aspx?user=' . $uid; print "Connecting to $uid's Xanga...\n"; grab($first_page); $next_page = save(); #save() returns the url to Next 5 print "\$next_page is $next_page\n"; until ($finished) { grab($next_page); $next_page = save(); print "\$next_page is $next_page\n"; last if $next_page =~ /$end$/; } print "\n\n\nCompleted Archiving\n\n\n"; #Usage: grab(url) # #Description: sub grab{ open TMP, ">tmp.html" or die; $url = shift; print "grabbing $url\n"; $ua = LWP::UserAgent->new; $ua->agent("MyApp/0.1 "); # Be nice to Xanga servers ;-) sleep 5; # Create a request my $req = HTTP::Request->new(GET => $url); $req->content_type('application/x-www-form-urlencoded'); $req->content('query=libwww-perl&mode=dist'); # Pass request to the user agent and get a response back my $res = $ua->request($req); # Check the outcome of the response if ($res->is_success) { print TMP $res->content; close TMP; print "Successfully grabbed html...\n"; } else { print $res->status_line, "\n"; } } #Useage: save(url); # #Description: sub save parses through a given URL and appends all found entries of that page to # "archive.html" It also finds the url of the next page to grab sub save { open IN, "tmp.html" or die; open OUT, ">>archive.html" or die; print "Saving...\n"; while ($line = ) { if ($line =~ /
/) { last; } } print OUT $line; print "Wrote out \$line\n"; REST: while($line = ) { print OUT $line; last REST if $line =~ /Next 5 >>/; } print "Saved\n"; $line = reverse($line); $line =~ /"(.*?)"/; close IN; close OUT; $a = 'http://www.xanga.com/' . reverse($1); #home.aspx?user=.... }