koti688 has asked for the wisdom of the Perl Monks concerning the following question:
package Webrobot; use strict; use warnings; use English; use XML::Parser; use XML::SimpleObject; use LWP::UserAgent; use HTTP::Cookies; use HTTP::Request::Common; use Storable; use DateTime; use base qw( Webrobot::Base Webrobot::Command::Auto Webrobot::Command::Exec Webrobot::Command::Match Webrobot::Command::Set Webrobot::Command::Form Webrobot::Command::Input Webrobot::Command::Filter Webrobot::Command::Method Webrobot::Command::Default Webrobot::Command::HTML Webrobot::Command::Push Webrobot::Command::Foreach ); use constant DEBUG => 1; sub new { my $class = shift; my $self = {}; %$self = @_; my $script = $self->{script}; $self->{keep_alive} = 1 unless defined $self->{keep_alive}; die "No script specified" unless defined $script; $self->{xmlfile} = $script . '.xml'; die "$self->{xmlfile} is not a plain file" unless -f $self->{xmlfi +le}; DEBUG && print "Webrobot keep_alive : $self->{keep_alive}\n"; $self->{ua} = LWP::UserAgent->new( agent => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; sv-SE; rv +:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9', keep_alive => $self->{keep_alive} ); if( $self->{debug} ) { $self->{ua}->default_header('Accept-Encoding' => scalar HTTP:: +Message::decodable()); $self->{ua}->add_handler("request_send", sub { shift->dump; r +eturn }); $self->{ua}->add_handler("response_done", sub { shift->dump; r +eturn }); } my $jarfile = 'var/gluepay/webrobot.jar'; $self->{jarfile} = $jarfile; unlink($jarfile) unless defined($self->{persistent}) && $self->{pe +rsistent} == 1; $self->{jar} = HTTP::Cookies->new( file => $jarfile, ignore_discard => 1, autosave => 1 ); $self->{ua}->cookie_jar($self->{jar}); my $stashfile = 'var/gluepay/webrobot.stash'; if( -f $stashfile && $self->{persistent} ) { open FP, '<' . $stashfile; my $bin = do { local $/; <FP> }; close FP; ($self->{stash}) = Storable::thaw($bin); } bless($self,$class); return $self; } sub store_stash { my $self = shift; open FP, '>var/gluepay/webrobot.stash'; print FP Storable::freeze($self->{stash}); close FP; } sub flushcookies { my $self = shift; $self->{stash} = {}; undef($self->{referer}); $self->{ua} = LWP::UserAgent->new( agent => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; sv-SE; rv +:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9', keep_alive => $self->{keep_alive} ); if( $self->{debug} ) { $self->{ua}->default_header('Accept-Encoding' => scalar HTTP:: +Message::decodable()); $self->{ua}->add_handler("request_send", sub { shift->dump; r +eturn }); $self->{ua}->add_handler("response_done", sub { shift->dump; r +eturn }); } my $jarfile = $self->{jarfile}; unlink($jarfile); $self->{jar} = HTTP::Cookies->new( file => $jarfile, ignore_discard => 1, autosave => 1 ); $self->{ua}->cookie_jar($self->{jar}); return 1; } sub run { my $self = shift; my $xml = $self->_readfile($self->{xmlfile}); my $parser = XML::Parser->new( ErrorContext => 2, Style => 'Tree' +); my $script; eval { $script = XML::SimpleObject->new( $parser->parse($xml) )->c +hild('webrobot'); }; die "Invalid XML file ($EVAL_ERROR)" if( $EVAL_ERROR ); eval { $self->_process($script); }; if( $EVAL_ERROR ) { DEBUG && print "FAILED : $EVAL_ERROR\n"; return undef; } return $self; } sub new_session { my $self = shift; my $id = shift; $self->{filecounter} = 0; $self->{session_id} = $id; my $date = DateTime->now()->ymd; my $dir = "var/gluepay/webrobot/$date/$id/"; `mkdir -p $dir`; $self->{session_dir} = $dir; } sub end_session { my $self = shift; $self->{filecounter} = 0; undef($self->{session_id}); undef($self->{session_dir}); } sub _readfile { my $self = shift; my $filename = shift; return undef unless defined $filename; open FP, "<$filename"; my $data = do { local $INPUT_RECORD_SEPARATOR; <FP> }; close FP; return $data; } 1;
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Need help with Facebook webpage robot
by Corion (Patriarch) on Nov 27, 2009 at 09:38 UTC | |
|
Re: Need help with Facebook webpage robot
by moritz (Cardinal) on Nov 27, 2009 at 09:34 UTC |