#!/usr/bin/perl use warnings; use strict; use WWW::Mechanize::PhantomJS; use IPC::Open2; use Text::Table; use IO::Prompter; use Encode; use Encode::Locale; binmode STDOUT, ":encoding(console_out)"; # login my $login = "login"; my $password = "password"; # backend my $phantomjs = "/path/to/phantomjs"; # url my $index = "http://example.org/"; my $newmsg = "http://example.org/forum/0-0-1-34"; # bogofilter my $bogofilter = "/usr/bin/bogofilter"; # display my $text_trim_length = 35; my $www = WWW::Mechanize::PhantomJS::->new( autodie => 1, launch_exe => $phantomjs, report_js_errors => 1, ); my @threads; auth(); load_messages(); my %actions = ( display => \&print_table, quit => sub { exit }, learn => \&learn_message, rampage => \&remove_spam, reload => \&load_messages, ); print_table(); while (my $mode = prompt 'What to do now?', -menu => [ keys %actions ]) { $actions{$mode}->(); } sub screenshot { my ($name) = $_[0] || "test.png"; open my $file, ">:raw", $name; print $file $www->content_as_png(); close $file; } sub feed_bogofilter { my ($text, @params) = @_; my $pid = open2(my $read, my $write, $bogofilter, @params) or die "$bogofilter @params: $!\n"; binmode $write, ":encoding(locale)"; # XXX teach it manually with the same encoding print $write $text; close $write; defined (my $reply = <$read>) or return; # this happens when we learn spam chomp $reply; waitpid $pid, 0; $reply =~ /^([SHU])\s([\d.+-e]+)$/ or return; my ($status, $score) = ($1, $2); return ($status, $score); } sub compute_scores { print "Computing spam scores\n"; @{$_}{"status","score"} = feed_bogofilter("$_->{title}\n$_->{text}", "-T") for @threads; @threads = sort { $b->{score} <=> $a->{score} } @threads; } sub print_table { compute_scores(); my $table = Text::Table::->new("#", "Status", "Score", "Title", "Trimmed text"); my $i = 0; $table->load( map { my $text = $_->{text}; $text =~ tr/\n/ /; $text = substr($text, 0, $text_trim_length); [ $i++, @{$_}{"status", "score", "title"}, $text ]; } @threads); print $table; } sub learn_message { my $num = prompt "Message number?", -integer => [0..$#threads]; print '-'x20, "\n", $threads[$num]->{title}, "\n", $threads[$num]->{text}, "\n", '-'x20, "\n", ; my $param = prompt "Spam or ham?", -menu => { "Spam" => "s", "Ham" => "n" }; feed_bogofilter($threads[$num]->{title}."\n".$threads[$num]->{text}, "-$param"); } sub remove_spam { # WARNING # This subroutine is full of kludges for (grep { $_->{status} eq "S" } @threads) { print "Loading page $_->{url}\n"; $www->get($_->{url}); # at this point I'm somehow logged out auth(); # another piece of duct tape $www->get($_->{url}); ($www->xpath('//*[@class="banDo"]', one => 1))[-1]->click; # we always work with the last message print "Loading ban window\n"; timeout(sub {$www->xpath('//input[@id="a2"]', single => 1)->click}); # [+] raise ban level $www->xpath('//textarea[@name="reason"]', single => 1)->send_keys("spambot"); # ban reason $www->xpath('//input[@id="ever"]', single => 1)->click; # ban for ever #$www->click({ xpath => q{//*[contains(concat(' ', normalize-space(@class), ' '), ' myBtnCont ')]} }); # does not click print "Sending ban request\n"; $www->eval_in_page(q|_uPostForm('frm982',{type:'POST',url:'http://example.org/index/'});|); print "Loading remove page\n"; my $last_post_bottom = ($www->xpath('//*[@class="postBottom"]'))[-1]; ($www->xpath('.//a', node => $last_post_bottom))[-2]->click; # second-from-last link in the postBottom block is "delete thread" print "Sending remove request\n"; $www->xpath('//input[@name="sbm"]', single => 1)->submit; } } sub timeout { my ($sub, $tries) = @_; $tries ||= 5; my $try = 0; until( eval { $sub->() }) { screenshot('fail.png'); no warnings 'once'; $DB::single++; die join ":", (caller(1))[1..3]. " failed to execute after $try tries" if $try++ > $tries; sleep 1; } } sub auth { print "Logging in\n"; $www->get($index)->is_success or die "Failed to get $index"; # autodie does not work $www->submit_form(with_fields => {user => $login, password => $password}); } sub load_messages { print "Listing new messages\n"; $www->get($newmsg); @threads = map { { # they are mostly new threads with one message and no replies # so title might be relevant title => ( eval { $www->xpath( './/a[@class="threadLink"]', node => $_, single => 1, ) } || $www->xpath( './/a[@class="threadPinnedLink"]', node => $_, single => 1, ) )->get_text, url => $www->xpath( './/a[@class="forumLastPostLink"]', node => $_, single => 1, )->get_attribute("href"), } } $www->xpath('//td[@class="threadIcoTd"]/..'); $|++; print "Loading messages"; for (@threads) { $www->get($_->{url}); # it is also possible that it's an appended spam message in another thread my $last_message = ($www->xpath('//span[@class="ucoz-forum-post"]', one => 1))[-1]; $_->{text} = $last_message->get_text; print "."; } print "\n"; }