PockMonk has asked for the wisdom of the Perl Monks concerning the following question:
I realise that I am throwing myself to the lions here, but so be it. I don't have a specific problem with my code, I just want some feedback on it. I've got to the point that whatever I want to do in Perl I can get it to work in the meaning of "it seems to work to me", but I am aware that this doesn't necessarily mean "It is robust and will always work" and almost certainly doesn't mean "It works in the most efficient manner".
I'm a Perl hobbyist, in that this is not what I do for a job and I code in Perl purely for fun. I lack any official training in Perl and my only sources of knowledge is the O'Reilly "Programming Perl" book and any info I find online. I hope this explains if not excuses some of the coding obscenities that will doubtless be pointed out. Being self-taught and not in a coding environment I have no method of judging myself or finding directions for improvement. With those caveats out of the way... please point out any problems with or ways to improve my code (example below). Any and al comments welcome but if you point out something wrong could you please explain why it is wrong.
EDIT: thank you, some great suggestions, code updated
#!/usr/bin/perl -T #_____________________________________________________________________ +_________# #DeleteAsstBot.pl PRE-RELEASE VERSION + # # Author Dan Adams , (User:PocklingtonDan) + # #_____________________________________________________________________ +_________# #_____________________________________________________________________ +_________# # CHANGES STILL TO MAKE + # # + # # - Don't allow use by blocked IPs? + # # http://en.wikipedia.org/w/index.php?title=Special%3AIpblocklist&ac +tion=search&ip=PLACE_IP_HERE # - Change "ARTICLENAME (nomination N)" to "Articlename (nth nominatio +n)". # # - add question mark/help link next to each input field + # # + # # RECENT CHANGES + # # + # # 09.01.06 - Version 0.01 - Initial code written + # # 10.01.06 - Version 0.02 - Fixed bug where it wrote bot sig twice. + # # 11.01.06 - Version 0.03 - Added edit token system + # # + # #_____________________________________________________________________ +_________# #_____________________________________________________________________ +_________# # WHAT THE SCRIPT DOES + # # + # # This script is a wikipedia bot. It acts as a wizard interface for th +e process# # of nominating an article for deletion, a currently manual task. It t +akes # # user input in a wizard interface and makes the necessary wikipedia e +dits in # # the background. + # # + # # INTENDED USE + # # + # # It is intended that ultimately this script is placed on the toolserv +er and # # becomes the default method of nominating articles for deletion from +wikipedia# # + # #_____________________________________________________________________ +_________# #_____________________________________________________________________ +_________# # PACKAGES TO IMPORT (must be installed on server) + # #_____________________________________________________________________ +_________# # "Use Always" Packages use strict; use warnings; use CGI::Carp "fatalsToBrowser"; # during testing and debugging only # Required Packages for features. use CGI qw(:standard Vars); use URI::Escape; use LWP::Simple; use LWP::UserAgent; use HTML::Template; #_____________________________________________________________________ +_________# # SETTINGS + # #_____________________________________________________________________ +_________# #Variables my $editorName = "**********"; my $editorPassword = "**************"; my $language = "en"; # language code for wikipedia namespace my $enabled = "true"; # set to false to take offline/disable #Local paths my $editTokenFile = "/*********/editToken.txt"; my $inputFormFile = "/**********/inputForm.tmpl"; my $errorFile = "/*********/error.tmpl"; my $respondToUserFile = "/*************/respondToUser.tmpl"; # Web paths my $pathToScript = "http://*********.com/cgi-bin"; my $pathToImages = "http://www.**********.com"; #_____________________________________________________________________ +_________# # MAIN ROUTINE + # #_____________________________________________________________________ +_________# my $action = param('action') or 'getInput'; if ($enabled eq "true") { if ($action eq 'getInput') {getInput;} elsif ($action eq 'processInput') {processInput;} else {error("Unrecognised action request");} } else {error("Bot is currently offline for maintenance.");} exit; #_____________________________________________________________________ +_________# # SUBROUTINES + # #_____________________________________________________________________ +_________# sub getInput { my $remoteHost = $ENV{'REMOTE_ADDR'}; my $editToken = generateEditToken($remoteHost); printInputForm($editToken); return(); } #_____________________________________________________________________ +_________# sub processInput { my $editToken = param('editToken'); my $articleForDeletion = param('articleForDeletion'); my $categoryForDeletion = param('categoryForDeletion'); my $reasonForDeletion = param('reasonForDeletion'); validateFilePaths($editTokenFile); validateBasicFormats($editToken, $articleForDeletion, $categoryFor +Deletion, $reasonForDeletion); validatePageExists($articleForDeletion); validateCategoryCode($categoryForDeletion); my $remoteHost = validateEditToken($editToken); notifyArticleCreator($articleForDeletion, $remoteHost); my $afdUrl = setupDeletionDebatePage($articleForDeletion, $categor +yForDeletion, $reasonForDeletion, $remoteHost); tagArticleWithDeletionNotice($articleForDeletion, $remoteHost, $af +dUrl); logDeletionNomination($articleForDeletion, $remoteHost, $afdUrl); fileMaintenance($editToken); respondToUser($articleForDeletion, $afdUrl); return(); } #_____________________________________________________________________ +_________# sub checkForUnexpectedArgs { my @unexpectedArgs = @_; if (scalar(@unexpectedArgs) > 0) { error("Mismatch between expected and received number of argume +nts."); } return(); } #_____________________________________________________________________ +_________# sub checkFileCanBeAccessed { my($filePath,$requestType, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); if ($requestType eq "READ") { unless (-r $filePath) { error("The file $filePath cannot be read as requested."); } } elsif ($requestType eq "WRITE") { unless (-w $filePath) { error("The file $filePath cannot be written to as requeste +d."); } } elsif ($requestType eq "EXISTS") { unless (-e $filePath) { error("The file $filePath does not exist."); } } else{ error("Invalid file access method was chosen."); } return(); } #_____________________________________________________________________ +_________# sub fileMaintenance { my($editToken, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); open(TOKENFILE, '+>', "$editTokenFile") or error("Cannot open edit + token file."); # open for both read and write flock(TOKENFILE, 2) or error("Cannot lock edit token file."); + my $fileContents = <TOKENFILE>; chomp($fileContents); my @allLines = split(/\n/, $fileContents); my $currentLineNumber = 0; my $matchLineNumber = 0; foreach my $line (@allLines) { my ($possibleMatchToken, $possibleMatchRemoteHost) = split +(/\|/, $line); if ($possibleMatchToken eq $editToken) { $matchLineNumber = $currentLineNumber; } $currentLineNumber++; } splice(@allLines, $matchLineNumber, 1); foreach my $newLine (@allLines) { print TOKENFILE "$newLine\n"; } flock(TOKENFILE, 8); close (TOKENFILE) or error("Cannot close edit token file."); return (); } #_____________________________________________________________________ +_________# sub respondToUser { my($articleName, $afdUrl, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $articleURL = uri_escape($articleName, "\0-\377"); my $template = HTML::Template->new(filename => 'respondToUser.tmpl +'); $template->param(LANGUAGE => $language); $template->param(ARTICLEURL => $articleURL); $template->param(PATHTOIMAGES => $pathToImages); $template->param(ARTICLENAME => $articleName); $template->param(AFDURL => $afdUrl); print "Content-Type: text/html\n\n", $template->output; return(); } #_____________________________________________________________________ +_________# sub writeResultsToPage { my($replacement_page, $replacement_text, $replacement_summary, $ad +ditionalTextStyle, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); use LWP::UserAgent; my $agent=LWP::UserAgent->new; $agent->agent('Perlwikipedia/0.90'); $agent->cookie_jar({file=> '.perlwikipedia-cookies'}); my $login = HTTP::Request->new(POST => "http://$language.wikipedia +.org/w/index.php?title=Special:Userlogin&action=submitlogin&type=logi +n"); $login->content_type('application/x-www-form-urlencoded'); $login->content("wpName=$editorName&wpPassword=$editorPassword&wpR +emember=1&wpLoginattempt=Log+in"); my $logger_inner = $agent->request($login); my $do_redirect=HTTP::Request->new(GET =>'http://$language.wikiped +ia.org/w/index.php?title=Special:Userlogin&wpCookieCheck=login'); my $redirecter= $agent->request($do_redirect); my $is_success=$redirecter->content; use HTML::Form; my $ua = LWP::UserAgent->new; $ua->agent("Perlwikipedia/0.90"); $ua->cookie_jar($agent->cookie_jar()); my $response = $ua->get("$replacement_page"); my $form = HTML::Form->parse($response); my $existingText = $form->find_input('wpTextbox1')->value; my $summary = $form->find_input('wpSummary')->value; my $save = $form->find_input('wpSave')->value; my $edittoken = $form->find_input('wpEditToken')->value; my $starttime = $form->find_input('wpStarttime')->value; my $edittime = $form->find_input('wpEdittime')->value; my $textToWrite = ""; if ($additionalTextStyle eq "APPENDFIRST") {$textToWrite = $replacement_text . $existingText;} elsif($additionalTextStyle eq "APPENDLAST") {$textToWrite = $existingText . $replacement_text;} elsif ($additionalTextStyle eq "REPLACE") {$textToWrite = $replacement_text} else {error("Unrecognised wikipedia write request.");} $form->value('wpTextbox1', $textToWrite); $form->value('wpSummary', $replacement_summary ); $response = $ua->request($form->click); return(); } #_____________________________________________________________________ +_________# sub logDeletionNomination { my($articleForDeletion, $remoteHost, $afdUrl, @unexpectedArgs) = @ +_; checkForUnexpectedArgs(@unexpectedArgs); my $articleURL = uri_escape($articleForDeletion, "\0-\377"); my ($day, $month, $year) = getCurrentDate(); my $pageURL = "http://$language.wikipedia.org/w/index.php?title=Wi +kipedia:Articles_for_deletion/Log/" . $year . "_" . $month . "_" . $d +ay . "&action=edit"; my $contentToAdd = "{{subst:afd3 | pg=$articleForDeletion}}"; my $editSummary = "Adding [[Wikipedia:$afdUrl]]"; writeResultsToPage($pageURL, $contentToAdd, $editSummary, "APPENDL +AST"); return(); } #_____________________________________________________________________ +_________# sub getCurrentDate { my @months = qw(January February March April May June July August +September October November December); my @weekDays = qw(Sun Mon Tue Wed Thu Fri Sat Sun); my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $da +yOfWeek, $dayOfYear, $daylightSavings) = localtime(); my $currentYear = 1900 + $yearOffset; my $currentMonth = $months[$month]; return ($dayOfMonth, $currentMonth, $currentYear); } #_____________________________________________________________________ +_________# sub setupDeletionDebatePage { my($articleForDeletion, $categoryForDeletion, $reasonForDeletion, +$remoteHost, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $afdPage = "Articles_for_deletion/$articleForDeletion"; my $numberOfPreviousNoms = getNoOfPreviousNoms($afdPage); my $currentNominationNumber = $numberOfPreviousNoms +1; my $afdPageToCreate = $afdPage . " (nomination $currentNominationN +umber)"; my $afdUrlToCreate = uri_escape($afdPageToCreate, "\0-\377"); my $pageURL = "http://$language.wikipedia.org/w/index.php?title=Wi +kipedia:$afdUrlToCreate&action=edit"; my $contentToAdd = "{{subst:afd2 | pg=$articleForDeletion | cat=$c +ategoryForDeletion | text=$reasonForDeletion}} - [[User:$editorName|$ +editorName]] (on behalf of IP: $remoteHost) ~~~~~"; my $editSummary = "Creating deletion discussion page for [[$articl +eForDeletion]] because $reasonForDeletion"; writeResultsToPage($pageURL, $contentToAdd, $editSummary, "REPLACE +"); return($afdUrlToCreate); } #_____________________________________________________________________ +_________# sub tagArticleWithDeletionNotice { my($articleForDeletion, $remoteHost, $afdUrl, @unexpectedArgs) = @ +_; checkForUnexpectedArgs(@unexpectedArgs); my $articleURL = uri_escape($articleForDeletion, "\0-\377"); my $pageURL = "http://$language.wikipedia.org/w/index.php?title=$a +rticleURL&action=edit"; my $contentToAdd = "{{subst:afd}}"; my $editSummary = "nominated for deletion: see [[Wikipedia:$afdUrl +]]"; writeResultsToPage($pageURL, $contentToAdd, $editSummary, "APPENDF +IRST"); return(); } #_____________________________________________________________________ +_________# sub notifyArticleCreator { my($articleForDeletion, $remoteHost, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $articleCreator = fetchArticleCreator($articleForDeletion); my $pageURL = "http://$language.wikipedia.org/w/index.php?title=Us +er_talk:$articleCreator&action=edit§ion=new"; my $contentToAdd = "{{subst:AFDWarning|$articleForDeletion}} - [[ +User:$editorName|$editorName]] (on behalf of IP: $remoteHost) ~~~~~"; my $editSummary = "Warning notification - $articleForDeletion prop +osed for deletion"; writeResultsToPage($pageURL, $contentToAdd, $editSummary, "REPLACE +"); return(); } #_____________________________________________________________________ +_________# sub fetchArticleCreator { my($articleForDeletion, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $articleURL = uri_escape($articleForDeletion, "\0-\377"); my $articleHistoryUrl = "http://en.wikipedia.org/w/index.php?title +=$articleURL&dir=prev&action=history"; my $browser = LWP::UserAgent->new(); $browser->timeout(60); my $request = HTTP::Request->new(GET => $articleHistoryUrl); my $response = $browser->request($request); my $articleHistoryContents = $response->content(); for ($articleHistoryContents) { s/[\s\S]*?<ul id="pagehistory">//; s/<\/ul>[\s\S]*/<br>/; s/<\/li>[\s\S]*?<li>/<br>/g; s/<li>//g; s/<\/li>//g; s/<input[\s\S]*?>//g; s/<a[\s\S]*?>//g; s/<\/a>//g; s/\(cur\)//g; s/\(last\)//g; s/\(Talk \| contribs\)//g; s/\(Talk\)//g; s/[\s\S]*?<span/<span/; s/<span class="minor">m<\/span>//g; s/<br>[\s\S]*?<span/<br><span/g; s/[\s]*?<\/span>/<\/span>/g; s/<\/span>[\s\S]*?<br>/<\/span><br>/g; s/<br>/\|/g; s/\|$//; s/^\|//; s/<span[\s\S]*?>//g; s/<\/span>//g; s/\n//g; } my @contributors = split(/\|/,$articleHistoryContents); reverse(@contributors); my $articleCreator = $contributors[0]; return ($articleCreator); } #_____________________________________________________________________ +_________# sub validateFilePaths { my($filePath, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); checkFileCanBeAccessed($editTokenFile, "EXISTS"); checkFileCanBeAccessed($inputFormFile, "EXISTS"); checkFileCanBeAccessed($errorFile, "EXISTS"); checkFileCanBeAccessed($respondToUserFile, "EXISTS"); return(); } #_____________________________________________________________________ +_________# sub validateCategoryCode { my($categoryForDeletion, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); unless (($categoryForDeletion eq "M") || ($categoryForDeletion eq +"O") || ($categoryForDeletion eq "B") || ($categoryForDeletion eq "S" +) || ($categoryForDeletion eq "W") || ($categoryForDeletion eq "G") | +| ($categoryForDeletion eq "T") || ($categoryForDeletion eq "F") || ( +$categoryForDeletion eq "P") || ($categoryForDeletion eq "I") || ($ca +tegoryForDeletion eq "?")) { error("The category '$categoryForDeletion' that you entered is + an invalid category code for the deletion debate."); } return() } #_____________________________________________________________________ +_________# sub validatePageExists { my($articleForDeletion, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $articleContents = fetchPageContents($articleForDeletion); if ($articleContents =~ m/Wikipedia does not have an article with +this exact name/) { error("The article '$articleForDeletion' that you nominated fo +r deletion does not exist."); } return(); } #_____________________________________________________________________ +_________# sub getNoOfPreviousNoms { my($afdPage, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $noOfNominations = -1; my $afdPageExists = "true"; until($afdPageExists eq "false") { $afdPageExists = "false"; my $afdPagePossibility = $afdPage . " (nomination $noOfNominat +ions)"; $afdPageExists = validateAfdPageExists($afdPagePossibility); $noOfNominations++; } return ($noOfNominations); } #_____________________________________________________________________ +_________# sub validateAfdPageExists { my($afdPage, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $articleContents = fetchPageContents($afdPage); my $afdPageExists = "false"; if ($articleContents =~ m/Category:AfD debates/) { $afdPageExists = "true"; } return($afdPageExists); } #_____________________________________________________________________ +_________# sub fetchPageContents { my($article, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); $article = uri_escape($article, "\0-\377"); my $article_url = "http://$language.wikipedia.org/wiki/$article"; my $browser = LWP::UserAgent->new(); $browser->timeout(60); my $request = HTTP::Request->new(GET => $article_url); my $response = $browser->request($request); if ($response->is_error()) {printf "%s\n", $response->status_line; +} my $contents = $response->content(); sleep(1); # don't hammer the server! One read request every 1 seco +nd. $article = uri_unescape $article; return($contents); } #_____________________________________________________________________ +_________# sub validateBasicFormats { my($editToken, $articleForDeletion, $categoryForDeletion, $reasonF +orDeletion, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); unless (($editToken =~ m/\S+/) && ($articleForDeletion =~ m/\S+/) + && ($categoryForDeletion =~ m/^\S{1}$/) && ($reasonForDeletion =~ m/ +\S+/)) { error("You returned one or more empty or invalid fields, this +is not permitted, all fields are mandatory."); } return(); } #_____________________________________________________________________ +_________# sub validateEditToken { my($offeredToken, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $fileContents = readEditTokens(); chomp($fileContents); my @allLines = split(/\n/, $fileContents); my $tokenFound = 0; my $actualHost = ""; foreach my $line (@allLines) { my ($editToken, $remoteHost) = split(/\|/, $line); if ($offeredToken eq $editToken) { $tokenFound++; $actualHost = $remoteHost; } } unless ($tokenFound > 0) { error("The edit token '$offeredToken' that you provded is an i +nvalid edit token."); } return($actualHost); } #_____________________________________________________________________ +_________# sub readEditTokens { open(TOKENFILE, '<', "$editTokenFile") or error("Cannot open edit +token file."); flock(TOKENFILE, 2) or error("Cannot lock edit token file."); + my $editTokens = <TOKENFILE>; flock(TOKENFILE, 8); close (TOKENFILE) or error("Cannot close edit token file."); return ($editTokens); } #_____________________________________________________________________ +_________# sub generateEditToken { my($remoteHost, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $timeNow = time(); my $stringForEncryption = $remoteHost . $timeNow; my $editToken = crypt($stringForEncryption,"a3"); writeTokenToFile($editToken, $remoteHost); return ($editToken); } #_____________________________________________________________________ +_________# sub writeTokenToFile { my($editToken, $remoteHost, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $stringToWrite = $editToken . "|" . $remoteHost; open(TOKENFILE, '>>', "$editTokenFile") or error("Cannot open edit + token file."); flock(TOKENFILE, 2) or error("Cannot lock edit token file."); + print TOKENFILE "$stringToWrite\n"; flock(TOKENFILE, 8); close (TOKENFILE) or error("Cannot close edit token file."); return(); } #_____________________________________________________________________ +_________# sub printInputForm { my($editToken, @unexpectedArgs) = @_; checkForUnexpectedArgs(@unexpectedArgs); my $template = HTML::Template->new(filename => 'inputForm.tmpl'); $template->param(PATHTOIMAGES => $pathToImages); $template->param(PATHTOSCRIPT => $pathToScript); $template->param(EDITTOKEN => $editToken); print "Content-Type: text/html\n\n", $template->output; return(); } #_____________________________________________________________________ +_________# sub error { my $template = HTML::Template->new(filename => 'error.tmpl'); $template->param(ERROR => $_[0]); print "Content-Type: text/html\n\n", $template->output; exit; }
|
|---|