#!/usr/bin/perl # # checkExpired.pl # # A perl script to check a WikiMedia database for pages # that are getting old and that will need re-examining. # # by starX ########################################################### =pod =head1 Check Expired A perl script to monitor your wikimedia database for pages that might be getting out of date and need revising. =head2 INSTRUCTIONS The script can be run as standalone from the command line, but would function more efficiently when scheduled to run on a regular basis via cron. =head2 REQUIREMENTS checkExpired.pl presumes a standard perl installation on a Unix/Linux platform with access to the DBI module, and sendmail. If you lack the system mail utility, you could fairly easily re-write those portions to take advantage of the NET::SMTP package on CPAN. =head2 FUNNCTIONALITY The basic functionality of the script is as follows: =over 4 =item 1 Connect to the database. =item 2 Select all titles and last touched records from relevent pages. =item 3 Compare the last touched date with parameters we establish. =item 4 Email all the pages that are looking out of date to a designated editor. =back =head2 ERRORS The $USERNAME, $PASSWORD, $DATABASE, and $EMAIL variables all need to be defined, and checkExpired will die and report an error if they're not. If checkExpired has any problem connecting to the database, it will die and print the error from the DBI. When opening or closing the file handle for sendmail, there is the possibility that sendmail will report an error, in which case checkExpired will print the error and stop; dying on the case of an error on open, and warning in the case of an error on close (since there's nothing else to do anyway)that something has gone wrong with writing to sendmail, and generate the sendmail error. =head2 CHANGELOG 1/19/2007 =over 4 =item Corrected error that was preventing proper time stamp formating for single-digit months. =item Created seperate variable to store the email address the report comes from for cases where the person checking the wiki is different from the developer. =back =cut # INITIALIZE PACKAGES use DBI; # For database access. use strict; # SCALARS my $dbh; # Database handle my $USERNAME; # Username for database my $PASSWORD; # Password for database my $DATABASE = 'wikidb'; # The name of the database that we're going to connect to. my $EMAIL; # The email address to send to. my $fromEmail; # email address this comes from. my $select; # The reference for the SQL statement we've prepared. my $time; # Scalar to store the time in appropriate format for comparisons with the database time stamp. my ($year, $month, $day, $hour, $minute, $second); # for assembling time stamp. my $row; # Hash ref for the 'current' row of the data we've selected from the database. # LISTS my @localtime = localtime; # Buffer to store time returned from localtime function # AND AWAY WE GO.... # First check to make sure our login variables have been defined. die "Error: No username specified.\n" if (!$USERNAME); die "Error: No password specified.\n" if (!$PASSWORD); die "Error: No database specified.\n" if (!$DATABASE); die "Error: No email address to send to.\n" if (!$EMAIL); # Make a connection to the database. If there are any problems, quit the # program and write a simple error. $dbh = DBI->connect( "dbi:mysql:$DATABASE", $USERNAME, $PASSWORD, ) || die "Couldn't connect to database: $DBI::errstr\n"; # Now that we've connected to the database, prepare and execute a statement query the # database with. In this case, we want to know if a page is getting out of date, so we need # the name of the page (page_title) and the last time that page was modified (page_touched). # We only want to examine pages that were user-created (page_namespace=0), and that are # not just redirection pages (page_is_redirect=0). $select = $dbh->prepare("SELECT page_title,page_touched FROM proadvpage WHERE page_namespace=0 AND page_is_redirect=0"); $select->execute(); # Before we can check which pages might be out of date, we need to get the current time # returned by the localtime() call into a format that is compatible with the # wikidb time stamp, which comes in the form of YYYYMMDDhhmmss. # Because the localtime function won't attach a prepending zero to a # single digit number, we have to do it to all numbers that might # come up as single digits: the month, day, hour, minute, and second. $year = 1900 + $localtime[5]; $month = $localtime[4] + 1; # because localtime starts counting months at 0 $month = sprintf("%02d", $month); # force 2 digit format. $day = sprintf("%02d", $localtime[3]); $hour = sprintf("%02d", $localtime[2]); $minute = sprintf("%02d", $localtime[1]); $second = sprintf("%02d", $localtime[0]); # And now to assemble the string! Mwuhahaha.... # Assemble the relevant time stamps, including a preceding 0 if necessary. $time = $year.$month.$day.$hour.$minute.$second; # Next let's fork sendmail. After opening the file handle, print some mail headers # to it so we're ready to receive data. open (MAIL, "|/usr/lib/sendmail -oi -t") or die "Couldn't fork sendmail $!\n"; print MAIL "To: $EMAIL\n"; # necessary to keep perl from getting confused on the @ print MAIL "From: $fromEmail\n"; print MAIL "Subject: Report on Expired Pages\n"; # Now that we have the current time stamp encoded in a compatible string format, # we can start comparing it to the data that we've retrieved from the database. while ($row=$select->fetchrow_hashref){ if (int($time - $row->{page_touched}) > 100000000){ print MAIL "Danger, Will Robinson! \"$row->{page_title}\" is getting out of date! " . "It's time stamp is: $row->{page_touched}\n\n"; } } # And close the database connection, since we're done using it. $dbh->disconnect(); # Now give a courtesy notice as to when the email was generated, and then close the file, # because we're done with it. Give both an easily human readable timestamp, and the value # that the program uses to determine if a page is out of date for easy debugging. print MAIL "This email was generated on " . (1 + $localtime[4]) . "-$localtime[3]-" . (1900 + $localtime[5]) . " at $localtime[2]:$localtime[1]\n"; print MAIL "Using a current time stamp of $time\n\n"; print MAIL "If something went wrong with the output, please contact $fromEmail\n\n"; close MAIL or warn "sendmail didn't like it... sendmail error: $?\n";