#!/usr/bin/perl -w use strict; # # OHlottery.pl By Scott Cain # # Uses CPAN modules to contact the Ohio Lottery web site search page # and extracts the winning numbers for all super lotto drawings for which # there is data, which starts at the beginning of 1998. The frequency of # each number's occurrence is calculated and compared to the expected # value, i.e., 1/47 or 1/49 depending on when the drawing was held. # # Things that could be done to make this better, but that I probably # won't do: # - have the results saved to a flat file so the script doesn't have # to hit the web server for every past result. # - make it more general to allow analysis of other games. # use LWP::Simple qw(get); use HTML::TableExtract; use Date::Calc qw(Day_of_Week Add_Delta_Days Today Date_to_Days Decode_Day_of_Week); my @start_date = qw/1998 1 3/; # a Saturday, the first for which data is available. my @current_date = @start_date; my $current_dow = Day_of_Week(@current_date); my $delta_days; my %lotto; my %lottoplus; my $lotto_count = 0; my $lottoplus_count = 0; # # properly initialize hashes # for my $i ( 1 .. 49 ) { $lotto{$i} = 0; $lottoplus{$i} = 0; } while ( Date_to_Days(@current_date) < Date_to_Days( Today() ) ) { my $URL = "http://www.ohiolottery.com/numbers/searchresults_bydate.asp?FromMonth=" . $current_date[1] . "&FromDay=" . $current_date[2] . "&FromYear=" . $current_date[0]; my $page = get($URL); if ( $page ) { my $te = new HTML::TableExtract( depth => 1 ); $te->parse($page); foreach my $ts ( $te->table_states ) { foreach my $row ( $ts->rows ) { my @game_str = grep ( /lotto/i, @$row ); if ( $game_str[0] and $game_str[0] =~ /plus/i and #differentiates old from new lotto $$row[3] =~ /(\d+)-(\d+)-(\d+)-(\d+)-(\d+)-(\d+)/ ) { $lottoplus{$1}++; $lottoplus{$2}++; $lottoplus{$3}++; $lottoplus{$4}++; $lottoplus{$5}++; $lottoplus{$6}++; $lottoplus_count++; } elsif ( $game_str[0] and $$row[3] =~ /(\d+)-(\d+)-(\d+)-(\d+)-(\d+)-(\d+)/) { $lotto{$1}++; $lotto{$2}++; $lotto{$3}++; $lotto{$4}++; $lotto{$5}++; $lotto{$6}++; $lotto_count++; } } } } else { # the LWP get didn't work print "Date_to_Text(@current_date) failed\n"; } # # figure out the next day to use. # (drawings are only held on Saturdays and Wednesdays) # $current_dow = Day_of_Week(@current_date); if ( $current_dow == Decode_Day_of_Week("Saturday") ) { $delta_days = 4; } else { $delta_days = 3; } @current_date = Add_Delta_Days( @current_date, $delta_days ); } # closes while(date) loop # # now do some simple statistics. # my $num_lottoplus_balls = 6 * $lottoplus_count; my $num_lotto_balls = 6 * $lotto_count; for my $i ( 1 .. 49 ) { $lottoplus{$i} = $lottoplus{$i} / $num_lottoplus_balls; $lotto{$i} = $lotto{$i} / $num_lotto_balls; } my @toptobottom = sort { $lottoplus{$b} <=> $lottoplus{$a} } keys %lottoplus; # # print fairly pretty results-- # probably could have used a sub here, but cut & paste is so convienent. # my $expect = 1.0 / 49.0; print 'Percentages are % deviation from expected value'; print "\nTop 8 superlotto plus balls\n"; print "for $lottoplus_count drawings\n"; for my $i ( 0 .. 7 ) { my $deviation = 100 * ( $lottoplus{ $toptobottom[$i] } - $expect ) / $expect; printf( "%.2d -> %+.1f%%\n", $toptobottom[$i], $deviation ); } print "\nBottom 8 superlotto plus balls\n"; for my $i ( 41 .. 48 ) { my $deviation = 100 * ( $lottoplus{ $toptobottom[$i] } - $expect ) / $expect; printf( "%.2d -> %+.1f%%\n", $toptobottom[$i], $deviation ); } @toptobottom = sort { $lotto{$b} <=> $lotto{$a} } keys %lotto; $expect = 1.0 / 47.0; print "\n\nOld Super Lotto results included for historical comparison.\n"; print "\nTop 8 superlotto balls\n"; print "for $lotto_count drawings\n"; for my $i ( 0 .. 7 ) { my $deviation = 100 * ( $lotto{ $toptobottom[$i] } - $expect ) / $expect; printf( "%.2d -> %+.1f%%\n", $toptobottom[$i], $deviation ); } print "\nBottom 8 superlotto balls\n"; for my $i ( 39 .. 46 ) { my $deviation = 100 * ( $lotto{ $toptobottom[$i] } - $expect ) / $expect; printf( "%.2d -> %+.1f%%\n", $toptobottom[$i], $deviation ); }