Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl: the Markov chain saw
 
PerlMonks  

Re: Re: More PM Stats

by Limbic~Region (Chancellor)
on Feb 15, 2004 at 17:58 UTC ( [id://329153]=note: print w/replies, xml ) Need Help??


in reply to Re: More PM Stats
in thread More PM Stats

diotalevi,
I didn't handle timezones. I used the information in the XML in tact. My rationale was that even if the hours were "off", they would be universally off. If you feel that adjusting to GMT will be worth it, I can certainly do that. As far as the code and data - see below, but don't laugh:
Code to build database
#!/usr/bin/perl use strict; use warnings; use Date::Calc 'Day_of_Week'; use DBI; my %holiday = ( '19990101' => 'NEW YEARS', '20000101' => 'NEW YEARS', '20010101' => 'NEW YEARS', '20020101' => 'NEW YEARS', '20030101' => 'NEW YEARS', '20040101' => 'NEW YEARS', '19990118' => 'MLK BDAY', '20000117' => 'MLK BDAY', '20010115' => 'MLK BDAY', '20020121' => 'MLK BDAY', '20030120' => 'MLK BDAY', '20040119' => 'MLK BDAY', '19990214' => 'VALENTINES DAY', '20000214' => 'VALENTINES DAY', '20010214' => 'VALENTINES DAY', '20020214' => 'VALENTINES DAY', '20030214' => 'VALENTINES DAY', '20040214' => 'VALENTINES DAY', '19990215' => 'WASHINGTONS BDAY', '20000221' => 'WASHINGTONS BDAY', '20010219' => 'WASHINGTONS BDAY', '20020218' => 'WASHINGTONS BDAY', '20030217' => 'WASHINGTONS BDAY', '20040216' => 'WASHINGTONS BDAY', '19990531' => 'MEMORIAL DAY', '20000529' => 'MEMORIAL DAY', '20010528' => 'MEMORIAL DAY', '20020527' => 'MEMORIAL DAY', '20030526' => 'MEMORIAL DAY', '20040531' => 'MEMORIAL DAY', '19990704' => 'INDEPENDENCE DAY', '19990705' => 'INDEPENDENCE DAY (OBSERVED)', '20000704' => 'INDEPENDENCE DAY', '20010704' => 'INDEPENDENCE DAY', '20020704' => 'INDEPENDENCE DAY', '20030704' => 'INDEPENDENCE DAY', '20040704' => 'INDEPENDENCE DAY', '20040705' => 'INDEPENDENCE DAY (OBSERVED)', '19990906' => 'LABOR DAY', '20000904' => 'LABOR DAY', '20010903' => 'LABOR DAY', '20020902' => 'LABOR DAY', '20030901' => 'LABOR DAY', '20040906' => 'LABOR DAY', '19991011' => 'COLUMBUS DAY', '20001009' => 'COLUMBUS DAY', '20011008' => 'COLUMBUS DAY', '20021014' => 'COLUMBUS DAY', '20031013' => 'COLUMBUS DAY', '20041011' => 'COLUMBUS DAY', '19991111' => 'VETERANS DAY', '20001110' => 'VETERANS DAY (OBSERVED)', '20001111' => 'VETERANS DAY', '20011111' => 'VETERANS DAY', '20011112' => 'VETERANS DAY (OBSERVED)', '20021111' => 'VETERANS DAY', '20031111' => 'VETERANS DAY', '20041111' => 'VETERANS DAY', '19991125' => 'THANKSGIVING DAY', '20001123' => 'THANKSGIVING DAY', '20011122' => 'THANKSGIVING DAY', '20021128' => 'THANKSGIVING DAY', '20031127' => 'THANKSGIVING DAY', '20041125' => 'THANKSGIVING DAY', '19991224' => 'CHRISTMAS EVE', '20001224' => 'CHRISTMAS EVE', '20011224' => 'CHRISTMAS EVE', '20021224' => 'CHRISTMAS EVE', '20031224' => 'CHRISTMAS EVE', '20041224' => 'CHRISTMAS EVE', '19991225' => 'CHRISTMAS DAY', '20001225' => 'CHRISTMAS DAY', '20011225' => 'CHRISTMAS DAY', '20021225' => 'CHRISTMAS DAY', '20031225' => 'CHRISTMAS DAY', '20041225' => 'CHRISTMAS DAY', '19991231' => 'NEW YEARS EVE', '20001231' => 'NEW YEARS EVE', '20011231' => 'NEW YEARS EVE', '20021231' => 'NEW YEARS EVE', '20031231' => 'NEW YEARS EVE', '20041231' => 'NEW YEARS EVE', ); my $xml = $ARGV[0] || 'pm.xml'; open ( INPUT , '<' , $xml ) or die "Unable to open $xml for reading : +$!"; my $tab = $ARGV[1] || 'pm.tab'; open ( OUTPUT , '>' , $tab ) or die "Unable to open $tab for writing : + $!"; select OUTPUT; while ( <INPUT> ) { last if /^<AUTHOR/; if ( /^<NODE createtime="(\d+)" node_id="(\d+)" nodetype="([^"]+)" + (root_node="(\d+)")?/ ) { my ($year, $month, $day, $hour) = unpack "a4a2a2a2" , $1; my $id = $2; my $type = $3; next if $type eq 'user' || $type eq 'system'; my $root = $5 || ''; my $dow = Day_of_Week($year, $month, $day); my $hol = $holiday{"$year$month$day"} || ''; print join "\t" , $id, $year, $month, $day, $hour, $type, $roo +t, $dow, $hol; print "\n"; } } close INPUT; close OUTPUT; my $srt = $ARGV[2] || 'pm.srt'; system("sort -n $tab > $srt"); my $sql = $ARGV[3] || 'COPYSQL'; open ( INPUT , '<' , $srt ) or die "Unable to open $srt for reading : +$!"; open ( OUTPUT , '>' , $sql ) or die "Unable to open $sql for writing : + $!"; select OUTPUT; my %node; while ( <INPUT> ) { my @field = split /\t/; my $id = shift @field; die "NOT 8 FIELDS\n" if @field != 8; if ( $field[4] eq 'note' ) { if ( $node{ $field[5] } ) { $field[4] = $node{ $field[5] }; } else { $field[4] = 'Poll'; $field[5] ||= '1'; } } else { $node{ $id } = $field[4]; } $_ ||= '' for @field; print join "\t" , @field; } close INPUT; close OUTPUT; my $db = 'pmstats.db'; my $dbh = DBI->connect("dbi:SQLite:dbname=$db", { AutoCommit => 0 }) o +r die $DBI::errstr; $dbh->do( "CREATE TABLE nodes (year, month, day, hour, type, root, dow, holi +day)" ) or die $dbh->errstr; $dbh->do("COPY nodes FROM $sql") or die $dbh->errstr; $dbh->disconnect;
Code to generate HTML (SQL)
#!/usr/bin/perl use strict; use warnings; use CGI ':all'; use DBI; my $db = 'pmstats.db'; my $dbh = DBI->connect("dbi:SQLite:dbname=$db") or die $DBI::errstr; All_Nodes_by_DOW( $dbh ); All_Root_Nodes_by_DOW( $dbh ); All_Sub_Nodes_by_DOW( $dbh ); All_Nodes_by_Hour( $dbh ); Root_Nodes_by_Hour( $dbh ); Sub_Nodes_by_Hour( $dbh ); Holiday( $dbh, 'christmas.html', 'CHRISTMAS DAY' ); Main(); sub All_Nodes_by_DOW { my ($dbh, $file) = @_; $file ||= 'all_nodes_by_dow.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ?" +); my @total; for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where dow = ? group by year,month, +day" ); for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $dow - 1 ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'All Nodes by Day of Week', -bgcolor => +"#ffffcc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'All Nodes by Day of Week' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, td( 'TOTAL' ), map { td( $_ ) } @total ), Tr( { -style => "background-color:#CCCCCC" }, td( 'AVERAGE' ), map { td( $_ ) } @average ), Tr( { -style => "background-color:#CCCCCC" }, td( 'PERCENT' ), map { td( $_ ) } @percentage ), ), ), end_html; } sub All_Root_Nodes_by_DOW { my ($dbh, $file) = @_; $file ||= 'all_root_nodes_by_dow.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ? +AND root = ''"); my @total; for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where dow = ? AND root = '' group +by year,month,day" ); for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $dow - 1 ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'Root Nodes by Day of Week', -bgcolor => + "#ffffcc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'Root Nodes by Day of Week' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, td( 'TOTAL' ), map { td( $_ ) } @total ), Tr( { -style => "background-color:#CCCCCC" }, td( 'AVERAGE' ), map { td( $_ ) } @average ), Tr( { -style => "background-color:#CCCCCC" }, td( 'PERCENT' ), map { td( $_ ) } @percentage ), ), ), end_html; } sub All_Sub_Nodes_by_DOW { my ($dbh, $file) = @_; $file ||= 'all_sub_nodes_by_dow.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ? +AND root != ''"); my @total; for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where dow = ? AND root != '' group + by year,month,day" ); for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $dow - 1 ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'Sub Nodes by Day of Week', -bgcolor => +"#ffffcc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'Sub Nodes by Day of Week' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, td( 'TOTAL' ), map { td( $_ ) } @total ), Tr( { -style => "background-color:#CCCCCC" }, td( 'AVERAGE' ), map { td( $_ ) } @average ), Tr( { -style => "background-color:#CCCCCC" }, td( 'PERCENT' ), map { td( $_ ) } @percentage ), ), ), end_html; } sub All_Nodes_by_Hour { my ($dbh, $file) = @_; $file ||= 'all_nodes_by_hour.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ? +"); my @total; for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where hour = ? group by year,month +,day,hour" ); for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $hour ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'All Nodes by Hour', -bgcolor => "#ffffc +c" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'All Nodes by Hour' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, [ map {td([ sprintf( "%.2d", $_ ), $total[$_], $average[$_], $percentage[$_], ]), } 0 .. 23 ] ), ), ), end_html; } sub Root_Nodes_by_Hour { my ($dbh, $file) = @_; $file ||= 'root_nodes_by_hour.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ? + AND root = ''"); my @total; for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where hour = ? AND root = '' group + by year,month,day,hour" ); for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $hour ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'Root Nodes by Hour', -bgcolor => "#ffff +cc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'Root Nodes by Hour' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, [ map {td([ sprintf( "%.2d", $_ ), $total[$_], $average[$_], $percentage[$_], ]), } 0 .. 23 ] ), ), ), end_html; } sub Sub_Nodes_by_Hour { my ($dbh, $file) = @_; $file ||= 'sub_nodes_by_hour.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ? + AND root != ''"); my @total; for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where hour = ? AND root != '' grou +p by year,month,day,hour" ); for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $hour ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'Sub Nodes by Hour', -bgcolor => "#ffffc +c" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'Sub Nodes by Hour' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, [ map {td([ sprintf( "%.2d", $_ ), $total[$_], $average[$_], $percentage[$_], ]), } 0 .. 23 ] ), ), ), end_html; } sub Holiday { my ($dbh, $file, $holiday) = @_; $file ||= 'holiday.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday += ?"); $sth->execute($holiday) or die $dbh->errstr; my ($total) = $sth->fetchrow_array; $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday = ? + AND root = ''"); $sth->execute($holiday) or die $dbh->errstr; my ($total_root) = $sth->fetchrow_array; $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday = ? + AND root != ''"); $sth->execute($holiday) or die $dbh->errstr; my ($total_sub) = $sth->fetchrow_array; my @row; push @row , [ 'Overall', 'N/A' , $total , $total_root , $total_sub + ]; my $year_total_sth = $dbh->prepare( "select count(*) from nodes where year = ? AND holiday = ?" ); my $year_root_sth = $dbh->prepare( "select count(*) from nodes where year = ? AND holiday = ? AND + root = ''" ); my $year_sub_sth = $dbh->prepare( "select count(*) from nodes where year = ? AND holiday = ? AND + root != ''" ); my $dow_sth = $dbh->prepare( "select dow from nodes where year = ? AND holiday = ?" ); my @num2day = ( qw(N/A MON TUE WED THU FRI SAT SUN) ); for my $year ( 1999 .. 2004 ) { $dow_sth->execute($year, $holiday) or die $dbh->errstr; my $dow_ref = $dow_sth->fetchall_arrayref; my $dow = $dow_ref->[0][0] ? $num2day[ $dow_ref->[0][0] ] : $num2d +ay[0]; $year_total_sth->execute($year, $holiday) or die $dbh->errstr; my @year_total = $year_total_sth->fetchrow_array; $year_root_sth->execute($year, $holiday) or die $dbh->errstr; my @year_root = $year_root_sth->fetchrow_array; $year_sub_sth->execute($year, $holiday) or die $dbh->errstr; my @year_sub = $year_sub_sth->fetchrow_array; push @row , [$year, $dow, $year_total[0], $year_root[0], $year +_sub[0]]; } print start_html( -title => "Report for $holiday", -bgcolor => "#fff +fcc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( "Report for $holiday" ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(YEAR DOW TOTAL ROOT SUB) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, [ map {td([ $_->[0], $_->[1], $_->[2], $_->[3], $_->[4], ]), } @row ] ), ), ), end_html; } sub Main { my $file = 'Main.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; print start_html( -title => 'MAIN MENU', -bgcolor => "#ffffcc" ), div( { -align => "center" }, p(h1( 'MAIN MENU' ) ), p( a({ href=>"all_nodes_by_dow.html"}, "All Nodes by Day o +f Week" ) ), p( a({ href=>"all_root_nodes_by_dow.html"}, "All Root Node +s by Day of Week" ) ), p( a({ href=>"all_sub_nodes_by_dow.html"}, "All Sub Nodes +by Day of Week" ) ), p( a({ href=>"all_nodes_by_hour.html"}, "All Nodes by Hour +" ) ), p( a({ href=>"root_nodes_by_hour.html"}, "All Root Nodes b +y Hour" ) ), p( a({ href=>"sub_nodes_by_hour.html"}, "All Sub Nodes by +Hour" ) ), p( a({ href=>"christmas.html"}, "Christmas Report" ) ), ), end_html; }
Sample from the 51MB XML provided by James
<NODE createtime="20000127201433" node_id="2515" nodetype="sourcecode" + author_user="979">Form Mailer</NODE> <NODE createtime="20000105204851" node_id="1766" nodetype="sourcecode" + author_user="979">MP3 to CD Audio</NODE> <NODE createtime="20020421045440" node_id="160856" nodetype="note" roo +t_node="160855" parent_node="160855" author_user="2680">Re: How do I +count characters</NODE> <NODE createtime="20020421034818" node_id="160854" nodetype="note" roo +t_node="160853" parent_node="160853" author_user="43894">Re: Lining u +p check boxes in Tk</NODE> <NODE createtime="20020421030142" node_id="160852" nodetype="note" roo +t_node="160838" parent_node="160838" author_user="133383">Re: ChAnGiN +g CaSe of forms before they are submitted?</NODE>

Cheers - L~R

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://329153]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others having a coffee break in the Monastery: (7)
As of 2024-03-28 10:30 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found