diotalevi,
I didn't handle timezones. I used the information in the XML in tact. My rationale was that even if the hours were "off", they would be universally off. If you feel that adjusting to GMT will be worth it, I can certainly do that. As far as the code and data - see below, but don't laugh:
Code to build database
#!/usr/bin/perl use strict; use warnings; use Date::Calc 'Day_of_Week'; use DBI; my %holiday = ( '19990101' => 'NEW YEARS', '20000101' => 'NEW YEARS', '20010101' => 'NEW YEARS', '20020101' => 'NEW YEARS', '20030101' => 'NEW YEARS', '20040101' => 'NEW YEARS', '19990118' => 'MLK BDAY', '20000117' => 'MLK BDAY', '20010115' => 'MLK BDAY', '20020121' => 'MLK BDAY', '20030120' => 'MLK BDAY', '20040119' => 'MLK BDAY', '19990214' => 'VALENTINES DAY', '20000214' => 'VALENTINES DAY', '20010214' => 'VALENTINES DAY', '20020214' => 'VALENTINES DAY', '20030214' => 'VALENTINES DAY', '20040214' => 'VALENTINES DAY', '19990215' => 'WASHINGTONS BDAY', '20000221' => 'WASHINGTONS BDAY', '20010219' => 'WASHINGTONS BDAY', '20020218' => 'WASHINGTONS BDAY', '20030217' => 'WASHINGTONS BDAY', '20040216' => 'WASHINGTONS BDAY', '19990531' => 'MEMORIAL DAY', '20000529' => 'MEMORIAL DAY', '20010528' => 'MEMORIAL DAY', '20020527' => 'MEMORIAL DAY', '20030526' => 'MEMORIAL DAY', '20040531' => 'MEMORIAL DAY', '19990704' => 'INDEPENDENCE DAY', '19990705' => 'INDEPENDENCE DAY (OBSERVED)', '20000704' => 'INDEPENDENCE DAY', '20010704' => 'INDEPENDENCE DAY', '20020704' => 'INDEPENDENCE DAY', '20030704' => 'INDEPENDENCE DAY', '20040704' => 'INDEPENDENCE DAY', '20040705' => 'INDEPENDENCE DAY (OBSERVED)', '19990906' => 'LABOR DAY', '20000904' => 'LABOR DAY', '20010903' => 'LABOR DAY', '20020902' => 'LABOR DAY', '20030901' => 'LABOR DAY', '20040906' => 'LABOR DAY', '19991011' => 'COLUMBUS DAY', '20001009' => 'COLUMBUS DAY', '20011008' => 'COLUMBUS DAY', '20021014' => 'COLUMBUS DAY', '20031013' => 'COLUMBUS DAY', '20041011' => 'COLUMBUS DAY', '19991111' => 'VETERANS DAY', '20001110' => 'VETERANS DAY (OBSERVED)', '20001111' => 'VETERANS DAY', '20011111' => 'VETERANS DAY', '20011112' => 'VETERANS DAY (OBSERVED)', '20021111' => 'VETERANS DAY', '20031111' => 'VETERANS DAY', '20041111' => 'VETERANS DAY', '19991125' => 'THANKSGIVING DAY', '20001123' => 'THANKSGIVING DAY', '20011122' => 'THANKSGIVING DAY', '20021128' => 'THANKSGIVING DAY', '20031127' => 'THANKSGIVING DAY', '20041125' => 'THANKSGIVING DAY', '19991224' => 'CHRISTMAS EVE', '20001224' => 'CHRISTMAS EVE', '20011224' => 'CHRISTMAS EVE', '20021224' => 'CHRISTMAS EVE', '20031224' => 'CHRISTMAS EVE', '20041224' => 'CHRISTMAS EVE', '19991225' => 'CHRISTMAS DAY', '20001225' => 'CHRISTMAS DAY', '20011225' => 'CHRISTMAS DAY', '20021225' => 'CHRISTMAS DAY', '20031225' => 'CHRISTMAS DAY', '20041225' => 'CHRISTMAS DAY', '19991231' => 'NEW YEARS EVE', '20001231' => 'NEW YEARS EVE', '20011231' => 'NEW YEARS EVE', '20021231' => 'NEW YEARS EVE', '20031231' => 'NEW YEARS EVE', '20041231' => 'NEW YEARS EVE', ); my $xml = $ARGV[0] || 'pm.xml'; open ( INPUT , '<' , $xml ) or die "Unable to open $xml for reading : +$!"; my $tab = $ARGV[1] || 'pm.tab'; open ( OUTPUT , '>' , $tab ) or die "Unable to open $tab for writing : + $!"; select OUTPUT; while ( <INPUT> ) { last if /^<AUTHOR/; if ( /^<NODE createtime="(\d+)" node_id="(\d+)" nodetype="([^"]+)" + (root_node="(\d+)")?/ ) { my ($year, $month, $day, $hour) = unpack "a4a2a2a2" , $1; my $id = $2; my $type = $3; next if $type eq 'user' || $type eq 'system'; my $root = $5 || ''; my $dow = Day_of_Week($year, $month, $day); my $hol = $holiday{"$year$month$day"} || ''; print join "\t" , $id, $year, $month, $day, $hour, $type, $roo +t, $dow, $hol; print "\n"; } } close INPUT; close OUTPUT; my $srt = $ARGV[2] || 'pm.srt'; system("sort -n $tab > $srt"); my $sql = $ARGV[3] || 'COPYSQL'; open ( INPUT , '<' , $srt ) or die "Unable to open $srt for reading : +$!"; open ( OUTPUT , '>' , $sql ) or die "Unable to open $sql for writing : + $!"; select OUTPUT; my %node; while ( <INPUT> ) { my @field = split /\t/; my $id = shift @field; die "NOT 8 FIELDS\n" if @field != 8; if ( $field[4] eq 'note' ) { if ( $node{ $field[5] } ) { $field[4] = $node{ $field[5] }; } else { $field[4] = 'Poll'; $field[5] ||= '1'; } } else { $node{ $id } = $field[4]; } $_ ||= '' for @field; print join "\t" , @field; } close INPUT; close OUTPUT; my $db = 'pmstats.db'; my $dbh = DBI->connect("dbi:SQLite:dbname=$db", { AutoCommit => 0 }) o +r die $DBI::errstr; $dbh->do( "CREATE TABLE nodes (year, month, day, hour, type, root, dow, holi +day)" ) or die $dbh->errstr; $dbh->do("COPY nodes FROM $sql") or die $dbh->errstr; $dbh->disconnect;
Code to generate HTML (SQL)
#!/usr/bin/perl use strict; use warnings; use CGI ':all'; use DBI; my $db = 'pmstats.db'; my $dbh = DBI->connect("dbi:SQLite:dbname=$db") or die $DBI::errstr; All_Nodes_by_DOW( $dbh ); All_Root_Nodes_by_DOW( $dbh ); All_Sub_Nodes_by_DOW( $dbh ); All_Nodes_by_Hour( $dbh ); Root_Nodes_by_Hour( $dbh ); Sub_Nodes_by_Hour( $dbh ); Holiday( $dbh, 'christmas.html', 'CHRISTMAS DAY' ); Main(); sub All_Nodes_by_DOW { my ($dbh, $file) = @_; $file ||= 'all_nodes_by_dow.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ?" +); my @total; for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where dow = ? group by year,month, +day" ); for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $dow - 1 ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'All Nodes by Day of Week', -bgcolor => +"#ffffcc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'All Nodes by Day of Week' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, td( 'TOTAL' ), map { td( $_ ) } @total ), Tr( { -style => "background-color:#CCCCCC" }, td( 'AVERAGE' ), map { td( $_ ) } @average ), Tr( { -style => "background-color:#CCCCCC" }, td( 'PERCENT' ), map { td( $_ ) } @percentage ), ), ), end_html; } sub All_Root_Nodes_by_DOW { my ($dbh, $file) = @_; $file ||= 'all_root_nodes_by_dow.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ? +AND root = ''"); my @total; for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where dow = ? AND root = '' group +by year,month,day" ); for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $dow - 1 ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'Root Nodes by Day of Week', -bgcolor => + "#ffffcc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'Root Nodes by Day of Week' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, td( 'TOTAL' ), map { td( $_ ) } @total ), Tr( { -style => "background-color:#CCCCCC" }, td( 'AVERAGE' ), map { td( $_ ) } @average ), Tr( { -style => "background-color:#CCCCCC" }, td( 'PERCENT' ), map { td( $_ ) } @percentage ), ), ), end_html; } sub All_Sub_Nodes_by_DOW { my ($dbh, $file) = @_; $file ||= 'all_sub_nodes_by_dow.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ? +AND root != ''"); my @total; for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where dow = ? AND root != '' group + by year,month,day" ); for my $dow ( 1..7 ) { $sth->execute($dow) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $dow - 1 ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'Sub Nodes by Day of Week', -bgcolor => +"#ffffcc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'Sub Nodes by Day of Week' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, td( 'TOTAL' ), map { td( $_ ) } @total ), Tr( { -style => "background-color:#CCCCCC" }, td( 'AVERAGE' ), map { td( $_ ) } @average ), Tr( { -style => "background-color:#CCCCCC" }, td( 'PERCENT' ), map { td( $_ ) } @percentage ), ), ), end_html; } sub All_Nodes_by_Hour { my ($dbh, $file) = @_; $file ||= 'all_nodes_by_hour.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ? +"); my @total; for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where hour = ? group by year,month +,day,hour" ); for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $hour ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'All Nodes by Hour', -bgcolor => "#ffffc +c" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'All Nodes by Hour' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, [ map {td([ sprintf( "%.2d", $_ ), $total[$_], $average[$_], $percentage[$_], ]), } 0 .. 23 ] ), ), ), end_html; } sub Root_Nodes_by_Hour { my ($dbh, $file) = @_; $file ||= 'root_nodes_by_hour.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ? + AND root = ''"); my @total; for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where hour = ? AND root = '' group + by year,month,day,hour" ); for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $hour ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'Root Nodes by Hour', -bgcolor => "#ffff +cc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'Root Nodes by Hour' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, [ map {td([ sprintf( "%.2d", $_ ), $total[$_], $average[$_], $percentage[$_], ]), } 0 .. 23 ] ), ), ), end_html; } sub Sub_Nodes_by_Hour { my ($dbh, $file) = @_; $file ||= 'sub_nodes_by_hour.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ? + AND root != ''"); my @total; for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my @day_total = $sth->fetchrow_array; push @total , $day_total[0]; } my @average; $sth = $dbh->prepare( "select count(*) from nodes where hour = ? AND root != '' grou +p by year,month,day,hour" ); for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) { $sth->execute($hour) or die $dbh->errstr; my $num_days = $sth->fetchall_arrayref; push @average , int ( $total[ $hour ] / @$num_days ); } my $sum_total; $sum_total += $_ for @total; my @percentage; for ( @total ) { push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total); } print start_html( -title => 'Sub Nodes by Hour', -bgcolor => "#ffffc +c" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( 'Sub Nodes by Hour' ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, [ map {td([ sprintf( "%.2d", $_ ), $total[$_], $average[$_], $percentage[$_], ]), } 0 .. 23 ] ), ), ), end_html; } sub Holiday { my ($dbh, $file, $holiday) = @_; $file ||= 'holiday.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday += ?"); $sth->execute($holiday) or die $dbh->errstr; my ($total) = $sth->fetchrow_array; $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday = ? + AND root = ''"); $sth->execute($holiday) or die $dbh->errstr; my ($total_root) = $sth->fetchrow_array; $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday = ? + AND root != ''"); $sth->execute($holiday) or die $dbh->errstr; my ($total_sub) = $sth->fetchrow_array; my @row; push @row , [ 'Overall', 'N/A' , $total , $total_root , $total_sub + ]; my $year_total_sth = $dbh->prepare( "select count(*) from nodes where year = ? AND holiday = ?" ); my $year_root_sth = $dbh->prepare( "select count(*) from nodes where year = ? AND holiday = ? AND + root = ''" ); my $year_sub_sth = $dbh->prepare( "select count(*) from nodes where year = ? AND holiday = ? AND + root != ''" ); my $dow_sth = $dbh->prepare( "select dow from nodes where year = ? AND holiday = ?" ); my @num2day = ( qw(N/A MON TUE WED THU FRI SAT SUN) ); for my $year ( 1999 .. 2004 ) { $dow_sth->execute($year, $holiday) or die $dbh->errstr; my $dow_ref = $dow_sth->fetchall_arrayref; my $dow = $dow_ref->[0][0] ? $num2day[ $dow_ref->[0][0] ] : $num2d +ay[0]; $year_total_sth->execute($year, $holiday) or die $dbh->errstr; my @year_total = $year_total_sth->fetchrow_array; $year_root_sth->execute($year, $holiday) or die $dbh->errstr; my @year_root = $year_root_sth->fetchrow_array; $year_sub_sth->execute($year, $holiday) or die $dbh->errstr; my @year_sub = $year_sub_sth->fetchrow_array; push @row , [$year, $dow, $year_total[0], $year_root[0], $year +_sub[0]]; } print start_html( -title => "Report for $holiday", -bgcolor => "#fff +fcc" ), div( { -align => "center" }, a({ href=>"Main.html"}, "Main Menu" ), p(h1( "Report for $holiday" ) ), table( { -bgcolor => "#000000", -border => "0", -cellpadding => "2", -cellspacing => "1", }, Tr( { -style => "background-color:#CCCCCC" }, th( [ qw(YEAR DOW TOTAL ROOT SUB) ] ), ), Tr( { -style => "background-color:#CCCCCC" }, [ map {td([ $_->[0], $_->[1], $_->[2], $_->[3], $_->[4], ]), } @row ] ), ), ), end_html; } sub Main { my $file = 'Main.html'; open ( HTML , '>' , $file ) or die "Unable to open $file for writi +ng : $!"; select HTML; $| = 1; print start_html( -title => 'MAIN MENU', -bgcolor => "#ffffcc" ), div( { -align => "center" }, p(h1( 'MAIN MENU' ) ), p( a({ href=>"all_nodes_by_dow.html"}, "All Nodes by Day o +f Week" ) ), p( a({ href=>"all_root_nodes_by_dow.html"}, "All Root Node +s by Day of Week" ) ), p( a({ href=>"all_sub_nodes_by_dow.html"}, "All Sub Nodes +by Day of Week" ) ), p( a({ href=>"all_nodes_by_hour.html"}, "All Nodes by Hour +" ) ), p( a({ href=>"root_nodes_by_hour.html"}, "All Root Nodes b +y Hour" ) ), p( a({ href=>"sub_nodes_by_hour.html"}, "All Sub Nodes by +Hour" ) ), p( a({ href=>"christmas.html"}, "Christmas Report" ) ), ), end_html; }
Sample from the 51MB XML provided by James
<NODE createtime="20000127201433" node_id="2515" nodetype="sourcecode" + author_user="979">Form Mailer</NODE> <NODE createtime="20000105204851" node_id="1766" nodetype="sourcecode" + author_user="979">MP3 to CD Audio</NODE> <NODE createtime="20020421045440" node_id="160856" nodetype="note" roo +t_node="160855" parent_node="160855" author_user="2680">Re: How do I +count characters</NODE> <NODE createtime="20020421034818" node_id="160854" nodetype="note" roo +t_node="160853" parent_node="160853" author_user="43894">Re: Lining u +p check boxes in Tk</NODE> <NODE createtime="20020421030142" node_id="160852" nodetype="note" roo +t_node="160838" parent_node="160838" author_user="133383">Re: ChAnGiN +g CaSe of forms before they are submitted?</NODE>

Cheers - L~R


In reply to Re: Re: More PM Stats by Limbic~Region
in thread More PM Stats by Limbic~Region

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.