I didn't handle timezones. I used the information in the XML in tact. My rationale was that even if the hours were "off", they would be universally off. If you feel that adjusting to GMT will be worth it, I can certainly do that. As far as the code and data - see below, but don't laugh:
#!/usr/bin/perl
use strict;
use warnings;
use Date::Calc 'Day_of_Week';
use DBI;
my %holiday = (
'19990101' => 'NEW YEARS',
'20000101' => 'NEW YEARS',
'20010101' => 'NEW YEARS',
'20020101' => 'NEW YEARS',
'20030101' => 'NEW YEARS',
'20040101' => 'NEW YEARS',
'19990118' => 'MLK BDAY',
'20000117' => 'MLK BDAY',
'20010115' => 'MLK BDAY',
'20020121' => 'MLK BDAY',
'20030120' => 'MLK BDAY',
'20040119' => 'MLK BDAY',
'19990214' => 'VALENTINES DAY',
'20000214' => 'VALENTINES DAY',
'20010214' => 'VALENTINES DAY',
'20020214' => 'VALENTINES DAY',
'20030214' => 'VALENTINES DAY',
'20040214' => 'VALENTINES DAY',
'19990215' => 'WASHINGTONS BDAY',
'20000221' => 'WASHINGTONS BDAY',
'20010219' => 'WASHINGTONS BDAY',
'20020218' => 'WASHINGTONS BDAY',
'20030217' => 'WASHINGTONS BDAY',
'20040216' => 'WASHINGTONS BDAY',
'19990531' => 'MEMORIAL DAY',
'20000529' => 'MEMORIAL DAY',
'20010528' => 'MEMORIAL DAY',
'20020527' => 'MEMORIAL DAY',
'20030526' => 'MEMORIAL DAY',
'20040531' => 'MEMORIAL DAY',
'19990704' => 'INDEPENDENCE DAY',
'19990705' => 'INDEPENDENCE DAY (OBSERVED)',
'20000704' => 'INDEPENDENCE DAY',
'20010704' => 'INDEPENDENCE DAY',
'20020704' => 'INDEPENDENCE DAY',
'20030704' => 'INDEPENDENCE DAY',
'20040704' => 'INDEPENDENCE DAY',
'20040705' => 'INDEPENDENCE DAY (OBSERVED)',
'19990906' => 'LABOR DAY',
'20000904' => 'LABOR DAY',
'20010903' => 'LABOR DAY',
'20020902' => 'LABOR DAY',
'20030901' => 'LABOR DAY',
'20040906' => 'LABOR DAY',
'19991011' => 'COLUMBUS DAY',
'20001009' => 'COLUMBUS DAY',
'20011008' => 'COLUMBUS DAY',
'20021014' => 'COLUMBUS DAY',
'20031013' => 'COLUMBUS DAY',
'20041011' => 'COLUMBUS DAY',
'19991111' => 'VETERANS DAY',
'20001110' => 'VETERANS DAY (OBSERVED)',
'20001111' => 'VETERANS DAY',
'20011111' => 'VETERANS DAY',
'20011112' => 'VETERANS DAY (OBSERVED)',
'20021111' => 'VETERANS DAY',
'20031111' => 'VETERANS DAY',
'20041111' => 'VETERANS DAY',
'19991125' => 'THANKSGIVING DAY',
'20001123' => 'THANKSGIVING DAY',
'20011122' => 'THANKSGIVING DAY',
'20021128' => 'THANKSGIVING DAY',
'20031127' => 'THANKSGIVING DAY',
'20041125' => 'THANKSGIVING DAY',
'19991224' => 'CHRISTMAS EVE',
'20001224' => 'CHRISTMAS EVE',
'20011224' => 'CHRISTMAS EVE',
'20021224' => 'CHRISTMAS EVE',
'20031224' => 'CHRISTMAS EVE',
'20041224' => 'CHRISTMAS EVE',
'19991225' => 'CHRISTMAS DAY',
'20001225' => 'CHRISTMAS DAY',
'20011225' => 'CHRISTMAS DAY',
'20021225' => 'CHRISTMAS DAY',
'20031225' => 'CHRISTMAS DAY',
'20041225' => 'CHRISTMAS DAY',
'19991231' => 'NEW YEARS EVE',
'20001231' => 'NEW YEARS EVE',
'20011231' => 'NEW YEARS EVE',
'20021231' => 'NEW YEARS EVE',
'20031231' => 'NEW YEARS EVE',
'20041231' => 'NEW YEARS EVE',
);
my $xml = $ARGV[0] || 'pm.xml';
open ( INPUT , '<' , $xml ) or die "Unable to open $xml for reading :
+$!";
my $tab = $ARGV[1] || 'pm.tab';
open ( OUTPUT , '>' , $tab ) or die "Unable to open $tab for writing :
+ $!";
select OUTPUT;
while ( <INPUT> ) {
last if /^<AUTHOR/;
if ( /^<NODE createtime="(\d+)" node_id="(\d+)" nodetype="([^"]+)"
+ (root_node="(\d+)")?/ ) {
my ($year, $month, $day, $hour) = unpack "a4a2a2a2" , $1;
my $id = $2;
my $type = $3;
next if $type eq 'user' || $type eq 'system';
my $root = $5 || '';
my $dow = Day_of_Week($year, $month, $day);
my $hol = $holiday{"$year$month$day"} || '';
print join "\t" , $id, $year, $month, $day, $hour, $type, $roo
+t, $dow, $hol;
print "\n";
}
}
close INPUT;
close OUTPUT;
my $srt = $ARGV[2] || 'pm.srt';
system("sort -n $tab > $srt");
my $sql = $ARGV[3] || 'COPYSQL';
open ( INPUT , '<' , $srt ) or die "Unable to open $srt for reading :
+$!";
open ( OUTPUT , '>' , $sql ) or die "Unable to open $sql for writing :
+ $!";
select OUTPUT;
my %node;
while ( <INPUT> ) {
my @field = split /\t/;
my $id = shift @field;
die "NOT 8 FIELDS\n" if @field != 8;
if ( $field[4] eq 'note' ) {
if ( $node{ $field[5] } ) {
$field[4] = $node{ $field[5] };
}
else {
$field[4] = 'Poll';
$field[5] ||= '1';
}
}
else {
$node{ $id } = $field[4];
}
$_ ||= '' for @field;
print join "\t" , @field;
}
close INPUT;
close OUTPUT;
my $db = 'pmstats.db';
my $dbh = DBI->connect("dbi:SQLite:dbname=$db", { AutoCommit => 0 }) o
+r die $DBI::errstr;
$dbh->do(
"CREATE TABLE nodes (year, month, day, hour, type, root, dow, holi
+day)"
) or die $dbh->errstr;
$dbh->do("COPY nodes FROM $sql") or die $dbh->errstr;
$dbh->disconnect;
#!/usr/bin/perl
use strict;
use warnings;
use CGI ':all';
use DBI;
my $db = 'pmstats.db';
my $dbh = DBI->connect("dbi:SQLite:dbname=$db") or die $DBI::errstr;
All_Nodes_by_DOW( $dbh );
All_Root_Nodes_by_DOW( $dbh );
All_Sub_Nodes_by_DOW( $dbh );
All_Nodes_by_Hour( $dbh );
Root_Nodes_by_Hour( $dbh );
Sub_Nodes_by_Hour( $dbh );
Holiday( $dbh, 'christmas.html', 'CHRISTMAS DAY' );
Main();
sub All_Nodes_by_DOW {
my ($dbh, $file) = @_;
$file ||= 'all_nodes_by_dow.html';
open ( HTML , '>' , $file ) or die "Unable to open $file for writi
+ng : $!";
select HTML;
$| = 1;
my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ?"
+);
my @total;
for my $dow ( 1..7 ) {
$sth->execute($dow) or die $dbh->errstr;
my @day_total = $sth->fetchrow_array;
push @total , $day_total[0];
}
my @average;
$sth = $dbh->prepare(
"select count(*) from nodes where dow = ? group by year,month,
+day"
);
for my $dow ( 1..7 ) {
$sth->execute($dow) or die $dbh->errstr;
my $num_days = $sth->fetchall_arrayref;
push @average , int ( $total[ $dow - 1 ] / @$num_days );
}
my $sum_total;
$sum_total += $_ for @total;
my @percentage;
for ( @total ) {
push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total);
}
print
start_html( -title => 'All Nodes by Day of Week', -bgcolor =>
+"#ffffcc" ),
div( { -align => "center" },
a({ href=>"Main.html"}, "Main Menu" ),
p(h1( 'All Nodes by Day of Week' ) ),
table(
{
-bgcolor => "#000000",
-border => "0",
-cellpadding => "2",
-cellspacing => "1",
},
Tr( { -style => "background-color:#CCCCCC" },
th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ),
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'TOTAL' ),
map { td( $_ ) } @total
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'AVERAGE' ),
map { td( $_ ) } @average
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'PERCENT' ),
map { td( $_ ) } @percentage
),
),
),
end_html;
}
sub All_Root_Nodes_by_DOW {
my ($dbh, $file) = @_;
$file ||= 'all_root_nodes_by_dow.html';
open ( HTML , '>' , $file ) or die "Unable to open $file for writi
+ng : $!";
select HTML;
$| = 1;
my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ?
+AND root = ''");
my @total;
for my $dow ( 1..7 ) {
$sth->execute($dow) or die $dbh->errstr;
my @day_total = $sth->fetchrow_array;
push @total , $day_total[0];
}
my @average;
$sth = $dbh->prepare(
"select count(*) from nodes where dow = ? AND root = '' group
+by year,month,day"
);
for my $dow ( 1..7 ) {
$sth->execute($dow) or die $dbh->errstr;
my $num_days = $sth->fetchall_arrayref;
push @average , int ( $total[ $dow - 1 ] / @$num_days );
}
my $sum_total;
$sum_total += $_ for @total;
my @percentage;
for ( @total ) {
push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total);
}
print
start_html( -title => 'Root Nodes by Day of Week', -bgcolor =>
+ "#ffffcc" ),
div( { -align => "center" },
a({ href=>"Main.html"}, "Main Menu" ),
p(h1( 'Root Nodes by Day of Week' ) ),
table(
{
-bgcolor => "#000000",
-border => "0",
-cellpadding => "2",
-cellspacing => "1",
},
Tr( { -style => "background-color:#CCCCCC" },
th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ),
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'TOTAL' ),
map { td( $_ ) } @total
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'AVERAGE' ),
map { td( $_ ) } @average
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'PERCENT' ),
map { td( $_ ) } @percentage
),
),
),
end_html;
}
sub All_Sub_Nodes_by_DOW {
my ($dbh, $file) = @_;
$file ||= 'all_sub_nodes_by_dow.html';
open ( HTML , '>' , $file ) or die "Unable to open $file for writi
+ng : $!";
select HTML;
$| = 1;
my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE dow = ?
+AND root != ''");
my @total;
for my $dow ( 1..7 ) {
$sth->execute($dow) or die $dbh->errstr;
my @day_total = $sth->fetchrow_array;
push @total , $day_total[0];
}
my @average;
$sth = $dbh->prepare(
"select count(*) from nodes where dow = ? AND root != '' group
+ by year,month,day"
);
for my $dow ( 1..7 ) {
$sth->execute($dow) or die $dbh->errstr;
my $num_days = $sth->fetchall_arrayref;
push @average , int ( $total[ $dow - 1 ] / @$num_days );
}
my $sum_total;
$sum_total += $_ for @total;
my @percentage;
for ( @total ) {
push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total);
}
print
start_html( -title => 'Sub Nodes by Day of Week', -bgcolor =>
+"#ffffcc" ),
div( { -align => "center" },
a({ href=>"Main.html"}, "Main Menu" ),
p(h1( 'Sub Nodes by Day of Week' ) ),
table(
{
-bgcolor => "#000000",
-border => "0",
-cellpadding => "2",
-cellspacing => "1",
},
Tr( { -style => "background-color:#CCCCCC" },
th( [ qw(DAY MON TUE WED THU FRI SAT SUN) ] ),
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'TOTAL' ),
map { td( $_ ) } @total
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'AVERAGE' ),
map { td( $_ ) } @average
),
Tr( { -style => "background-color:#CCCCCC" },
td( 'PERCENT' ),
map { td( $_ ) } @percentage
),
),
),
end_html;
}
sub All_Nodes_by_Hour {
my ($dbh, $file) = @_;
$file ||= 'all_nodes_by_hour.html';
open ( HTML , '>' , $file ) or die "Unable to open $file for writi
+ng : $!";
select HTML;
$| = 1;
my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ?
+");
my @total;
for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) {
$sth->execute($hour) or die $dbh->errstr;
my @day_total = $sth->fetchrow_array;
push @total , $day_total[0];
}
my @average;
$sth = $dbh->prepare(
"select count(*) from nodes where hour = ? group by year,month
+,day,hour"
);
for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) {
$sth->execute($hour) or die $dbh->errstr;
my $num_days = $sth->fetchall_arrayref;
push @average , int ( $total[ $hour ] / @$num_days );
}
my $sum_total;
$sum_total += $_ for @total;
my @percentage;
for ( @total ) {
push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total);
}
print
start_html( -title => 'All Nodes by Hour', -bgcolor => "#ffffc
+c" ),
div( { -align => "center" },
a({ href=>"Main.html"}, "Main Menu" ),
p(h1( 'All Nodes by Hour' ) ),
table(
{
-bgcolor => "#000000",
-border => "0",
-cellpadding => "2",
-cellspacing => "1",
},
Tr( { -style => "background-color:#CCCCCC" },
th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ),
),
Tr( { -style => "background-color:#CCCCCC" },
[
map {td([
sprintf( "%.2d", $_ ),
$total[$_],
$average[$_],
$percentage[$_],
]),
} 0 .. 23
]
),
),
),
end_html;
}
sub Root_Nodes_by_Hour {
my ($dbh, $file) = @_;
$file ||= 'root_nodes_by_hour.html';
open ( HTML , '>' , $file ) or die "Unable to open $file for writi
+ng : $!";
select HTML;
$| = 1;
my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ?
+ AND root = ''");
my @total;
for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) {
$sth->execute($hour) or die $dbh->errstr;
my @day_total = $sth->fetchrow_array;
push @total , $day_total[0];
}
my @average;
$sth = $dbh->prepare(
"select count(*) from nodes where hour = ? AND root = '' group
+ by year,month,day,hour"
);
for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) {
$sth->execute($hour) or die $dbh->errstr;
my $num_days = $sth->fetchall_arrayref;
push @average , int ( $total[ $hour ] / @$num_days );
}
my $sum_total;
$sum_total += $_ for @total;
my @percentage;
for ( @total ) {
push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total);
}
print
start_html( -title => 'Root Nodes by Hour', -bgcolor => "#ffff
+cc" ),
div( { -align => "center" },
a({ href=>"Main.html"}, "Main Menu" ),
p(h1( 'Root Nodes by Hour' ) ),
table(
{
-bgcolor => "#000000",
-border => "0",
-cellpadding => "2",
-cellspacing => "1",
},
Tr( { -style => "background-color:#CCCCCC" },
th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ),
),
Tr( { -style => "background-color:#CCCCCC" },
[
map {td([
sprintf( "%.2d", $_ ),
$total[$_],
$average[$_],
$percentage[$_],
]),
} 0 .. 23
]
),
),
),
end_html;
}
sub Sub_Nodes_by_Hour {
my ($dbh, $file) = @_;
$file ||= 'sub_nodes_by_hour.html';
open ( HTML , '>' , $file ) or die "Unable to open $file for writi
+ng : $!";
select HTML;
$| = 1;
my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE hour = ?
+ AND root != ''");
my @total;
for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) {
$sth->execute($hour) or die $dbh->errstr;
my @day_total = $sth->fetchrow_array;
push @total , $day_total[0];
}
my @average;
$sth = $dbh->prepare(
"select count(*) from nodes where hour = ? AND root != '' grou
+p by year,month,day,hour"
);
for my $hour ( map { sprintf "%.2d" , $_ } 0..23 ) {
$sth->execute($hour) or die $dbh->errstr;
my $num_days = $sth->fetchall_arrayref;
push @average , int ( $total[ $hour ] / @$num_days );
}
my $sum_total;
$sum_total += $_ for @total;
my @percentage;
for ( @total ) {
push @percentage , sprintf("%.2f" , $_ * 100 / $sum_total);
}
print
start_html( -title => 'Sub Nodes by Hour', -bgcolor => "#ffffc
+c" ),
div( { -align => "center" },
a({ href=>"Main.html"}, "Main Menu" ),
p(h1( 'Sub Nodes by Hour' ) ),
table(
{
-bgcolor => "#000000",
-border => "0",
-cellpadding => "2",
-cellspacing => "1",
},
Tr( { -style => "background-color:#CCCCCC" },
th( [ qw(HOUR TOTAL AVERAGE PERCENT) ] ),
),
Tr( { -style => "background-color:#CCCCCC" },
[
map {td([
sprintf( "%.2d", $_ ),
$total[$_],
$average[$_],
$percentage[$_],
]),
} 0 .. 23
]
),
),
),
end_html;
}
sub Holiday {
my ($dbh, $file, $holiday) = @_;
$file ||= 'holiday.html';
open ( HTML , '>' , $file ) or die "Unable to open $file for writi
+ng : $!";
select HTML;
$| = 1;
my $sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday
+= ?");
$sth->execute($holiday) or die $dbh->errstr;
my ($total) = $sth->fetchrow_array;
$sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday = ?
+ AND root = ''");
$sth->execute($holiday) or die $dbh->errstr;
my ($total_root) = $sth->fetchrow_array;
$sth = $dbh->prepare("SELECT COUNT(*) FROM nodes WHERE holiday = ?
+ AND root != ''");
$sth->execute($holiday) or die $dbh->errstr;
my ($total_sub) = $sth->fetchrow_array;
my @row;
push @row , [ 'Overall', 'N/A' , $total , $total_root , $total_sub
+ ];
my $year_total_sth = $dbh->prepare(
"select count(*) from nodes where year = ? AND holiday = ?"
);
my $year_root_sth = $dbh->prepare(
"select count(*) from nodes where year = ? AND holiday = ? AND
+ root = ''"
);
my $year_sub_sth = $dbh->prepare(
"select count(*) from nodes where year = ? AND holiday = ? AND
+ root != ''"
);
my $dow_sth = $dbh->prepare(
"select dow from nodes where year = ? AND holiday = ?"
);
my @num2day = ( qw(N/A MON TUE WED THU FRI SAT SUN) );
for my $year ( 1999 .. 2004 ) {
$dow_sth->execute($year, $holiday) or die $dbh->errstr;
my $dow_ref = $dow_sth->fetchall_arrayref;
my $dow = $dow_ref->[0][0] ? $num2day[ $dow_ref->[0][0] ] : $num2d
+ay[0];
$year_total_sth->execute($year, $holiday) or die $dbh->errstr;
my @year_total = $year_total_sth->fetchrow_array;
$year_root_sth->execute($year, $holiday) or die $dbh->errstr;
my @year_root = $year_root_sth->fetchrow_array;
$year_sub_sth->execute($year, $holiday) or die $dbh->errstr;
my @year_sub = $year_sub_sth->fetchrow_array;
push @row , [$year, $dow, $year_total[0], $year_root[0], $year
+_sub[0]];
}
print
start_html( -title => "Report for $holiday", -bgcolor => "#fff
+fcc" ),
div( { -align => "center" },
a({ href=>"Main.html"}, "Main Menu" ),
p(h1( "Report for $holiday" ) ),
table(
{
-bgcolor => "#000000",
-border => "0",
-cellpadding => "2",
-cellspacing => "1",
},
Tr( { -style => "background-color:#CCCCCC" },
th( [ qw(YEAR DOW TOTAL ROOT SUB) ] ),
),
Tr( { -style => "background-color:#CCCCCC" },
[
map {td([
$_->[0],
$_->[1],
$_->[2],
$_->[3],
$_->[4],
]),
} @row
]
),
),
),
end_html;
}
sub Main {
my $file = 'Main.html';
open ( HTML , '>' , $file ) or die "Unable to open $file for writi
+ng : $!";
select HTML;
$| = 1;
print
start_html( -title => 'MAIN MENU', -bgcolor => "#ffffcc" ),
div( { -align => "center" },
p(h1( 'MAIN MENU' ) ),
p( a({ href=>"all_nodes_by_dow.html"}, "All Nodes by Day o
+f Week" ) ),
p( a({ href=>"all_root_nodes_by_dow.html"}, "All Root Node
+s by Day of Week" ) ),
p( a({ href=>"all_sub_nodes_by_dow.html"}, "All Sub Nodes
+by Day of Week" ) ),
p( a({ href=>"all_nodes_by_hour.html"}, "All Nodes by Hour
+" ) ),
p( a({ href=>"root_nodes_by_hour.html"}, "All Root Nodes b
+y Hour" ) ),
p( a({ href=>"sub_nodes_by_hour.html"}, "All Sub Nodes by
+Hour" ) ),
p( a({ href=>"christmas.html"}, "Christmas Report" ) ),
),
end_html;
}