To get a better idea of how each strategy performs you need to observe the results over many invocations. Here is a benchmark of a few strategies:
use Benchmark qw(cmpthese);
use strict;
my @data = <DATA>;
my $data = join "\n",@data;
#open OUT, '>&STDOUT';
open OUT, '>', '/dev/null';
cmpthese(1000, {
table_col => sub {open my $fh, '<', \$data; table_col($fh)},
col_table => sub {open my $fh, '<', \$data; col_table($fh)},
multi => sub {open my $fh, '<', \$data; multi($fh)},
regex => sub {open my $fh, '<', \$data; regex($fh)},
});
sub multi {
my $fh = shift;
my $table;
while(<$fh>) {
if (/^COLUMN;(.+?);/) {
print OUT $table,$1,"\n";
}
elsif (/^TABLE;(.+)$/) {
$table = $1 . '.';
}
}
}
sub col_table {
my $fh = shift;
my $table;
while(<$fh>) {
/^COLUMN;(.+?);/ && print OUT ($table,$1,"\n") or /^TABLE;(.+)
+$/ && ($table = $1 . '.')
}
}
sub table_col {
my $fh = shift;
my $table;
while(<$fh>) {
/^TABLE;(.+)$/ && ($table = $1 . '.') or /^COLUMN;(.+?);/ && p
+rint OUT ($table,$1,"\n")
}
}
sub regex {
my $fh = shift;
local $/;
my $data = <$fh>;
my $table;
$data =~ s{
^TABLE;(.+?)\s*\n
|
^COLUMN;(.+?);.*\n
}{$1 ? (($table=$1),"") : "$table.$2"}mexg;
print OUT $data;
}
__DATA__
TABLE;nokia_sgsn_tot_int_util_month
COLUMN;nc_id;integer
COLUMN;sgsn_id;varchar(50) not null
COLUMN;month_of;integer not null
COLUMN;nokia_sgsn_interface_utilisation_busy_hour;utime
COLUMN;data_coverage_pc;float
COLUMN;tot_measurement_seconds;integer
COLUMN;avg_measurement_seconds;integer
COLUMN;tot_ifinbroadcastpkts;int8
COLUMN;avg_ifinbroadcastpkts;int8
COLUMN;min_ifinbroadcastpkts;int8
COLUMN;max_ifinbroadcastpkts;int8
COLUMN;nsiubh_ifinbroadcastpkts;int8
COLUMN;tot_ifindiscards;int8
COLUMN;avg_ifindiscards;int8
COLUMN;min_ifindiscards;int8
COLUMN;max_ifindiscards;int8
COLUMN;nsiubh_ifindiscards;int8
COLUMN;tot_ifinerrors;int8
COLUMN;avg_ifinerrors;int8
COLUMN;min_ifinerrors;int8
COLUMN;max_ifinerrors;int8
COLUMN;nsiubh_ifinerrors;int8
COLUMN;tot_ifinmulticastpkts;int8
COLUMN;avg_ifinmulticastpkts;int8
COLUMN;min_ifinmulticastpkts;int8
COLUMN;max_ifinmulticastpkts;int8
COLUMN;nsiubh_ifinmulticastpkts;int8
COLUMN;tot_ifinnucastpkts;int8
COLUMN;avg_ifinnucastpkts;int8
Results:
Rate col_table multi table_col regex
col_table 565/s -- -10% -14% -36%
multi 625/s 11% -- -5% -29%
table_col 658/s 16% 5% -- -26%
regex 885/s 57% 42% 35% --
With a different set of test data you will of course get different results.
I was expecting the col_table and multi strategies to do better than table_col, but they consistently perform worse for me. |