in reply to Re^2: Merging partially duplicate lines
in thread Merging partially duplicate lines

Here's a database solution showing the flexibility by adding the weighted average

#!perl use strict; use DBI; # create table my $dbh = create_db('database.sqlite'); # load data my @files = qw(fileA.txt fileB.txt); for my $file (@files){ load_db($dbh,$file); } # report my $query = 'SELECT A,B,C,D,AVG(E),SUM(F), MIN(E),MAX(E),COUNT(*),SUM(E*F)/SUM(F) FROM test GROUP BY A,B,C,D ORDER BY A,B,C,D'; report($dbh,$query);
sub report { my ($dbh,$sql) = @_; my $ar = $dbh->selectall_arrayref($sql); print join "\t",qw(A B C D Avg Sum Min Max Count WeightedAvg),"\n"; for (@$ar){ print join "\t",@$_,"\n"; } } sub load_db { my ($dbh,$filename) = @_; open IN,'<',$filename or die "$!"; my $sql = 'INSERT INTO test VALUES (?,?,?,?,?,?)'; my $sth = $dbh->prepare($sql); my $n = 0; while (<IN>){ next unless (/\S/); # skip blank lines my @f = split; $sth->execute(@f); ++$n; } print "$n records insert from $filename\n"; close IN; } sub create_db { my $dbfile = shift; unlink($dbfile) if (-e $dbfile); my $dbh = DBI->connect( "dbi:SQLite:dbname=$dbfile", "", "", { RaiseError => 1 } ) or die $DBI::errstr; my $sql = "CREATE TABLE test ( A,B INTEGER,C,D,E REAL,F REAL)"; $dbh->do($sql) or die $DBI::errstr; return $dbh; }
poj