# Strict use strict; use warnings; # Libraries use Data::Dumper; # User-defined # Define tables, each one a separate value in the hash "$ptables" my $ptables = { 'tab1' => [ "ID column | column 1", "gene 1 | value 1.1", "gene 2 | value 2.1", "gene 4 | value 4.1", "gene 8 | value 8.1", ], 'tab2' => [ "ID column | column 1 | column2", "gene 1 | value 1.1 | value 1.2", "gene 3 | value 3.1 | value 3.2", "gene 4 | value 4.1 | value 4.2", ] }; # Globals my %output; my %ncolumns; my %values; my @tables = (sort keys %$ptables); # Get all table names # Main program # First pass -- parse each table to fetch all the IDs print "=== Pass 1 ===\n"; foreach my $table (@tables) { my $ptab = $ptables->{$table}; # Assign to table my @rows = split(/\s*\|\s*/, shift @$ptab); # Get column headings shift @rows; # Discard "ID column" my $ncols = @rows; # Find number of columns $ncolumns{$table} = $ncols; # Save # of columns print "Reading $table; $ncols col(s)...\n"; # Announce table name foreach my $line (@$ptab) { my ($id,@vals) = split(/\s*\|\s*/, $line); # Get ID and values $output{$id} ||= [ ]; # Placeholder for ID $values{$table}{$id} = [ @vals ]; # Save values for table/ID } } # Second pass -- process each ID, adding values from each table my @ids = (sort keys %output); print "=== Pass 2 ===\n"; foreach my $id (@ids) { print "Processing ID $id\n"; my $pout = $output{$id}; # Get current ID list foreach my $table (@tables) { my $ncols = $ncolumns{$table}; # Get number of columns my $pvalues = $values{$table}{$id}; # Get values for table/ID if (defined($pvalues)) { push @$pout, @$pvalues; # Save values } else { push @$pout, ( "n.a." ) x $ncols; # Missing value = N/A } } } # Verify results print "=== Verify results ===\n"; foreach my $id (@ids) { my $pvalues = $output{$id}; printf "%12.12s | %s\n", $id, join(" | ", @$pvalues); }