Finally,
working version of the script:
Input:
Tab1.txt
ID column | column 1
gene 1 | value 1.1
gene 2 | value 2.1
gene 4 | value 4.1
gene 8 | value 8.1
Tab2.txt
ID column | column 1 | column2
gene 1 | value 1.1 | n.a.
gene 3 | value 3.1 | value 3.2
gene 4 | value 4.1 | value 4.2
To run the script, provide it with file names as argumeents. Files should be in the same filder as a script.
The table merging script is below:
# Strict
use strict;
use warnings;
# Libraries
use Data::Dumper;
#variables definition
my (@filenames, @strings, @text);
#read user defigned tables filenames;
if(!$ARGV[1]){
die "Please provide with the at least 2 file names. Good luck!!!";
};
foreach my $element (@ARGV)
{
if ($element =~ /-help=/i) {
print STDERR "Please provide with the at least 2 filenames\n";
+ exit;
}
else {
push (@filenames, $element);
}
}
# Define master hash "$ptables"
my $ptables;
#read files and add data to the HoA:
foreach my $file (@filenames) {
my @string_array;
#read input file and define arrays of strings
open (FILE, "<$file") or die "$!";
while (<FILE>)
{
for my $chank (split /\n/)
{ push (@string_array, $chank);}
}
close (FILE);
(my $hash_key = $file) =~ s/\.txt//; # generate hash key
$ptables->{$hash_key} = [ @string_array ]; # save all strings to t
+he hash
print Dumper(\$ptables);
undef @string_array;
}
# Globals
my %output;
my %ncolumns;
my %values;
my @tables = (sort keys %$ptables); # Get all table na
+mes
# Main program
# First pass -- parse each table to fetch all the IDs
print "=== Pass 1 ===\n";
foreach my $table (@tables) {
my $ptab = $ptables->{$table}; # Assign to table
my @rows = split(/\s*\|\s*/, shift @$ptab); # Get column headi
+ngs
shift @rows; # Discard "ID colu
+mn"
my $ncols = @rows; # Find number of c
+olumns
$ncolumns{$table} = $ncols; # Save # of column
+s
print "Reading $table; $ncols col(s)...\n"; # Announce table n
+ame
foreach my $line (@$ptab) {
my ($id,@vals) = split(/\s*\|\s*/, $line); # Get ID and value
+s
$output{$id} ||= [ ]; # Placeholder for
+ID
$values{$table}{$id} = [ @vals ]; # Save values for
+table/ID
}
}
# Second pass -- process each ID, adding values from each table
my @ids = (sort keys %output);
print "=== Pass 2 ===\n";
foreach my $id (@ids) {
print "Processing ID $id\n";
my $pout = $output{$id}; # Get current ID l
+ist
foreach my $table (@tables) {
my $ncols = $ncolumns{$table}; # Get number of co
+lumns
my $pvalues = $values{$table}{$id}; # Get values for t
+able/ID
if (defined($pvalues)) {
push @$pout, @$pvalues; # Save values
} else {
push @$pout, ( "n.a." ) x $ncols; # Missing value =
+N/A
}
}
}
# Verify results
print "=== Verify results ===\n";
foreach my $id (@ids) {
my $pvalues = $output{$id};
printf "%12.12s | %s\n", $id, join(" | ", @$pvalues);
}
Thanks everybody for your help!
Regards,
Evgeniy
|