ID column | column 1 gene 1 | value 1.1 gene 2 | value 2.1 gene 4 | value 4.1 gene 8 | value 8.1
ID column | column 1 | column2 gene 1 | value 1.1 | n.a. gene 3 | value 3.1 | value 3.2 gene 4 | value 4.1 | value 4.2
# Strict use strict; use warnings; # Libraries use Data::Dumper; #variables definition my (@filenames, @strings, @text); #read user defigned tables filenames; if(!$ARGV[1]){ die "Please provide with the at least 2 file names. Good luck!!!"; }; foreach my $element (@ARGV) { if ($element =~ /-help=/i) { print STDERR "Please provide with the at least 2 filenames\n"; + exit; } else { push (@filenames, $element); } } # Define master hash "$ptables" my $ptables; #read files and add data to the HoA: foreach my $file (@filenames) { my @string_array; #read input file and define arrays of strings open (FILE, "<$file") or die "$!"; while (<FILE>) { for my $chank (split /\n/) { push (@string_array, $chank);} } close (FILE); (my $hash_key = $file) =~ s/\.txt//; # generate hash key $ptables->{$hash_key} = [ @string_array ]; # save all strings to t +he hash print Dumper(\$ptables); undef @string_array; } # Globals my %output; my %ncolumns; my %values; my @tables = (sort keys %$ptables); # Get all table na +mes # Main program # First pass -- parse each table to fetch all the IDs print "=== Pass 1 ===\n"; foreach my $table (@tables) { my $ptab = $ptables->{$table}; # Assign to table my @rows = split(/\s*\|\s*/, shift @$ptab); # Get column headi +ngs shift @rows; # Discard "ID colu +mn" my $ncols = @rows; # Find number of c +olumns $ncolumns{$table} = $ncols; # Save # of column +s print "Reading $table; $ncols col(s)...\n"; # Announce table n +ame foreach my $line (@$ptab) { my ($id,@vals) = split(/\s*\|\s*/, $line); # Get ID and value +s $output{$id} ||= [ ]; # Placeholder for +ID $values{$table}{$id} = [ @vals ]; # Save values for +table/ID } } # Second pass -- process each ID, adding values from each table my @ids = (sort keys %output); print "=== Pass 2 ===\n"; foreach my $id (@ids) { print "Processing ID $id\n"; my $pout = $output{$id}; # Get current ID l +ist foreach my $table (@tables) { my $ncols = $ncolumns{$table}; # Get number of co +lumns my $pvalues = $values{$table}{$id}; # Get values for t +able/ID if (defined($pvalues)) { push @$pout, @$pvalues; # Save values } else { push @$pout, ( "n.a." ) x $ncols; # Missing value = +N/A } } } # Verify results print "=== Verify results ===\n"; foreach my $id (@ids) { my $pvalues = $output{$id}; printf "%12.12s | %s\n", $id, join(" | ", @$pvalues); }
In reply to Re^5: Merging/Rearranging Tables
by homeveg
in thread Merging/Rearranging Tables
by homeveg
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |