merge.pl AA 1,2,5,6 BB 3,4,1,5 #### ## merge.pl use strict; my $AFile = shift; my @Acols = split(/,/ => shift); my $BFile = shift; my @Bcols = split(/,/ => shift); ## Read B into memory first open(B, "$BFile") or die "Could not open $BFile: $!\n"; ## Grab the header labels from the first line and store them for later: my @HeaderB = split(/\t/, ); chomp @HeaderB; ## Now go through and save each line into a hash, where they key ## is the field to be matched, and the value is a reference to ## an array that holds all the fields my %B; while() { my @bar = split(/\s+/ => $_); ## Change to tab if needed $bar[1] or next; ## Skip blank lines: add other validation if needed $B{$bar[$Bcols[0]-1]}=\@bar; } close(B); shift @Bcols; ## Remove B's first header: we will use A's open(A, "$AFile") or die "Could not open $AFile: $!\n"; ## Print all the headers now: my @HeaderA = split(/\t/, ); chomp @HeaderA; for (@Acols) { print "$HeaderA[$_-1]\t"; } for (@Bcols) { print "$HeaderB[$_-1]\t"; } ## Remember that shift? print "\n"; ## Save the offset of the "matching field" into a variable ## Mainly makes things easier to read below my $A=$Acols[0]-1; while() { my @bar = split(/\s+/ => $_); ## Change to tab if needed $bar[1] or next; if ($B{$bar[$A]}) { ## We have a match from %B! ## Print all the A fields we want: for (@Acols) { print "$bar[$_-1]\t"; } ## Print all the B fields we want: for (@Bcols) { print "$B{$bar[$A]}[$_]\t"; } print "\n"; } } close(A);