File 1
CHROM POS REF ALT VARIANT_LIST 209T-D 459T-D 644T-D 94T-D 99T1-D 99T2-D 99T3-D 99T4-D 99T5-D
['MT', '1010', 'G', 'A', 'A', 'REF', 'A', 'A', 'REF', 'A', 'A', 'A', 'A', 'A']
['MT', '2962', 'C', 'T', 'T', 'REF', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T']
...
####
File 2
CHROM POS REF ALT VARIANT_LIST 209H-D 459H-D 644H-D 94H-D 99H-D
['MT', '1010', 'G', 'A', 'A', 'REF', 'REF', 'REF', 'REF', 'REF']
['MT', '2962', 'C', 'T', 'T', 'REF', 'REF', 'T', 'REF', 'T']
....
####
CHROM POS REF ALT VARIANT_LIST 209T-D 459T-D 644T-D 94T-D 99T1-D 99T2-D 99T3-D 99T4-D 99T5-D
['MT', '1010', 'G', 'A', 'A', 'REF', 'A', 'A', 'REF', 'A', 'A', 'A', 'A', 'A']
['MT', '2962', 'C', 'T', 'T', 'REF', 'T', 'REF, 'T', 'REF', 'REF', 'REF', 'REF', 'REF']
####
#!/usr/local/bin/perl
use strict;
use warnings;
my $file1 = shift;
my $file2 = shift;
open(my $in1, '<', $file1)
or die "Cannot open file '$file1' for reading: $!";
open(my $in2, '<', $file2)
or die "Cannot open file '$file2' for reading: $!";
my $header1 = <$in1>;
<$in2>;
my @heads1 = split "\t", $header1;
my $index = 5;
my %index_map;
for (@heads1)
{
$index_map{$index++} = $1 + 4 if m/^(\d+)/;
}
print $header1;
while (my $line1 = <$in1>)
{
my @fields1 = get_fields($line1);
defined(my $line2 = <$in2>)
or die "Data missing in file '$file2': $!";
my @fields2 = get_fields($line2);
my @out = @fields1;
for my $i (5 .. $#fields1)
{
my $j = $index_map{$i};
if ($fields1[$i] ne 'REF')
{
$out[$i] = $fields2[$j] if exists $fields2[$j] &&
$fields2[$j] ne 'REF';
}
}
@out = map { "'$_'" } @out;
print '[', join(', ', @out), "]\n";
}
close $in2
or die "Cannot close file '$file2': $!";
close $in1
or die "Cannot close file '$file1': $!";
sub get_fields
{
my ($line) = @_;
chomp $line;
my @fields = split "\t", $line;
s{ ^ \[? ' }{}x for @fields;
s{ ' \]? $ }{}x for @fields;
return @fields;
}
####
CHROM POS REF ALT VARIANT_LIST 209T-D 459T-D 644T-D 94T-D 99T1-D 99T2-D 99T3-D 99T4-D 99T5-D
['MT', '1010', 'G', 'A', 'A', 'REF', 'A', 'A', 'REF', 'A', 'A', 'A', 'A', 'A']
['MT', '2962', 'C', 'T', 'T', 'REF', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T']
...