$fields[2] = $file;
my $locus = join "\t", @fields[0,1,2,3,4,5];
####
my $samples_list = join "\t", @samples;
print OUT "CHROM POS ID REF ALT QUAL $samples_list
####
#!perl
use strict;
use warnings;
use Data::Dump 'pp';
# get .var files in current directory
my $path = '.';
opendir(DIR, $path);
my @files = sort grep { /\.var$/ } readdir(DIR);
closedir(DIR);
# remove _suffix and .var extension
my @samples = map { m/^([^_]+).*\.var/ ; $1 } @files;
print "Samples are @samples\n";
# scan files and build hash
my %zygos = ();
my %info = ();
my @header= ();
foreach my $i (0..$#files) {
my $file = $files[$i];
my $count = 0;
print "Reading $file .. ";
open IN, '<', $file or die "Could not open $file :$!";
while () {
chomp;
my @fields = split /\t/, $_;
# extract header line
if (/^#/){
@header = @fields;
next;
}
# pull out genotype
my ($g,undef) = split ":",$fields[9];
my $zyg = ($g eq '0/1') ? "HET" : "HOM";
# create hash key from first 6 fields
my $id = join "\t", @fields[0..5];
# build hashes
$info{$id} = join "\t", @fields[10..$#fields];
@{$zygos{$id}}[$i] = $zyg;
++$count;
}
print "$count lines\n";
}
close IN;
print "Dump of \%zygos hash\n";
pp \%zygos;
# open output file
my $outfile = ($ARGV[0] || '').'merged.xls';
open OUT, '>', $outfile or die "Could not open OUTFILE";
# print header
my $col_header = join "\t", @header[0..5],@samples,@header[10..$#header];
print OUT $col_header."\n";
# print records
foreach my $id (sort keys %info) {
# replace undef with ''
my @zygos = map { $_ || ''} @{$zygos{$id}}[0..$#samples];
print OUT join "\t",$id,@zygos,$info{$id};
print OUT "\n";
}