#!/usr/bin/perl; use strict; use v5.8.8; use Data::Dumper; ##List all .var files in directory my $path = '.'; #Reads same directory as running .pl file opendir(DIR, $path); my @files = grep { /\.var$/ } readdir(DIR); #Reads names of .var files into @files @files = sort @files; my @samples; foreach (@files) { if ($_ =~ /\_/) { $_ =~ /(^.+?)\_/; push @samples, $1; } else { $_ =~ /(^.+)\.var/; push @samples, $1; } } closedir(DIR); @files = sort @files; my $file_count = @files; print "\nRunning on $file_count files:\n"; print "@files "; #Make unique list of variants observed in all opened .var files my @variants; ##Create concatenated list of all variant locations and alleles my @zygosity; my $zyg; my %zygos; foreach my $file (@files) { open VAR, $file; chomp (my @var = ); ##sub in $file for ID (3rd column) while ($var[0] =~ s/ID/$file/) { shift @var; } foreach (@var) { my @fields = split /\t/, $_; $fields[2] = $file; my $locus = join "\t", @fields[0,1,2,3,4,5]; push @variants, $locus; #pull out genotype $fields[9] =~ /(^\d\/\d\:)/; if ($1 =~ /0\/1\:/) { $zyg = "HET"; } else { $zyg = "HOM"; } $fields[9] = $zyg; # push @zygosity, $zyg; push @{$zygos{$locus}}, $zyg; } } close VAR; my %variants_hash = map { $_, 1 } @variants; ##Generates the unique list my @variants_list = keys %variants_hash; %variants_hash = undef; @variants_list = sort @variants_list; #List of just loci my %variants_geno; #List of loci to which genotypes will be appended my %variants_info; # List of variant information to append after genotypes for printing. my $sum_variants = @variants_list; print "\nRunning on $sum_variants non-redundant variants.\n"; # Pull out genotypes for all of the non-redundant variants for each individual my @variant_geno; #Pointless array for anonymous assignation of arrays in hash foreach my $file (@files) { open VAR, $file; my @var = ; foreach my $variant (@variants_list) { my $found = 0; my $genotype = 'REF?'; foreach my $line (@var) { my @fields2 = split /\t/, $line; $fields2[2] = $file; $fields2[9] = $zygos{$variant}; my $newline = join "\t", @fields2[0..123]; $line = $newline; if ($line =~ /$variant/) { if ($found == 0) { $variants_info{$variant} = (join "\t", @fields2[10..123]); $found ++; } if ($fields2[9] eq 'HET') { $genotype = 'HET'; } if ($fields2[9] eq 'HOM') { $genotype = 'HOM'; } last; }} if ($variants_geno{$variant} =~ /./) { push @{$variants_geno{$variant}}, $genotype; } else { @{$variants_geno{$variant}} = [@variant_geno]; push @{$variants_geno{$variant}}, $genotype; }} close VAR; } open OUT, ">$ARGV[0]" . 'merged.vars'; my $samples_list = join "\t", @samples; print OUT "CHROM POS ID REF ALT QUAL $samples_list Chr Start End Ref Alt Func.refGene Gene.refGene GeneDetail.refGene ExonicFunc.refGene AAChange.refGene Func.knownGene Gene.knownGene GeneDetail.knownGene ExonicFunc.knownGene AAChange.knownGene avsnp144 1000g2015aug_all 1000g2015aug_afr 1000g2015aug_amr 1000g2015aug_sas 1000g2015aug_eur 1000g2015aug_eas esp6500siv2_all esp6500siv2_ea esp6500siv2_aa ExAC_ALL ExAC_AFR ExAC_AMR ExAC_EAS ExAC_FIN ExAC_NFE ExAC_OTH ExAC_SAS cosmic70 SIFT_score SIFT_pred Polyphen2_HDIV_score Polyphen2_HDIV_pred Polyphen2_HVAR_score Polyphen2_HVAR_pred LRT_score LRT_pred MutationTaster_score MutationTaster_pred MutationAssessor_score MutationAssessor_pred FATHMM_score FATHMM_pred PROVEAN_score PROVEAN_pred VEST3_score CADD_raw CADD_phred DANN_score fathmm-MKL_coding_score fathmm-MKL_coding_pred MetaSVM_score MetaSVM_pred MetaLR_score MetaLR_pred integrated_fitCons_score integrated_confidence_value GERP++_RS phyloP7way_vertebrate phyloP20way_mammalian phastCons7way_vertebrate phastCons20way_mammalian SiPhy_29way_logOdds Interpro_domain dbscSNV_ADA_SCORE dbscSNV_RF_SCORE CLINSIG CLNDBN CLNACC CLNDSDB CLNDSDBID HRC_AF HRC_AC HRC_AN HRC_non1000G_AF HRC_non1000G_AC HRC_non1000G_AN Kaviar_AF Kaviar_AC Kaviar_AN nci60 KEY3 INFO_AC INFO_AF INFO_BaseQRankSum INFO_ClippingRankSum INFO_DP INFO_DS INFO_END INFO_ExcessHet INFO_FS INFO_HaplotypeScore INFO_InbreedingCoeff INFO_MLEAC INFO_MLEAF INFO_MQ INFO_MQRankSum INFO_QD INFO_RAW_MQ INFO_ReadPosRankSum INFO_SOR seq seq_flag AAchange Grantham Mutability FuentesFalsePositive ACMG"; foreach my $variant_ident (@variants_list) { shift @{$variants_geno{$variant_ident}}; my $genotypes = join "\t", @{$variants_geno{$variant_ident}}; my $joined = join "\t", $variant_ident, $genotypes, $variants_info{$variant_ident}; $joined =~ s/^\s+//; chop $joined; print OUT "$joined"; } close OUT;