11 608502
11 1016988
11 1017495
11 1018088
11 3661585
####
#!/usr/bin/perl
use warnings;
use strict;
use v5.10;
# Create program to read in a series of VCF files, outputting a two-column
# file consisting of chromosome and coordinate for each site seen in one or
# more files ('master-site-list').
die "Give the names of each VCF" if @ARGV < 1;
my $number_of_files = @ARGV;
my $infile;
my $current_file_number = 0;
my %hash_of_chroms;
open my $outfile, '>', "master-site-list" or die "Cannot open output file: $!";
until ( $current_file_number >= $number_of_files ) {
open $infile, '<', $ARGV[$current_file_number]
or die "Cannot open VCF file: $!";
while (<$infile>) {
chomp;
next if /^#/;
my ( $chr, $coord ) = split(/\s+/);
push @{ $hash_of_chroms{$chr} }, $coord
unless defined $hash_of_chroms{$chr}[$coord]; # don't add duplicates
}
$current_file_number++;
close $infile;
}
my %sorted_hash_of_chroms;
foreach my $chrom ( keys %hash_of_chroms ) {
push @{ $sorted_hash_of_chroms{$chrom} },
sort { $hash_of_chroms{$a}->[0] <=> $hash_of_chroms{$b}->[0] }
keys %hash_of_chroms;
}
foreach my $chrom ( keys %hash_of_chroms ) {
say $outfile "$chrom\t@{ $hash_of_chroms{$chrom} }";
}
close $outfile;
##
##
##fileformat=VCFv4.1
##FORMAT=
##FORMAT=
##FORMAT=
##FORMAT=
##FORMAT=
##FORMAT=
##INFO=
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19238
1 874816 . C CT 10 FINAL_AMBG SF=bcm_illum,bi_illum;validation_ratio=52/1019;sets=bcm_illum,bi_illum;samples=NA19238 GT 1/0
1 1647893 . C CTTTCTT 30 FINAL_NOT_CONFIRMED SF=bcm_illum,bcm_solid;validation_ratio=0/3380;sets=bcm_illum,bcm_solid;samples=NA19238 GT 0/0
1 7889972 rs57875989 GAGAATCCATCCCATCCTACTGCCAGCGCTCTGTCCACAGGATCGCCTCCCATGA G 33190 DESIGN_FAIL SF=bi_illum GT ./.
1 14106394 . A ACTC 100 FINAL_CONFIRMED SF=bcm_illum,bcm_solid,bc_illum;validation_ratio=864/1754;sets=bcm_illum,bcm_solid,bc_illum;samples=NA19238 GT 1/0