#!/usr/bin/perl use warnings; use strict; use v5.14; die "need two arguments (i.e. chr cont) at invocation" unless @ARGV == 2; chomp( my $chr_num = shift ); chomp( my $cont = shift ); open my $out_file, ">", "chr${chr_num}_exome_snps_processed_${cont}_STATS" or die "Can't open output file: $!\n"; # Get a list of individuals (will be hash keys later): open my $in_file, "<", "chr${chr_num}_exome_snps_processed_$cont" or die "Can't open input file: $!\n"; my @individuals; my %data; while (<$in_file>) { chomp; my @snp_bins; if (/^SAMPLE/) { my ( $placeholder, @coords ) = split /,/; foreach my $coord (@coords) { push @snp_bins, int( $coord / 100_000 ); } } else { my ( $id, @snps ) = split /,/; push @individuals, $id; foreach my $individual (@individuals) { foreach my $snp (@snps) { $data{$individual}[ [ shift @snp_bins ] ] = $snp; } } } } close $in_file; #### ## Sample of data file. Each file has hundreds of thousands of columns and hundreds of rows SAMPLE,16287215,16287226,16287365,16287649,16287784,16287851,16287912 HG00553,0 0,0 0,0 0,0 0,0 0,0 0,0 0 HG00554,0 0,0 0,0 0,0 0,0 0,0 0,0 0 HG00637,0 0,0 0,0 0,0 0,0 0,0 0,0 0 HG00638,0 0,0 0,0 0,0 0,0 0,1 1,0 0 HG00640,0 0,0 0,0 0,0 0,0 0,1 1,0 0