0201 3201 1.00 0201 2608 1.00 0201 2402 0.94 0201 0302 1.00 2402 2402 0.99 0101 0201 0.99 0201 1101 1.00 0301 2601 1.00 0301 1101 0.98 2601 0301 1.00 0301 2601 1.00 0301 2601 1.00 0301 2601 1.00 #### 0201 3201 2.00 0201 2608 2.00 0201 2402 1.94 0201 0302 2.00 2402 2402 1.99 0101 0201 1.99 0201 1101 2.00 0301 2601 2.00 0301 1101 1.98 2601 0301 2.00 0301 2601 2.00 0301 2601 2.00 0301 2601 2.00 #### #!/usr/bin/perl use strict; use warnings; my $infile = $ARGV[0]; my $infile2 = $ARGV[1]; unless (open(INFILE, $infile)){ die "Couldn't open infile: $!\n"; } my @AtypeData = qw(A0101 A0102 A0201 A0202 A0205 A0301 A0302 A1101 A2301 A2402 A2403 A2601 A2608 A2902 A3001 A3002 A3004 A3101 A3201 A3601 A6801 A6802); my %diplotypes; my %diplotypes2; initHash(\%diplotypes, \@AtypeData); initHash(\%diplotypes2, \@AtypeData); ##read in the data while (){ chomp; my @line = split ('\t', $_); my $key1 = 'A' . $line[0] . '.' . 'A' . $line[1]; ##first key my $key2 = 'A' . $line[1] . '.' . 'A' . $line[0]; ##key the other way ##check to see if the key exists in the hash ##if it doesn't there is data in your infile, not in you names array if (exists $diplotypes{$key1} && $line[0] <= $line[1]) { $diplotypes{$key1} += $line[2]; } elsif (exists $diplotypes{$key2} && $line[0] >= $line[1]) { $diplotypes{$key2} += $line[2]; } else{##world is out to get you print STDERR "No key for $key1 or $key2\n"; next; } } close INFILE; unless (open(INFILE2, $infile2)){ die "Couldn't open infile: $!\n"; } while (){ chomp; my @line = split ('\t', $_); my $key1 = 'A' . $line[0] . '.' . 'A' . $line[1]; ##first key my $key2 = 'A' . $line[1] . '.' . 'A' . $line[0]; ##key the other way ##check to see if the key exists in the hash ##if it doesn't there is data in your infile, not in you names array if (exists $diplotypes2{$key1} && $line[0] <= $line[1]) { $diplotypes2{$key1} += $line[2]; } elsif (exists $diplotypes2{$key2} && $line[0] >= $line[1]) { $diplotypes2{$key2} += $line[2]; } else{##world is out to get you print STDERR "No key for $key1 or $key2\n"; next; } } foreach my $key1(keys %diplotypes){ if (exists $diplotypes2{$key1}){ $diplotypes{$key1} /= $diplotypes2{$key1} +0.01; } } close INFILE2; printData(\%diplotypes, \@AtypeData); sub initHash { #init the all to all hash ##first argument is the hash of data, and the second is a reference to all the columns my ($refHash, $refArr) = @_; foreach my $ele1(@$refArr){ foreach my $ele2(@$refArr){ my $key = $ele1 . "." . $ele2; if (exists $$refHash{$key}){ print STDERR "This key existed in your array of names, skipping\n"; next; } else{ $$refHash{$key} = 0; } } } } sub printData { my ($refHash, $refArr) = @_; #print header line; print "MATRIX\t"; foreach my $ele(@$refArr){ print "$ele", "\t"; } print "\n"; #print out the actual data foreach my $ele1(@$refArr){ print "$ele1" , "\t";##print out the first value on the row, which is the name foreach my $ele2(@$refArr){ my $key = $ele1 . "." . $ele2; if (exists $$refHash{$key}){ printf "%.2f \t", $$refHash{$key}; } else{ print STDERR "Something is wrong\n"; } } print "\n"; } }