#!/usr/bin/perl use strict; use warnings; use autodie; if (@ARGV != 1){ print "USAGE: ./parse-counts.pl file\n"; exit(1); } my $mutfile = $ARGV[0]; open(INPUTR,"<$mutfile") or die "Can't open \$mutfile for reading. \n"; my (%counts, %unique, %masks); my ($headname, @unique) = grep !$unique{$_}++, my @headers = split /\t/, ; # the basic syntax is @out = map { CODE } @in; for my $label ( @unique ) { $masks{$label} = join "\t", map { $_ eq $label ? 'MUTS' : '_' } @headers[1..$#headers]; } my $line; while($line=) #while() { chomp $line; $line =~ s/\t/\t/g; # for uniform spacing my ($name, $letters) = split /\t/, $line, 2; $counts{$name}{$_} += ($masks{$_} | $letters) =~ /MUTS/ for @unique; print $name."\n"; print $letters."\n"; } print "$headname @unique\n"; print "$_ @{ $counts{$_} }{@unique}\n" for sort keys %counts; The output produced is: Gname G1 G2 G3 A 3 2 0 B 2 1 1 C 2 0 0 The modified datafiles is Gname G1 G1 G1 G1 G2 G2 G3 A W W MUTS W W W MUTS A W W W W W W W A W W W W W W W B W W W W W MUTS MUTS B MUTS W W W W MUTS MUTS C MUTS MUTS MUTS W W W W C MUTS W W MUTS MUTS W W