CGGCGGAAAACTGTCCTCCGTGC mouse CGACGGAACATTCTCCTCCGCGC rat CGACGGAATATTCCCCTCCGTGC human CGACGGAAGACTCTCCTCCGTGC chimp 00100000302011000000100 -> number of subst per site (max parsimony) #### use Data::Dumper; use List::MoreUtils qw(uniq); # The related phylogenetic in Newick format tree is: my $tree = '(mouse,rat,(human,chimp))'; my $sites = [ 'CGGCGGAAAACTGTCCTCCGTGC', # mouse 'CGACGGAACATTCTCCTCCGCGC', # rat 'CGACGGAATATTCCCCTCCGTGC', # human 'CGACGGAAGACTCTCCTCCGTGC', # chimp ]; my @val = my_parsimony($sites); print Dumper \@val; sub my_parsimony { my $tfbs = shift; my $mlen = length($tfbs->[0]); my $sum_min = 0; my @mincol; foreach my $pos ( 0 .. $mlen-1 ) { my @colbp = (); foreach my $site ( @{$tfbs} ) { my $bp = substr($site,$pos,1); push @colbp, $bp; } # this heuristic seems to be faulty # Column 11 it predicts 1 instead of 2 my $min_mm = scalar( uniq(@colbp) ) - 1; push @mincol, $min_mm; } return @mincol; }