#!/usr/bin/perl use warnings; use strict; my %group = ( # Hash table/dictionary for all the groups 'P' => 'I_1', 'Pl' => 'I_2', 'P.P' => 'I_3', 'P.Pl' => 'I_4', 'Pl.P' => 'I_5', 'Pl.Pl' => 'I_6', 'P.P.P' => 'I_7', 'P.P.Pl' => 'I_8', 'P.Pl.P' => 'I_9', 'P.Pl.Pl' => 'I_10', 'Pl.P.P' => 'I_11', 'Pl.P.Pl' => 'I_12', 'Pl.Pl.P' => 'I_13', 'Pl.Pl.Pl' => 'I_14', 'E' => 'II_15', 'P.E' => 'II_16', 'Pl.E' => 'II_17', 'P.P.E' => 'II_18', 'P.Pl.E' => 'II_19', 'Pl.P.E' => 'II_20', 'Pl.Pl.E' => 'II_21', 'E.P' => 'III_22', 'E.Pl' => 'III_23', 'P.E.P' => 'III_24', 'P.E.Pl' => 'III_25', 'Pl.E.P' => 'III_26', 'Pl.E.Pl' => 'III_27', 'E.P.P' => 'III_28', 'E.P.Pl' => 'III_29', 'E.Pl.P' => 'III_30', 'E.Pl.Pl' => 'III_31', 'E.E' => 'IV_32', 'P.E.E' => 'IV_33', 'Pl.E.E' => 'IV_34', 'E.P.E' => 'IV_35', 'E.Pl.E' => 'IV_36', 'E.E.P' => 'IV_37', 'E.E.Pl' => 'IV_38', 'E.E.E' => 'IV_39', ); ; # Skip the headers (first row). my %tree; while () { # parse through the input data and fill in our tree data structure chomp; my ($child, $parent, $prob) = split /\t/; if ($child eq 'Q') { push @{$tree{$child}}, {parent => '', prob => $prob, dist => 0}; next; } if ($parent eq 'Q') { push @{$tree{$child}}, {parent => $parent, prob => $prob, dist => 1}; next; } for my $opt (@{$tree{$parent}}) { my $dist = $opt->{dist} + 1; push @{$tree{$child}}, {parent => $parent, prob => $prob, dist => $dist}; } } for my $child (sort {length $a <=> length $b or $a cmp $b} keys %tree) { my @bestPath = findBestPath($child, \%tree); my $probs = join '.', map {$_->{prob}} @bestPath; printf "%-5s ", "$child:"; # Join the likelihood path. Then if group is found for a likelihood #from the group hash table then print it, else quit print join '<-', $child, grep {$_} map {$_->{parent}} @bestPath; print ", $probs"; print ", $group{$probs}" if exists $group{$probs}; print "\n"; } sub findBestPath { my ($child, $tree) = @_; return $tree->{Q}[0] if $child eq 'Q'; my @alts = sort {$a->{dist} <=> $b->{dist}} @{$tree->{$child}}; return $alts[0], findBestPath($alts[0]{parent}, $tree); } __DATA__ child, Parent, likelihood M7 Q P M54 M7 Pl M213 M54 E M206 M54 E M194 M54 E ...