1;2;8;root;xyzzy;cat 1;2;5;root;xyzzy;dog 1;9;root;bird #### 1 / \ / \ 2 \ --^- \ / \ \ 8 5 9 | | | cat dog bird \ / / xyzzy / \ / root #### #!/usr/bin/env perl use strict; use warnings; use Data::Dump qw(pp); # the file containing the sample data you provided open my $FH, '<', '1118135.dat2' or die $!; # Index (multilevel numeric index) my $tree_nums = { }; # Hierarchy tree my $tree_names = { }; # randomly-selected test data my @tests; ### # Read file and build index hash as well as taxonomy hash ### while (my $t = <$FH>) { my @flds = split /;\s*/,$t; # split numbers into keys and hierarchy into flds my @keys; push @keys, shift @flds while $flds[0] =~ /^\d+$/; # select some of the keys for testing if (.1 > rand) { push @tests, [ @keys ]; } # build taxonomy tree my $leaf = build_name_tree(@flds); # Build index tree my $p = $tree_nums; while (@keys) { my $k = shift @keys; $p->{$k} = {} if ! exists $p->{$k}; $p = $p->{$k} } # Tie index tree to taxonomy tree $p->{ch} = $leaf; } # Debug junk #dump_tree($tree_names), "\n"; #dump_tree($tree_nums), "\n"; # Run some test lookups for my $t (@tests) { my $p = $tree_nums; # Given a list of keys, get the reference to the desired # leaf of the taxonomy tree my @keys = @$t; for my $k (@keys) { die "Eh? " if ! exists $p->{$k}; $p = $p->{$k}; } # Now traceback function gives you the path from root to leaf print join(":",@keys), " => ", join(';', traceback($p->{ch})), "\n"; } sub dump_tree { my ($t,$lev) = @_; $lev = $lev//0; if ('HASH' eq ref $t) { for my $k (sort keys %$t) { if ($k eq 'par') { print " " x $lev, "Parent is <$t->{$k}[0]>\n"; } else { print " " x $lev, $k, "\n"; dump_tree($t->{$k}, $lev+1); } } } elsif ('ARRAY' eq ref $t) { die "T is an array of " . scalar(@$t) . " elements"; } else { #print pp($t), "\n"; die "T is " . ref($t); } } sub build_name_tree { my @path = @_; my $p = $tree_names; while (@path>1) { my $parent = shift @path; my $child = $path[0]; if (! exists $p->{$child}) { $p->{$child} = { par=>[$parent, $p] }; } else { if ($p->{$child}{par}[0] ne $parent) { die "Unexpected $child:$parent ??? $p->{$child}{par}[0]" unless $p->{$child}{par}[0] eq $parent; } } $p = $p->{$child}; } return $p; } sub traceback { my ($node) = @_; my @lineage; while (defined $node and exists $node->{par}) { push @lineage, $node->{par}[0]; $node = $node->{par}[1]; } return reverse @lineage; }