1;2;8;root;xyzzy;cat
1;2;5;root;xyzzy;dog
1;9;root;bird
####
1
/ \
/ \
2 \
--^- \
/ \ \
8 5 9
| | |
cat dog bird
\ / /
xyzzy /
\ /
root
####
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dump qw(pp);
# the file containing the sample data you provided
open my $FH, '<', '1118135.dat2' or die $!;
# Index (multilevel numeric index)
my $tree_nums = { };
# Hierarchy tree
my $tree_names = { };
# randomly-selected test data
my @tests;
###
# Read file and build index hash as well as taxonomy hash
###
while (my $t = <$FH>) {
my @flds = split /;\s*/,$t;
# split numbers into keys and hierarchy into flds
my @keys;
push @keys, shift @flds while $flds[0] =~ /^\d+$/;
# select some of the keys for testing
if (.1 > rand) {
push @tests, [ @keys ];
}
# build taxonomy tree
my $leaf = build_name_tree(@flds);
# Build index tree
my $p = $tree_nums;
while (@keys) {
my $k = shift @keys;
$p->{$k} = {} if ! exists $p->{$k};
$p = $p->{$k}
}
# Tie index tree to taxonomy tree
$p->{ch} = $leaf;
}
# Debug junk
#dump_tree($tree_names), "\n";
#dump_tree($tree_nums), "\n";
# Run some test lookups
for my $t (@tests) {
my $p = $tree_nums;
# Given a list of keys, get the reference to the desired
# leaf of the taxonomy tree
my @keys = @$t;
for my $k (@keys) {
die "Eh? " if ! exists $p->{$k};
$p = $p->{$k};
}
# Now traceback function gives you the path from root to leaf
print join(":",@keys), " => ", join(';', traceback($p->{ch})), "\n";
}
sub dump_tree {
my ($t,$lev) = @_;
$lev = $lev//0;
if ('HASH' eq ref $t) {
for my $k (sort keys %$t) {
if ($k eq 'par') {
print " " x $lev, "Parent is <$t->{$k}[0]>\n";
}
else {
print " " x $lev, $k, "\n";
dump_tree($t->{$k}, $lev+1);
}
}
}
elsif ('ARRAY' eq ref $t) {
die "T is an array of " . scalar(@$t) . " elements";
}
else {
#print pp($t), "\n";
die "T is " . ref($t);
}
}
sub build_name_tree {
my @path = @_;
my $p = $tree_names;
while (@path>1) {
my $parent = shift @path;
my $child = $path[0];
if (! exists $p->{$child}) {
$p->{$child} = { par=>[$parent, $p] };
}
else {
if ($p->{$child}{par}[0] ne $parent) {
die "Unexpected $child:$parent ??? $p->{$child}{par}[0]" unless $p->{$child}{par}[0] eq $parent;
}
}
$p = $p->{$child};
}
return $p;
}
sub traceback {
my ($node) = @_;
my @lineage;
while (defined $node and exists $node->{par}) {
push @lineage, $node->{par}[0];
$node = $node->{par}[1];
}
return reverse @lineage;
}