#!/usr/bin/perl use warnings; use strict; use lib '.'; use Bio::Tree::Node; my $str = " \n (\n(A:0.333,B,(C,D,(E, \nF),G),H):0.456,(I,J,K):0.123) \n "; my $current = Bio::Tree::Node->new; my $last; our $re = qr{ \( (?{ $current = $current->add_new_child }) (?: (?: ((?> [^()]+ )) # Non-parens without backtracking (?{ process_children($current, $^N) }) )+ | (??{ $re }) # Group with matching parens )* \)(?{ $current = $current->parent }) }x; $str =~ $re; sub process_children { my ($node_obj, $str) = @_; my @nodes = split '\s*,\s*', $str; for my $node (@nodes) { my ($tag, $contents) = split ':', $node; if ($tag) { $node_obj->add_new_child($tag, $contents) } else { $node_obj->contents($contents); } } }