in reply to Parsing command string into a hash

The following is a solution. The output format than requested since it wasn't sufficient as per my earlier comment. It may be possible to simplify the output format if certain circumstances will never occur, but you said you needed a more general solution than one that could parse the presented statement.
#!/usr/bin/perl # make_parser.pl use strict; use warnings; use Parse::RecDescent qw( ); my $grammar = <<'__END_OF_GRAMMAR__'; { use strict; use warnings; sub optimise_option { my ($option) = @_; for (;;) { return $option if @{ $option->{children} } != 1; my ($child) = @{ $option->{children} }; return $option if $child->{type} ne 'option'; $option = $child; } } sub optimise_alternation { my ($alternation) = @_; my $choices = $alternation->{choices}; my $optional = 0; for my $choice (@$choices) { next if grep { $_->{type} ne 'option' } @$choice; $optional = 1; last; } return $alternation if !$optional; for my $choice (@$choices) { next if @$choice != 1; my ($child) = @$choice; next if $child->{type} ne 'option'; @$choice = @{ $child->{children} }; } return { type => 'option', children => [ $alternation ], }; } } parse : command param(s?) /\Z/ { [ $item[1], @{$item[2]} ] } command : IDENT param : option | switch | variable | alternation | literal option : '[' param(s?) ']' { optimise_option({ type => $item[0], children => $item[2], }) } switch : DASHED variable(?) { +{ type => $item[0], name => $item[1], value => @{$item[2]} ? $item[2][0]{name} : undef +, } } variable : '<' <skip:''> IDENT '>' { +{ type => $item[0], name => $item[3], } } alternation : '{' altern_body '}' { optimise_alternation({ type => $item[0], choices => $item[2], }) } altern_body : <leftop: param(s) '|' param(s) > literal : IDENT { +{ type => $item[0], value => $item[1], } } IDENT : /[a-zA-Z][a-zA-Z0-9-]*/ DASHED : /-[a-zA-Z][a-zA-Z0-9]*/ __END_OF_GRAMMAR__ Parse::RecDescent->Precompile($grammar, 'Grammar') or die("Bad grammar\n");

#!/usr/bin/perl # test.pl use strict; use warnings; use Data::Dumper qw( ); use Grammar qw( ); { my %keys_lkup = ( option => [qw( children )], switch => [qw( name value )], variable => [qw( name )], alternation => [qw( choices )], literal => [qw( value )], ); sub keys_by_type { my ($h) = @_; if (exists($h->{type})) { return [ qw( type ), @{ $keys_lkup{ $h->{type} } } ]; } else { return [ sort keys %$h ]; } } } { my $parser = Grammar->new(); while (<DATA>) { chomp; my $params = $parser->parse($_) or do { warn("Bad data at line $.\n"); next; }; print(">> $_\n"); print Data::Dumper ->new([ $params ], [qw( $params )]) ->Indent(1) ->Sortkeys(\&keys_by_type) ->Dump(); print("\n"); } } __DATA__ dfpm dataset add [-D] [-N <node-name>] <data-set-name-or-id> { [<volum +e-name-or-id>] | [<qtree-name-or-id>] | [<ossv-dir-name-or-id>] | [<s +torage-system-name-or-id>] | [<host-name-or-id>] } nested-square [ [ param ] ] multi-child { foo <foo> | bar <bar> } partially-optimisable { <foo> | [<bar>] | [<cat>] [<dog>] }

$ perl make_parser.pl && perl test.pl >> dfpm dataset add [-D] [-N <node-name>] <data-set-name-or-id> { [<vo +lume-name-or-id>] | [<qtree-name-or-id>] | [<ossv-dir-name-or-id>] | +[<storage-system-name-or-id>] | [<host-name-or-id>] } $params = [ 'dfpm', { 'type' => 'literal', 'value' => 'dataset' }, { 'type' => 'literal', 'value' => 'add' }, { 'type' => 'option', 'children' => [ { 'type' => 'switch', 'name' => '-D', 'value' => undef } ] }, { 'type' => 'option', 'children' => [ { 'type' => 'switch', 'name' => '-N', 'value' => 'node-name' } ] }, { 'type' => 'variable', 'name' => 'data-set-name-or-id' }, { 'type' => 'option', 'children' => [ { 'type' => 'alternation', 'choices' => [ [ { 'type' => 'variable', 'name' => 'volume-name-or-id' } ], [ { 'type' => 'variable', 'name' => 'qtree-name-or-id' } ], [ { 'type' => 'variable', 'name' => 'ossv-dir-name-or-id' } ], [ { 'type' => 'variable', 'name' => 'storage-system-name-or-id' } ], [ { 'type' => 'variable', 'name' => 'host-name-or-id' } ] ] } ] } ]; >> nested-square [ [ param ] ] $params = [ 'nested-square', { 'type' => 'option', 'children' => [ { 'type' => 'literal', 'value' => 'param' } ] } ]; >> multi-child { foo <foo> | bar <bar> } $params = [ 'multi-child', { 'type' => 'alternation', 'choices' => [ [ { 'type' => 'literal', 'value' => 'foo' }, { 'type' => 'variable', 'name' => 'foo' } ], [ { 'type' => 'literal', 'value' => 'bar' }, { 'type' => 'variable', 'name' => 'bar' } ] ] } ]; >> partially-optimisable { <foo> | [<bar>] | [<cat>] [<dog>] } $params = [ 'partially-optimisable', { 'type' => 'option', 'children' => [ { 'type' => 'alternation', 'choices' => [ [ { 'type' => 'variable', 'name' => 'foo' } ], [ { 'type' => 'variable', 'name' => 'bar' } ], [ { 'type' => 'option', 'children' => [ { 'type' => 'variable', 'name' => 'cat' } ] }, { 'type' => 'option', 'children' => [ { 'type' => 'variable', 'name' => 'dog' } ] } ] ] } ] } ];