Use Marpa::R2 (or a similar module) to write a parser for the format.
#!/usr/bin/perl use warnings; use strict; use feature qw{ say }; use Marpa::R2; use Data::Dumper; my $input = q(sys ecm cloud-provider /Common/aws-ec2 { description "The aws-ec2 parameters" property-template { account { } availability-zone { valid-values { a b c d } } instance-type { valid-values { t2.micro t2.small t2.medium } } region { valid-values { us-east-1 us-west-1 } } } }); my $dsl = << '__DSL__'; lexeme default = latm => 1 :default ::= action => first Top ::= atom Attrs Struct action => top Attrs ::= atom Attrs action => merge | atom action => newlist Struct ::= ('{') Elements ('}') Elements ::= Element Elements action => merges | Element Element ::= atom Value action => struct | atom Struct action => struct Value ::= Struct | ('"') string ('"') | ('{ }') action => empty || ('{') List ('}') List ::= atom List action => merge | atom action => newlist :discard ~ [\s] string ~ [^"]+ atom ~ [^\s{}]+ __DSL__ sub top { +{ $_[1] => { attrs => $_[2], contents => $_[3] } } } sub first { $_[1] } sub empty { [] } sub newlist { [ $_[1] ] } sub merge { [ $_[1], @{ $_[2] } ] } sub struct { +{ $_[1] => $_[2] } } sub merges { +{ %{ $_[1] }, %{ $_[2] } } } my $grammar = 'Marpa::R2::Scanless::G'->new({ source => \$dsl }); my $recce = 'Marpa::R2::Scanless::R'->new({ grammar => $gram +mar, semantics_package => 'main +', }); $recce->read(\$input); use Data::Dumper; print Dumper($recce->value) =~ s/ / /gr;
Output:
$VAR1 = \{ 'sys' => { 'contents' => { 'property-template' => { 'account' => [], 'region' => { 'valid-values' => [ 'us-east-1', 'us-west-1' ] }, 'availability-zone' => { 'valid-values' => [ 'a', 'b', 'c', 'd' ] }, 'instance-type' => { 'valid-values' => [ 't2.micro', 't2.small', 't2.medium' ] } }, 'description' => 'The aws-ec2 parameters' }, 'attrs' => [ 'ecm', 'cloud-provider', '/Common/aws-ec2' ] } };

Update: Fixed the missing + in the top rule, compacted the output, reverted the order of the merge rule.

Update2: Added the default action.

map{substr$_->[0],$_->[1]||0,1}[\*||{},3],[[]],[ref qr-1,-,-1],[{}],[sub{}^*ARGV,3]

In reply to Re: Parsing bracket formatted file (updated) by choroba
in thread Parsing bracket formatted file by Stilgar

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.