http://qs1969.pair.com?node_id=1175487


in reply to Regex Question

You just write a recursive-descent parser. Doing that is pretty easy. Here's most of one:

#!/usr/bin/perl -w use strict; use Data::Dumper 'Dumper'; my %Data; my $Comment = '/[*]([^*]+|[*]+[^*/])*[*]/'; my $Name = '([a-zA-Z_][a-zA-Z0-9_]*)'; my $Quoted = '"([^"]*)"'; local $/; my $code = <DATA>; parse( \$code ); print Dumper( \%Data ); exit; sub parse { my( $svCode ) = @_; skip( $svCode ); while( $$svCode !~ /\G\z/gc ) { if( $$svCode =~ /\Goptions(?!\w)/gc ) { parseOptions( $svCode ); } elsif( $$svCode =~ /\Gobject(?!\w)/gc ) { parseObject( $svCode ); } else { fail( $svCode, "Expected 'options' or 'object'" ); } skip( $svCode ); } } sub skip { my( $svCode ) = @_; 0 while $$svCode =~ /\G\s+/gc || $$svCode =~ /\G$Comment/gc; } sub expect { my( $svCode, $re, $desc ) = @_; skip( $svCode ); fail( $svCode, "Expected ", $desc ) if $$svCode !~ /\G$re/gc; my $return = $1; skip( $svCode ); return $return; } sub fail { my( $svCode, @error ) = @_; my $pos = pos $$svCode; my $before = substr( $$svCode, 0, $pos ); my $line = 1 + ( $before =~ tr/\n/\n/ ); my $col = 1 + length( $before =~ /([^\n]*)\z/ ? $1 : '' ); my $next = $$svCode =~ /\G([^\n]{1,8})/gc ? $1 : undef; die @error, " at line $line, col $col, before '$next'.\n" if defined $next; my $after = $before =~ /([^\n]{1,8})\z/ ? $1 : undef; die @error, " at line $line, col $col, after '$after'.\n" if defined $after; die @error, " at line $line, col $col.\n"; } sub parseOptions { my( $svCode ) = @_; expect( $svCode, '[{]', "'{' after 'options'" ); while( $$svCode !~ /\G[}]/gc ) { my $name = expect( $svCode, $Name, 'option name' ); if( $$svCode =~ /\G$Name/gc ) { $Data{''}{$name} = $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes $Data{''}{$name} = $1; } elsif( $$svCode =~ /\G[{]/gc ) { parseListOption( $svCode, $name ); } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of options" ); } else { fail( $svCode, "Unsupported value for option '$name'" +); } expect( $svCode, ';', "';' after option '$name'" ); } expect( $svCode, ';', "';' after options" ); } sub parseListOption { my( $svCode, $name ) = @_; skip( $svCode ); my @values; while( $$svCode !~ /\G[}]/gc ) { skip( $svCode ); if( $$svCode =~ /\G$Name/gc ) { push @values, $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes push @values, $1; } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of list of option ' +$name'" ); } else { fail( $svCode, "Unsupported value in list of option '$ +name'" ); } expect( $svCode, ';', "';' after value in list of option ' +$name'" ); } $Data{''}{$name} = \@values; } sub parseListObject { my( $svCode, $obj, $name ) = @_; skip( $svCode ); my @values; while( $$svCode !~ /\G[}]/gc ) { skip( $svCode ); if( $$svCode =~ /\G$Name/gc ) { push @values, $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes push @values, $1; } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of list '$obj'.'$na +me'" ); } else { fail( $svCode, "Unsupported value in list of '$obj'.'$ +name'" ); } expect( $svCode, ';', "';' after value in list of '$obj'.' +$name'" ); } $Data{$obj}{$name} = \@values; } sub parseObject { my( $svCode ) = @_; my $obj = expect( $svCode, $Quoted, 'object name' ); # TODO: Unescape things that can be escaped inside object name +s expect( $svCode, 'in', "'in' after object '$obj'" ); expect( $svCode, '[{]', "'{' for object '$obj'" ); while( $$svCode !~ /\G[}]/gc ) { my $name = expect( $svCode, $Name, "option name for object + '$obj'" ); if( $$svCode =~ /\G$Name/gc ) { $Data{$obj}{$name} = $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes $Data{$obj}{$name} = $1; } elsif( $$svCode =~ /\G[{]/gc ) { parseListObject( $svCode, $obj, $name ); } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of object '$obj'" ) +; } else { fail( $svCode, "Unsupported value for '$obj'.'$name'" +); } expect( $svCode, ';', "';' after '$obj'.'$name'" ); } expect( $svCode, ';', "';' after object '$obj'" ); } __END__ /* SOME COMMENT HERE */ /* MORE COMMENT */ /* Description - information */ options { option1 value; option2 value; option3 "value"; option4 { value1; value2; }; }; /* identifier1 ID 123456 */ object "identifier1" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* identifier2 ID 234561 */ object "identifier2" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* identifier3 ID 345612 */ object "identifier3" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* EOF */

And it even runs:

$VAR1 = { '' => { 'option1' => 'value', 'option2' => 'value', 'option3' => 'value', 'option4' => [ 'value1', 'value2' ], }, 'identifier1' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, 'identifier2' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, 'identifier3' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, };

- tye