You just write a recursive-descent parser. Doing that is pretty easy. Here's most of one:
#!/usr/bin/perl -w
use strict;
use Data::Dumper 'Dumper';
my %Data;
my $Comment = '/[*]([^*]+|[*]+[^*/])*[*]/';
my $Name = '([a-zA-Z_][a-zA-Z0-9_]*)';
my $Quoted = '"([^"]*)"';
local $/;
my $code = <DATA>;
parse( \$code );
print Dumper( \%Data );
exit;
sub parse {
my( $svCode ) = @_;
skip( $svCode );
while( $$svCode !~ /\G\z/gc ) {
if( $$svCode =~ /\Goptions(?!\w)/gc ) {
parseOptions( $svCode );
} elsif( $$svCode =~ /\Gobject(?!\w)/gc ) {
parseObject( $svCode );
} else {
fail( $svCode, "Expected 'options' or 'object'" );
}
skip( $svCode );
}
}
sub skip {
my( $svCode ) = @_;
0
while $$svCode =~ /\G\s+/gc
|| $$svCode =~ /\G$Comment/gc;
}
sub expect {
my( $svCode, $re, $desc ) = @_;
skip( $svCode );
fail( $svCode, "Expected ", $desc )
if $$svCode !~ /\G$re/gc;
my $return = $1;
skip( $svCode );
return $return;
}
sub fail {
my( $svCode, @error ) = @_;
my $pos = pos $$svCode;
my $before = substr( $$svCode, 0, $pos );
my $line = 1 + ( $before =~ tr/\n/\n/ );
my $col = 1 + length( $before =~ /([^\n]*)\z/ ? $1 : '' );
my $next = $$svCode =~ /\G([^\n]{1,8})/gc ? $1 : undef;
die @error, " at line $line, col $col, before '$next'.\n"
if defined $next;
my $after = $before =~ /([^\n]{1,8})\z/ ? $1 : undef;
die @error, " at line $line, col $col, after '$after'.\n"
if defined $after;
die @error, " at line $line, col $col.\n";
}
sub parseOptions {
my( $svCode ) = @_;
expect( $svCode, '[{]', "'{' after 'options'" );
while( $$svCode !~ /\G[}]/gc ) {
my $name = expect( $svCode, $Name, 'option name' );
if( $$svCode =~ /\G$Name/gc ) {
$Data{''}{$name} = $1;
} elsif( $$svCode =~ /\G$Quoted/gc ) {
# TODO: Unescape things that can be escaped inside quo
+tes
$Data{''}{$name} = $1;
} elsif( $$svCode =~ /\G[{]/gc ) {
parseListOption( $svCode, $name );
} elsif( $$svCode =~ /\G\z/gc ) {
fail( $svCode, "Missing '}' at end of options" );
} else {
fail( $svCode, "Unsupported value for option '$name'"
+);
}
expect( $svCode, ';', "';' after option '$name'" );
}
expect( $svCode, ';', "';' after options" );
}
sub parseListOption {
my( $svCode, $name ) = @_;
skip( $svCode );
my @values;
while( $$svCode !~ /\G[}]/gc ) {
skip( $svCode );
if( $$svCode =~ /\G$Name/gc ) {
push @values, $1;
} elsif( $$svCode =~ /\G$Quoted/gc ) {
# TODO: Unescape things that can be escaped inside quo
+tes
push @values, $1;
} elsif( $$svCode =~ /\G\z/gc ) {
fail( $svCode, "Missing '}' at end of list of option '
+$name'" );
} else {
fail( $svCode, "Unsupported value in list of option '$
+name'" );
}
expect( $svCode, ';', "';' after value in list of option '
+$name'" );
}
$Data{''}{$name} = \@values;
}
sub parseListObject {
my( $svCode, $obj, $name ) = @_;
skip( $svCode );
my @values;
while( $$svCode !~ /\G[}]/gc ) {
skip( $svCode );
if( $$svCode =~ /\G$Name/gc ) {
push @values, $1;
} elsif( $$svCode =~ /\G$Quoted/gc ) {
# TODO: Unescape things that can be escaped inside quo
+tes
push @values, $1;
} elsif( $$svCode =~ /\G\z/gc ) {
fail( $svCode, "Missing '}' at end of list '$obj'.'$na
+me'" );
} else {
fail( $svCode, "Unsupported value in list of '$obj'.'$
+name'" );
}
expect( $svCode, ';', "';' after value in list of '$obj'.'
+$name'" );
}
$Data{$obj}{$name} = \@values;
}
sub parseObject {
my( $svCode ) = @_;
my $obj = expect( $svCode, $Quoted, 'object name' );
# TODO: Unescape things that can be escaped inside object name
+s
expect( $svCode, 'in', "'in' after object '$obj'" );
expect( $svCode, '[{]', "'{' for object '$obj'" );
while( $$svCode !~ /\G[}]/gc ) {
my $name = expect( $svCode, $Name, "option name for object
+ '$obj'" );
if( $$svCode =~ /\G$Name/gc ) {
$Data{$obj}{$name} = $1;
} elsif( $$svCode =~ /\G$Quoted/gc ) {
# TODO: Unescape things that can be escaped inside quo
+tes
$Data{$obj}{$name} = $1;
} elsif( $$svCode =~ /\G[{]/gc ) {
parseListObject( $svCode, $obj, $name );
} elsif( $$svCode =~ /\G\z/gc ) {
fail( $svCode, "Missing '}' at end of object '$obj'" )
+;
} else {
fail( $svCode, "Unsupported value for '$obj'.'$name'"
+);
}
expect( $svCode, ';', "';' after '$obj'.'$name'" );
}
expect( $svCode, ';', "';' after object '$obj'" );
}
__END__
/* SOME COMMENT HERE */
/* MORE COMMENT */
/* Description - information */
options {
option1 value;
option2 value;
option3 "value";
option4 { value1; value2; };
};
/*
identifier1
ID 123456
*/
object "identifier1" in {
option1 value;
option2 "value";
option3 { value1; value2; };
option4 { value; };
};
/*
identifier2
ID 234561
*/
object "identifier2" in {
option1 value;
option2 "value";
option3 { value1; value2; };
option4 { value; };
};
/*
identifier3
ID 345612
*/
object "identifier3" in {
option1 value;
option2 "value";
option3 { value1; value2; };
option4 { value; };
};
/* EOF */
And it even runs:
$VAR1 = {
'' => {
'option1' => 'value',
'option2' => 'value',
'option3' => 'value',
'option4' => [ 'value1', 'value2' ],
},
'identifier1' => {
'option1' => 'value',
'option2' => 'value',
'option3' => [ 'value1', 'value2' ],
'option4' => [ 'value' ],
},
'identifier2' => {
'option1' => 'value',
'option2' => 'value',
'option3' => [ 'value1', 'value2' ],
'option4' => [ 'value' ],
},
'identifier3' => {
'option1' => 'value',
'option2' => 'value',
'option3' => [ 'value1', 'value2' ],
'option4' => [ 'value' ],
},
};
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.