Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??

You just write a recursive-descent parser. Doing that is pretty easy. Here's most of one:

#!/usr/bin/perl -w use strict; use Data::Dumper 'Dumper'; my %Data; my $Comment = '/[*]([^*]+|[*]+[^*/])*[*]/'; my $Name = '([a-zA-Z_][a-zA-Z0-9_]*)'; my $Quoted = '"([^"]*)"'; local $/; my $code = <DATA>; parse( \$code ); print Dumper( \%Data ); exit; sub parse { my( $svCode ) = @_; skip( $svCode ); while( $$svCode !~ /\G\z/gc ) { if( $$svCode =~ /\Goptions(?!\w)/gc ) { parseOptions( $svCode ); } elsif( $$svCode =~ /\Gobject(?!\w)/gc ) { parseObject( $svCode ); } else { fail( $svCode, "Expected 'options' or 'object'" ); } skip( $svCode ); } } sub skip { my( $svCode ) = @_; 0 while $$svCode =~ /\G\s+/gc || $$svCode =~ /\G$Comment/gc; } sub expect { my( $svCode, $re, $desc ) = @_; skip( $svCode ); fail( $svCode, "Expected ", $desc ) if $$svCode !~ /\G$re/gc; my $return = $1; skip( $svCode ); return $return; } sub fail { my( $svCode, @error ) = @_; my $pos = pos $$svCode; my $before = substr( $$svCode, 0, $pos ); my $line = 1 + ( $before =~ tr/\n/\n/ ); my $col = 1 + length( $before =~ /([^\n]*)\z/ ? $1 : '' ); my $next = $$svCode =~ /\G([^\n]{1,8})/gc ? $1 : undef; die @error, " at line $line, col $col, before '$next'.\n" if defined $next; my $after = $before =~ /([^\n]{1,8})\z/ ? $1 : undef; die @error, " at line $line, col $col, after '$after'.\n" if defined $after; die @error, " at line $line, col $col.\n"; } sub parseOptions { my( $svCode ) = @_; expect( $svCode, '[{]', "'{' after 'options'" ); while( $$svCode !~ /\G[}]/gc ) { my $name = expect( $svCode, $Name, 'option name' ); if( $$svCode =~ /\G$Name/gc ) { $Data{''}{$name} = $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes $Data{''}{$name} = $1; } elsif( $$svCode =~ /\G[{]/gc ) { parseListOption( $svCode, $name ); } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of options" ); } else { fail( $svCode, "Unsupported value for option '$name'" +); } expect( $svCode, ';', "';' after option '$name'" ); } expect( $svCode, ';', "';' after options" ); } sub parseListOption { my( $svCode, $name ) = @_; skip( $svCode ); my @values; while( $$svCode !~ /\G[}]/gc ) { skip( $svCode ); if( $$svCode =~ /\G$Name/gc ) { push @values, $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes push @values, $1; } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of list of option ' +$name'" ); } else { fail( $svCode, "Unsupported value in list of option '$ +name'" ); } expect( $svCode, ';', "';' after value in list of option ' +$name'" ); } $Data{''}{$name} = \@values; } sub parseListObject { my( $svCode, $obj, $name ) = @_; skip( $svCode ); my @values; while( $$svCode !~ /\G[}]/gc ) { skip( $svCode ); if( $$svCode =~ /\G$Name/gc ) { push @values, $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes push @values, $1; } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of list '$obj'.'$na +me'" ); } else { fail( $svCode, "Unsupported value in list of '$obj'.'$ +name'" ); } expect( $svCode, ';', "';' after value in list of '$obj'.' +$name'" ); } $Data{$obj}{$name} = \@values; } sub parseObject { my( $svCode ) = @_; my $obj = expect( $svCode, $Quoted, 'object name' ); # TODO: Unescape things that can be escaped inside object name +s expect( $svCode, 'in', "'in' after object '$obj'" ); expect( $svCode, '[{]', "'{' for object '$obj'" ); while( $$svCode !~ /\G[}]/gc ) { my $name = expect( $svCode, $Name, "option name for object + '$obj'" ); if( $$svCode =~ /\G$Name/gc ) { $Data{$obj}{$name} = $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes $Data{$obj}{$name} = $1; } elsif( $$svCode =~ /\G[{]/gc ) { parseListObject( $svCode, $obj, $name ); } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of object '$obj'" ) +; } else { fail( $svCode, "Unsupported value for '$obj'.'$name'" +); } expect( $svCode, ';', "';' after '$obj'.'$name'" ); } expect( $svCode, ';', "';' after object '$obj'" ); } __END__ /* SOME COMMENT HERE */ /* MORE COMMENT */ /* Description - information */ options { option1 value; option2 value; option3 "value"; option4 { value1; value2; }; }; /* identifier1 ID 123456 */ object "identifier1" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* identifier2 ID 234561 */ object "identifier2" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* identifier3 ID 345612 */ object "identifier3" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* EOF */

And it even runs:

$VAR1 = { '' => { 'option1' => 'value', 'option2' => 'value', 'option3' => 'value', 'option4' => [ 'value1', 'value2' ], }, 'identifier1' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, 'identifier2' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, 'identifier3' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, };

- tye        


In reply to Re: Regex Question (rec desc) by tye
in thread Regex Question by jedikaiti

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others browsing the Monastery: (8)
As of 2024-04-16 07:29 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found