in reply to Parsing Text into Arrays..

I promised a Parse::RecDescent-free parser, and here it is. It does simple items, arrays, mappings, nested structures, even mappings with arrays as keys. (People curious about the syntax should Google for "LPC".) I haven't tested it thoroughly, but it seems to work pretty well. String-postprocessing is non-existent, so you get the original source back, even including the quotes.

I had a version using s/^PATTERN// as well, but for some reason, I thought that using /\GPATTERN/gc looked cuter.

BTW, this is a recursive descent parser.


use Data::Dumper; print Dumper parse_lpc_expr( q#({ 1, 2, "three", 0, ({ "internal", "array", 0, }), ([ ({"key1", "key2"}): "value", "key": "anothervalue" ]), ({ "arrays", ({ "even", ({ "nest"})})}), "end", })#); # wrapper: sub parse_lpc_expr { local $_ = shift; s/(?=[\015\012])\015?\012?/\n/g; parse_lpc_item(); } # Actual parser starts here: sub parse_lpc_item { my $result; /\G\s*/gc; pos() < length or return; if(/\G\(\{/gc) { $result = parse_lpc_array(); /\G\s*\}\)/gc or error("})"); } elsif (/\G\(\[/gc) { $result = parse_lpc_mapping(); /\G\s*\]\)/gc or error("])"); } elsif (/\G(-?(?:\d+\.?\d*|\.\d+))/gc) { $result = $1; } elsif (/\G("(?:[^"\n\\]|\\.|"\s*\")*")/sgc) { $result = $1; # lazy } else { error(); } return $result; } sub parse_lpc_array { my @result; while(length) { last if /\G(?=\}\))/gc; my($item) = parse_lpc_item() or last; push @result, $item; /\G\s*,\s*/gc or last; } return \@result; } sub parse_lpc_mapping { my %result; while(length) { last if /\G(?=\]\))/gc; my($key) = parse_lpc_item() or last; /\G\s*:\s*/gc or error(":"); my($value) = parse_lpc_item() or error(); if(ref $key eq 'ARRAY') { foreach(@$key) { die "A mapping should only have simple keys, or arrays + of simple keys\n" if ref; $result{$_} = $value ; } } elsif(ref $key) { die "A mapping can't have a mapping as a key\n"; } else { $result{$key} = $value; } /\G\s*,\s*/gc or last; } return \%result; } sub error { my($expect) = @_; s/\G\s*(\S{0,5})/^$1/; if(defined $expect) { die "Expected \"$expect\", found \"$1\" at \"$_\"\n"; } else { die "Can't parse \"$1\" at \"$_\"\n"; } }

Result:
$VAR1 = [ '1', '2', '"three"', '0', [ '"internal"', '"array"', '0' ], { '"key1"' => '"value"', '"key"' => '"anothervalue"', '"key2"' => '"value"' }, [ '"arrays"', [ '"even"', [ '"nest"' ] ] ], '"end"' ];