#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper ();
use Parse::RecDescent ();
our $test_num;
sub do_test {
my $parser = $_[0];
our $text; local *text = \$_[1]; # Pass by ref.
++$test_num;
print("========================\n");
print("\n");
print("Test $test_num\n");
print("======\n");
print("\n");
print("TEXT\n");
print("----\n");
print($text);
print("\n");
print("OUTPUT\n");
print("------\n");
my $data = $parser->parse($text);
if ($data) {
print(Data::Dumper::Dumper($data));
} else {
warn("Bad text.\n");
}
print("\n");
}
my $grammar = <<'__EOI__';
########################################
# Helper Code
#
{
sub OK () { '' }
sub REJECT () { undef }
my %COMMANDS = map { $_ => $_ } qw(
print
);
# XXX Global bad.
# Should be an attribute of the parser.
my $in_code;
sub dequote {
local $_ = @_ ? $_[0] : $_;
s/^['"]//;
s/['"]$//;
s/\\(.)/$1/gs;
return $_;
}
}
########################################
# Rules
#
parse : SET_IN_JUNK parse_ { $item[2] }
parse_ : stmt_list EOF { $item[1] }
| IN_JUNK JUNK_LINE parse_ { $item[3] }
stmt_list : stmt SET_IN_CODE sep stmt_list_(s?)
{ [ $item[1], @{$item[4]} ] }
stmt_list_ : stmt sep { $item[1] }
stmt : cmd_name arg_list { [ $item[1], @{$item[2]} ] }
cmd_name : IDENT { $COMMANDS{$item[1]} }
| /[^ \t\n;]{1,20}/
{
my $idx = 1;
my $line = $itempos[$idx]{line}{from};
my $col = $itempos[$idx]{column}{from};
my $cmd = $item[$idx];
warn("$line,$col: Unknown command \"$cmd\".\n")
if ($in_code);
REJECT
}
arg_list : QUOTED_STR { [ $item[1] ] }
| {
my $idx = 1;
my $line = $itempos[$idx]{line}{from};
my $col = $itempos[$idx]{column}{from};
warn("$line,$col: Bad argument list.\n");
REJECT
}
sep : SEP
| {
my $idx = 1;
my $line = $itempos[$idx]{line}{from};
my $col = $itempos[$idx]{column}{from};
warn("$line,$col: Expecting ';'.\n");
REJECT
}
########################################
# Tokens
#
EOF : /\Z/
JUNK_LINE : /[^\n]*\n/
SEP : ';'
IDENT : /[a-zA-Z][a-zA-Z0-9-_]*/
QUOTED_STR : /'(?:[^'\\]|\\.)*'/ { dequote($item[1]) }
| /"(?:[^"\\]|\\.)*"/ { dequote($item[1]) }
########################################
# Special
#
SET_IN_JUNK : { $in_code = 0; OK }
SET_IN_CODE : { $in_code = 1; OK }
IN_JUNK : { $in_code ? REJECT : OK }
__EOI__
$::RD_ERRORS = 1;
$::RD_WARN = 1;
$::RD_HINT = 1;
$::RD_TRACE = undef;
# Make sure STDOUT and STDERR mix correctly.
select(STDOUT); $|=1;
select(STDERR); $|=1;
select(STDOUT);
my $parser = Parse::RecDescent->new($grammar)
or die("Bad Grammar.\n");
do_test($parser, <<'__EOI__');
junk junk junk
junk junk print junk junk
junk junk junk
print 'foo';
print 'bar';
__EOI__
do_test($parser, <<'__EOI__');
junk junk junk
junk junk print junk junk
junk junk junk
print 'boo';
junk junk
__EOI__
do_test($parser, <<'__EOI__');
junk junk junk
junk junk print junk junk
junk junk junk
print '\'moo\'';
__EOI__
do_test($parser, <<'__EOI__');
junk junk junk
junk junk print junk junk
junk junk junk
print 'Hello';
print;
__EOI__
print("========================\n");
####
========================
Test 1
======
TEXT
----
junk junk junk
junk junk print junk junk
junk junk junk
print 'foo';
print 'bar';
OUTPUT
------
$VAR1 = [
[
'print',
'foo'
],
[
'print',
'bar'
]
];
========================
Test 2
======
TEXT
----
junk junk junk
junk junk print junk junk
junk junk junk
print 'boo';
junk junk
OUTPUT
------
5,1: Unknown command "junk".
Bad text.
========================
Test 3
======
TEXT
----
junk junk junk
junk junk print junk junk
junk junk junk
print '\'moo\'';
OUTPUT
------
$VAR1 = [
[
'print',
'\'moo\''
]
];
========================
Test 4
======
TEXT
----
junk junk junk
junk junk print junk junk
junk junk junk
print 'Hello';
print;
OUTPUT
------
5,6: Bad argument list.
Bad text.
========================
####
For personal reference.
Not fully tested.
Change:
stmt_list : SEP(s?) stmt SET_IN_CODE stmt_list_(s?) SEP(s?)
{ [ $item[2], @{$item[4]} ] }
Change:
stmt_list_ : seps stmt { $item[2] }
Add:
seps : SEP(s)
| {
my $idx = 1;
my $line = $itempos[$idx]{line}{from};
my $col = $itempos[$idx]{column}{from};
warn("$line,$col: Expecting ';'.\n");
REJECT
}
Remove:
sep