#---------------------------------------------------------------------------- # LEXER section. Grab tokens and feed to the recognizer # my %keywords = map { $_=>undef } # Keywords for a toy language qw( do else end goto if last loop next print program then while until ), # Artificial tokens qw( number string ) ; # Operators $keywords{$$_[1]}='OP_' . uc $$_[0] for ( [lparen=>'('], [rparen=>')'], [mult=>'*'], [div=>'/'], [add=>'+'], [subtr=>'-'], [EOS=>';'], [comment=>'#'], [EQ=>'='], [NEQ=>'<>'], [LT=>'<'], [LTE=>'<='], [GT=>'>'], [GTE=>'>='], [EQADD=>'+='], [EQSUB=>'-='], [EQMUL=>'*='], [EQDIV=>'/='], [COMMA=>','], ); my $FName = shift or die "Missing filename"; open my $FH, '<', $FName; my $token; my $cnt=0; my $curpos=0; my $fl_die=0; OUTER: while (<$FH>) { s/\s+$//; printf "\n% 3u: %s\n", $., $_; pos($_)=0; while (!$fl_die) { /\G\s*/gc; $curpos = pos($_); last if $curpos>=length($_); ++$cnt; # last OUTER if $cnt>40; if (/\G([-+\/*]=?|=)/gc) { $token=tk_xform('OP', $1) } elsif (/\G([;:,])/gc) { $token=tk_xform('OP', $1) } elsif (/\G(<[=>]?|>=?)/gc) { $token=tk_xform('OP', $1) } elsif (/\G(#.*)/gc) { $token=['COMMENT', $1] } elsif (/\G(".*?")/gc) { $token=['string',$1] } elsif (/\G(\d+)/gc) { $token=['number', $1] } elsif (/\G(\w[_\w]*)/gc) { $token=tk_xform('name', $1) } else { $token=['ERROR','UNEXPECTED INPUT', substr($_,pos($_))]; ++$fl_die } print("ABEND (token #:$cnt\n") && last OUTER if $fl_die; next unless defined $token; if ($fl_trace) { print " " . (" " x $curpos) . "^"; no warnings; if ($$token[0] eq 'COMMENT') { print "comment (ignored)" } elsif (!defined $$token[1]) { print $$token[0] } else { print "$$token[0]=$$token[1]" } print "\n"; } next if $$token[0] eq 'COMMENT'; # Feed the token into the parser if (@$token < 2) { push @$token, $$token[0]; } $P->read(@$token); #print " progress: ", join(", ", map { "(".join(",",@$_).")" } @{$P->progress}), "\n"; #print " expected: ", join(", ", @{$P->terminals_expected}), "\n"; $token=undef; } } #### my $TG = Marpa::R2::Grammar->new({ start=>'FILE', actions=>'ToyLang', default_action=>'swallow', unproductive_ok=>[qw( FILE )], rules=>[ # A file contains a PROGRAM and zero or more SUBROUTINES. The # subroutine definitions may precede and/or follow the program. [ FILE=>[qw( PROGRAM name stmt_list )], 'swallow' ], #PROGRAM FILE2)], ], [ FILE=>[qw( COMMENT FILE )], ], #stmt_list PROGRAM FILE2)], ], # [ FILE=>[qw( PROGRAM name stmt_list PROGRAM FILE2)], ], # [ FILE=>[qw( SUB name stmt_list sub FILE)], ], # [ FILE2=>[qw( SUB name stmt_list sub FILE2)], ], # A statement list consists of zero or more statements followed # by END. We don't care whether or not there's an end of # statement marker before END. [ stmt_list=>[qw( END )], 'discard' ], [ stmt_list=>[qw( stmt stmt_list_2 )], 'first_on' ], [ stmt_list_2=>[qw( END )], 'discard' ], [ stmt_list_2=>[qw( OP_EOS END )] ], [ stmt_list_2=>[qw( OP_EOS stmt stmt_list_2 )], 'second_on' ], # [ stmt=>[qw( IF expr if_body )], ], [ stmt=>[qw( PRINT expr print_body )], ], [ stmt=>[qw( WHILE expr DO do_body )], ], # [ stmt=>[qw( DO do_body WHILE expr )], ], [ stmt=>[qw( name assop expr )], 'binary_op' ], [ do_body=>[qw( LOOP )], ], [ do_body=>[qw( stmt do_body_2 )], 'first_on' ], [ do_body_2=>[qw( LOOP )], ], [ do_body_2=>[qw( OP_EOS LOOP )], 'second_arg' ], [ do_body_2=>[qw( OP_EOS stmt do_body_2 )], 'second_arg' ], [ print_body=>[qw( OP_EOS )], ], [ print_body=>[qw( OP_COMMA expr print_body )], 'second_on' ], [ expr=>[qw( term )], 'first_arg' ], [ expr=>[qw( expr logop expr )], 'binary_op' ], [ term=>[qw( term addop term )], 'binary_op' ], [ term=>[qw( factor )], 'first_arg'], [ factor=>[qw( factor mulop factor )], 'binary_op'], [ factor=>[qw( name )], 'first_arg'], [ factor=>[qw( number )], 'first_arg'], [ factor=>[qw( string )], 'first_arg'], [ addop=>[qw( OP_ADD )], 'first_arg'], [ addop=>[qw( OP_SUB )], 'first_arg'], [ assop=>[qw( OP_EQ )], 'first_arg'], [ assop=>[qw( OP_EQADD )], 'first_arg'], [ logop=>[qw( OP_NEQ )], 'first_arg'], [ mulop=>[qw( OP_MUL )], ], [ mulop=>[qw( OP_DIV )], ], ], });