#---------------------------------------------------------------------------- # LEXER section. Grab tokens and feed to the recognizer # my %keywords = map { $_=>undef } # Keywords for a toy language qw( do else end goto if last loop next print program then while until ), # Artificial tokens qw( number string ) ; # Operators $keywords{$$_[1]}='OP_' . uc $$_[0] for ( [lparen=>'('], [rparen=>')'], [mult=>'*'], [div=>'/'], [add=>'+'], [subtr=>'-'], [EOS=>';'], [comment=>'#'], [EQ=>'='], [NEQ=>'<>'], [LT=>'<'], [LTE=>'<='], [GT=>'>'], [GTE=>'>='], [EQADD=>'+='], [EQSUB=>'-='], [EQMUL=>'*='], [EQDIV=>'/='], [COMMA=>','], ); my $FName = shift or die "Missing filename"; open my $FH, '<', $FName; my $token; my $cnt=0; my $curpos=0; my $fl_die=0; OUTER: while (<$FH>) { s/\s+$//; printf "\n% 3u: %s\n", $., $_; pos($_)=0; while (!$fl_die) { /\G\s*/gc; $curpos = pos($_); last if $curpos>=length($_); ++$cnt; # last OUTER if $cnt>40; if (/\G([-+\/*]=?|=)/gc) { $token=tk_xform('OP', $1) } elsif (/\G([;:,])/gc) { $token=tk_xform('OP', $1) } elsif (/\G(<[=>]?|>=?)/gc) { $token=tk_xform('OP', $1) } elsif (/\G(#.*)/gc) { $token=['COMMENT', $1] } elsif (/\G(".*?")/gc) { $token=['string',$1] } elsif (/\G(\d+)/gc) { $token=['number', $1] } elsif (/\G(\w[_\w]*)/gc) { $token=tk_xform('name', $1) } else { $token=['ERROR','UNEXPECTED INPUT', substr($_,pos($_))]; ++$fl_die } print("ABEND (token #:$cnt\n") && last OUTER if $fl_die; next unless defined $token; if ($fl_trace) { print " " . (" " x $curpos) . "^"; no warnings; if ($$token[0] eq 'COMMENT') { print "comment (ignored)" } elsif (!defined $$token[1]) { print $$token[0] } else { print "$$token[0]=$$token[1]" } print "\n"; } next if $$token[0] eq 'COMMENT'; # Feed the token into the parser if (@$token < 2) { push @$token, $$token[0]; } $P->read(@$token); #print " progress: ", join(", ", map { "(".join(",",@$_).")" } @{$P->progress}), "\n"; #print " expected: ", join(", ", @{$P->terminals_expected}), "\n"; $token=undef; } }