use strict; use warnings; package ScriptParser; #don't pollute global namespace use Carp; # atom token is any run that doesn't include whitespace # opening or closing parenthesis - YMMV depending on syntax # details my $RE_TOKEN_ATOM = qr/[^\(\)\s]+/o; #================================================================== # Base class for all commands #================================================================== { package AnyCmd; my $CLASS = __PACKAGE__; sub new { my ($sClass, $sCmd, $iLine, $iNested) = @_; #Note: depth and line is for convenience in formatting and #printing error messages my $self = {cmd => $sCmd , depth => $iNested , line => $iLine , args => [] }; return bless($self, $sClass); } sub getArgs { my $self = shift; #my $self=$_[0] also OK here return $self->{args}; } sub getCmd { my $self = shift; #my $self=$_[0] also OK here return $self->{cmd}; } sub getDepth { my $self = shift; #my $self=$_[0] also OK here return $self->{depth}; } sub getLine { my $self = shift; #my $self=$_[0] also OK here return $self->{line}; } sub pushArg { my ($self, $xArg) = @_; push @{$self->getArgs()}, $xArg; } sub resolveArgs { my $self = shift; my $aArgs = $self->getArgs(); #an argument can be either a simple value or a variable whose #value needs to be looked up or another command. What we want #is the value after all of that is done. my $aResolved = []; foreach my $sArg (@$aArgs) { # assume anything that is a subclass of this class is a # command to be executed or a variable to look up. if (ref($sArg) && $sArg->isa($CLASS)) { $sArg = $sArg->execute(); } push @$aResolved, $sArg; } return $aResolved; } sub execute { my $self = shift; #my $self=$_[0] also OK here my $sCmd = $self->getCmd(); my $aResolved = $self->resolveArgs(); return $self->_p_doExecute($sCmd, $aResolved); } #----------------------------------------------------------- # this method is meant to be overridden by each subclass # _p_ is just my funky convention for marking protected # member that aren't meant to be accessed by any sort of # public API, just subclasses sub _p_doExecute { my ($self, $sCmd, $aResolved) = @_; #implementation goes here. each subclass has a different #implementation, but here is a dummy one: #delimiting everything in case one of the args begins or ends #with whitespace. I'm assuming that '(' is not part of any #value, but if there is some other delimiter use that instead #mainly just wanted to avoid messing escapes or quoting inside #each arg. my $sArgList = scalar(@$aResolved) ? '(' . join(') (', map { defined($_)?$_:'undef' } @$aResolved) . ')' : ''; print STDERR "Executing command: $sCmd($sArgList)\n"; return "$sCmd executed"; } } #end AnyCmd package #================================================================== # Actual Cmd definitions here #================================================================== # HERE you would define subclasses for predefined commands # e.g. global, sleep, camera control { package GlobalCmd; our @ISA=qw(AnyCmd); sub _p_doExecute { my ($self, $sCmd, $aResolved) = @_; my ($sName, $sType, $sValue) = @$aResolved; printf STDERR "Declaring variable: name=<%s>, type=<%s>, value=<%s>\n" , map {defined($_) ? $_ : 'undef' } ($sName, $sType, $sValue); } } { package SleepCmd; our @ISA=qw(AnyCmd); sub _p_doExecute { my ($self, $sCmd, $aResolved) = @_; printf STDERR "Sleeping for %d seconds\n", $aResolved->[0]; } } { package UserDefinedCmd; our @ISA=qw(AnyCmd); } #---------------------------------------------------------- # HERE you would fill up the hash with defined commands. # the hash makes it easy to check which commands are # recognized and which are not. Also it maps the command # to the class with the proper implementation of ->execute() my %DEFINED_CMDS = ( global => 'GlobalCmd' , sleep => 'SleepCmd' ); my $USER_DEFINED_CMD_CLASS='UserDefinedCmd'; #================================================================== # And now for the parsing and execution.... #================================================================== #NOTE: this is written to read from but in real code # you would probably want to pass a file handle to # parseScript() and even to runScript() as well. sub runScript { my $aCmds = parseScript(); if (! defined($aCmds)) { carp("Couldn't execute script: too many errors!!!"); return 0; } foreach my $oCmd (@$aCmds) { $oCmd->execute(); } } #---------------------------------------------------------- sub parseScript { my $aTopLevelCmds=[]; my $aNested=[]; my $iLine=0; my $iErrors=0; LINE: while (my $line = ) { chomp $line; $iLine++; # the o flag at the end tells Perl that $RE_TOKEN_ATOM will # not change so once its parsed and compiled it, it can # just reuse the result. DO NOT use if you have a variable # that changes value in your regexp # NOTE: this loop assumes each atom must be on a single line # i.e. nothing like "sle\nep" in the script. while ($line =~ /\s*(;) # $1 = start of comment |\s*(\()?\s* # $2 = start of nested cmd ($RE_TOKEN_ATOM) # $3 = capture the token |(\)\s*) # $4 = end of nested cmd /gox) { # if $1 (comment prefix) is defined, ditch this token and the # rest of the line: its just comments; next LINE if (defined($1)); # not a comment - process $2=begin, $3=atom, $4=close $iErrors += processTokens($iLine, $2,$3,$4,$aTopLevelCmds, $aNested); } #end parsing of line } #end reading input my $iIncomplete = scalar(@$aNested); if ($iIncomplete) { $iErrors += $iIncomplete; # this is why we stored the line number with each command: it helps with # debugging problems with the script carp( "Unmatched opening parenthesis!!!! for commands starting at lines:" . join(',', (map { $_->getLine() } @$aNested)) . "\n" ); } return $iErrors > 0 ? undef : $aTopLevelCmds; } #---------------------------------------------------------- # handle one token retrieved by $line sub processTokens { my ($iLine, $bStart, $sAtom, $bEnd, $aTopLevelCmds, $aNested) = @_; my $iNested = scalar(@$aNested); my $iError = 0; #if ($sAtom) { #whoops this will miss "0" if (defined($sAtom)) { if ($bStart) { # start a new command my $sCmdClass = $DEFINED_CMDS{$sAtom}; $sCmdClass = $USER_DEFINED_CMD_CLASS unless defined($sCmdClass); my $oCmd = $sCmdClass->new($sAtom, $iLine, $iNested); push @$aNested, $oCmd; } elsif (scalar(@$aNested)) { # add an arg to the current command my $oCurrentCmd = $aNested->[-1]; $oCurrentCmd->pushArg($sAtom); } else { $iError=1; carp("We've got a problem: atom <$sAtom> outside " ."parenthesis!!! in line $iLine\n"); } } elsif ($bEnd) { # complete a command if ($iNested) { #we're done with the current command my $oCompletedCmd = pop @$aNested; if ($iNested == 1) { #top level command, add it to the list push @$aTopLevelCmds, $oCompletedCmd; } else { #add it to the argument list of the command #surrounding it my $oCurrentCmd = $aNested->[-1]; $oCurrentCmd->pushArg($oCompletedCmd); } } else { $iError = 1; carp("We've got a problem: unmatched closing parenthesis " ."in line $iLine\n"); } } return $iError; } #---------------------------------------------------------- runScript(); # Data embedded in a file - a nice trick for quick and dirty testing # replace stuff below __DATA__ with the script you want to parse. __DATA__ ;;GAME GLOBALS------------------------------------------------ (global short rounds_completed 0) (global short lives 999) (global trigger_volume battlefield_volume deathbystupidness_1) (global short restart_battlefield 0) (global boolean life_titles true) (global short destroyall 0)