If what you want to do is syntax highlighting, you don't need a parser, just a tokenizer. It might make small mistakes (e.g. if you have a variable name that's identical to a keyword), but they should be tolerable.
What follows is a tokenizer written in Parse::RecDescent. That's inefficient because the whole input file must be in memory, and the whole file is processed before anything is outputed.
make_tokenizer.pl -- Run once to create JassTokenizer.pm
#!/usr/bin/perl
# To debug:
# perl -s make_tokenizer.pl -RD_HINT -RD_TRACE > make_tokenizer.out
+ 2>&1
use strict;
use warnings;
use Parse::RecDescent ();
my $grammar = <<'__EOI__';
{
my %keywords = (
map { $_ => 'keyword' } qw(
array
call
constant
debug
else
elseif
endfunction
endglobals
endif
endloop
exitwhen
extends
function
globals
if
local
loop
native
nothing
return
returns
set
takes
then
type
),
and => 'operator',
or => 'operator',
not => 'operator',
null => 'null',
false => 'bool',
true => 'bool',
);
}
tokenize : <skip: ''> token(s?) { $item[2] }
token : ident { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
# | keyword { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| ws { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| comment { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| list_sep { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| paren { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| bracket { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| operator { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| assign { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| string { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| real { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| decimal { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| hex { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| octal { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| packed { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
# | bool { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
# | null { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
| unknown { [ $item[1], $itempos[1]{offset}{from}, $itempos[
+1]{offset}{to} ] }
ident : /[a-zA-Z](?:[a-zA-Z0-9_]*[a-zA-Z0-9])?/ { ($keywords{$item[
+1]} || 'ident') }
ws : m'\s+' { $item[0] }
comment : m'//[^\n]*' { $item[0] }
list_sep : ',' { $item[0] }
paren : m'[()]' { $item[0] }
bracket : m'[\[\]]' { $item[0] }
operator : m'[-+*/]|[=!]=|>=?|<=?' { $item[0] }
assign : '=' { $item[0] }
#string : /"(?:[^\\"\n]|\\[^\n])*"/ { $item[0] }
string : /"(?:[^\\"\n]|\\[^\n])*(?:"|(?=\n))/ { $item[0] }
real : /[0-9]+\.[0-9]*/ { $item[0] }
| /\.[0-9]+/ { $item[0] }
decimal : /[1-9][0-9]*/ { $item[0] }
#hex : /\$[0-9a-fA-F]+/ { $item[0] }
# | /0[xX][0-9a-fA-F]+/ { $item[0] }
hex : /\$[0-9a-fA-F]*/ { $item[0] }
| /0[xX][0-9a-fA-F]*/ { $item[0] }
octal : /0[0-7]*/ { $item[0] }
#packed : /'[^'\n]{4}'/ { $item[0] }
packed : /'[^'\n]{0,4}'?/ { $item[0] }
unknown : /./ { $item[0] }
__EOI__
rename('JassTokenizer.pm', 'JassTokenizer.pm.bak');
Parse::RecDescent->Precompile($grammar, 'JassTokenizer')
or die("Bad grammar.\n");
script.pl
#!/usr/bin/perl
use strict;
use warnings;
use JassTokenizer ();
# To debug, until proper <error> and <error: ...> directives are added
+ to the grammar:
# perl -s script.pl -RD_HINT -RD_TRACE > script.out 2>&1
my $parser = JassTokenizer->new();
my $sample;
foreach (<<'__EOI__', <<'__EOI__')
function AsciiCharToInteger takes string char returns integer
local string charMap = " !\"#$%%&'()*+,-./0123456789:;<=>?@ABCDEFG
+HIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
local string u = SubString(char, 0, 1)
local string c
local integer i = 0
loop
set c = SubString(charMap, i, i + 1)
exitwhen c == ""
if c == u then
return i + 32
endif
set i = i + 1
endloop
return 0
endfunction
function IdStringToIdInteger takes string value returns integer
return AsciiCharToInteger(SubString(value, 0, 1)) * 0x1000000 + Asci
+iCharToInteger(SubString(value, 1, 2)) * 0x10000 + AsciiCharToInteger
+(SubString(value, 2, 3)) * 0x100 + AsciiCharToInteger(SubString(value
+, 3, 4))
endfunction
function makeAdvancedUnit takes player who, string id, location where,
+ real angle, string life, string mana, string abil returns unit
local integer unitid = S2I(id)
local integer spellmnt = StringLength(abil)/4
local unit u = null
local integer i = 0
if unitid == 0 then
set unitid = IdStringToIdInteger(id)
endif
set u = CreateUnit(who, unitid, GetLocationX(where), GetLocationY(wher
+e), angle)
loop
exitwhen i>=spellmnt
call UnitAddAbility(u, IdStringToIdInteger(SubString(abil,i*4,(i+1)*
+4)) )
set i = i + 1
endloop
if StringCase(SubString(life,StringLength(life)-1,StringLength(life) )
+,false) == "p" then
call SetUnitLifePercentBJ(u,S2R(SubString(life,0,StringLength(life))
+ ))
else
call SetUnitLifeBJ(u, S2R(life) )
endif
if StringCase(SubString(mana,StringLength(mana)-1,StringLength(mana) )
+,false) == "p" then
call SetUnitManaPercentBJ(u,S2R(SubString(mana,0,StringLength(mana))
+ ))
else
call SetUnitManaBJ(u, S2R(mana) )
endif
return u
endfunction
__EOI__
function Trig_respawn_Condition takes nothing returns boolean
return true
endfunction
function Trig_respawn_Actions takes nothing returns nothing
local location respawn_point
local integer respawn_unit
local unit u
local integer i = 0
call DisplayTextToForce( GetPlayersAll(), "TRIGSTR_013" )
loop
exitwhen i > udg_max_units
if ( GetTriggerUnit() == udg_all_monsters[i] ) then
set respawn_unit = GetUnitTypeId(GetTriggerUnit())
set respawn_point = Location( udg_unit_pos_x[i], udg_unit_
+pos_y[i] )
call DisplayTextToForce( GetPlayersAll(), "Unit: " )
call DisplayTextToForce( GetPlayersAll(), I2S( i ) )
call TriggerSleepAction( 5.00 )
set u = CreateUnitAtLoc( GetOwningPlayer( GetTriggerUnit()
+ ), respawn_unit, respawn_point, bj_UNIT_FACING )
set udg_all_monsters[i] = u
else
endif
set i = i + 1
endloop
endfunction
//====================================================================
+=======
function InitTrig_respawn takes nothing returns nothing
set gg_trg_respawn = CreateTrigger( )
call TriggerRegisterAnyUnitEventBJ( gg_trg_respawn, EVENT_PLAYER_U
+NIT_DEATH )
call TriggerAddCondition( gg_trg_respawn, Condition( function Trig
+_respawn_Condition ) )
call TriggerAddAction( gg_trg_respawn, function Trig_respawn_Actio
+ns )
endfunction
__EOI__
{
printf("Sample %d.\n", ++$sample);
my $list = $parser->tokenize($_)
or do {
print("Bad text.\n\n\n");
next;
};
printf("%-8s from %d to %d.\n", @$_)
foreach (@$list);
print("\n\n");
}
|