in reply to Re: Optimizing Regexp::Grammars' grammar (backtracking?)
in thread Optimizing Regexp::Grammars' grammar (backtracking?)

Thanks for the pointers. I've modified the Equal and LessGreater rules now to avoid backtracking, which in turn also makes the relational operators chainable. Now everything is as fast as it should be. Thanks a bunch!
my $grammar = do{
    use Regexp::Grammars;
    qr{
        ^<Answer>$

        <rule: Answer>
            <MATCH=Or>

# precedence level  2: left     =>
        <rule: Pair>
            <K=Key> =\> <V=Answer>
            (?{ $MATCH = [$MATCH{K}, $MATCH{V}] })

# precedence level  3: left     || //
        <rule: Or>
            <[X=And]> ** <[Op=(\|\||//)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '||') { $MATCH ||= $term }
                    elsif ($op eq '//') { $MATCH //= $term }
                }
            })

# precedence level  4: left     &&
        <rule: And>
            <[X=BitOrXor]> ** <[Op=(&&)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '&&') { $MATCH &&= $term }
                }
            })

# precedence level  5: left     | ^
        <rule: BitOrXor>
            <[X=BitAnd]> ** <[Op=(\||\^)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '|') { $MATCH = $MATCH+0 | $term }
                    elsif ($op eq '^') { $MATCH = $MATCH+0 ^ $term }
                }
            })


# precedence level  6: left     &
        <rule: BitAnd>
            <[X=Equal]> ** <[Op=(&)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '&') { $MATCH = $MATCH+0 & $term }
                }
            })

# precedence level  7: nonassoc == != <=> eq ne cmp
        <rule: Equal>
            # \x3c = "<", \x3e = ">"
            <[X=LessGreater]> ** <[Op=(==|!=|\x3c=\x3e|eq|ne|cmp)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '==' ) { $MATCH = ($MATCH ==  $term) }
                    elsif ($op eq '!=' ) { $MATCH = ($MATCH !=  $term) }
                    elsif ($op eq '<=>') { $MATCH = ($MATCH <=> $term) }
                    elsif ($op eq 'eq' ) { $MATCH = ($MATCH eq  $term) }
                    elsif ($op eq 'ne' ) { $MATCH = ($MATCH ne  $term) }
                    elsif ($op eq 'cmp') { $MATCH = ($MATCH cmp $term) }
                }
            })

# precedence level  8: nonassoc < > <= >= lt gt le ge
        <rule: LessGreater>
            # \x3c = "<", \x3e = ">"
            <[X=BitShift]> ** <[Op=(\x3c=?|\x3e=?|lt|gt|le|ge)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '<' ) { $MATCH = ($MATCH <  $term) }
                    elsif ($op eq '<=') { $MATCH = ($MATCH <= $term) }
                    elsif ($op eq '>' ) { $MATCH = ($MATCH >  $term) }
                    elsif ($op eq '>=') { $MATCH = ($MATCH >= $term) }
                    elsif ($op eq 'lt') { $MATCH = ($MATCH lt $term) }
                    elsif ($op eq 'gt') { $MATCH = ($MATCH gt $term) }
                    elsif ($op eq 'le') { $MATCH = ($MATCH le $term) }
                    elsif ($op eq 'ge') { $MATCH = ($MATCH ge $term) }
                }
            })

# precedence level  9: left     << >>
        <rule: BitShift>
            # \x3c = "<", \x3e = ">"
            <[X=Add]> ** <[Op=(\x3c\x3c|\x3e\x3e)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '>>') { $MATCH >>= $term }
                    elsif ($op eq '<<') { $MATCH <<= $term }
                }
            })

# precedence level 10: left     + - .
        <rule: Add>
            <[X=Mult]> ** <[Op=(\+|-|\.)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '+') { $MATCH += $term }
                    elsif ($op eq '-') { $MATCH -= $term }
                    elsif ($op eq '.') { $MATCH .= $term }
                }
            })

# precedence level 11: left     * / % x
        <rule: Mult>
            <[X=Unary]> ** <[Op=(\*|/|%|x)]>
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $term (@{$MATCH{X}}) {
                    my $op = shift @{$MATCH{Op}//=[]};
                    if    ($op eq '*') { $MATCH *= $term }
                    elsif ($op eq '/') { $MATCH /= $term }
                    elsif ($op eq '%') { $MATCH %= $term }
                    elsif ($op eq 'x') { $MATCH x= $term }
                }
            })

# precedence level 12: right    ! ~ unary+ unary-
        <rule: Unary>
            <[Op=(!|~|\+|-)]>* <X=Pow>
            (?{ $MATCH = $MATCH{X};
                if ($MATCH{Op}) {
                    for my $op (reverse @{$MATCH{Op}}) {
                        if    ($op eq '!') { $MATCH = !$MATCH }
                        elsif ($op eq '-') { $MATCH = -$MATCH }
                        elsif ($op eq '~') { $MATCH = ~($MATCH+0) }
                    }
                }
            })

# precedence level 13: right    **
        <rule: Pow>
            <[X=Subscripting]> ** <Op=(\*\*)>
            (?{ $MATCH = reduce { $b ** $a } reverse @{$MATCH{X}} })

# precedence level 14: left    hash[s], array[i]
        <rule: Subscripting>
            <[X=Term]> <[Subscript]>*
            (?{ $MATCH = shift @{$MATCH{X}};
                for my $i (@{$MATCH{Subscript}}) {
                    if (ref($MATCH) eq 'ARRAY'  ) { $MATCH = $MATCH->[$i] }
                    elsif (ref($MATCH) eq 'HASH') { $MATCH = $MATCH->{$i} }
                    else                          { $MATCH = "error: invalid hash subscripting"; last }
                }
            })

        <rule: Subscript>
              \[ <MATCH=Term> \]

# precedence level 15: left     term (variable, str/num literals, func(), (paren))
        <rule: Term>
              <MATCH=Func>
            | <MATCH=Var>
            | <MATCH=Str>
            | <MATCH=Undef>
            | <MATCH=Num>
#            | <debug:step> <MATCH=Array> <debug:off>
            | <MATCH=Array>
            | <MATCH=Hash>
            | \( <MATCH=Answer> \)

        <rule: Array>
            \[ \]
            (?{ $MATCH = [] })
          | \[ <[X=Answer]> ** (,) \]
            (?{ $MATCH = $MATCH{X} })

        <rule: Hash>
            \{ \}
            (?{ $MATCH = {} })
          | \{ <[X=Pair]> ** (,) \}
            (?{ $MATCH = { map { $_->[0] => $_->[1] } @{ $MATCH{X} } } })

        <rule: Key>
            <MATCH=(\w+)>
          | <MATCH=Answer>

        <token: Undef>
            undef
            (?{ $MATCH = undef })

        <token: Num>
            <MATCH=( [+-]? \d++ (?: \. \d++ )?+ )>

        <token: Str>
            # XXX support escapes
            <X=( "[^"]*" | '[^']*' )>
            (?{ $MATCH = substr($MATCH{X}, 1, length($MATCH{X})-2); })

        <rule: Var>
            \$ <X=(\w+)>
            (?{ $MATCH = $vars->{ $MATCH{X} } })

        <rule: Func>
            <FuncName=([A-Za-z_]\w*)> \( <[Args=Answer]> ** (,) \)
            (?{
                my $f = $MATCH{FuncName};
                my $args = $MATCH{Args};
                if    ($f eq 'length') { $MATCH = length($args->[0]) }
                elsif ($f eq 'ceil'  ) { $MATCH = POSIX::ceil($args->[0]) }
                elsif ($f eq 'floor' ) { $MATCH = POSIX::floor($args->[0]) }
                elsif ($f eq 'rand'  ) { $MATCH = rand() }
                else                   { $MATCH = "undef function $f" }
            })

    }xms
};
  • Comment on Re^2: Optimizing Regexp::Grammars' grammar (backtracking?)

Replies are listed 'Best First'.
Re^3: Optimizing Regexp::Grammars' grammar (backtracking?)
by Anonymous Monk on Apr 15, 2010 at 05:08 UTC
    Code goes in <code>$code</code> or <c>$code</c> tags, sharyanto^b^b^b^bdgarmond2, whomever you are
      Thanks for the correction, will remember that from now on. Yup, sorry for the dual identities, it's a laptop/PC thing :-)