in reply to Understanding 're' debug output
scan = r->program + 1; /* First BRANCH. */
Usually it is OPEN1... */
if (OP(scan) != BRANCH) { /* Only one top-level choice. */
first = scan;
/* Skip introductions and multiplicators >= 1. */
while ((OP(first) == OPEN && (sawopen = 1)) ||
/* An OR of *one* alternative - should not happen now. */
(OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
(OP(first) == PLUS) ||
(OP(first) == MINMOD) ||
/* An {n,m} with n>0 */
(PL_regkind[(U8)OP(first)] == CURLY && ARG1(first) > 0) ) {
if (OP(first) == PLUS)
sawplus = 1;
else
first += regarglen[(U8)OP(first)];
first = NEXTOPER(first);
}
/* Starting-point info. */
again:
if (PL_regkind[(U8)OP(first)] == EXACT) {
if (OP(first) == EXACT)
; /* Empty, get anchored substr later. */
else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
r->regstclass = first;
}
else if (strchr((char*)PL_simple,OP(first)))
r->regstclass = first;
else if (PL_regkind[(U8)OP(first)] == BOUND ||
PL_regkind[(U8)OP(first)] == NBOUND)
r->regstclass = first;
else if (PL_regkind[(U8)OP(first)] == BOL) {
first = NEXTOPER(first);
goto again;
}
else if (OP(first) == GPOS) {
first = NEXTOPER(first);
goto again;
}
else if (!sawopen && (OP(first) == STAR &&
PL_regkind[(U8)OP(NEXTOPER(first))] == REG_ANY) &&
!(r->reganch & ROPT_ANCH) )
{
first = NEXTOPER(first);
goto again;
}
/* Scan is after the zeroth branch, first is atomic matcher. */
DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
(IV)(first - scan + 1)));
So it looks like you skip certain nodes until you get to a matching atom. It makes sense that /a+/ says that the atom is the "a", whereas /a*/ says that the atom is the "*", then.
What's weird, though, is /.*/ makes "." the first atom, instead of "*".
_____________________________________________________
Jeff[japhy]Pinyan:
Perl,
regex,
and perl
hacker.
s++=END;++y(;-P)}y js++=;shajsj<++y(p-q)}?print:??;
|
|---|