while( $pattern =~ m< # Tokenize the potential regex \G # Don't let it skip bits ( # Return what we find \\x.. # A hexidecimal character | \\0[0-7]{0,3} # An octal character | \\\D # A boring escaped character | [^\[\{] # Not '[' nor '{' so treat as a token #} | \{(?=\D) # Literal but unescaped '{' #} | \{\d+,?\d*} # Bounded repetitions | \[ # '[' starts a character class \^? # '^' can go in front of the literal ']' \]? # ']' at start is taken literally (?: # Inside, there can be some subtokens [^\]] # Not '[' so isn't a subtoken | \[(?=[^.=:]) # '[' but not '[.', '[=', nor '[:' | \[ # Must be '[' of '[.', '[=', or '[:' [^\]]* # Anything but the closing ']' \] # ']' closes out subtoken )* # Any number of subtokens \] # ']' closes out the character class | (.) # Found something invalid (sets $2) ) >xsg ) { Strange *+?{} on zero-length expression