sub _decode_string { my $pos = pos; # Extract string with escaped characters m!\G((?:(?:[^\x00-\x1f\\"]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4})){0,32766})*)!gc; # segfault under 5.8.x in t/20-mojo-json.t #83 my $str = $1; #### sub _decode_string { my $pos = pos(); m{\G(?:[^\\"]+|\\.)*}gc; my $str = substr( $_, $pos, pos()-$pos ); Fail("Unclosed string",$pos) if ! m{"}gc; #### sub parse_value { if( peek( qr{['"]} ) ) { parse_string(); ... sub parse_string { my $start = get_pos(); # now a seek position, not a string offset my $str = ''; if( eat("'") ) { # Inside '...', '' is a literal ', all else is just literal: while( ! peek("'(?!')",1) ) { # ^ keep leading whitespace if( eat("''",1) ) { $str .= "'"; } else { swallow("[^']+",1,\$str,1); # fatal if no match # Can match to $Buf end ^ } } swallow("'",1); } else { swallow('"'); while( peek('[^"]',1) ) { while( peek('\\',1) ) { $str .= parse_escape(); } eat( qr{[^\\"]*}, 1, \$str, 1 ); } swallow('"',1,'',0,'Unclosed string',$start); } return $str; } #### my $FH; my $Buf; # Consume matching text, if found next: sub eat { my( $re, # Regex or string to match against next part of doc. $keep_ws, # Don't skip over whitespace first? $sv, # Append matched text to referenced scalar (if any). $to_end, # Allow match to hit end of $Buf? $peek, # Keep pos() unchanged? ) = @_; $Buf =~ /\G\s+/gc if ! $keep_ws; my $start = pos( $Buf ); $re = _compile( $re ); # Prepends \G also do { return 0 if $Buf !~ /$re/gc; } while( ! $to_end && _hit_end(\$start) ); $$sv .= substr( $Buf, $start, pos($Buf)-$start ) if $sv; pos($Buf) = $start if $peek; return 1; } # Tell me if next text matches (without consuming, except maybe whitespace): sub peek { my( $re, # Regex or string to match against next part of doc. $keep_ws, # Don't skip over whitespace first. $sv, # Append matched text to referenced scalar (if any). $to_end, # Allow match to hit end of $Buf. ) = @_; return eat( $re, $keep_ws, $sv, $to_end, 1 ); } # Consume matching text! If not found next, then die: sub swallow { my( $re, # Regex or string to match against next part of doc. $keep_ws, # Don't skip over whitespace first. $sv, # Append matched text to referenced scalar (if any). $to_end, # Allow match to hit end of $Buf. $err, # Error name if no match. @args, # Extra details for above error. ) = @_; return 1 if eat( $re, $keep_ws, $sv, $to_end ); throw( $err ||= 'Internal bug in parser', @args ); } sub _compile { my( $re ) = @_; our %Compiled; return $Compiled{$re} ||= qr/\G$re/; } # Keep sliding window of document in $Buf: sub _hit_end { my( $sv_start ) = @_; my $pos = pos( $Buf ); return 0 if $pos < length($Buf)-1024; # Darn! Match got too close to end of $Buf. if( 2048 < $$sv_start ) { # Copy to left as little of $Buf as reasonable: my $skip = $$sv_start - 1024; $$sv_start -= $skip; # This way may just keep consuming memory: # substr( $Buf, 0, $skip, '' ); $Buf = substr( $Buf, $skip ); } sysread( $FH, $Buf, 1024*1024, length($Buf) ); # Tell caller to re-do match; have it compare at same part of $Buf again: pos($Buf) = $$sv_start; return 1; }