my $FH; my $Buf; # Consume matching text, if found next: sub eat { my( $re, # Regex or string to match against next part of doc. $keep_ws, # Don't skip over whitespace first? $sv, # Append matched text to referenced scalar (if any). $to_end, # Allow match to hit end of $Buf? $peek, # Keep pos() unchanged? ) = @_; $Buf =~ /\G\s+/gc if ! $keep_ws; my $start = pos( $Buf ); $re = _compile( $re ); # Prepends \G also do { return 0 if $Buf !~ /$re/gc; } while( ! $to_end && _hit_end(\$start) ); $$sv .= substr( $Buf, $start, pos($Buf)-$start ) if $sv; pos($Buf) = $start if $peek; return 1; } # Tell me if next text matches (without consuming, except maybe whitespace): sub peek { my( $re, # Regex or string to match against next part of doc. $keep_ws, # Don't skip over whitespace first. $sv, # Append matched text to referenced scalar (if any). $to_end, # Allow match to hit end of $Buf. ) = @_; return eat( $re, $keep_ws, $sv, $to_end, 1 ); } # Consume matching text! If not found next, then die: sub swallow { my( $re, # Regex or string to match against next part of doc. $keep_ws, # Don't skip over whitespace first. $sv, # Append matched text to referenced scalar (if any). $to_end, # Allow match to hit end of $Buf. $err, # Error name if no match. @args, # Extra details for above error. ) = @_; return 1 if eat( $re, $keep_ws, $sv, $to_end ); throw( $err ||= 'Internal bug in parser', @args ); } sub _compile { my( $re ) = @_; our %Compiled; return $Compiled{$re} ||= qr/\G$re/; } # Keep sliding window of document in $Buf: sub _hit_end { my( $sv_start ) = @_; my $pos = pos( $Buf ); return 0 if $pos < length($Buf)-1024; # Darn! Match got too close to end of $Buf. if( 2048 < $$sv_start ) { # Copy to left as little of $Buf as reasonable: my $skip = $$sv_start - 1024; $$sv_start -= $skip; # This way may just keep consuming memory: # substr( $Buf, 0, $skip, '' ); $Buf = substr( $Buf, $skip ); } sysread( $FH, $Buf, 1024*1024, length($Buf) ); # Tell caller to re-do match; have it compare at same part of $Buf again: pos($Buf) = $$sv_start; return 1; }