#!/usr/bin/perl
# usage: grep.html-here-docs filename
# usage: grep.html-here-docs
# (without filename argument, runs
# against test data in the __END__ section)
# grep out all here-documents:
# prints the source filename at the top of the output.
# prints the last subroutine definition name seen
# for each here-document.
use warnings;
use strict;
use re 'eval';
my $R_SP = qq{[\x20\t]};
my $R_QUOTE = qq{[\'\"]};
my $TERM = '';
my $p = qr!
^ # at beginning of line
[^\x23]*? # match one or more non-comment chars
(?:
print ${R_SP}* # match print, one or more space
| # OR
\w+ ${R_SP}* = ${R_SP}* # assignment
)
<< ${R_SP}* # begin here-doc, zero or more spaces
(${R_QUOTE}?) # an optional quote character
(\w+) # match/capture one or more word characters
(?(1) \1 ) # if a quote was matched, look for another.
(?(2) (?{$TERM = $2}) )
!x;
my @sub;
my $new_here_doc = 1;
if (@ARGV) {
open FH, $ARGV[0] or die "error opening input: $!";
} else {
open FH, "<&DATA" or die "error duping DATA!: $!";
}
while () {
if (1 == $.) { print "[here-docs in source file: @{[@ARGV ? $ARGV[0] : '']}]\n\n" }
if (/^[ \t]*sub[ \t]*(\w+)/) {
@sub = ($1,$.);
}
if (my $s = /$p/../^${TERM}/) {
if ((1 == $s) && @sub) {
print "\n\@\@sub:$sub[0]:$sub[1]\@\@\n\n";
@sub = ();
}
if ($new_here_doc) {
print "\@\@BEGIN:$.\@\@\n";
$new_here_doc = 0;
}
print;
if ($s =~ /E0$/) {
print "\@\@END:$.\@\@\n\n";
$TERM = '';
$new_here_doc = 1;
}
}
}
close FH;
__END__
sub bar {
print <<'FOO';
blah
FOO
}
sub ralph { print <