#!/usr/bin/perl # usage: grep.html-here-docs filename # usage: grep.html-here-docs # (without filename argument, runs # against test data in the __END__ section) # grep out all here-documents: # prints the source filename at the top of the output. # prints the last subroutine definition name seen # for each here-document. use warnings; use strict; use re 'eval'; my $R_SP = qq{[\x20\t]}; my $R_QUOTE = qq{[\'\"]}; my $TERM = ''; my $p = qr! ^ # at beginning of line [^\x23]*? # match one or more non-comment chars (?: print ${R_SP}* # match print, one or more space | # OR \w+ ${R_SP}* = ${R_SP}* # assignment ) << ${R_SP}* # begin here-doc, zero or more spaces (${R_QUOTE}?) # an optional quote character (\w+) # match/capture one or more word characters (?(1) \1 ) # if a quote was matched, look for another. (?(2) (?{$TERM = $2}) ) !x; my @sub; my $new_here_doc = 1; if (@ARGV) { open FH, $ARGV[0] or die "error opening input: $!"; } else { open FH, "<&DATA" or die "error duping DATA!: $!"; } while () { if (1 == $.) { print "[here-docs in source file: @{[@ARGV ? $ARGV[0] : '']}]\n\n" } if (/^[ \t]*sub[ \t]*(\w+)/) { @sub = ($1,$.); } if (my $s = /$p/../^${TERM}/) { if ((1 == $s) && @sub) { print "\n\@\@sub:$sub[0]:$sub[1]\@\@\n\n"; @sub = (); } if ($new_here_doc) { print "\@\@BEGIN:$.\@\@\n"; $new_here_doc = 0; } print; if ($s =~ /E0$/) { print "\@\@END:$.\@\@\n\n"; $TERM = ''; $new_here_doc = 1; } } } close FH; __END__ sub bar { print <<'FOO'; blah FOO } sub ralph { print <