in reply to Re^2: Parsing ASP files
in thread Parsing ASP files

Looks like I spoke too soon.

For anyone else attempting something similar, the above code had a flaw illustrated by the last line I added to the $doc test string below. The solution I came up with essentially amounts to using a more benign format for the temporary replacement string used to hide ASP code from HTML::Parser:

#!/usr/bin/perl use HTML::PullParser; use HTML::Entities; my $doc = <<'EOF'; my %options = (); <input value="abc" /> abc &nbsp; abc <% abc ( '<input value="abc" /> abc &nbsp; abc <span> %\>', $abc ); %> <input value="<%= $abc %>" abc /> EOF $doc =~ s/<%(.*?)%>/ my $content = $1; HTML::Entities::encode_entities ( $content ); qq'[[asp_pp"$content"asp_pp]]' /gsex; foreach ( qw{ text default } ) { $options{$_} = "event, text, is_cdata"; } my $p = HTML::PullParser->new ( doc => $doc, %options ); my $output = ""; while ( my $token = $p->get_token() ) { my $text = $token->[1]; $text =~ s/(\[\[asp_pp"[^"]+"asp_pp\]\])|(abc)/$1?$1:"<b>$2<\/b>"/gs +e if $token->[0] eq 'text' and ! $token->[2]; $output .= $text; } $output =~ s/\[\[asp_pp"([^"]+)"asp_pp\]\]/ "<%" . HTML::Entities::decode_entities ( $1 ) . "%>"/gse; print $output;
Output:
<input value="abc" /> <b>abc</b> &nbsp; <b>abc</b> <% abc ( '<input value="abc" /> abc &nbsp; abc <span> %>', $abc ); %> <input value="<%= $abc %>" abc />