in reply to Re: Parsing ASP files
in thread Parsing ASP files

Awesome!

The test script is now working great:

#!/usr/bin/perl use HTML::PullParser; use HTML::Entities; my $doc = <<'EOF'; my %options = (); <input value="abc" /> abc &nbsp; abc <% abc ( '<input value="abc" /> abc &nbsp; abc <span> %>', $abc ); %> EOF $doc =~ s{ <% ((?: '[^']+' | "[^"]+' | \$?[\w\s\(\);,]+ | [^%]+ )+) %> }{ my $content = $1; HTML::Entities::encode_entities ( $content ); qq'<asp_pp content="$content">' }gsex; foreach ( qw{ start text default } ) { $options{$_} = "'$_', text, tagname, attr"; } my $p = HTML::PullParser->new ( doc => $doc, %options ); my $output = ""; while ( my $token = $p->get_token() ) { my $text = $token->[1]; $text =~ s|abc|<b>abc</b>|g if $token->[0] eq 'text'; $text = "<%$token->[3]->{content}%>" if $token->[0] eq 'start' and $token->[2] eq 'asp_pp'; $output .= $text; } print $output;

Replies are listed 'Best First'.
Re^3: Parsing ASP files
by thewebsi (Scribe) on Apr 05, 2012 at 03:47 UTC

    Looks like I spoke too soon.

    For anyone else attempting something similar, the above code had a flaw illustrated by the last line I added to the $doc test string below. The solution I came up with essentially amounts to using a more benign format for the temporary replacement string used to hide ASP code from HTML::Parser:

    #!/usr/bin/perl use HTML::PullParser; use HTML::Entities; my $doc = <<'EOF'; my %options = (); <input value="abc" /> abc &nbsp; abc <% abc ( '<input value="abc" /> abc &nbsp; abc <span> %\>', $abc ); %> <input value="<%= $abc %>" abc /> EOF $doc =~ s/<%(.*?)%>/ my $content = $1; HTML::Entities::encode_entities ( $content ); qq'[[asp_pp"$content"asp_pp]]' /gsex; foreach ( qw{ text default } ) { $options{$_} = "event, text, is_cdata"; } my $p = HTML::PullParser->new ( doc => $doc, %options ); my $output = ""; while ( my $token = $p->get_token() ) { my $text = $token->[1]; $text =~ s/(\[\[asp_pp"[^"]+"asp_pp\]\])|(abc)/$1?$1:"<b>$2<\/b>"/gs +e if $token->[0] eq 'text' and ! $token->[2]; $output .= $text; } $output =~ s/\[\[asp_pp"([^"]+)"asp_pp\]\]/ "<%" . HTML::Entities::decode_entities ( $1 ) . "%>"/gse; print $output;
    Output:
    <input value="abc" /> <b>abc</b> &nbsp; <b>abc</b> <% abc ( '<input value="abc" /> abc &nbsp; abc <span> %>', $abc ); %> <input value="<%= $abc %>" abc />