in reply to Re^5: POD style regex for inline HTML elements
in thread POD style regex for inline HTML elements

Dear Anonymous Monk, it appears this will not work in Perl 5.8.8. The ?^ returns an error too.

Well, sorry about that, I forgot about that (5.16 is my first perl in path), thats just the way it stringifies on the newer perls, you can simply ditch the ^

With actual 5.8.8 it stringifies as

(?-xism:(?-xism:(?:\<(?:(?>[^\<\>]+)|(??{$Regexp::Common::balanced [0] +}))*\>)))

So here is the updated file, test still pass, with actual 5.8.8 too :)

#!/usr/bin/perl -- use strict; use warnings; use Test::More qw' no_plan '; use Regexp::Common qw/ balanced /; use Data::Dump qw/ dd pp /; sub TRACE; sub DEBUG; *TRACE = *DEBUG = sub { print STDERR @_,"\n" }; #~ *TRACE = *DEBUG = sub { }; use vars qw/ $re_balanced_angles /; #~ our $re_balanced_angles = qr{(?^:(?^:(?:\<(?:(?>[^\<\>]+)|(??{ $re_ +balanced_angles }))*\>)))}; #~ our $re_balanced_angles = qr{(?:(?:(?:\<(?:(?>[^\<\>]+)|(??{ $re_ba +lanced_angles }))*\>)))}; #~ our $re_balanced_angles = qr{ #~ (?: #~ (?: #~ (?: #~ \< #~ (?: #~ (?> #~ [^\<\>]+ #~ ) #~ | #~ (??{ $re_balanced_angles }) #~ )* #~ \> #~ ) #~ ) #~ )}x; ## still works the same #~ our $re_balanced_angles = qr{ #~ \< #~ (?: #~ (?> #~ [^\<\>]+ #~ ) #~ | #~ (??{ $re_balanced_angles }) #~ )* #~ \> #~ }x; #~ our $re_balanced_angles = qr{ #~ \< #~ (?: #~ (?> #~ [^\<\>]+ #~ ) #~ | #~ (??{ $re_balanced_angles }) #~ )* #~ \> #~ }x; our $re_balanced_angles = qr{ \< (?: (?> [^\<\>]+ ) | (??{ $re_balanced_angles }) )* \> }x; our $allowed = join '|', qw[ ABBR ACRONYM B BIG CITE CODE DFN EM I KBD SAMP SMALL SPAN STRONG SUB SUP TT VAR ]; { my $in = 'SPAN<text I<italic text>|class="span_class"> some more t +ext B<bold text>.'; my $out = shabba($in); my $wanted = '<span class="span_class">text <i>italic text</i></sp +an> some more text <b>bold text</b>.'; is( $out, $wanted , 'shabba'); } { my $in = 'SPAN<text I<italic B<and bold> text>|class="span_class"> + some more text B<bold text>.'; my $out = shabba($in); my $wanted = '<span class="span_class">text <i>italic <b>and bold< +/b> text</i></span> some more text <b>bold text</b>.'; is( $out, $wanted , 'shabba'); } exit( 0 ); sub shabba { local $_ = $_[0]; my $dent = $_[1] || 0; pos = 0; my $ret = ""; SHABBALOOP: while( length > pos ){ m{\G(\s+)}gcsx and do { $ret .= $1; next SHABBALOOP; }; m{ #~ \G( $allowed )( $RE{balanced}{-parens=>'<>'} ) \G( $allowed )( $re_balanced_angles ) }gcsx and do { TRACE "# $dent allowed<> { $1 ( $2 ) }"; $ret .= shabba_allowed( "$1" , "$2" , $dent ); next SHABBALOOP; }; #~ confusion :) #~ \G(\w+\b) #~ fail #~ \G([^<]+)(?!:$allowed)\b #~ \w+\b #~ \G([^<]+?)(?!:$allowed\<)\b #~ fail #~ \G([^<]+?)(?!:$allowed\<) #~ fail #~ \G([^<]+)(?!:$allowed\<) #~ fail \G([^<]+)(?!:\<) #~ inch #~ \G([^<]+?)(?!:\<) #~ \G([^<]+?\b[^<]) #~ \G([^<]+?[^<]) #~ FAIL #~ \G([^<]+[^<]) #~ \G([^<]+[^<]\b) #~ 2same#~ \G( (?!:$allowed\<) .+ ) #~ 2same#~ \G( .+(?!:$allowed\<) ) m{ \G([^<]+\s) }gcmx and do { TRACE "# $dent text { $1 }"; $ret .= shabba_text( "$1" ); next SHABBALOOP; };;; m{ \G([\<\>]) }gcmx and do { TRACE "## $dent error-stray<> { $1 } at pos(@{[pos]})"; last SHABBALOOP; };;; m{ \G(\S) }gcmx and do { TRACE "# $dent inch-forward { $1 }"; $ret .= shabba_text( "$1" ); next SHABBALOOP; };;; } $ret; } sub shabba_allowed { my( $tag , $stuff, $dent ) = @_; $stuff = $1 if $stuff =~ m{^<(.*)>$}gs; my $ret = ""; $ret .= "<\L$tag\E" if $tag; $stuff =~ s{\|([^<>]+)$}{ $ret .= " $1"; ## shabba_allowed_atts($tag,$1); ""; }gsex if defined $stuff ; $ret .= ">" if $tag; if( defined $stuff and length $stuff and $stuff =~ m{[<>]}g ){ $ret .= shabba( $stuff , $dent+1) ; ## recurse } else { $ret .= $stuff; } $ret .= "</\L$tag\E>" if $tag; $ret; } sub shabba_text { join'',@_ } __END__

Replies are listed 'Best First'.
Re^7: POD style regex for inline HTML elements
by Lady_Aleena (Priest) on May 17, 2014 at 23:51 UTC

    Will you please tell me what I am missing after I took all of the comments out? I'm still getting an infinite loop when these are nested with the error Use of uninitialized value in numeric gt (>) at files/lib/Base/HTML/Inline.pm line 32, <DATA> line 19.

    package Base::HTML::Inline; use strict; use warnings; use Exporter qw(import); our @EXPORT_OK = qw(inline); # Written by an Anonymous Monk on PerlMonks (http://www.perlmonks.org/ +?node_id=1028699) use Test::More qw' no_plan '; use Regexp::Common qw/ balanced /; use Data::Dump qw/ dd pp /; use vars qw/ $re_balanced_angles /; sub TRACE; sub DEBUG; *TRACE = *DEBUG = sub { print STDERR @_,"\n" }; our $re_balanced_angles = qr{\<(?:(?>[^\<\>]+)|(??{ $re_balanced_angle +s }))*\>}x; our $allowed = join '|', qw[ A ABBR ACRONYM B BIG CITE CODE DFN EM I KBD Q SAMP SMALL SPAN STRONG SUB SUP TT VAR ]; sub inline { local $_ = $_[0]; my $dent = $_[1] || 0; pos = 0; my $ret = ""; inlineLOOP: while( length > pos ){ m{\G(\s+)}gcsx and do { $ret .= $1; next inlineLOOP; }; m{\G( $allowed )( $re_balanced_angles )}gcsx and do { TRACE "# $dent allowed<> { $1 ( $2 ) }"; $ret .= inline_allowed( "$1" , "$2" , $dent ); next inlineLOOP; }; m{\G([^<]+\s)}gcmx and do { TRACE "# $dent text { $1 }"; $ret .= inline_text( "$1" ); next inlineLOOP; };;; m{\G([\<\>])}gcmx and do { TRACE "## $dent error-stray<> { $1 } at pos(@{[pos]})"; last inlineLOOP; };;; m{\G(\S)}gcmx and do { TRACE "# $dent inch-forward { $1 }"; $ret .= inline_text( "$1" ); next inlineLOOP; };;; } $ret; } sub inline_allowed { my( $tag , $stuff, $dent ) = @_; $stuff = $1 if $stuff =~ m{^<(.*)>$}gs; my $ret = ""; $ret .= "<\L$tag\E" if $tag; $stuff =~ s{\|([^<>]+)$}{$ret .= " $1"; "";}gsex if defined $stuff + ; ## inline_allowed_atts($tag,$1); $ret .= ">" if $tag; if( defined $stuff and length $stuff and $stuff =~ m{[<>]}g ){ $ret .= inline( $stuff , $dent+1) ; ## recurse } else { $ret .= $stuff; } $ret .= "</\L$tag\E>" if $tag; $ret; } sub inline_text { join'',@_ } 1;
    No matter how hysterical I get, my problems are not time sensitive. So, relax, have a cookie, and a very nice day!
    Lady Aleena

      Will you please tell me what I am missing after I took all of the comments out? I'm still getting an infinite loop when these are nested with the error Use of uninitialized value in numeric gt (>) at files/lib/Base/HTML/Inline.pm line 32, <DATA> line 19.

      Well, first thing is a call that calls the code .. word problems are hard :) when what is nested?