Update: Because of some shortcomings in the code below (use of '&lgt;' instead of '<' as replacement string; inability to handle multiple '<' characters per backtick group; no handling of escaped '<' characters), please see instead a later and greater version posted here. Because it may still be useful as material for archeological study, I won't try to update or delete this code, and all the caveats still apply.
I'd be inclined to think a parser approach would be best for reasons of readability/maintainability. I'm fascinated by regexes, however, so here's my final try, albeit not totally un-hairy I must admit.
use 5.010; # needs various perl version 5.10 regex extensions use warnings; use strict; use Test::More 'no_plan'; use Test::NoWarnings; my @tests = ( 'no changes to these', [ ('') x 2 ], # unchanged pair [ ('`') x 2 ], [ ('``') x 2 ], [ ('```') x 2 ], [ ('<') x 2 ], [ ('<<') x 2 ], [ ('<<<') x 2 ], [ ('``<>``<>``') x 2 ], [ ('is `not <this> one ') x 2 ], [ ('is not <this> ` one ') x 2 ], [ ('is `not ` <this> one ') x 2 ], [ ('is not <this> ` one`') x 2 ], [ ('is not ``<this> ` one`') x 2 ], [ ('is `not` <this> ` one`') x 2 ], [ ('is \\\\`not <this> one ') x 2 ], [ ('is not <this> \\\\` one ') x 2 ], [ ('is \\\\`not ` <this> one ') x 2 ], [ ('is `not \\\\` <this> one ') x 2 ], [ ('is not <this> \\\\` one `') x 2 ], [ ('is not <this> ` one \\\\`') x 2 ], [ ('``<A>``<B>``<C>``<D>`') x 2 ], 'all these should change', [ '`<A>``<B>``<C>``<D>`', '`&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>`', ], [ '`<A>``<B>``<C>``<D>``', '`&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>``', ], [ '```<A>``<B>``<C>``<D>`', '```&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>`', ], [ 'u `v <A> ` w `x <B> y` z', 'u `v &lgt;A> ` w `x &lgt;B> y` z', ], [ 'u \` `v <A> ` w `x <B> y` z', 'u \` `v &lgt;A> ` w `x &lgt;B> y` z', ], [ 'u `v \` <A> ` w `x <B> y` z', 'u `v \` &lgt;A> ` w `x &lgt;B> y` z', ], [ 'u `v <A> \` ` w `x <B> y` z', 'u `v &lgt;A> \` ` w `x &lgt;B> y` z', ], [ 'u `v <A> ` \` w `x <B> y` z', 'u `v &lgt;A> ` \` w `x &lgt;B> y` z', ], [ 'u `v <A> ` w \` `x <B> y` z', 'u `v &lgt;A> ` w \` `x &lgt;B> y` z', ], [ 'u `v <A> ` w `x \` <B> y` z', 'u `v &lgt;A> ` w `x \` &lgt;B> y` z', ], [ 'u `v <A> ` w `x <B> \` y` z', 'u `v &lgt;A> ` w `x &lgt;B> \` y` z', ], [ 'u `v <A> ` w `x <B> y` \` z', 'u `v &lgt;A> ` w `x &lgt;B> y` \` z', ], [ 'is `my <string>` that `also <this> one` too', 'is `my &lgt;string>` that `also &lgt;this> one` too', ], [ 'is \\\\` `my \\\\` <string>` that `also <this> \\\\` one` \\\\` t +oo', 'is \\\\` `my \\\\` &lgt;string>` that `also &lgt;this> \\\\` one` + \\\\` too', ], [ 'x \\\\` `y \\\\` <A>` z `v <B> \\\\` w` \\\\` x', 'x \\\\` `y \\\\` &lgt;A>` z `v &lgt;B> \\\\` w` \\\\` x', ], [ 'x y \\\\` <A>` z `v <B> \\\\` w` \\\\` x', 'x y \\\\` &lgt;A>` z `v &lgt;B> \\\\` w` \\\\` x', ], [ 'is \`my <NO> that `but <this> one` yes', 'is \`my <NO> that `but &lgt;this> one` yes', ], [ 'is \\\\`my <this> that `but <this> one` no', 'is \\\\`my &lgt;this> that `but <this> one` no', ], 'is this acceptable for < that are unbalanced?', [ 'is `this <UN <BALANCED> `ok?', 'is `this &lgt;UN <BALANCED> `ok?', ], ); FUNT: for my $func_name (qw(replace_lt_3 replace_lt_4)) { note "\n=== testing $func_name() ===\n\n"; *replace_lt = do { no strict 'refs'; *$func_name; }; VECTOR: for my $ar_vector (@tests) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } my ($string, $expected) = @$ar_vector; is replace_lt($string), $expected, qq{'$string' -> '$expected'}; } # end for VECTOR } # end for FUNT note "\n=== done testing functions ===\n\n"; done_testing; exit; # Functions UNder Test ############################################# sub replace_lt_3 { # needs 5.10+: uses possessives and \K my ($string, ) = @_; # replace < pattern in all backtick (bt) groups in a string. # a backtick group begins and ends with a ` (backtick) # that is NOT escaped. if the escape is itself escaped, # it does NOT affect the bt. so: # ` can begin or end a bt group; # \` can NOT begin or end a bt group (ordinary char); # \\` escape is escaped: bt can begin/end a bt group. # an un-unescaped backtick: a true backtick. my $tru_bt = qr{ (?<! (?<! \\) \\) ` }xmso; # any character NOT a true backtick. my $not_bt = qr{ (?! $tru_bt) . }xmso; # any character NOT a true backtick and also NOT a < (lt). my $not_bt_lt = qr{ (?! <) $not_bt }xmso; # a bt-group NOT containing an lt. my $empty_bt = qr{ (?> $tru_bt $not_bt_lt* $tru_bt) }xmso; (my $replacement = $string) =~ s{ (?: # after match starts: eat lookahead left over from below \G (?! \A) $not_bt*+ $tru_bt | # at match start: immediately begin matching pre-bt-lt stuff \A ) # match all stuff before bt-group containing an lt. (?: $not_bt*+ $empty_bt*+)*+ # match start of bt-group up to an lt. $tru_bt $not_bt_lt*+ # ignore all characters matched so far. \K # match (and replace) lt if followed by end of bt group. < (?= $not_bt* $tru_bt) } {&lgt;}xmsgo; return $replacement; } sub replace_lt_4 { # needs 5.10+ regex extensions my ($string, ) = @_; # replace < pattern in all backtick (bt) groups in a string. # a backtick group begins and ends with a ` (backtick) # that is NOT escaped. if the escape is itself escaped, # it does NOT affect the bt. so: # ` can begin or end a bt group; # \` can NOT begin or end a bt group (ordinary char); # \\` escape is escaped: bt can begin/end a bt group. (my $replacement = $string) =~ s{ (?: # after match starts: eat lookahead left over from below \G (?! \A) (?&NOT_BT)*+ (?&TRU_BT) | # at match start: immediately begin matching pre-bt-lt stuff \A ) # match all stuff before bt-group containing an lt. (?: (?&NOT_BT)*+ (?&EMPTY_BT)*+)*+ # match start of bt-group up to an lt. (?&TRU_BT) (?&NOT_BT_LT)*+ # ignore all characters matched so far. \K # match (and replace) lt if followed by end of bt group. < (?= (?&NOT_BT)* (?&TRU_BT)) (?(DEFINE) # an un-unescaped backtick: a true backtick. (?<TRU_BT> (?<! (?<! \\) \\) `) # any character NOT a true backtick. (?<NOT_BT> (?! (?&TRU_BT)) .) # any character NOT a true backtick and also NOT a < (lt). (?<NOT_BT_LT> (?! <) (?&NOT_BT)) # a bt-group NOT containing an lt. (?<EMPTY_BT> (?> (?&TRU_BT) (?&NOT_BT_LT)*+ (?&TRU_BT))) ) } {&lgt;}xmsg; return $replacement; }
Give a man a fish: <%-{-{-{-<
In reply to Re: Replacing left angle bracket with HTML entity when between two backtick characters
by AnomalousMonk
in thread Replacing left angle bracket with HTML entity when between two backtick characters
by nysus
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |