use 5.010; # needs various perl version 5.10 regex extensions use warnings; use strict; use Test::More 'no_plan'; use Test::NoWarnings; my @tests = ( 'no changes to these', [ ('') x 2 ], # unchanged pair [ ('`') x 2 ], [ ('``') x 2 ], [ ('```') x 2 ], [ ('<') x 2 ], [ ('<<') x 2 ], [ ('<<<') x 2 ], [ ('``<>``<>``') x 2 ], [ ('is `not one ') x 2 ], [ ('is not ` one ') x 2 ], [ ('is `not ` one ') x 2 ], [ ('is not ` one`') x 2 ], [ ('is not `` ` one`') x 2 ], [ ('is `not` ` one`') x 2 ], [ ('is \\\\`not one ') x 2 ], [ ('is not \\\\` one ') x 2 ], [ ('is \\\\`not ` one ') x 2 ], [ ('is `not \\\\` one ') x 2 ], [ ('is not \\\\` one `') x 2 ], [ ('is not ` one \\\\`') x 2 ], [ ('`````````') x 2 ], 'all these should change', [ '````````', '`&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>`', ], [ '`````````', '`&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>``', ], [ '``````````', '```&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>`', ], [ 'u `v ` w `x y` z', 'u `v &lgt;A> ` w `x &lgt;B> y` z', ], [ 'u \` `v ` w `x y` z', 'u \` `v &lgt;A> ` w `x &lgt;B> y` z', ], [ 'u `v \` ` w `x y` z', 'u `v \` &lgt;A> ` w `x &lgt;B> y` z', ], [ 'u `v \` ` w `x y` z', 'u `v &lgt;A> \` ` w `x &lgt;B> y` z', ], [ 'u `v ` \` w `x y` z', 'u `v &lgt;A> ` \` w `x &lgt;B> y` z', ], [ 'u `v ` w \` `x y` z', 'u `v &lgt;A> ` w \` `x &lgt;B> y` z', ], [ 'u `v ` w `x \` y` z', 'u `v &lgt;A> ` w `x \` &lgt;B> y` z', ], [ 'u `v ` w `x \` y` z', 'u `v &lgt;A> ` w `x &lgt;B> \` y` z', ], [ 'u `v ` w `x y` \` z', 'u `v &lgt;A> ` w `x &lgt;B> y` \` z', ], [ 'is `my ` that `also one` too', 'is `my &lgt;string>` that `also &lgt;this> one` too', ], [ 'is \\\\` `my \\\\` ` that `also \\\\` one` \\\\` too', 'is \\\\` `my \\\\` &lgt;string>` that `also &lgt;this> \\\\` one` \\\\` too', ], [ 'x \\\\` `y \\\\` ` z `v \\\\` w` \\\\` x', 'x \\\\` `y \\\\` &lgt;A>` z `v &lgt;B> \\\\` w` \\\\` x', ], [ 'x y \\\\` ` z `v \\\\` w` \\\\` x', 'x y \\\\` &lgt;A>` z `v &lgt;B> \\\\` w` \\\\` x', ], [ 'is \`my that `but one` yes', 'is \`my that `but &lgt;this> one` yes', ], [ 'is \\\\`my that `but one` no', 'is \\\\`my &lgt;this> that `but one` no', ], 'is this acceptable for < that are unbalanced?', [ 'is `this `ok?', 'is `this &lgt;UN `ok?', ], ); FUNT: for my $func_name (qw(replace_lt_3 replace_lt_4)) { note "\n=== testing $func_name() ===\n\n"; *replace_lt = do { no strict 'refs'; *$func_name; }; VECTOR: for my $ar_vector (@tests) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } my ($string, $expected) = @$ar_vector; is replace_lt($string), $expected, qq{'$string' -> '$expected'}; } # end for VECTOR } # end for FUNT note "\n=== done testing functions ===\n\n"; done_testing; exit; # Functions UNder Test ############################################# sub replace_lt_3 { # needs 5.10+: uses possessives and \K my ($string, ) = @_; # replace < pattern in all backtick (bt) groups in a string. # a backtick group begins and ends with a ` (backtick) # that is NOT escaped. if the escape is itself escaped, # it does NOT affect the bt. so: # ` can begin or end a bt group; # \` can NOT begin or end a bt group (ordinary char); # \\` escape is escaped: bt can begin/end a bt group. # an un-unescaped backtick: a true backtick. my $tru_bt = qr{ (? $tru_bt $not_bt_lt* $tru_bt) }xmso; (my $replacement = $string) =~ s{ (?: # after match starts: eat lookahead left over from below \G (?! \A) $not_bt*+ $tru_bt | # at match start: immediately begin matching pre-bt-lt stuff \A ) # match all stuff before bt-group containing an lt. (?: $not_bt*+ $empty_bt*+)*+ # match start of bt-group up to an lt. $tru_bt $not_bt_lt*+ # ignore all characters matched so far. \K # match (and replace) lt if followed by end of bt group. < (?= $not_bt* $tru_bt) } {&lgt;}xmsgo; return $replacement; } sub replace_lt_4 { # needs 5.10+ regex extensions my ($string, ) = @_; # replace < pattern in all backtick (bt) groups in a string. # a backtick group begins and ends with a ` (backtick) # that is NOT escaped. if the escape is itself escaped, # it does NOT affect the bt. so: # ` can begin or end a bt group; # \` can NOT begin or end a bt group (ordinary char); # \\` escape is escaped: bt can begin/end a bt group. (my $replacement = $string) =~ s{ (?: # after match starts: eat lookahead left over from below \G (?! \A) (?&NOT_BT)*+ (?&TRU_BT) | # at match start: immediately begin matching pre-bt-lt stuff \A ) # match all stuff before bt-group containing an lt. (?: (?&NOT_BT)*+ (?&EMPTY_BT)*+)*+ # match start of bt-group up to an lt. (?&TRU_BT) (?&NOT_BT_LT)*+ # ignore all characters matched so far. \K # match (and replace) lt if followed by end of bt group. < (?= (?&NOT_BT)* (?&TRU_BT)) (?(DEFINE) # an un-unescaped backtick: a true backtick. (? (? (?! (?&TRU_BT)) .) # any character NOT a true backtick and also NOT a < (lt). (? (?! <) (?&NOT_BT)) # a bt-group NOT containing an lt. (? (?> (?&TRU_BT) (?&NOT_BT_LT)*+ (?&TRU_BT))) ) } {&lgt;}xmsg; return $replacement; }