use 5.010; # needs perl version 5.10 regex extensions use warnings; use strict; use Test::More 'no_plan'; use Test::NoWarnings; my @tests = ( 'no changes to these', [ ('') x 2 ], # unchanged pair [ ('`') x 2 ], [ ('``') x 2 ], [ ('```') x 2 ], [ ('<') x 2 ], [ ('<<') x 2 ], [ ('<<<') x 2 ], [ ('``<>``<>``') x 2 ], [ ('is `not one ') x 2 ], [ ('is not ` one ') x 2 ], [ ('is `not ` one ') x 2 ], [ ('is not ` one`') x 2 ], [ ('is not `` ` one`') x 2 ], [ ('is `not` ` one`') x 2 ], [ ('is \\\\`not one ') x 2 ], [ ('is not \\\\` one ') x 2 ], [ ('is \\\\`not ` one ') x 2 ], [ ('is `not \\\\` one ') x 2 ], [ ('is not \\\\` one `') x 2 ], [ ('is not ` one \\\\`') x 2 ], [ ('`````````') x 2 ], 'all these should change', [ '````````', '`<A>``<B>``<C>``<D>`', ], [ '``', # new: multiple < in bt group '`<A><B><C><D>`', ], [ '`\\\\\`', # new: < may be escaped '`<A>\\\\\<C><D>`', ], [ '`````````', '`<A>``<B>``<C>``<D>``', ], [ '` `` \`` \\\\`` ``', '`<A> <a a>``<B> \``<C> \\\\<c c>``<D> <d d>``', ], [ '``````````', '```<A>``<B>``<C>``<D>`', ], [ 'u `v ` w `x y` z', 'u `v <A> ` w `x <B> y` z', ], [ 'u \` `v ` w `x y` z', 'u \` `v <A> ` w `x <B> y` z', ], [ 'u `v \` ` w `x y` z', 'u `v \` <A> ` w `x <B> y` z', ], [ 'u `v \` ` w `x y` z', 'u `v <A> \` ` w `x <B> y` z', ], [ 'u `v ` \` w `x y` z', 'u `v <A> ` \` w `x <B> y` z', ], [ 'u `v ` w \` `x y` z', 'u `v <A> ` w \` `x <B> y` z', ], [ 'u `v ` w `x \` y` z', 'u `v <A> ` w `x \` <B> y` z', ], [ 'u `v ` w `x \` y` z', 'u `v <A> ` w `x <B> \` y` z', ], [ 'u `v ` w `x y` \` z', 'u `v <A> ` w `x <B> y` \` z', ], [ 'is `my ` that `also one` too', 'is `my <string>` that `also <this> one` too', ], [ 'is \\\\` `my \\\\` ` that `also \\\\` one` \\\\` too', 'is \\\\` `my \\\\` <string>` that `also <this> \\\\` one` \\\\` too', ], [ 'x \\\\` `y \\\\` ` z `v \\\\` w` \\\\` x', 'x \\\\` `y \\\\` <A>` z `v <B> \\\\` w` \\\\` x', ], [ 'x y \\\\` ` z `v \\\\` w` \\\\` x', 'x y \\\\` <A>` z `v <B> \\\\` w` \\\\` x', ], [ 'is \`my that `but one` yes', 'is \`my that `but <this> one` yes', ], '(per Corion pm#1222697)', [ 'is \\\\`my that `but one` no', 'is \\\\`my <this> that `but one` no', ], 'is this acceptable for < that are unbalanced?', [ '`<\<\\\\<``\\\\<\<<`', '`<\<\\\\<``\\\\<\<<`', ], [ 'is `this `ok?', 'is `this <UN <BALANCED> `ok?', ], ); FUNT: for my $func_name (qw(replace_lt_5 replace_lt_6)) { note "\n=== testing $func_name() ===\n\n"; *replace_lt = do { no strict 'refs'; *$func_name; }; VECTOR: for my $ar_vector (@tests) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } my ($string, $expected) = @$ar_vector; is replace_lt($string), $expected, qq{'$string' -> '$expected'}; } # end for VECTOR } # end for FUNT note "\n=== done testing functions ===\n\n"; done_testing; exit; # Functions UNder Test ############################################# sub replace_lt_5 { # needs 5.10+ regex extensions # handles multiple < in bt group and escaped < my ($string, ) = @_; # replace ALL < patterns in all backtick (bt) groups in a string. # there may be multiple < patterns in a bt group. # a backtick group begins and ends with a ` (backtick) # that is NOT escaped. if the escape is itself escaped, # it does NOT affect the bt. so: # ` can begin or end a bt group; # \` can NOT begin or end a bt group (ordinary char); # \\` escape is escaped: bt can begin/end a bt group. # similarly, < (lt) patterns may be escaped and the escape may # be escaped: # < subject to replacement; # \< escaped: < NOT subject to replacement; # \\< escape is escaped: < subject to replacement. $string =~ s{ # match to just before valid lt in a valid bt group. (?: \G (?! \A) # start just after some valid lt already found (?: (?&TO_NEXT_LT_IN_THIS_BT_GROUP) | (?&TO_FIRST_LT_IN_NEXT_BT_GROUP) ) | \A (?&TO_FIRST_LT_IN_FIRST_BT_GROUP) ) # grab and replace lt. \K # ignore everything matched so far (?&TRU_LT) # replace this (?(DEFINE) # an un-unescaped backtick: may start a bt group. (? (? (? (?! (?&TRU_BT)) .) # any character NOT a true backtick and also NOT an lt. (? (?! (?&TRU_LT)) (?&NOT_BT)) # a bt-group NOT containing an lt. (? (?> (?&TRU_BT) (?&NOT_BT_LT)*+ (?&TRU_BT))) # non-bt-group-with-lt stuff to ignore. (? (?: (?&NOT_BT)*+ (?&EMPTY_BT)*+)*+) # match to first valid lt in first valid bt group. (? (?&IGNORE) # ignore non-group stuff (?&TRU_BT) (?&NOT_BT_LT)*+ # positioned just before lt (?= (?&NOT_BT)*+ (?&TRU_BT)) # rest is valid bt ) # match to first valid lt in NEXT valid bt group. (? (?&NOT_BT)*+ (?&TRU_BT) # to end of current group (?&TO_FIRST_LT_IN_FIRST_BT_GROUP) # actually in NEXT group ) # match to next valid lt in current ASSUMED-valid bt group. (? (?&NOT_BT_LT)*+) ) # end (DEFINE) } {<}xmsg; return $string; } # end sub replace_lt_5() sub replace_lt_6 { # needs 5.10+ regex extensions # handles multiple < in bt group and escaped < my ($string, ) = @_; # replace ALL < patterns in all backtick (bt) groups in a string. # there may be multiple < patterns in a bt group. # a backtick group begins and ends with a ` (backtick) # that is NOT escaped. if the escape is itself escaped, # it does NOT affect the bt. so: # ` can begin or end a bt group; # \` can NOT begin or end a bt group (ordinary char); # \\` escape is escaped: bt can begin/end a bt group. # similarly, < (lt) patterns may be escaped and the escape may # be escaped: # < subject to replacement; # \< escaped: < NOT subject to replacement; # \\< escape is escaped: < subject to replacement. # an un-unescaped backtick: may start a bt group. my $tru_bt = qr{ (? $tru_bt $not_bt_lt*+ $tru_bt) }xmso; # non-bt-group-with-lt stuff to ignore. my $ignore = qr{ (?: $not_bt*+ $empty_bt*+)*+ }xmso; # match to first valid lt in first valid bt group. my $to_first_lt_in_first_bt_group = qr{ $ignore # ignore non-group stuff $tru_bt $not_bt_lt*+ # positioned just before lt (?= $not_bt*+ $tru_bt) # rest is valid bt }xmso; # match to first valid lt in NEXT valid bt group. my $to_first_lt_in_next_bt_group = qr{ $not_bt*+ $tru_bt # to end of current group $to_first_lt_in_first_bt_group # actually in NEXT group }xmso; # match to next valid lt in current ASSUMED-valid bt group. my $to_next_lt_in_this_bt_group = qr{ $not_bt_lt*+ }xmso; $string =~ s{ # match to just before valid lt in a valid bt group. (?: \G (?! \A) # start just after some valid lt already found (?: $to_next_lt_in_this_bt_group | $to_first_lt_in_next_bt_group ) | \A $to_first_lt_in_first_bt_group ) # grab and replace lt. \K # ignore everything matched so far $tru_lt # replace this } {<}xmsgo; return $string; } # end sub replace_lt_6()