comment on

Update: Because of some shortcomings in the code below (use of '&lgt;' instead of '<' as replacement string; inability to handle multiple '<' characters per backtick group; no handling of escaped '<' characters), please see instead a later and greater version posted here. Because it may still be useful as material for archeological study, I won't try to update or delete this code, and all the caveats still apply.

I'd be inclined to think a parser approach would be best for reasons of readability/maintainability. I'm fascinated by regexes, however, so here's my final try, albeit not totally un-hairy I must admit.

use 5.010;  # needs various perl version 5.10 regex extensions

use warnings;
use strict;

use Test::More 'no_plan';
use Test::NoWarnings;

my @tests = (
  'no changes to these',
  [ ('')                        x 2 ],  # unchanged pair
  [ ('`')                       x 2 ],
  [ ('``')                      x 2 ],
  [ ('```')                     x 2 ],
  [ ('<')                       x 2 ],
  [ ('<<')                      x 2 ],
  [ ('<<<')                     x 2 ],
  [ ('``<>``<>``')              x 2 ],
  [ ('is `not   <this>   one ') x 2 ],
  [ ('is  not   <this> ` one ') x 2 ],
  [ ('is `not ` <this>   one ') x 2 ],
  [ ('is  not   <this> ` one`') x 2 ],
  [ ('is  not ``<this> ` one`') x 2 ],
  [ ('is `not`  <this> ` one`') x 2 ],
  [ ('is \\\\`not       <this>       one      ') x 2 ],
  [ ('is      not       <this> \\\\` one      ') x 2 ],
  [ ('is \\\\`not     ` <this>       one      ') x 2 ],
  [ ('is     `not \\\\` <this>       one      ') x 2 ],
  [ ('is      not       <this> \\\\` one     `') x 2 ],
  [ ('is      not       <this>     ` one \\\\`') x 2 ],
  [ ('``<A>``<B>``<C>``<D>`')                    x 2 ],

  'all these should change',
  [ '`<A>``<B>``<C>``<D>`',
    '`&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>`',
    ],
  [ '`<A>``<B>``<C>``<D>``',
    '`&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>``',
    ],
  [ '```<A>``<B>``<C>``<D>`',
    '```&lgt;A>``&lgt;B>``&lgt;C>``&lgt;D>`',
    ],
  [ 'u    `v    <A>    `    w    `x    <B>    y`    z',
    'u    `v    &lgt;A>    `    w    `x    &lgt;B>    y`    z',
    ],
  [ 'u \` `v    <A>    `    w    `x    <B>    y`    z',
    'u \` `v    &lgt;A>    `    w    `x    &lgt;B>    y`    z',
    ],
  [ 'u    `v \` <A>    `    w    `x    <B>    y`    z',
    'u    `v \` &lgt;A>    `    w    `x    &lgt;B>    y`    z',
    ],
  [ 'u    `v    <A> \` `    w    `x    <B>    y`    z',
    'u    `v    &lgt;A> \` `    w    `x    &lgt;B>    y`    z',
    ],
  [ 'u    `v    <A>    ` \` w    `x    <B>    y`    z',
    'u    `v    &lgt;A>    ` \` w    `x    &lgt;B>    y`    z',
    ],
  [ 'u    `v    <A>    `    w \` `x    <B>    y`    z',
    'u    `v    &lgt;A>    `    w \` `x    &lgt;B>    y`    z',
    ],
  [ 'u    `v    <A>    `    w    `x \` <B>    y`    z',
    'u    `v    &lgt;A>    `    w    `x \` &lgt;B>    y`    z',
    ],
  [ 'u    `v    <A>    `    w    `x    <B> \` y`    z',
    'u    `v    &lgt;A>    `    w    `x    &lgt;B> \` y`    z',
    ],
  [ 'u    `v    <A>    `    w    `x    <B>    y` \` z',
    'u    `v    &lgt;A>    `    w    `x    &lgt;B>    y` \` z',
    ],

  [ 'is `my <string>` that `also <this> one` too',
    'is `my &lgt;string>` that `also &lgt;this> one` too',
    ],
  [ 'is \\\\` `my \\\\` <string>` that `also <this> \\\\` one` \\\\` t
+oo',
    'is \\\\` `my \\\\` &lgt;string>` that `also &lgt;this> \\\\` one`
+ \\\\` too',
    ],
  [ 'x \\\\` `y \\\\` <A>` z `v <B> \\\\` w` \\\\` x',
    'x \\\\` `y \\\\` &lgt;A>` z `v &lgt;B> \\\\` w` \\\\` x',
    ],
  [ 'x        y \\\\` <A>` z `v <B> \\\\` w` \\\\` x',
    'x        y \\\\` &lgt;A>` z `v &lgt;B> \\\\` w` \\\\` x',
    ],
  [ 'is \`my <NO> that `but <this> one` yes',
    'is \`my <NO> that `but &lgt;this> one` yes',
    ],
  [ 'is \\\\`my <this> that `but <this> one` no',
    'is \\\\`my &lgt;this> that `but <this> one` no',
    ],

  'is this acceptable for < that are unbalanced?',
  [ 'is `this <UN <BALANCED> `ok?',
    'is `this &lgt;UN <BALANCED> `ok?',
    ],

  );

FUNT:
for my $func_name (qw(replace_lt_3  replace_lt_4)) {

  note "\n=== testing $func_name() ===\n\n";
  *replace_lt = do { no strict 'refs';  *$func_name; };

  VECTOR:
  for my $ar_vector (@tests) {

    if (not ref $ar_vector) {
      note $ar_vector;
      next VECTOR;
      }

    my ($string, $expected) = @$ar_vector;

    is replace_lt($string), $expected, qq{'$string' -> '$expected'};

    }  # end for VECTOR

  }  # end for FUNT

note "\n=== done testing functions ===\n\n";

done_testing;

exit;

# Functions UNder Test #############################################

sub replace_lt_3 {  # needs 5.10+: uses possessives and \K

    my ($string,
        ) = @_;

    # replace < pattern in all backtick (bt) groups in a string.
    # a backtick group begins and ends with a ` (backtick)
    # that is NOT escaped.  if the escape is itself escaped,
    # it does NOT affect the bt.  so:
    #   `     can begin or end a bt group;
    #   \`    can NOT begin or end a bt group (ordinary char);
    #   \\`   escape is escaped: bt can begin/end a bt group.

    # an un-unescaped backtick: a true backtick.
    my $tru_bt    = qr{ (?<! (?<! \\) \\) ` }xmso;
    # any character NOT a true backtick.
    my $not_bt    = qr{ (?! $tru_bt) . }xmso;
    # any character NOT a true backtick and also NOT a < (lt).
    my $not_bt_lt = qr{ (?! <) $not_bt }xmso;
    # a bt-group NOT containing an lt.
    my $empty_bt  = qr{ (?> $tru_bt $not_bt_lt* $tru_bt) }xmso;

    (my $replacement = $string) =~ s{
      (?:
        # after match starts: eat lookahead left over from below
        \G (?! \A) $not_bt*+ $tru_bt
        |
        # at match start: immediately begin matching pre-bt-lt stuff
        \A
      )
      # match all stuff before bt-group containing an lt.
      (?: $not_bt*+ $empty_bt*+)*+
      # match start of bt-group up to an lt.
      $tru_bt $not_bt_lt*+
      # ignore all characters matched so far.
      \K
      # match (and replace) lt if followed by end of bt group.
      <  (?= $not_bt* $tru_bt)
      }
      {&lgt;}xmsgo;

    return $replacement;

    }

sub replace_lt_4 {  # needs 5.10+ regex extensions

    my ($string,
        ) = @_;

    # replace < pattern in all backtick (bt) groups in a string.
    # a backtick group begins and ends with a ` (backtick)
    # that is NOT escaped.  if the escape is itself escaped,
    # it does NOT affect the bt.  so:
    #   `     can begin or end a bt group;
    #   \`    can NOT begin or end a bt group (ordinary char);
    #   \\`   escape is escaped: bt can begin/end a bt group.

    (my $replacement = $string) =~ s{
      (?:
        # after match starts: eat lookahead left over from below
        \G (?! \A) (?&NOT_BT)*+ (?&TRU_BT)
        |
        # at match start: immediately begin matching pre-bt-lt stuff
        \A
      )
      # match all stuff before bt-group containing an lt.
      (?: (?&NOT_BT)*+ (?&EMPTY_BT)*+)*+
      # match start of bt-group up to an lt.
      (?&TRU_BT) (?&NOT_BT_LT)*+
      # ignore all characters matched so far.
      \K
      # match (and replace) lt if followed by end of bt group.
      <  (?= (?&NOT_BT)* (?&TRU_BT))

      (?(DEFINE)
        # an un-unescaped backtick: a true backtick.
        (?<TRU_BT>    (?<! (?<! \\) \\) `)
        # any character NOT a true backtick.
        (?<NOT_BT>    (?! (?&TRU_BT)) .)
        # any character NOT a true backtick and also NOT a < (lt).
        (?<NOT_BT_LT> (?! <) (?&NOT_BT))
        # a bt-group NOT containing an lt.
        (?<EMPTY_BT>  (?> (?&TRU_BT) (?&NOT_BT_LT)*+ (?&TRU_BT)))
        )

      }
      {&lgt;}xmsg;

    return $replacement;

    }
[download]

Give a man a fish: <%-{-{-{-<

In reply to Re: Replacing left angle bracket with HTML entity when between two backtick characters by AnomalousMonk
in thread Replacing left angle bracket with HTML entity when between two backtick characters by nysus

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.