comment on

m{ \A (.+?) \1* (.*) (?(?{ 0 != index $1, $2 }) (*FAIL)) \z }xmsg;
[download]

... seems to do the trick for all the permutations I've seen so far. I have no idea of the performance hit of the (?(?{ CODE })yes-pattern) thingy. (Update: index $1, $2 works just as well as 0 != index $1, $2)

use warnings;
use strict;


use Test::More 'no_plan';
use Test::NoWarnings;


my @vectors_1 = (
  [ 'abcdabc'                    => 'abcd',         'abc' ],
  [ 'abcdab'                     => 'abcd',         'ab'  ],
  [ 'abcda'                      => 'abcd',         'a'   ],
  [ 'abcd'                       => 'abcd',         ''    ],
  [ 'abcdabcdabcdabcdabc'        => 'abcd',         'abc' ],
  [ 'abcdabcdabcdabcdab'         => 'abcd',         'ab'  ],
  [ 'abcdabcdabcdabcda'          => 'abcd',         'a'   ],
  [ 'abcdabcdabcdabcd'           => 'abcd',         ''    ],
  [ 'abcdabcdabceabcdabcdabceabcdabcdabc' =>
      'abcdabcdabce', 'abcdabcdabc'  ],
  [ 'abcdabcdabceabcdabcdabceab' => 'abcdabcdabce', 'ab'  ],
  [ 'abcdabcdabceabcdabcdabce'   => 'abcdabcdabce', ''    ],
  [ 'aaaabaaaabaaaaabaaaab'      => 'aaaabaaaaba', 'aaaabaaaab' ],
  [ 'aaaabaaaabaaaaabaaaabaaaaabaaaab' =>
      'aaaabaaaaba', 'aaaabaaaab' ],
  [ 'aaaabaaaabaaaaabaaaabaaaaabaaaabaaaaabaaaab' =>
      'aaaabaaaaba', 'aaaabaaaab' ],
  );


for my $ar_vector (@vectors_1) {

    my ($string, $expect_rep, $expect_part) = @$ar_vector;

    my ($rep, $part) = $string =~ m{
        \A
        (.+?) \1*  (.*)  (?(?{ 0 != index $1, $2 }) (*FAIL))
        \z
        }xmsg;

    is $rep,  $expect_rep,  qq{rptd '$string' -> '$expect_rep'};
    is $part, $expect_part, qq{part '$string' -> '$expect_part'};

    }


note "\n\n\n\n";

my @vectors_2 = (
    [ 'a',               [1 .. 4], ],
    [ 'ab',              [1 .. 4], ],
    [ 'abc',             [1 .. 4], ],
    [ 'aba',             [2 .. 4], ],  # note: (aba) looks like (ab)a
    [ 'aab',             [1 .. 4], ],
    [ 'abcd',            [1 .. 4], ],
    [ 'abac',            [1 .. 4], ],
    [ 'aaab',            [1 .. 4], ],
    [ 'abcdabcdabce',    [1 .. 2], ],
    [ 'aaaabaaaaba',     [2 .. 3], ],  # (aaaabaaaaba) looks like (aaa
+ab)(aaaab)a
    );


VECTORS_2:
for my $ar_vector (@vectors_2) {

    my ($base, $ar_reps) = @$ar_vector;

    REPS:
    for my $reps (@$ar_reps) {

        my $repeated = $base x $reps;

        PARTS:
        for my $part (0 .. length($base) - 1) {

            my $partial = substr $base, 0, $part;

            my $string = $repeated . $partial;

            my ($rep, $part) = $string =~ m{
                \A
                (.+?) \1*  (.*)  (?(?{ 0 != index $1, $2 }) (*FAIL))
                \z
                }xmsg;

            is $rep,  $base,    qq{repeat  '$string' -> '$base'};
            is $part, $partial, qq{partial '$string' -> '$partial'};

            }  # end for PARTS

        }  # end for REPS

    }  # end for VECTORS_2
[download]

Update: Just noticed needless /g modifier in posted regex. Final regex should be
m{ \A (.+?) \1* (.*) (?(?{ index $1, $2 }) (*FAIL)) \z }xms
which is functionally identical (i.e., passes all tests), but which I would not expect to differ in performance in any detectable way (untested).

In reply to Re: Finding repeat sequences. by AnomalousMonk
in thread Finding repeat sequences. by BrowserUk

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.