m{ # set up $^R to hold a hashref and a number. # anchor the regex to the beginning of the # string this will ensure that we stop # processing once we've exhausted the string. (?{ [ {}, 0 ] }) ^ # we select a run of digits (greedy) after # some prefix of digits (non-greedy). the # run is stored in $1 \d*? (\d+) # we make sure $1 is short enough that # it COULD be repeated (a 10 character $1 # in a 15 character string is no good). # we also check to see if we've seen this # particular run of digits before. this is # analogous to grep { $seen{$_}++ } LIST. # $^R->[0], the hashref, is our "seen" hash. # if $1 has been seen, we fail via (?!), # which causes us to backtrack our (\d+). (?(?{ length($1) > (length($_) - $-[1])/2 or $^R->[0]{$1}++ })(?!)) # here we set the "count" part of $^R to 1. # if you're wondering why I don't just do # $^R->[1]++ (like I did for $^R->[0]{$1}) # I'll explain later. suffice to say, this # is the proper and safe way to do it. (?{ [ $^R->[0], 1 ] }) # we use (?>...) to prevent backtracking # inside this part. this part tracks the # number of times $1 is repeated in the # rest of the string. we only want the # most number of matches, so we don't want # to backtrack inside here. (?> # we look for the non-$1 part followed # by $1, one or more times (since we # are only interested in $1's that ARE # repeated). for each match of $1 we # find, we add 1 to $^R->[1], the safe # way. (?: \d*? \1 (?{ [ $^R->[0], $^R->[1] + 1 ] }) )+ # when we can't find anymore $1's, we # report on it. (?{ print report($1, $^R->[1]) }) ) # we force the regex to fail here, which # causes the entire thing to backtrack, # which ensures we try each substring of # the entire string. (?!) }x;