0: =pod
1:
2: @longest = pinyan_UCS($string)
3:
4: This function returns the set of the longest substrings
5: in a given string. It seems rather efficient, even though
6: it calls C<index()> quite a bit. I found that using a hash
7: to figure if I'd seen a character had adverse effects.
8:
9: =cut
10:
11: sub pinyan_UCS {
12: my $str = shift;
13: my $len = length $str;
14: my ($diff,$biggest) = (0,0);
15: my ($jump,@ahead,@matches);
16:
17: for (my $i = 0; $i < $len; ) {
18: my $match = [ $i, $len ];
19: if ($len - $i >= $biggest) {
20: for (my $k = $i; $k < $match->[1]; $k++) {
21: $ahead[$k] ||= index($str, substr($str,$k,1), $k+1);
22: if ($ahead[$k] != -1 and $match->[1] > $ahead[$k]) {
23: $match->[1] = $ahead[$k];
24: $jump = $k;
25: }
26: }
27:
28: $diff = $match->[1] - $match->[0];
29:
30: if ($diff > $biggest) { ($biggest,@matches) = ($diff,$match) }
31: elsif ($diff == $biggest) { push @matches, $match; }
32: }
33: else { last }
34:
35: $i = ++$jump;
36: }
37:
38: return map substr($str, $_->[0], $_->[1] - $_->[0]), @matches;
39: }
In reply to Unique-Character Substring by japhy
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |