my $xor = substr($data[$d1],$s1,$L) 
        ^ substr($data[$d2],$s2,$L);

##</code><code>##

#!/your/perl/here

# find the longest common substring (contiguous) in a list of strings
#
# method:
# sort all strings by descending length
# search from longest overlap to shortest
# (the longest substring is more likely to be in the longest overlap)
# 

use strict;
use warnings;
use Benchmark;

my @timer;
push @timer, new Benchmark;

# predictable randomness
srand(atan2(1,1)*4*1000);

my @keys = qw( LENGTH STRING FIRST_INDEX FIRST_OFFSET SECOND_INDEX SECOND_OFFSET );

my @chars = qw( A G C T );

my $longest = 0;
my @data;

foreach my $i (1..300)
{
	my $d;
	foreach my $j (1..3000)
	{
		$d .= $chars[rand(@chars)];
	}
	push @data, $d;
	$longest = length($d) if ( length($d) > $longest );
}

# sort longest to shortest
@data = sort { length($b) <=> length ($a) } @data;

my %LCS;
@LCS{@keys} = (0,'',-1,-1,-1,-1);
my $L = $longest;
L:
while ( $L > $LCS{LENGTH} )
{
	push @timer, new Benchmark;
	@timer = time_delta("\nL:$L", @timer);
	D1:
	for my $d1 ( 0..$#data-1 )
	{
		# all entries are sorted by descending length
		# if this entry is too short, all the rest are as well
		# go to the next "length"
		my $d1L = length( $data[$d1] );
		next L unless ( $d1L >= $L );
		D2:
		for my $d2 ( $d1+1..$#data )
		{
			# all entries are sorted by descending length
			# if this entry is too short, all the rest are as well
			# go to the next D1 (which may be long enough)
			my $d2L = length( $data[$d2] );
			next D1 unless ( $d2L >= $L );

			# consider overlaps of length $L
			# these are starting index numbers
			# overlaps are either at index 0, 
			#   or the last $L characters
			my @s1 = (0,         $d1L - $L );
			my @s2 = ($d2L - $L, 0         );
			S:
			for my $s ( 0,1 )
			{
				my $xor = substr($data[$d1],$s1[$s],$L) ^ substr($data[$d2],$s2[$s],$L);
				my $offset = 0;
				while ( $xor =~ /([^\0]*)(\0{$LCS{LENGTH},})/gsm )
				{
					my ($first, $second) = ($1, $2);
					$offset += length($first);
					my $long = length($second);
					if ( $long > $LCS{'LENGTH'} )
					{
						@LCS{@keys} = ($long,
						               substr($data[$d1],$s1[$s]+$offset,$long), # string
						               $d1,$s1[$s]+$offset,
						               $d2,$s2[$s]+$offset);
						print "\nLCS: <$LCS{STRING}> $long\n";
					}
					$offset += length($second);
				}

			}
		}
	}
	$L--;
}

print "Longest: <$LCS{STRING}> ($LCS{LENGTH})\n";
print "Item $LCS{FIRST_INDEX}, offset $LCS{FIRST_OFFSET}\n";
print "Item $LCS{SECOND_INDEX}, offset $LCS{SECOND_OFFSET}\n";

push @timer, new Benchmark;
@timer = time_delta("total", @timer);

exit;

###########################################
# nice benchmark timestamp printer

sub time_delta
{
	my $msg = shift;
	my @t = @_;
	my $total_time = timestr(timediff(@t[-1,0]));
	my $delta_time = timestr(timediff(@t[-1,-2]));
	print "$msg\nd:$delta_time\nt:$total_time\n";
	return @t[0,-1];
}