--- Init ...
 ... completed: 2000 snippets from 200 to 400 of 6000 long DNA
Check_all 	 took 1.206	Filtered: 287 Ratio: 6.96864111498258
Check_longest 	 took 0.436	Filtered: 287 Ratio: 6.96864111498258
Faster: 2.76641019482413

--- Init ...
 ... completed: 2000 snippets from 200 to 400 of 60000 long DNA
Check_all 	 took 2.990	Filtered: 924 Ratio: 2.16450216450216
Check_longest 	 took 1.908	Filtered: 924 Ratio: 2.16450216450216
Faster: 1.56725918185693

--- Init ...
 ... completed: 2000 snippets from 200 to 400 of 600000 long DNA
Check_all 	 took 4.462	Filtered: 1785 Ratio: 1.12044817927171
Check_longest 	 took 4.058	Filtered: 1785 Ratio: 1.12044817927171
Faster: 1.09963952407683

--- Init ...
 ... completed: 2000 snippets from 200 to 400 of 6000000 long DNA
Check_all 	 took 4.601	Filtered: 1977 Ratio: 1.01163378856854
Check_longest 	 took 4.651	Filtered: 1977 Ratio: 1.01163378856854
Faster: 0.989378646160973

##</code><code>##

#! perl -slw
use strict;
use Time::HiRes qw[ time ];
use Data::Dump qw[ pp ];

use constant DEBUG => 0;

$|=1;



my (@sequences,@uniq);

for my $magnitude (0..3) {
  my $factor = 10**$magnitude;
  
  init_sequences(6000*$factor,2000,200,400);
  #init_sequences(6000,20*$factor,200,400);
  pp \@sequences if DEBUG;


  my ($res_all,$time_all) = check_all();
  pp $res_all if DEBUG;

  my ($res_longest,$time_longest) = check_longest();
  pp $res_longest if DEBUG;

  print STDERR "Faster: ", $time_all/ $time_longest;
}




# ---------
#  subs
# ---------



sub uniq{ my %x; @x{@_} = (); keys %x }


sub check_all {
  my $start = time;
  my @uniq = uniq(@sequences);
  my @result;
  chomp @uniq;

  @uniq = sort{ length $a <=> length $b } @uniq;

  my $all = join chr(0), @uniq;
  my $p = 0;

  for my $x ( @uniq ) {
    $p += 1+ length $x;
    next if 1+ index $all, $x, $p;	  ## COrrected per LanX below.
    push @result, $x ;
  }

  my $runtime=time() - $start;
  printf STDERR "Check_all \t took %.3f\t",$runtime;
  print STDERR "Filtered: ",scalar @result ," Ratio: ",@sequences /  @result;
  return \@result,$runtime;
}


sub check_longest {
  my $start = time;
  my @uniq = uniq(@sequences);
  chomp @uniq;

  @uniq = sort{ length $b <=> length $a } @uniq;

  my $longest= shift @uniq;

  for my $x ( @uniq ) {
    next if 1+ index $longest, $x;
    $longest .= "\n" . $x;
  }
  
  my $runtime=time() - $start;
  printf STDERR "Check_longest \t took %.3f\t",$runtime; 
  
  my @result=split "\n",$longest;
  print STDERR "Filtered: ",scalar @result ," Ratio: ",@sequences /  @result;

  return \@result,$runtime;

}

sub init_sequences {
  my $length_dna=shift;
  my $num=shift;
  my $min=shift;
  my $max=shift;

  print STDERR "\n--- Init ...";
  
  my $dna;
  @sequences=();    
  for (1 .. $length_dna) {
    $dna .= (qw/A C G T N/)[int(rand 5)]
  }
  print length $dna,": ",$dna if DEBUG;

  for (1 .. $num) {
    my $length= $min + int ( rand ($max -$min +1) );
    my $offset= int ( rand ($length_dna - $length +1) );
    # pp $length,$offset;
    push @sequences,  substr $dna,$offset,$length;
  }
  print STDERR " ... completed: $num snippets from $min to $max of $length_dna long DNA";
}