#! perl -slw
use strict;
use Time::HiRes qw[ time ];
use Data::Dump qw[ pp ];

use constant DEBUG => 0;

$|=1;



my (@sequences,@uniq);

for my $magnitude (0..3) {
  my $factor = 10**$magnitude;
  
  init_sequences(6000*$factor,2000,200,400);
  #init_sequences(6000,20*$factor,200,400);
  pp \@sequences if DEBUG;


  my ($res_all,$time_all) = check_all();
  pp $res_all if DEBUG;

  my ($res_longest,$time_longest) = check_longest();
  pp $res_longest if DEBUG;

  print STDERR "Faster: ", $time_all/ $time_longest;
}




# ---------
#  subs
# ---------



sub uniq{ my %x; @x{@_} = (); keys %x }


sub check_all {
  my $start = time;
  my @uniq = uniq(@sequences);
  my @result;
  chomp @uniq;

  @uniq = sort{ length $a <=> length $b } @uniq;

  my $all = join chr(0), @uniq;
  my $p = 0;

  for my $x ( @uniq ) {
    $p += 1+ length $x;
    next if 1+ index $all, $x, $p;	  ## COrrected per LanX below.
    push @result, $x ;
  }

  my $runtime=time() - $start;
  printf STDERR "Check_all \t took %.3f\t",$runtime;
  print STDERR "Filtered: ",scalar @result ," Ratio: ",@sequences /  @result;
  return \@result,$runtime;
}


sub check_longest {
  my $start = time;
  my @uniq = uniq(@sequences);
  chomp @uniq;

  @uniq = sort{ length $b <=> length $a } @uniq;

  my $longest= shift @uniq;

  for my $x ( @uniq ) {
    next if 1+ index $longest, $x;
    $longest .= "\n" . $x;
  }
  
  my $runtime=time() - $start;
  printf STDERR "Check_longest \t took %.3f\t",$runtime; 
  
  my @result=split "\n",$longest;
  print STDERR "Filtered: ",scalar @result ," Ratio: ",@sequences /  @result;

  return \@result,$runtime;

}

sub init_sequences {
  my $length_dna=shift;
  my $num=shift;
  my $min=shift;
  my $max=shift;

  print STDERR "\n--- Init ...";
  
  my $dna;
  @sequences=();    
  for (1 .. $length_dna) {
    $dna .= (qw/A C G T N/)[int(rand 5)]
  }
  print length $dna,": ",$dna if DEBUG;

  for (1 .. $num) {
    my $length= $min + int ( rand ($max -$min +1) );
    my $offset= int ( rand ($length_dna - $length +1) );
    # pp $length,$offset;
    push @sequences,  substr $dna,$offset,$length;
  }
  print STDERR " ... completed: $num snippets from $min to $max of $length_dna long DNA";
}