#!/usr/bin/perl -w
use strict;
use List::Util 'shuffle';
use Data::Dumper qw(Dumper); 
#g=generation
# Current seeding sequence is HA
       my @g0= ("ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATACATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); #Initial generation "seeding generation"
       
       
       
    #########
    ######### Paramters
    #########
    my $viralfitness=30; # The size of the range a random number is picked from that determines how many copies each viral sequence that makes it through a round will make
    my $replication_rounds=120; 
    my $portion_decimal=50; # this number (when divided by 100 below ) provides the range of fractions to select a random portion for the next gen so for example 50 will result in a random number being selected from the range .1-.5, that number will then be mutiplied by the scalar(@new_gen) to get the array coordinates to seed the next gen
    ###########
    ###########
    ###########
   my %allgen; #hash that holds sequences
   my $i;
   my $j;
   my @population_by_gen=(); # this array keeps track of all the viruses generated in each round including the ones that dont make it to the next
   
   
   %allgen=(0 =>\@g0);
   
          for($i=1;$i<$replication_rounds;$i++) # This $i determines the number of replications 
          {
             
             my @new_gen=();
             my @last_gen=@{$allgen{$i-1}};
             my @filtered_new_gen=();
              
              foreach my $seq_string(@last_gen)
              {
                  chomp $seq_string;
                    
                 my $vf_rand=int(rand($viralfitness))+1; #The random number selected from the range specified by $viralfitness;
                 
                 for($j=0;$j<($vf_rand);$j++) #This loop determines how many copies each sequence will make of itself 
                  {
                      my $new_string;
                      my @nucleotide_array = split(//,$seq_string);        
                      foreach my $nucleotide(@nucleotide_array)
                      {
                          chomp $nucleotide;
                                  
                          my($T,$G,$C,$A);
                          #These mutations rates are just for testing now they dont reflect accurate viral mutation rates yet
                              if ($nucleotide eq 'A'){
                                     # $T=.1;$G=.2;$C=.3;$A=.7;
                                      my $score = (int rand(100000))/100000;
                                      if( $score>=00000 && $score<=.00033){ $nucleotide=~s/A/T/g;}                       
                                      if( $score>=.000334 && $score<=.00066){ $nucleotide=~s/A/G/g;} 
                                      if( $score>=.00067 && $score<=.00099){ $nucleotide=~s/A/C/g;} 
                                     # if( $score>=.300 && $score<=1.00){  Do NOTHING      } }
                                  }
                              elsif ($nucleotide eq 'T'){
                                     # $A=.1;$G=.2;$C=.3;$T=.7;
                                      my $score = (int rand(100000))/100000;
                                      if( $score>=00000 && $score<=.000333){ $nucleotide=~s/T/A/g;}                       
                                      if( $score>=.00034 && $score<=.00066){ $nucleotide=~s/T/G/g;} 
                                      if( $score>=.00067 && $score<=.00099){ $nucleotide=~s/T/C/g;} 
                                      #if( $score>=.300 && $score<=1.00){  DO NOTHING      } }
                                  }
                              elsif ($nucleotide eq 'C'){
                                     # $T=.1;$G=.2;$A=.3;$C=.7;
                                      my $score = (int rand(100000))/100000;
                                      if( $score>=00000 && $score<=.00033){ $nucleotide=~s/C/T/g;}                       
                                      if( $score>=.00034 && $score<=.00066){ $nucleotide=~s/C/G/g;} 
                                      if( $score>=.00067 && $score<=.00099){ $nucleotide=~s/C/A/g;} 
                                      #if( $score>=.300 && $score<=1.00){  #No mutation} 
                                  }
                              elsif ($nucleotide eq 'G'){
                                     # $T=.1;$A=.2;$C=.3;$G=.7;
                                      my $score = (int rand(100000))/100000;
                                      if( $score>=00000 && $score<=.00033){ $nucleotide=~s/G/T/g;}                       
                                      if( $score>=.00034 && $score<=.00066){ $nucleotide=~s/G/A/g;} 
                                      if( $score>=.00067 && $score<=.00099){ $nucleotide=~s/G/C/g;} 
                                      #if( $score>=.300 && $score<=1.00){  #No mutation} 
                                  }
                          $new_string= $new_string . $nucleotide;
                      }
                      push(@new_gen,$new_string);
                  }         
              
              }       
         
          @new_gen=shuffle(@new_gen);
         
          $population_by_gen[0]=@g0;  # the input sequences
          $population_by_gen[$i]=@new_gen;  # The current total created, including those that don't go on to replicate
  
  
            #these next conditionals prevent the new_gen from fizzling out by making a larger range to select when the numbers are low    
            #The value to be changed is the +10(or whatever) that is before the )/100        
           
            # print "Sequences:".scalar(@new_gen)."\t";
                    
             if( (scalar(@new_gen)) < 10){
               my $v3=((int(rand($portion_decimal))+10)/100)*(scalar(@new_gen)); # the added number determins the minimum of the range that a decimal can be selected from ie 20-50 =.20-.50
               
               for(my $k=0;$k<($v3);$k++){  $filtered_new_gen[$k]=$new_gen[$k];}
               
            # print "Survivors:$v3\n";
                }
             elsif( (scalar(@new_gen)) < 20){
               my $v3=((int(rand($portion_decimal))+10)/100)*(scalar(@new_gen)); # the added number determins the minimum of the range that a decimal can be selected from ie 20-50 =.20-.50
               
               for(my $k=0;$k<($v3);$k++){  $filtered_new_gen[$k]=$new_gen[$k];}
               
            # print "Survivors:$v3\n";
                }
             else{
                   my $v3=((int(rand($portion_decimal))+10)/100)*(scalar(@new_gen));
                   for(my $k=0;$k<($v3);$k++){  $filtered_new_gen[$k]=$new_gen[$k];}
                 #  print "Survivors: $v3\n";
                 }
             $allgen{$i}=\@filtered_new_gen;
        }
         
    ################## Print Block
    ################## 
           # print Dumper(\\%allgen); #Print this to view the sequences in each generation
           # print Dumper(\\@population_by_gen);  #print this to view the total progeny of each of the generations in the %allgen (inlcuding the ones that didnt go on to populate the next gen     
              
            # print @{$allgen{1}} . "\n";
          
              for my$i(0..($replication_rounds-1))
            {
                 for my $j (0..(@{$allgen{$i}}))
                 {
                    my $month=(int($i/30)+1); # the counter divided by 30 will give a decimal, the int rounds it down... therefore .2 -->0 (then the +1) yields a january
                    if ($month<10){$month="0".$month;} ## too add a zero which mat lab reads easier so 09 instead of 9 for sept
                    my $date="000".(int($month/12))."/".$month."/".($i-(($month-1)*30)); # year is set at 0000 for now.... change 000 to 200 or whatever to make it a specific year
                  
                    print "\>RandomHA ". $date ." "."\n" . $allgen{$i}[$j-1] . "\n";
                  
                  # to print a file automatically in fasta format
                    #open(FILE,">>Rand_HA_.txt");
                    #print FILE "\>RandomHA ". $date ." " ."\n" . $allgen{$i}[$j-1] . "\n";
                    #close(FILE);
                }
            }
  ###################