#!/usr/bin/perl -w use strict; use List::Util 'shuffle'; use Data::Dumper qw(Dumper); #g=generation # Current seeding sequence is HA my @g0= ("ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATACATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); #Initial generation "seeding generation" ######### ######### Paramters ######### my $viralfitness=30; # The size of the range a random number is picked from that determines how many copies each viral sequence that makes it through a round will make my $replication_rounds=120; my $portion_decimal=50; # this number (when divided by 100 below ) provides the range of fractions to select a random portion for the next gen so for example 50 will result in a random number being selected from the range .1-.5, that number will then be mutiplied by the scalar(@new_gen) to get the array coordinates to seed the next gen ########### ########### ########### my %allgen; #hash that holds sequences my $i; my $j; my @population_by_gen=(); # this array keeps track of all the viruses generated in each round including the ones that dont make it to the next %allgen=(0 =>\@g0); for($i=1;$i<$replication_rounds;$i++) # This $i determines the number of replications { my @new_gen=(); my @last_gen=@{$allgen{$i-1}}; my @filtered_new_gen=(); foreach my $seq_string(@last_gen) { chomp $seq_string; my $vf_rand=int(rand($viralfitness))+1; #The random number selected from the range specified by $viralfitness; for($j=0;$j<($vf_rand);$j++) #This loop determines how many copies each sequence will make of itself { my $new_string; my @nucleotide_array = split(//,$seq_string); foreach my $nucleotide(@nucleotide_array) { chomp $nucleotide; my($T,$G,$C,$A); #These mutations rates are just for testing now they dont reflect accurate viral mutation rates yet if ($nucleotide eq 'A'){ # $T=.1;$G=.2;$C=.3;$A=.7; my $score = (int rand(100000))/100000; if( $score>=00000 && $score<=.00033){ $nucleotide=~s/A/T/g;} if( $score>=.000334 && $score<=.00066){ $nucleotide=~s/A/G/g;} if( $score>=.00067 && $score<=.00099){ $nucleotide=~s/A/C/g;} # if( $score>=.300 && $score<=1.00){ Do NOTHING } } } elsif ($nucleotide eq 'T'){ # $A=.1;$G=.2;$C=.3;$T=.7; my $score = (int rand(100000))/100000; if( $score>=00000 && $score<=.000333){ $nucleotide=~s/T/A/g;} if( $score>=.00034 && $score<=.00066){ $nucleotide=~s/T/G/g;} if( $score>=.00067 && $score<=.00099){ $nucleotide=~s/T/C/g;} #if( $score>=.300 && $score<=1.00){ DO NOTHING } } } elsif ($nucleotide eq 'C'){ # $T=.1;$G=.2;$A=.3;$C=.7; my $score = (int rand(100000))/100000; if( $score>=00000 && $score<=.00033){ $nucleotide=~s/C/T/g;} if( $score>=.00034 && $score<=.00066){ $nucleotide=~s/C/G/g;} if( $score>=.00067 && $score<=.00099){ $nucleotide=~s/C/A/g;} #if( $score>=.300 && $score<=1.00){ #No mutation} } elsif ($nucleotide eq 'G'){ # $T=.1;$A=.2;$C=.3;$G=.7; my $score = (int rand(100000))/100000; if( $score>=00000 && $score<=.00033){ $nucleotide=~s/G/T/g;} if( $score>=.00034 && $score<=.00066){ $nucleotide=~s/G/A/g;} if( $score>=.00067 && $score<=.00099){ $nucleotide=~s/G/C/g;} #if( $score>=.300 && $score<=1.00){ #No mutation} } $new_string= $new_string . $nucleotide; } push(@new_gen,$new_string); } } @new_gen=shuffle(@new_gen); $population_by_gen[0]=@g0; # the input sequences $population_by_gen[$i]=@new_gen; # The current total created, including those that don't go on to replicate #these next conditionals prevent the new_gen from fizzling out by making a larger range to select when the numbers are low #The value to be changed is the +10(or whatever) that is before the )/100 # print "Sequences:".scalar(@new_gen)."\t"; if( (scalar(@new_gen)) < 10){ my $v3=((int(rand($portion_decimal))+10)/100)*(scalar(@new_gen)); # the added number determins the minimum of the range that a decimal can be selected from ie 20-50 =.20-.50 for(my $k=0;$k<($v3);$k++){ $filtered_new_gen[$k]=$new_gen[$k];} # print "Survivors:$v3\n"; } elsif( (scalar(@new_gen)) < 20){ my $v3=((int(rand($portion_decimal))+10)/100)*(scalar(@new_gen)); # the added number determins the minimum of the range that a decimal can be selected from ie 20-50 =.20-.50 for(my $k=0;$k<($v3);$k++){ $filtered_new_gen[$k]=$new_gen[$k];} # print "Survivors:$v3\n"; } else{ my $v3=((int(rand($portion_decimal))+10)/100)*(scalar(@new_gen)); for(my $k=0;$k<($v3);$k++){ $filtered_new_gen[$k]=$new_gen[$k];} # print "Survivors: $v3\n"; } $allgen{$i}=\@filtered_new_gen; } ################## Print Block ################## # print Dumper(\\%allgen); #Print this to view the sequences in each generation # print Dumper(\\@population_by_gen); #print this to view the total progeny of each of the generations in the %allgen (inlcuding the ones that didnt go on to populate the next gen # print @{$allgen{1}} . "\n"; for my$i(0..($replication_rounds-1)) { for my $j (0..(@{$allgen{$i}})) { my $month=(int($i/30)+1); # the counter divided by 30 will give a decimal, the int rounds it down... therefore .2 -->0 (then the +1) yields a january if ($month<10){$month="0".$month;} ## too add a zero which mat lab reads easier so 09 instead of 9 for sept my $date="000".(int($month/12))."/".$month."/".($i-(($month-1)*30)); # year is set at 0000 for now.... change 000 to 200 or whatever to make it a specific year print "\>RandomHA ". $date ." "."\n" . $allgen{$i}[$j-1] . "\n"; # to print a file automatically in fasta format #open(FILE,">>Rand_HA_.txt"); #print FILE "\>RandomHA ". $date ." " ."\n" . $allgen{$i}[$j-1] . "\n"; #close(FILE); } } ###################