my $file1=shift; my @array=('A','T','C','G','AA','AT','AG','AC','TA','TT','TG','TC','GA','GT','GG','GC','CA','CT','CG','CC','AAA','AAT','AAG','AAC','ATA','ATT','ATG','ATC','AGA','AGT','AGG','AGC','ACA','ACT','ACG','ACC','TAA','TAT','TAG','TAC','TTA','TTT','TTG','TTC','TGA','TGT','TGG','TGC','TCA','TCT','TCG','TCC','GAA','GAT','GAG','GAC','GTA','GTT','GTG','GTC','GGA','GGT','GGG','GGC','GCA','GCT','GCG','GCC','CAA','CAT','CAG','CAC','CTA','CTT','CTG','CTC','CGA','CGT','CGG','CGC','CCA','CCT','CCG','CCC','AAAA','AAAT','AAAG','AAAC','AATA','AATT','AATG','AATC','AAGA','AAGT','AAGG','AAGC','AACA','AACT','AACG','AACC','ATAA','ATAT','ATAG','ATAC','ATTA','ATTT','ATTG','ATTC','ATGA','ATGT','ATGG','ATGC','ATCA','ATCT','ATCG','ATCC','AGAA','AGAT','AGAG','AGAC','AGTA','AGTT','AGTG','AGTC','AGGA','AGGT','AGGG','AGGC','AGCA','AGCT','AGCG','AGCC','ACAA','ACAT','ACAG','ACAC','ACTA','ACTT','ACTG','ACTC','ACGA','ACGT','ACGG','ACGC','ACCA','ACCT','ACCG','ACCC','TAAA','TAAT','TAAG','TAAC','TATA','TATT','TATG','TATC','TAGA','TAGT','TAGG','TAGC','TACA','TACT','TACG','TACC','TTAA','TTAT','TTAG','TTAC','TTTA','TTTT','TTTG','TTTC','TTGA','TTGT','TTGG','TTGC','TTCA','TTCT','TTCG','TTCC','TGAA','TGAT','TGAG','TGAC','TGTA','TGTT','TGTG','TGTC','TGGA','TGGT','TGGG','TGGC','TGCA','TGCT','TGCG','TGCC','TCAA','TCAT','TCAG','TCAC','TCTA','TCTT','TCTG','TCTC','TCGA','TCGT','TCGG','TCGC','TCCA','TCCT','TCCG','TCCC','GAAA','GAAT','GAAG','GAAC','GATA','GATT','GATG','GATC','GAGA','GAGT','GAGG','GAGC','GACA','GACT','GACG','GACC','GTAA','GTAT','GTAG','GTAC','GTTA','GTTT','GTTG','GTTC','GTGA','GTGT','GTGG','GTGC','GTCA','GTCT','GTCG','GTCC','GGAA','GGAT','GGAG','GGAC','GGTA','GGTT','GGTG','GGTC','GGGA','GGGT','GGGG','GGGC','GGCA','GGCT','GGCG','GGCC','GCAA','GCAT','GCAG','GCAC','GCTA','GCTT','GCTG','GCTC','GCGA','GCGT','GCGG','GCGC','GCCA','GCCT','GCCG','GCCC','CAAA','CAAT','CAAG','CAAC','CATA','CATT','CATG','CATC','CAGA','CAGT','CAGG','CAGC','CACA','CACT','CACG','CACC','CTAA','CTAT','CTAG','CTAC','CTTA','CTTT','CTTG','CTTC','CTGA','CTGT','CTGG','CTGC','CTCA','CTCT','CTCG','CTCC','CGAA','CGAT','CGAG','CGAC','CGTA','CGTT','CGTG','CGTC','CGGA','CGGT','CGGG','CGGC','CGCA','CGCT','CGCG','CGCC','CCAA','CCAT','CCAG','CCAC','CCTA','CCTT','CCTG','CCTC','CCGA','CCGT','CCGG','CCGC','CCCA','CCCT','CCCG','CCCC'); my $name1=""; my $seq1=""; my %counts=(); my %counts_1=(); my %counts_2=(); my %counts_3=(); my %counts_4=(); my %total_mono=(); open (IN, "<$file1") or die ("Couldn't open file $file1\n"); while (my $i=){ next unless ($i =~ /\w+/); chomp($i); if ($i =~ /^>(\S+)/){ unless ($seq1 eq ""){ $seq1 =~ s/[^ATCG]//g; &process_nuc($seq1, $name1); } $seq1=""; $name1=$1; }else{ $seq1.=uc($i); } } close IN; $seq1 =~ s/[^ATCG]//g; &process_nuc($seq1, $name1); print "Matrix_"; print scalar(@array); for (my $k=0; $k<@array; $k++){ print "\t$array[$k]"; }print "\n"; my %norm_1=(); my %norm_2=(); my %norm_3=(); my %norm_4=(); foreach my $k (keys (%counts)){ print "$k"; 60 my $value=0; 61 $norm_1{'A'}=$counts_1{$k}{'A'}/$total_mono{$k}; 62 $norm_1{'T'}=$counts_1{$k}{'T'}/$total_mono{$k}; 63 $norm_1{'C'}=$counts_1{$k}{'C'}/$total_mono{$k}; 64 $norm_1{'G'}=$counts_1{$k}{'G'}/$total_mono{$k};