in reply to Question about speeding a regexp count
#!/usr/bin/perl -w use strict; use Dumpvalue; my $d = new Dumpvalue; my %count; my $DNAstr = <DATA>; chomp($DNAstr); my @bases = split //, $DNAstr; for(my $i = 0 ; $i < scalar(@bases); $i++){ $count{$bases[$i]}++; $count{$bases[$i].$bases[$i+1]}++ if(defined $bases[$i+1]); $count{$bases[$i].$bases[$i+1].$bases[$i+2]}++ if(defined $bases[$ +i+2]); } $d->dumpValues(\%count);
Which outputs__DATA__ AGATAGCGCTGATCGAGAGCTATAGCGATCGATCGGATCGATCGGGATCTTAGCGAAAAGCTCGAT TTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGATCGATCGCGCTTAGGAAATTCC CCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGATCGCGCTAGCTCGATCGCGCGCT AGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAGCTCGCTAGCTCGATCTTCTCGA TCGCGGCTAGGAGAGCTCGAGCTTCGAGGCTGCGAGGATCGCGGAGAGGAGGATCGAGATCGGATA GAGATCGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCT GCTAGCTCGCTAGCTCGATCTTCTCGATCGCGGCTAGGAGAGCTCGAGCTTCGAGGCTGCGAGGAT CGCGGAGAGGAGGATCGAGATCGGATCGAGAGCTATAGCGATCGATCGGATCGATCGGGATCTTAG CGAAAAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGATCGATCGCG CTTAGGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGATCGCGCTAGC TCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAGCTCGCTAGC TCGATCTTCTGATAGCGCTGATCGAGAGCTATAGCGATCGATCGGATCGATCGGGATCTTAGCGAA AAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGATCGATCGCGCTTA GGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGATCGCGCTAGCTCGA TCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAGCTCGCTAGCTCGA TCTTCTCGATCGCGGCTAGGAGAGCTCGAGCTTCGAGGCTGCGAGGATCGCGGAGAGGAGGATCGA GATCGGATAGAGATCGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGC TCGCTAGCTGCTAGCTCGCTAGCTCGATCTTCTCGATCGCGGCTAGGAGAGCTCGAGCTTCGAGGC TGCGAGGATCGCGGAGAGGAGGATCGAGATCGGATCGAGAGCTATAGCGATCGATCGGATCGATCG GGATCTTAGCGAAAAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGA TCGATCGCGCTTAGGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGAT CGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAG CTCGCTAGCTCGATCTTCGATAGCGCTGATCGAGAGCTATAGCGATCGATCGGATCGATCGGGATC TTAGCGAAAAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGATCGAT CGCGCTTAGGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGATCGCGC TAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAGCTCGC TAGCTCGATCTTCTCGATCGCGGCTAGGAGAGCTCGAGCTTCGAGGCTGCGAGGATCGCGGAGAGG AGGATCGAGATCGGATAGAGATCGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGC GCGATAGCTCGCTAGCTGCTAGCTCGCTAGCTCGATCTTCTCGATCGCGGCTAGGAGAGCTCGAGC TTCGAGGCTGCGAGGATCGCGGAGAGGAGGATCGAGATCGGATCGAGAGCTATAGCGATCGATCGG ATCGATCGGGATCTTAGCGAAAAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGA TCGATCGATCGATCGCGCTTAGGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGG ATAGAGATCGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTA GCTGCTAGCTCGCTAGCTCGATCTTC
0 HASH(0x1824334) 'A' => 535 'AA' => 84 'AAA' => 66 'AAG' => 6 'AAT' => 12 'AG' => 223 'AGA' => 64 'AGC' => 105 'AGG' => 54 'AT' => 228 'ATA' => 81 'ATC' => 111 'ATT' => 36 'C' => 504 'CC' => 48 'CCC' => 36 'CCG' => 12 'CG' => 286 'CGA' => 124 'CGC' => 117 'CGG' => 45 'CT' => 169 'CTA' => 69 'CTC' => 54 'CTG' => 19 'CTT' => 27 'G' => 675 'GA' => 264 'GAA' => 12 'GAG' => 87 'GAT' => 165 'GC' => 270 'GCC' => 6 'GCG' => 117 'GCT' => 147 'GG' => 141 'GGA' => 72 'GGC' => 33 'GGG' => 36 'T' => 490 'TA' => 186 'TAA' => 6 'TAG' => 129 'TAT' => 51 'TC' => 186 'TCC' => 6 'TCG' => 157 'TCT' => 22 'TG' => 25 'TGA' => 4 'TGC' => 15 'TGG' => 6 'TT' => 93 'TTA' => 36 'TTC' => 21 'TTG' => 6 'TTT' => 30
|
|---|