Here is a standalone demonstration that the code I posted works just fine:
#! perl -slw use strict; use Time::HiRes qw[ time ]; sub gen{ return @_[1..$#_] if $_[0]==1; map{ my $p=$_; map{ $p . $_ } gen($_[0]-1, @_[1..$#_] ) } @_[1..$#_] } our $N //= 7; my %seqs = map { if( length() ) { my( $id, @seq ) = split "\n", $_; $id => join '', @seq; } else { () } } split '>', do{ local $/; uc( <DATA> ) }; my $start = time; my %counts; for my $seq ( values %seqs ) { ++$counts{ substr $seq, $_, $N } for 0 .. length( $seq ) -$N; } print "Took ", time - $start; print "$_ ::= ", $counts{ $_ } // 0 for gen( $N, qw[A C G T] ); __DATA__ > DNA1 GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA > DNA2 AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT TTTATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA > DNA3 TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCC GCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAAC CAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAA ATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGC AAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAG > DNA4 AGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTC CCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGT AAATAATAGGAGCTTAAACCCCCTTATTTctaggactatgagaatcgaac ccatccctgagaatccaaaattctccgtgccacctatcacaccccatcct aAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTG GTTATACCCTTCCCGTACTAATTAATCCCCTGGCCCAACCCGTCATCTAC
And some output:
C:\test>976237-2 -N=2 Took 0.000387907028198242 AA ::= 97 AC ::= 86 AG ::= 49 AT ::= 84 CA ::= 99 CC ::= 130 CG ::= 26 CT ::= 71 GA ::= 35 GC ::= 43 GG ::= 27 GT ::= 36 TA ::= 85 TC ::= 68 TG ::= 39 TT ::= 71
In reply to Re^3: counting the number of 16384 pattern matches in a large DNA sequence (100x faster?)
by BrowserUk
in thread counting the number of 16384 pattern matches in a large DNA sequence
by anonym
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |