#! perl -slw use strict; use vars qw[$N]; use Benchmark::Timer; my $T= new Benchmark::Timer; $|++; $N ||= 10; # The number of random permutations of each string to analyse. ##my $start-seq = ; ## No need to count frequencies just to build any array. ##my @bases = split//, $start_seq; # Simulate the above two lines for testing my @bases = map{ qw[A C G T N][ rand 5 ] } 1..50_000; my $perms_count = 0; for( 1 .. $N ) { # Pick our first random 7 char string my $sub7= join'', @bases[ map{ rand @bases } 1 .. 7 ]; my $count = @bases - 7; # No. of 7 chars string to match against. $T->start( 'search' ); while( $count-- ) { printf "\r%d", $count; # print 'Checking: ', $sub7; #Unroll the match loop $perms_count++ if $sub7 =~ qr '[NT][GCATN][NG][NC][NG][NT][NG]'; $perms_count++ if $sub7 =~ qr '[NG][NT][NG][NC][NG][GCATN][NT]'; $perms_count++ if $sub7 =~ qr '[NA][GCATN][NC][NG][NC][NA][NC]'; $perms_count++ if $sub7 =~ qr '[NC][NA][NC][NG][NC][GCATN][NA]'; $perms_count++ if $sub7 =~ qr '[NT][GCATN][NG][NC][NG][NT][NG]'; $perms_count++ if $sub7 =~ qr '[NG][NT][NG][NC][NG][GCATN][NT]'; $perms_count++ if $sub7 =~ qr '[NA][GCATN][NC][NG][NC][NA][NC]'; $perms_count++ if $sub7 =~ qr '[NC][NA][NC][NG][NC][GCATN][NA]'; $perms_count++ if $sub7 =~ qr '[NT][GCATN][NG][NC][NG][NT][NG]'; $perms_count++ if $sub7 =~ qr '[NG][NT][NG][NC][NG][GCATN][NT]'; $perms_count++ if $sub7 =~ qr '[NA][GCATN][NC][NG][NC][NA][NC]'; $perms_count++ if $sub7 =~ qr '[NC][NA][NC][NG][NC][GCATN][NA]'; $perms_count++ if $sub7 =~ qr '[NT][GCATN][NG][NC][NG][NT][NG]'; $perms_count++ if $sub7 =~ qr '[NG][NT][NG][NC][NG][GCATN][NT]'; $perms_count++ if $sub7 =~ qr '[NA][GCATN][NC][NG][NC][NA][NC]'; $perms_count++ if $sub7 =~ qr '[NC][NA][NC][NG][NC][GCATN][NA]'; $perms_count++ if $sub7 =~ qr '[NT][GCATN][NG][NC][NG][NT][NG]'; $perms_count++ if $sub7 =~ qr '[NG][NT][NG][NC][NG][GCATN][NT]'; $perms_count++ if $sub7 =~ qr '[NA][GCATN][NC][NG][NC][NA][NC]'; $perms_count++ if $sub7 =~ qr '[NC][NA][NC][NG][NC][GCATN][NA]'; $perms_count++ if $sub7 =~ qr '[NT][GCATN][NG][NC][NG][NT][NG]'; $perms_count++ if $sub7 =~ qr '[NG][NT][NG][NC][NG][GCATN][NT]'; $perms_count++ if $sub7 =~ qr '[NA][GCATN][NC][NG][NC][NA][NC]'; $perms_count++ if $sub7 =~ qr '[NC][NA][NC][NG][NC][GCATN][NA]'; # Drop the first char and tack a new one on the end. $sub7 = substr( $sub7, 1 ) . $bases[ rand @bases ]; } $T->stop( 'search' ); } print 'Found: ', $perms_count, ' matches in'; $T->report; #### #! perl -slw use strict; sub p{ my @a = split '', shift; while( @_ ) { my @c= split '', shift; my @b=(); for my $c ( @c ) { push @b, $_ . $c for @a ; } @a = @b; } return @a; } my @combs = ( p( 'NT', 'GCATN', 'NG', 'NC', 'NG', 'NT', 'NG' ), p( 'NG', 'NT', 'NG', 'NC', 'NG', 'GCATN', 'NT' ), p( 'NA', 'GCATN', 'NC', 'NG', 'NC', 'NA', 'NC' ), p( 'NC', 'NA', 'NC', 'NG', 'NC', 'GCATN', 'NA' ), ); my %seen; @seen{ @combs } = (); print "The number of unique 7-char sequences that can be matched by the regexes is:", scalar keys %seen;