use warnings # FATAL => 'all' ; use strict; use Test::More # tests => ?? + 1 # Test::NoWarnings adds 1 test 'no_plan' ; use Test::NoWarnings; my $bases = 'AGCT'; my $agct = qr{ [\Q$bases\E] }xms; my ($no_repeat) = map qr{ $_ }xms, join ' ', map { qr{ (?! \Q$_\E $agct* \Q$_\E) }xms } split //, $bases ; my $sub_string = qr{ \b $no_repeat $agct{1,4} \b }xms; my $pair = qr{ $sub_string , $sub_string }xms; VECTOR: for my $ar_vector ( "all the following should be accepted", [ 'A,G', 1, '' ], [ 'AG,CT', 1, '' ], [ 'TC,CA', 1, '' ], [ 'GAT,CGA', 1, '' ], [ 'CGAT,TG', 1, '' ], [ 'CGAT,CTGA', 1, '' ], "all the following should be rejected", [ '', 0, 'empty string' ], [ ' ', 0, 'space' ], [ ',', 0, 'comma, no sub-strings' ], [ ',G', 0, 'missing 1st sub-string' ], [ 'G,', 0, 'missing 2nd sub-string' ], [ 'ACGT', 0, 'missing comma' ], [ 'X,A', 0, 'incorrect character' ], [ 'AA,G', 0, 'repetition of character in 1st substring' ], [ 'ACGTA,G', 0, 'repetition of character in 1st substring' ], [ 'AC,GGC', 0, 'repetition of character in 2nd sub-string' ], [ 'AC,ACGTA', 0, 'repetition of character in 2nd sub-string' ], [ 'ATGA,TGG', 0, 'repetition in both sub-strings' ], [ 'ATCXG,AAC', 0, 'incorrect character and repetition' ], ) { if (not ref $ar_vector) { note $ar_vector; next VECTOR; } my ($str, $match, $msg) = @$ar_vector; if ($match) { ok $str =~ $pair, qq{'$str'}; } else { ok $str !~ $pair, qq{'$str': $msg}; } } # end for VECTOR #### c:\@Work\Perl\monks\naderra>perl agct_substrings_1.pl # all the following should be accepted ok 1 - 'A,G' ok 2 - 'AG,CT' ok 3 - 'TC,CA' ok 4 - 'GAT,CGA' ok 5 - 'CGAT,TG' ok 6 - 'CGAT,CTGA' # all the following should be rejected ok 7 - '': empty string ok 8 - ' ': space ok 9 - ',': comma, no sub-strings ok 10 - ',G': missing 1st sub-string ok 11 - 'G,': missing 2nd sub-string ok 12 - 'ACGT': missing comma ok 13 - 'X,A': incorrect character ok 14 - 'AA,G': repetition of character in 1st substring ok 15 - 'ACGTA,G': repetition of character in 1st substring ok 16 - 'AC,GGC': repetition of character in 2nd sub-string ok 17 - 'AC,ACGTA': repetition of character in 2nd sub-string ok 18 - 'ATGA,TGG': repetition in both sub-strings ok 19 - 'ATCXG,AAC': incorrect character and repetition ok 20 - no warnings 1..20