The OP seems to want to match at the start of the target string with either
>perl -wMstrict -le "my @queries = qw[ GCGAT CACGT ]; ;; my @targets = qw(GNGATNNN GCGANBBB CNCGTNNN CBCGTNNN ); ;; for my $q (@queries) { for my $t (@targets) { my $matched = ($q ^ substr($t, 0, length $q)) =~ tr[\0][\0]; if($matched >= (length($q) - 1)) { print qq{'$q' matched '$t'}; } } } " 'GCGAT' matched 'GNGATNNN' 'GCGAT' matched 'GCGANBBB' 'CACGT' matched 'CNCGTNNN' 'CACGT' matched 'CBCGTNNN'
Here's a variation that avoids this (although the conditional logic is a bit obscure).
>perl -wMstrict -le "use List::MoreUtils qw(uniq); ;; my @queries = qw(GCGAT CACGTT); ;; my $n_diff = join '', uniq map { sprintf '\x%02x', ord($_ ^ 'N') } map { split // } @queries ; $n_diff = eval qq{ sub { return \$_[0] =~ tr/$n_diff/$n_diff/; } }; ;; my @targets = qw( GNGATNNNHIT GCGANBBBHIT CNCGTTNNNHIT CACGTTNNNHIT CBCGTTNNNMISS CNNGTTNNNMISS NCACGTTNNNMISS ); ;; for my $q (@queries) { my $len_q = length $q; TARGET: for my $t (@targets) { my $mask = $q ^ substr $t, 0, $len_q; my $nulls = $mask =~ tr{\0}{\0}; next TARGET if $len_q > $nulls + 1 or $len_q > $nulls && $n_diff->($mask) != 1 ; print qq{'$q' matched '$t'}; } } " 'GCGAT' matched 'GNGATNNNHIT' 'GCGAT' matched 'GCGANBBBHIT' 'CACGTT' matched 'CNCGTTNNNHIT' 'CACGTT' matched 'CACGTTNNNHIT'
In reply to Re^2: string match using with an N in any position
by AnomalousMonk
in thread string match using with an N in any position
by biobee07
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |