in reply to Re: simple string comparison for efficiency
in thread simple string comparison for efficiency
I was wandering around SuperSearch looking for something else when I saw this and wondered "Are the xor's with 'N' different than the xor's with ACGT?". Turns out they are, and so there is no need for any of the masking in this post.
#!/usr/bin/perl use strict; # https://perlmonks.org/?node_id=766743 use warnings; my %thexor; # check for N mismatches different from non-N mismatches for my $x ( qw( A T G C N ) ) { for my $y ( qw( A T G C N ) ) { $x lt $y and $thexor{$x ^ $y} .= "$x$y "; } } use Data::Dump 'dd', 'pp'; dd \%thexor; # yes they are # mismatch "\2\4\6\23\25\27" # match "\0\t\r\17\32" local $/ = ''; while( <DATA> ) { my ($x, $y) = split; my $bad = ($x ^ $y) =~ tr/\2\4\6\23\25\27//; # therefore this counts + mismatches print "$x ^ $y => ", pp($x ^ $y), $bad ? ' FAIL' : ' ok', "\n"; } __DATA__ ATGNCNC ATGACNN ATGNCNC TTGNNNC
Outputs:
{ "\2" => "AC ", "\4" => "CG ", "\6" => "AG ", "\t" => "GN ", "\r" => "CN ", "\17" => "AN ", "\23" => "GT ", "\25" => "AT ", "\27" => "CT ", "\32" => "NT ", } ATGNCNC ^ ATGACNN => "\0\0\0\17\0\0\r" ok ATGNCNC ^ TTGNNNC => "\25\0\0\0\r\0\0" FAIL
|
|---|