Here's my brute-force try. Don't know if the call-overhead is bigger than the gain? I assumed a one-byte-is-one-character relationship (no wchar's yet). HTH
Update: Moved i and sum to head of function to cope with ancient compilers (thanks to davido). Simplified sizeof() expression.use Inline C; use strict; use warnings; use Time::HiRes qw(time); my $str1 = "abcdefabc"; my $str2 = "xyzabcabc@@@"; my $now = time; my $res; $res = deltasum( $str1, $str2 ) for (1..1_000_000); print "res=$res time=", ( time() - $now ), "us\n"; #res=9 time=0.819153070449829us __END__ __C__ #define DEBUG(x) int counter_tab[256]; int deltasum(char* a, char* b) { int i; int sum = 0; DEBUG( printf("IN: (%d:%s) (%d:%s)\n", strlen(a), a, strlen(b), b); +) bzero( counter_tab, sizeof( counter_tab ) ); for ( ; *a ; ++a ) ++counter_tab[ *a ]; for ( ; *b ; ++b ) --counter_tab[ *b ]; for ( i = 0; i < 256; ++i ) { /* maybe reduced to significant range? + */ sum += abs( counter_tab[i] ); DEBUG( if ( counter_tab[i] ) printf( "'%c' (%3d) x %5d\n", i, i, c +ounter_tab[i]); ) } DEBUG( printf("OUT: %d\n", sum); ) return sum; }
Debug sample:
IN: (9:abcdefabc) (12:xyzabcabc@@@) '@' ( 64) x -3 'd' (100) x 1 'e' (101) x 1 'f' (102) x 1 'x' (120) x -1 'y' (121) x -1 'z' (122) x -1 OUT: 9
Update: Approx. 60% speed-up when limiting to [ACGTacgt], still room for improvement...
Script shall mask ignored characters from returned value by using $delta = $res & 0xfffff;. Number of chars ignored is $ign = $res >> 20; (think system / $?).... __END__ __C__ #define DEBUG(x) #define GATTACA(x) (x)=='A' || (x)=='C' || (x)=='G' || (x)=='T' || \ (x)=='a' || (x)=='c' || (x)=='g' || (x)=='t' int deltasum_acgt(char* a, char* b) { int i; int sum = 0; int tab[8] = {0}; int ignored = 0; DEBUG( printf("IN (%d:%s) (%d:%s)!\n", strlen(a), a, strlen(b), b); +) for ( ; *a ; ++a ) { if ( GATTACA( *a ) ) ++tab[ *a & 0x07 ]; else ++ignored; } for ( ; *b ; ++b ) { if ( GATTACA( *b ) ) --tab[ *b & 0x07 ]; else ++ignored; } sum += abs( tab[1] ); /* A */ sum += abs( tab[3] ); /* C */ sum += abs( tab[7] ); /* G */ sum += abs( tab[4] ); /* T */ sum += ignored << 20; /* signal ill chars */ DEBUG( for (i=0; i<7; ++i) printf("%d -> %d\n", i, tab[i]); ) DEBUG( printf("OUT: %d ign=%d delta=%d\n", sum, ignored, sum & 0xff +fff); ) return sum; }
In reply to Re: The sum of absolute differences in the counts of chars in two strings.
by Perlbotics
in thread The sum of absolute differences in the counts of chars in two strings.
by BrowserUk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |