in reply to Challenge: CPU-optimized byte-wise or-equals (for a meter of beer)
Sadly (or maybe not), they can't compete with memchr. With gcc 3.3.3 the first one seems to come in second place. With gcc 4.1.2, the second one does.void bduggan_duff(SV *sv1, SV *sv2) { /* slightly modified avar_c_inplace */ char *sv1p, *sv2p; STRLEN sv1len, sv2len; SV *sv1_sv; STRLEN i; if (!SvROK(sv1) || !SvPOK((SV*)SvRV(sv1)) || !SvPOK(sv2)) { croak("Usage: avar_cee(\$s1, $s2)"); } sv1_sv = (SV*)SvRV(sv1); sv1p = SvPV(sv1_sv, sv1len); sv2p = SvPV(sv2, sv2len); if (sv1len != sv2len) { croak("The given strings must be of the same length"); } i = 0; switch (sv2len % 8) { case 0 : do { if (sv1p[i] == '\0') sv1p[i] = sv2p[i]; i++; case 1 : if (sv1p[i] == '\0') sv1p[i] = sv2p[i]; i++; case 2 : if (sv1p[i] == '\0') sv1p[i] = sv2p[i]; i++; case 3 : if (sv1p[i] == '\0') sv1p[i] = sv2p[i]; i++; case 4 : if (sv1p[i] == '\0') sv1p[i] = sv2p[i]; i++; case 5 : if (sv1p[i] == '\0') sv1p[i] = sv2p[i]; i++; case 6 : if (sv1p[i] == '\0') sv1p[i] = sv2p[i]; i++; case 7 : if (sv1p[i] == '\0') sv1p[i] = sv2p[i]; i++; } while (i<sv2len); } } void bduggan_duff2(SV *sv1, SV *sv2) { /* slightly modified avar_tye_c_inplace */ char *sv1p, *sv2p; + + STRLEN sv1len, sv2len; SV *sv1_sv; STRLEN i; if (!SvROK(sv1) || !SvPOK((SV*)SvRV(sv1)) || !SvPOK(sv2)) { croak("Usage: avar_cee(\$s1, $s2)"); } sv1_sv = (SV*)SvRV(sv1); sv1p = SvPV(sv1_sv, sv1len); sv2p = SvPV(sv2, sv2len); if (sv1len != sv2len) { croak("The given strings must be of the same length"); } char *pSrc = sv2p + sv2len; char *pDst = sv1p + sv2len; switch (sv2len % 8) { case 0 : do { if (!*pDst) { *pDst = *pSrc; } --pSrc; --pDst; case 1 : if (!*pDst) { *pDst = *pSrc; } --pSrc; --pDst; case 2 : if (!*pDst) { *pDst = *pSrc; } --pSrc; --pDst; case 3 : if (!*pDst) { *pDst = *pSrc; } --pSrc; --pDst; case 4 : if (!*pDst) { *pDst = *pSrc; } --pSrc; --pDst; case 5 : if (!*pDst) { *pDst = *pSrc; } --pSrc; --pDst; case 6 : if (!*pDst) { *pDst = *pSrc; } --pSrc; --pDst; case 7 : if (!*pDst) { *pDst = *pSrc; } --pSrc; --pDst; } while (sv2p <= pSrc); }
|
---|