And we have a (provisional) winner! (Needs more testing.)
Your (my C version of) tail recursive algorithm beats (my C version of) roboticus' implementation of his/graff's/hdb's/bitingduck's iterative algorithm on a 3GB buffer with a 2GB offset.
The output shows 10 elements at the beginning; either side of the offset; and at the end; before and after the swap:
C:\test\C>bufswap 402653184 268435456 size:402653184 offset;268435456 [ 0 1 2 3 4 5 6 7 8 9 ...268435446 268435 +447 268435448 268435449 268435450 268435451 268435452 268435453 26843 +5454 268435455 268435456 268435457 268435458 268435459 268435460 2684 +35461 268435462 268435463 268435464 268435465 ...402653174 402653175 +402653176 402653177 402653178 402653179 402653180 402653181 402653182 + 402653183 ] [268435456 268435457 268435458 268435459 268435460 268435461 268435462 + 268435463 268435464 268435465 ...134217718 134217719 134217720 13421 +7721 134217722 134217723 134217724 134217725 134217726 134217727 1342 +17728 134217729 134217730 134217731 134217732 134217733 134217734 134 +217735 134217736 134217737 ...268435446 268435447 268435448 268435449 + 268435450 268435451 268435452 268435453 268435454 268435455 ] iterative took 3.217107510 [ 0 1 2 3 4 5 6 7 8 9 ...268435446 268435 +447 268435448 268435449 268435450 268435451 268435452 268435453 26843 +5454 268435455 268435456 268435457 268435458 268435459 268435460 2684 +35461 268435462 268435463 268435464 268435465 ...402653174 402653175 +402653176 402653177 402653178 402653179 402653180 402653181 402653182 + 402653183 ] [268435456 268435457 268435458 268435459 268435460 268435461 268435462 + 268435463 268435464 268435465 ...134217718 134217719 134217720 13421 +7721 134217722 134217723 134217724 134217725 134217726 134217727 1342 +17728 134217729 134217730 134217731 134217732 134217733 134217734 134 +217735 134217736 134217737 ...268435446 268435447 268435448 268435449 + 268435450 268435451 268435452 268435453 268435454 268435455 ] recursive took 2.445034532
The C code (needs some sanity checks):
/* Drawn from roboticus' (1118303) & hdb's(1118256) iterative implemen +tations (Algorithm also described by graff & bitingduck) */ void xlateBuffersI( register U64 *p, const U32 size, const U32 offset +) { U32 register cnt = size, dst = 0, cmp = 0; U64 register tmp = p[ dst ]; assert( offset < size ); assert( ( (U64)p & 7ull ) == 0 ); while( cnt-- ) { U32 register src = ( dst + offset ) % size; if( src == cmp ) { p[ dst ] = tmp; dst = ++cmp; tmp = p[ dst ]; } else { p[ dst ] = p[ src ]; dst = src; } } return; } __forceinline void swapElems( register U64 *p, const U32 x, const U32 +y ) { register U64 temp = p[ x ]; p[ x ] = p[ y ]; p[ y ] = temp; } /* Adapted from anonymonk's post 1118262 */ void xlateBuffersR( U64 *p, U32 size, U32 x, U32 y ) { U32 i, savedY = y; if( y == size ) return; for( i = x; i < savedY; ++i ) { if( y == size ) y = savedY; swapElems( p, x++, y++ ); } xlateBuffersR( p, size, x, y ); }
In reply to Re^4: [OT] Swapping buffers in place.
by BrowserUk
in thread [OT] Swapping buffers in place.
by BrowserUk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |