C:\test\C>bufswap 402653184 268435456 size:402653184 offset;268435456 [ 0 1 2 3 4 5 6 7 8 9 ...268435446 268435447 268435448 268435449 268435450 268435451 268435452 268435453 268435454 268435455 268435456 268435457 268435458 268435459 268435460 268435461 268435462 268435463 268435464 268435465 ...402653174 402653175 402653176 402653177 402653178 402653179 402653180 402653181 402653182 402653183 ] [268435456 268435457 268435458 268435459 268435460 268435461 268435462 268435463 268435464 268435465 ...134217718 134217719 134217720 134217721 134217722 134217723 134217724 134217725 134217726 134217727 134217728 134217729 134217730 134217731 134217732 134217733 134217734 134217735 134217736 134217737 ...268435446 268435447 268435448 268435449 268435450 268435451 268435452 268435453 268435454 268435455 ] iterative took 3.217107510 [ 0 1 2 3 4 5 6 7 8 9 ...268435446 268435447 268435448 268435449 268435450 268435451 268435452 268435453 268435454 268435455 268435456 268435457 268435458 268435459 268435460 268435461 268435462 268435463 268435464 268435465 ...402653174 402653175 402653176 402653177 402653178 402653179 402653180 402653181 402653182 402653183 ] [268435456 268435457 268435458 268435459 268435460 268435461 268435462 268435463 268435464 268435465 ...134217718 134217719 134217720 134217721 134217722 134217723 134217724 134217725 134217726 134217727 134217728 134217729 134217730 134217731 134217732 134217733 134217734 134217735 134217736 134217737 ...268435446 268435447 268435448 268435449 268435450 268435451 268435452 268435453 268435454 268435455 ] recursive took 2.445034532 #### /* Drawn from roboticus' (1118303) & hdb's(1118256) iterative implementations (Algorithm also described by graff & bitingduck) */ void xlateBuffersI( register U64 *p, const U32 size, const U32 offset ) { U32 register cnt = size, dst = 0, cmp = 0; U64 register tmp = p[ dst ]; assert( offset < size ); assert( ( (U64)p & 7ull ) == 0 ); while( cnt-- ) { U32 register src = ( dst + offset ) % size; if( src == cmp ) { p[ dst ] = tmp; dst = ++cmp; tmp = p[ dst ]; } else { p[ dst ] = p[ src ]; dst = src; } } return; } __forceinline void swapElems( register U64 *p, const U32 x, const U32 y ) { register U64 temp = p[ x ]; p[ x ] = p[ y ]; p[ y ] = temp; } /* Adapted from anonymonk's post 1118262 */ void xlateBuffersR( U64 *p, U32 size, U32 x, U32 y ) { U32 i, savedY = y; if( y == size ) return; for( i = x; i < savedY; ++i ) { if( y == size ) y = savedY; swapElems( p, x++, y++ ); } xlateBuffersR( p, size, x, y ); }