[0] Perl> sub steps { my($i,$m,$o) = (0,@_); print( "$i: $m $o" ), ++$i,( $m >= $o ? ($m -= $o) : ($o -= $m )) while $m and $o; };; [0] Perl> steps( 9, 6 );; 0: 9 6 1: 3 6 2: 3 3 [0] Perl> steps( 17, 9 );; 0: 17 9 1: 8 9 2: 8 1 3: 7 1 4: 6 1 5: 5 1 6: 4 1 7: 3 1 8: 2 1 9: 1 1 #### [0] Perl> sub steps { my($i,$m,$o) = (0,@_); ++$i,( $m >= $o ? ($m -= $o) : ($o -= $m )) while $m and $o; print $i; };; Subroutine steps redefined at (eval 17) line 1, line 9. [0] Perl> steps( 2**29, 2**28-1 );; 134217731 #### p$ = 80 size$ = 88 x$ = 96 y$ = 104 swaps$ = 112 _xchgBuffersR PROC ; 53 : U32 _xchgBuffersR( register U64 *p, const U32 size, register U32 x, register U32 y, U32 register swaps ) { $LN19: mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi push rdi push r12 push r13 push r14 push r15 sub rsp, 32 ; 00000020H mov rax, rcx movsxd rbx, r9d movsxd r15, r8d and eax, 7 mov esi, edx mov r13, rcx mov r14, r15 mov rdi, rbx mov QWORD PTR tv246[rsp], rax npad 8 $LL12@xchgBuffer@2: ############## **HERE** (see below) ; 54 : U32 register i; ; 55 : U32 const savedY = y; mov ebp, ebx mov r12, rdi ; 56 : ; 57 : assert( y <= size ); cmp ebx, esi jbe SHORT $LN8@xchgBuffer@2 lea rdx, OFFSET FLAT:$SG3784 lea rcx, OFFSET FLAT:$SG3785 mov r8d, 57 ; 00000039H call _wassert mov rax, QWORD PTR tv246[rsp] $LN8@xchgBuffer@2: ; 58 : assert( ( (U64)p & 7ull ) == 0 ); test rax, rax je SHORT $LN9@xchgBuffer@2 lea rdx, OFFSET FLAT:$SG3788 lea rcx, OFFSET FLAT:$SG3789 mov r8d, 58 ; 0000003aH call _wassert mov rax, QWORD PTR tv246[rsp] $LN9@xchgBuffer@2: ; 59 : ; 60 : if( y == size ) return swaps; cmp ebx, esi je SHORT $LN18@xchgBuffer@2 ############# recursion end condition jumps to **THERE** ; 61 : ; 62 : for( i = x; i < savedY; ++i ) { cmp r15d, ebx jae SHORT $LL12@xchgBuffer@2 mov eax, ebx sub eax, r15d mov edx, eax add DWORD PTR swaps$[rsp], edx add r15d, eax $LL4@xchgBuffer@2: ; 63 : if( y == size ) y = savedY; cmp ebx, esi jne SHORT $LN1@xchgBuffer@2 mov ebx, ebp mov rdi, r12 $LN1@xchgBuffer@2: ; 64 : swapElems( p, x++, y++ ); mov rax, QWORD PTR [r13+rdi*8] mov rcx, QWORD PTR [r13+r14*8] inc rdi mov QWORD PTR [r13+r14*8], rax mov QWORD PTR [r13+rdi*8-8], rcx inc r14 inc ebx sub rdx, 1 jne SHORT $LL4@xchgBuffer@2 ; 65 : ++swaps; ; 66 : } ; 67 : return _xchgBuffersR( p, size, x, y, swaps ); mov rax, QWORD PTR tv246[rsp] jmp $LL12@xchgBuffer@2 ############### and just before it, it unconditionally jumps to **HERE** $LN18@xchgBuffer@2: ############### **THERE** ; 59 : ; 60 : if( y == size ) return swaps; mov eax, DWORD PTR swaps$[rsp] ; 68 : } mov rbx, QWORD PTR [rsp+88] mov rbp, QWORD PTR [rsp+96] mov rsi, QWORD PTR [rsp+104] add rsp, 32 ; 00000020H pop r15 pop r14 pop r13 pop r12 pop rdi ret 0 _xchgBuffersR ENDP #### C:\test\C>bufswap 536870912 268435455 2 ### 2^29 2^28-1 size:536870912 offset;268435455 [ 0 1 ... 268435453 268435454 ^ 268435455 268435456 ... 536870910 536870911 ] [ 268435455 268435456 ... 536870910 536870911 ^ 0 1 ... 268435453 268435454 ] iterative: swaps:536870912 took 7.359985176 secs. [ 0 1 ... 268435453 268435454 ^ 268435455 268435456 ... 536870910 536870911 ] [ 268435455 268435456 ... 536870910 536870911 ^ 0 1 ... 268435453 268435454 ] recursive: swaps:536870911 took 3.762964774 secs. [ 0 1 ... 268435453 268435454 ^ 268435455 268435456 ... 536870910 536870911 ] [ 268435455 268435456 ... 536870910 536870911 ^ 0 1 ... 268435453 268435454 ] reversive: swaps:536870911 took 4.901475821 secs.