[0] Perl> sub steps { my($i,$m,$o) = (0,@_); print( "$i: $m $o" ), ++$i,( $m >= $o ? ($m -= $o) : ($o -= $m )) while $m and $o; };;
[0] Perl> steps( 9, 6 );;
0: 9 6
1: 3 6
2: 3 3
[0] Perl> steps( 17, 9 );;
0: 17 9
1: 8 9
2: 8 1
3: 7 1
4: 6 1
5: 5 1
6: 4 1
7: 3 1
8: 2 1
9: 1 1
####
[0] Perl> sub steps { my($i,$m,$o) = (0,@_); ++$i,( $m >= $o ? ($m -= $o) : ($o -= $m )) while $m and $o; print $i; };;
Subroutine steps redefined at (eval 17) line 1, line 9.
[0] Perl> steps( 2**29, 2**28-1 );;
134217731
####
p$ = 80
size$ = 88
x$ = 96
y$ = 104
swaps$ = 112
_xchgBuffersR PROC
; 53 : U32 _xchgBuffersR( register U64 *p, const U32 size, register U32 x, register U32 y, U32 register swaps ) {
$LN19:
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 32 ; 00000020H
mov rax, rcx
movsxd rbx, r9d
movsxd r15, r8d
and eax, 7
mov esi, edx
mov r13, rcx
mov r14, r15
mov rdi, rbx
mov QWORD PTR tv246[rsp], rax
npad 8
$LL12@xchgBuffer@2: ############## **HERE** (see below)
; 54 : U32 register i;
; 55 : U32 const savedY = y;
mov ebp, ebx
mov r12, rdi
; 56 :
; 57 : assert( y <= size );
cmp ebx, esi
jbe SHORT $LN8@xchgBuffer@2
lea rdx, OFFSET FLAT:$SG3784
lea rcx, OFFSET FLAT:$SG3785
mov r8d, 57 ; 00000039H
call _wassert
mov rax, QWORD PTR tv246[rsp]
$LN8@xchgBuffer@2:
; 58 : assert( ( (U64)p & 7ull ) == 0 );
test rax, rax
je SHORT $LN9@xchgBuffer@2
lea rdx, OFFSET FLAT:$SG3788
lea rcx, OFFSET FLAT:$SG3789
mov r8d, 58 ; 0000003aH
call _wassert
mov rax, QWORD PTR tv246[rsp]
$LN9@xchgBuffer@2:
; 59 :
; 60 : if( y == size ) return swaps;
cmp ebx, esi
je SHORT $LN18@xchgBuffer@2 ############# recursion end condition jumps to **THERE**
; 61 :
; 62 : for( i = x; i < savedY; ++i ) {
cmp r15d, ebx
jae SHORT $LL12@xchgBuffer@2
mov eax, ebx
sub eax, r15d
mov edx, eax
add DWORD PTR swaps$[rsp], edx
add r15d, eax
$LL4@xchgBuffer@2:
; 63 : if( y == size ) y = savedY;
cmp ebx, esi
jne SHORT $LN1@xchgBuffer@2
mov ebx, ebp
mov rdi, r12
$LN1@xchgBuffer@2:
; 64 : swapElems( p, x++, y++ );
mov rax, QWORD PTR [r13+rdi*8]
mov rcx, QWORD PTR [r13+r14*8]
inc rdi
mov QWORD PTR [r13+r14*8], rax
mov QWORD PTR [r13+rdi*8-8], rcx
inc r14
inc ebx
sub rdx, 1
jne SHORT $LL4@xchgBuffer@2
; 65 : ++swaps;
; 66 : }
; 67 : return _xchgBuffersR( p, size, x, y, swaps );
mov rax, QWORD PTR tv246[rsp]
jmp $LL12@xchgBuffer@2 ############### and just before it, it unconditionally jumps to **HERE**
$LN18@xchgBuffer@2: ############### **THERE**
; 59 :
; 60 : if( y == size ) return swaps;
mov eax, DWORD PTR swaps$[rsp]
; 68 : }
mov rbx, QWORD PTR [rsp+88]
mov rbp, QWORD PTR [rsp+96]
mov rsi, QWORD PTR [rsp+104]
add rsp, 32 ; 00000020H
pop r15
pop r14
pop r13
pop r12
pop rdi
ret 0
_xchgBuffersR ENDP
####
C:\test\C>bufswap 536870912 268435455 2 ### 2^29 2^28-1
size:536870912 offset;268435455
[ 0 1 ... 268435453 268435454 ^ 268435455 268435456 ... 536870910 536870911 ]
[ 268435455 268435456 ... 536870910 536870911 ^ 0 1 ... 268435453 268435454 ]
iterative: swaps:536870912 took 7.359985176 secs.
[ 0 1 ... 268435453 268435454 ^ 268435455 268435456 ... 536870910 536870911 ]
[ 268435455 268435456 ... 536870910 536870911 ^ 0 1 ... 268435453 268435454 ]
recursive: swaps:536870911 took 3.762964774 secs.
[ 0 1 ... 268435453 268435454 ^ 268435455 268435456 ... 536870910 536870911 ]
[ 268435455 268435456 ... 536870910 536870911 ^ 0 1 ... 268435453 268435454 ]
reversive: swaps:536870911 took 4.901475821 secs.