$n <<= 1 while $n & 0xfff;
####
for( c = 1; ( ~n & 1 ) && ( c < 4096 ); c <<= 1 ) n >>= 1;
lcm = n * 4096;
####
while( n & 0xfff ) n <<= 1;
lcm = n;
####
C:\test\C>gcm
gcm : 2132901888
gcm2: 2132901888
gcm3: 2132901888
anonyM: gcm for s=2147483648 & r=1 to 1073741824 took:33.850023994460
anonyM: gcm2 for s=2147483648 & r=1 to 1073741824 took:46.293298113614
anonyM: gcm3 for s=2147483648 & r=1 to 1073741824 took:64.208097030422
####
U64 gcm2( U64 max, U64 n ) {
U32 b;
U64 c, lcm;
_BitScanForward64( &b, n );
n >>= min( b, 12 );
lcm = n * 4096;
return ( max / lcm ) * lcm;
}
####
PUBLIC gcm2
; Function compile flags: /Ogtpy
_TEXT SEGMENT
max$ = 8
n$ = 16
gcm2 PROC
; 24 : U32 b;
; 25 : U64 c, lcm;
; 26 :
; 27 : _BitScanForward64( &b, n );
bsf rax, rdx
mov r8, rcx
mov r9, rdx
; 28 : n >>= min( b, 12 );
mov ecx, 12
cmp eax, ecx
cmovb ecx, eax
; 29 : lcm = n * 4096;
; 30 : return ( max / lcm ) * lcm;
xor edx, edx
mov rax, r8
shr r9, cl
shl r9, 12
div r9
imul rax, r9
; 31 : }
ret 0
gcm2 ENDP
####
PUBLIC gcm
; Function compile flags: /Ogtpy
_TEXT SEGMENT
max$ = 8
n$ = 16
gcm PROC
; 16 : U64 c, lcm;
; 17 :
; 18 : for( c = 1; ( ~n & 1 ) && ( c < 4096 ); c <<= 1 ) n >>= 1;
movzx eax, dl
mov r8d, 1
mov r9, rdx
not al
test al, r8b
je SHORT $LN1@gcm
$LL3@gcm:
cmp r8, 4096 ; 00001000H
jae SHORT $LN1@gcm
shr r9, 1
add r8, r8
movzx eax, r9b
not al
test al, 1
jne SHORT $LL3@gcm
$LN1@gcm:
; 19 : lcm = n * 4096;
shl r9, 12
; 20 : return ( max / lcm ) * lcm;
xor edx, edx
mov rax, rcx
div r9
imul rax, r9
; 21 : }
ret 0
gcm ENDP
####
C:\test\C>gcm
anonyM: gcm for s=2147483648 & r=1 to 1073741824 took: 33.920637491715
anonyM: gcm2 for s=2147483648 & r=1 to 1073741824 took: 46.151947659089
oiskuu: gcm for s=2147483648 & r=1 to 1073741824 took:330.492011773110