Software Optimization Guide for AMD64 Processors

25112 Rev. 3.06 September 2005

mov

eax, [esp+20]

; divisor_lo

 

 

sbb

ecx, edx

; Subtract divisor * quot. from dividend.

sbb

edx, edx

; (remainder <

0)

? 0xFFFFFFFF : 0

and

eax, edx

; (remainder <

0)

? divisor_lo : 0

and

edx, [esp+24]

; (remainder <

0)

? divisor_hi : 0

add

eax, ebx

; remainder +=

(remainder < 0) ? divisor : 0

pop

edi

; Restore EDI as per calling convention.

pop

ebx

; Restore EBX as per calling convention.

ret

 

; Done, return

to caller.

_ullrem ENDP

64-Bit Signed Remainder Computation

;_llrem divides two signed 64-bit numbers and returns the remainder.

; In:

[ESP+8]:[ESP+4] = dividend

;[ESP+16]:[ESP+12] = divisor

; Out:

EDX:EAX = remainder of division

;

 

 

 

; Destroys: EAX, ECX, EDX, EFlags

 

push

ebx

; Save EBX

as per calling convention.

push

esi

; Save ESI

as per calling convention.

push

edi

; Save EDI

as per calling convention.

mov

ecx, [esp+28]

; divisor-hi

mov

ebx, [esp+24]

; divisor-lo

mov

edx, [esp+20]

; dividend-hi

mov

eax, [esp+16]

; dividend-lo

mov

esi, edx

; sign(remainder) == sign(dividend)

sar

esi, 31

; (remainder < 0) ? -1 : 0

mov

edi, edx

; dividend-hi

sar

edi, 31

; (dividend < 0) ? -1 : 0

xor

eax, edi

; If (dividend < 0),

xor

edx, edi

; compute

1's complement of dividend.

sub

eax, edi

; If (dividend < 0),

sbb

edx, edi

; compute

2's complement of dividend.

mov

edi, ecx

; divisor-hi

sar

edi, 31

; (divisor

< 0) ? -1 : 0

xor

ebx, edi

; If (divisor < 0),

xor

ecx, edi

; compute

1's complement of divisor.

sub

ebx, edi

; If (divisor < 0),

sbb

ecx, edi

; compute

2's complement of divisor.

jnz

sr_big_divisor

; divisor > 2^32 - 1

cmp

edx, ebx

; Only one

division needed (ECX = 0)?

jae

sr_two_divs

; No, need

two divisions.

div

ebx

; EAX = quotient_lo

mov

eax, edx

; EAX = remainder_lo

mov

edx, ecx

; EDX = remainder_lo = 0

xor

eax, esi

; If (remainder < 0),

xor

edx, esi

; compute

1's complement of result.

176

Integer Optimizations

Chapter 8

Page 192
Image 192
AMD 250 manual Bit Signed Remainder Computation, 176