25112 Rev. 3.06 September 2005

Software Optimization Guide for AMD64 Processors

64-Bit Right Shift

shrd

eax, edx, cl

; First apply shift count.

shr

edx, cl

; mod

32 to EDX:EAX

test

ecx, 32

; Need to shift by another 32?

jz

rshift_done

; No,

done.

mov

eax, edx

; Left shift EDX:EAX

xor

edx, edx

; by

32 bits.

rshift_done:

64-Bit Multiplication

;_llmul computes the low-order half of the product of its

;arguments, two 64-bit integers.

;

 

 

;

In:

[ESP+8]:[ESP+4] = multiplicand

;[ESP+16]:[ESP+12] = multiplier

; Out:

EDX:EAX = (multiplicand * multiplier) % 2^64

; Destroys: EAX, ECX, EDX, EFlags

_llmul PROC

 

 

 

mov edx, [esp+8]

; multiplicand_hi

mov ecx, [esp+16]

; multiplier_hi

or

edx, ecx

; One

operand >= 2^32?

mov

edx, [esp+12]

; multiplier_lo

mov

eax, [esp+4]

; multiplicand_lo

jnz

twomul

; Yes, need two multiplies.

mul edx

; multiplicand_lo * multiplier_lo

ret

 

; Done,

return to caller.

twomul:

 

 

 

 

imul edx, [esp+8]

 

;

p3_lo = multiplicand_hi * multiplier_lo

imul

ecx, eax

 

;

p2_lo = multiplier_hi * multiplicand_lo

add

ecx, edx

 

;

p2_lo + p3_lo

mul

dword ptr [esp+12]

;

p1 = multiplicand_lo * multiplier_lo

add

edx, ecx

 

;

p1 + p2_lo + p3_lo = result in EDX:EAX

ret

 

 

;

Done, return to caller.

_llmul ENDP

64-Bit Unsigned Division

;_ulldiv divides two unsigned 64-bit integers and returns the quotient.

; In:

[ESP+8]:[ESP+4] = dividend

;[ESP+16]:[ESP+12] = divisor

; Out:

EDX:EAX =

quotient of division

; Destroys: EAX, ECX,

EDX, EFlags

_ulldiv

PROC

 

push

ebx

; Save EBX as per calling convention.

mov

ecx, [esp+20]

; divisor_hi

mov

ebx, [esp+16]

; divisor_lo

Chapter 8

Integer Optimizations

171

Page 187
Image 187
AMD 250 manual Bit Right Shift, Bit Multiplication, Bit Unsigned Division, 171