25112 Rev. 3.06 September 2005

Software Optimization Guide for AMD64 Processors

sub

eax,

esi

; If (remainder < 0),

sbb

edx,

esi

; compute 2's complement of result.

pop

edi

 

; Restore EDI as per calling convention.

pop

esi

 

; Restore ESI as per calling convention.

pop

ebx

 

; Restore EBX as per calling convention.

ret

 

 

; Done, return to caller.

sr_two_divs:

 

 

 

mov ecx, eax

; Save

dividend_lo in ECX.

mov eax, edx

; Get dividend_hi.

xor edx, edx

; Zero-extend it into EDX:EAX.

div ebx

 

; EAX = quotient_hi, EDX = intermediate remainder

mov eax, ecx

; EAX = dividend_lo

div

ebx

 

; EAX = quotient_lo

mov

eax,

edx

; remainder_lo

xor

edx,

edx

; remainder_hi = 0

jmp

sr_makesign

; Make

remainder signed.

sr_big_divisor:

 

 

sub

esp,

16

;

Create three local variables.

mov

[esp], eax

;

dividend_lo

mov

[esp+4], ebx

;

divisor_lo

mov

[esp+8], edx

;

dividend_hi

mov

[esp+12], ecx

;

divisor_hi

mov

edi,

ecx

;

Save divisor_hi.

shr

edx,

1

;

Shift both

rcr

eax,

1

;

divisor and

ror

edi,

1

;

and dividend

rcr

ebx,

1

;

right by 1 bit.

bsr

ecx,

ecx

;

ECX = number of remaining shifts

shrd

ebx,

edi, cl

;

Scale down divisor and

shrd

eax,

edx, cl

;

dividend such that divisor is

shr

edx,

cl

;

less than 2^32 (that is, fits in EBX).

rol

edi,

1

;

Restore original divisor_hi.

div

ebx

 

;

Compute quotient.

mov

ebx,

[esp]

;

dividend_lo

mov

ecx,

eax

;

Save quotient.

imul

edi,

eax

;

quotient * divisor high word (low only)

mul

DWORD PTR [esp+4] ;

quotient * divisor low word

add

edx,

edi

;

EDX:EAX = quotient * divisor

sub

ebx,

eax

;

dividend_lo - (quot.*divisor)_lo

mov

ecx,

[esp+8]

;

dividend_hi

sbb

ecx,

edx

;

Subtract divisor * quot. from dividend.

sbb

eax,

eax

;

remainder < 0 ? 0xffffffff : 0

mov

edx,

[esp+12]

;

divisor_hi

and

edx,

eax

;

remainder < 0 ? divisor_hi : 0

and

eax,

[esp+4]

;

remainder < 0 ? divisor_lo : 0

add

eax,

ebx

;

remainder_lo

add

edx,

ecx

;

remainder_hi

add

esp,

16

;

Remove local variables.

Chapter 8

Integer Optimizations

177

Page 193
Image 193
AMD 250 manual 177