AMD Athlon™ Processor x86 Code Optimization

22007E/0 — November 1999

Example 7 (Division):

;_ulldiv divides two unsigned 64-bit integers, and returns

;the quotient.

;INPUT: [ESP+8]:[ESP+4] dividend

;[ESP+16]:[ESP+12] divisor

;OUTPUT:

EDX:EAX

 

quotient of division

;

 

 

 

;DESTROYS: EAX,ECX,EDX,EFlags

_ulldiv

PROC

 

 

PUSH

EBX

 

;save EBX as per calling convention

MOV

ECX, [ESP+20]

;divisor_hi

MOV

EBX, [ESP+16]

;divisor_lo

MOV

EDX, [ESP+12]

;dividend_hi

MOV

EAX, [ESP+8]

;dividend_lo

TEST

ECX, ECX

 

;divisor > 2^32–1?

JNZ

$big_divisor

;yes, divisor > 32^32–1

CMP

EDX, EBX

 

;only one division needed? (ECX = 0)

JAE

$two_divs

 

;need two divisions

DIV

EBX

 

;EAX = quotient_lo

MOV

EDX, ECX

 

;EDX = quotient_hi = 0 (quotient in

 

 

 

; EDX:EAX)

POP

EBX

;restore EBX as per calling convention

RET

 

;done, return to caller

$two_divs:

 

 

MOV

ECX, EAX

;save dividend_lo in ECX

MOV

EAX, EDX

;get dividend_hi

XOR

EDX, EDX

;zero extend it into EDX:EAX

DIV

EBX

;quotient_hi in EAX

XCHG

EAX, ECX

;ECX = quotient_hi, EAX = dividend_lo

DIV

EBX

;EAX = quotient_lo

MOV

EDX, ECX

;EDX = quotient_hi (quotient in EDX:EAX)

POP

EBX

;restore EBX as per calling convention

RET

 

;done, return to caller

$big_divisor:

 

 

PUSH

EDI

 

;save EDI as per calling convention

MOV

EDI, ECX

 

;save divisor_hi

SHR

EDX, 1

 

;shift both divisor and dividend right

RCR

EAX, 1

 

; by 1 bit

ROR

EDI, 1

 

 

RCR

EBX, 1

 

 

BSR

ECX, ECX

 

;ECX = number of remaining shifts

SHRD

EBX, EDI, CL

;scale down divisor and dividend

SHRD

EAX, EDX, CL

; such that divisor is

SHR

EDX, CL

 

; less than 2^32 (i.e. fits in EBX)

ROL

EDI, 1

 

;restore original divisor_hi

DIV

EBX

 

;compute quotient

MOV

EBX, [ESP+12]

;dividend_lo

88

Efficient 64-Bit Integer Arithmetic

Page 104
Image 104
AMD x86 manual Example 7 Division, EBX, ESP+12 Dividendlo