AMD Athlon™ Processor x86 Code Optimization

22007E/0 — November 1999

Example 5:

C code:

 

 

#define PI

3.14159265358979323

 

float

x,y,xa,ya,r,res;

 

int

xs,df;

 

xs = x < 0

? 1 : 0;

 

xa = fabs(x);

 

ya = fabs(y);

 

df = (xa <

ya);

 

if (xs && df) {

 

 

res =

PI/2 + r;

}

else if (xs) { res = PI - r;

}

else if (df) {

res = PI/2 - r;

}

else {

res = r;

}

3DNow! code:

;in: MM0 = r

;MM1 = y

;MM2 = x

;out: MM0 = res

MOVQ

MM7, sgn

;mask

to extract sign bit

MOVQ

MM6, sgn

;mask

to extract sign bit

MOVQ

MM5, mabs

;mask

to clear sign bit

PAND

MM7, MM2

;xs =

sign(x)

PAND

MM1, MM5

;ya =

abs(y)

PAND

MM2, MM5

;xa =

abs(x)

MOVQ

MM6, MM1

;y

 

PCMPGTD

MM6, MM2

;df =

(xa < ya) ? 0xffffffff : 0

PSLLD

MM6, 31

;df =

bit<31>

MOVQ

MM5, MM7

;xs

 

PXOR

MM7, MM6

;xs^df ? 0x80000000 : 0

MOVQ

MM3, npio2

;-pi/2

 

PXOR

MM5, MM3

;xs ?

pi/2 : -pi/2

PSRAD

MM6, 31

;df ?

0xffffffff : 0

PANDN

MM6, MM5

;xs ? (df ? 0 : pi/2) : (df ? 0 : -pi/2)

PFSUB

MM6, MM3

;pr =

pi/2 + (xs ? (df ? 0 : pi/2) :

 

 

; (df

? 0 : -pi/2))

POR

MM0, MM7

;ar =

xs^df ? -r : r

PFADD

MM0, MM6

;res = ar + pr

64

Replace Branches with Computation in 3DNow!™ Code

Page 80
Image 80
AMD x86 manual Psrad