Software Optimization Guide for AMD64 Processors

25112 Rev. 3.06 September 2005

Example 5: C Code

#define PI 3.14159265358979323 float x, y ,xa ,ya ,r ,res; int xs, df;

xs = x < 0 ? 1 : 0; xa = fabs(x);

ya = fabs(y); df = (xa < ya); if (xs && df) {

res = PI / 2 + r;

}else if (xs) { res = PI - r;

}else if (df) { res = PI/2 - r;

}else {

res = r;

}

Example 5: 3DNow!™ Code

;In: MM0 = r

;MM1 = y

;MM2 = x

;Out: MM0 = res

movq

mm7, sgn

; Mask

to

extract sign bit

movq

mm6, sgn

; Mask

to

extract sign bit

movq

mm5, mabs

; Mask

to

clear sign bit

pand

mm7, mm2

; xs =

sign(x)

pand

mm1, mm5

; ya =

abs(y)

pand

mm2, mm5

; xa = abs(x)

movq

mm6, mm1

; y

 

 

pcmpgtd

mm6, mm2

; df = (xa < ya) ? 0xffffffff : 0

pslld

mm6, 31

; df = bit 31

movq

mm5, mm7

; xs

 

 

pxor

mm7, mm6

; xs ^ df

? 0x80000000 : 0

movq

mm3, npio2

; -pi / 2

 

pxor

mm5, mm3

; xs ? pi

/ 2 : -pi / 2

psrad

mm6, 31

; df ? 0xffffffff : 0

pandn

mm6, mm5

; xs ? (df ? 0 : pi / 2) : (df ? 0 : -pi / 2)

pfsub

mm6, mm3

; pr = pi

/ 2 + (xs ? (df ? 0 : pi / 2) :

 

 

; (df ? 0 : -pi / 2))

por

mm0, mm7

; ar = xs

^ df ? -r : r

pfadd

mm0, mm6

; res = ar + pr

140

Branch Optimizations

Chapter 6

Page 156
Image 156
AMD 250 manual Example 5 C Code, Example 5 3DNow! Code, 140