25112 Rev. 3.06 September 2005

Software Optimization Guide for AMD64 Processors

Example 3: C Code

float x, z, r, res; z = fabs(x)

if (z < 0.575) { res = r;

} else {

res = PI / 2 - 2 * r;

}

Example 3: 3DNow!™ Code

;In: MM0 = x

;MM1 = r

;Out: MM0 = res

movq

mm7, mabs

; Mask for absolute value

pand

mm0, mm7

; z = abs(x)

movq

mm2, bnd

; 0.575

pcmpgtd

mm2, mm0

; z < 0.575 ? 0xffffffff : 0

movq

mm3, pio2

; pi / 2

movq

mm0, mm1

; Save r.

pfadd

mm1, mm1

; 2 * r

pfsubr

mm1, mm3

; pi / 2 - 2 * r

pand

mm0, mm2

; z < 0.575 ? r : 0

pandn

mm2, mm1

; z < 0.575 ? 0 : pi / 2 - 2 * r

por

mm0, mm2

; z < 0.575 ? r : pi / 2 - 2 * r

Example 4: C Code

#define PI 3.14159265358979323 float x, z, r, res;

/* 0 <= r <= PI / 4 */ z = abs(x)

if (z < 1) { res = r;

} else {

res = PI / 2 - r;

}

Example 4: 3DNow!™ Code

;In: MM0 = x

;MM1 = r

;Out: MM1 = res

movq

mm5, mabs

; Mask to

clear sign

bit

movq

mm6, one

; 1.0

 

 

pand

mm0, mm5

; z = abs(x)

 

pcmpgtd

mm6, mm0

; z < 1 ?

0xffffffff

: 0

movq

mm4, pio2

; pi / 2

 

 

pfsub

mm4, mm1

; pi / 2 - r

 

pandn

mm6, mm4

; z < 1 ?

0 : pi / 2

- r

pfmax

mm1, mm6

; res = z

< 1 ? r : pi / 2 - r

Chapter 6

Branch Optimizations

139

Page 155
Image 155
AMD 250 manual Example 3 C Code, Example 3 3DNow! Code, Example 4 C Code, Example 4 3DNow! Code, 139