AMD Athlon™ Processor x86 Code Optimization

22007E/0 — November 1999

Example 2:

C code:

 

float x,z;

 

z = abs(x);

 

if (z >= 1) {

 

z = 1/z;

 

}

3DNow! code:

;in:

MM0 = x

 

 

;out: MM0 = z

 

 

MOVQ

MM5,

mabs

;0x7fffffff

PAND

MM0,

MM5

;z=abs(x)

PFRCP

MM2,

MM0

;1/z approx

MOVQ

MM1,

MM0

;save z

PFRCPIT1 MM0,

MM2

;1/z step

PFRCPIT2 MM0,

MM2

;1/z final

PFMIN

MM0,

MM1

;z = z < 1 ? z : 1/z

Example 3:

C code:

 

float x,z,r,res;

 

z = fabs(x)

 

if (z < 0.575) {

 

res = r;

 

}

 

else {

 

res = PI/2 - 2*r;

 

}

3DNow! code:

;in:

MM0 = x

 

 

 

;

MM1 = r

 

 

 

;out: MM0 = res

 

 

 

MOVQ

MM7, mabs

;mask for absolute value

PAND

MM0, MM7

;z =

abs(x)

 

MOVQ

MM2, bnd

;0.575

 

PCMPGTD MM2, MM0

;z <

0.575

? 0xffffffff : 0

MOVQ

MM3, pio2

;pi/2

 

 

MOVQ

MM0, MM1

;save r

 

PFADD

MM1, MM1

;2*r

 

 

PFSUBR

MM1, MM3

;pi/2 - 2*r

 

PAND

MM0, MM2

;z <

0.575

? r : 0

PANDN

MM2, MM1

;z <

0.575

? 0 : pi/2 - 2*r

POR

MM0, MM2

;z <

0.575

? r : pi/2 - 2 * r

62

Replace Branches with Computation in 3DNow!™ Code

Page 78
Image 78
AMD x86 manual MM5