25112 Rev. 3.06 September 2005

Software Optimization Guide for AMD64 Processors

__inline FIXED_U_16_16 fixed_add(FIXED_U_16_16 x, FIXED_U_16_16 y) { FIXED_U_16_16 z;

z.whole = x.whole + y.whole; return (z);

}

__inline unsigned int fixed_int(FIXED_U_16_16 x) { return((unsigned int)(x.parts.intg));

}

...

FIXED_U_16_16 y, z; unsigned int q;

...

label1:

y= fixed_add (y, z); q = fixed_int (y);

label2:

...

The object code generated for the source code between label1 and label2 typically follows one of these two variants:

; Variant 1

 

 

mov edx, DWORD PTR [z]

 

 

mov eax, DWORD PTR [y]

; -+

add eax, edx

;

mov DWORD PTR [y], eax

;

mov EAX, DWORD PTR [y+2] ; <+ Address mismatch--no forwarding in LSU

and EAX, 0FFFFh

 

 

mov DWORD PTR [q], eax

 

 

; Variant 2

 

 

mov

edx, DWORD PTR [z]

 

 

mov

eax, DWORD PTR [y]

; -+

add

eax, edx

;

mov

DWORD PTR [y], eax

;

movzx

eax, WORD PTR [y+2] ; <+ Size and address mismatch--no forwarding in LSU

mov

DWORD PTR [q], eax

 

 

Listing 6. Preferred

typedef union { unsigned int whole; struct {

unsigned short frac; /* Lower 16 bits are fraction. */ unsigned short intg; /* Upper 16 bits are integer. */

}parts;

}FIXED_U_16_16;

__inline FIXED_U_16_16 fixed_add(FIXED_U_16_16 x, FIXED_U_16_16 y) {

Chapter 2

C and C++ Source-Level Optimizations

23

Page 39
Image 39
AMD 250 manual Listing 6. Preferred