AMD Athlon™ Processor x86 Code Optimization

22007E/0 — November 1999

/* Function XForm performs a fully generalized 3D transform on an array of vertices pointed to by "v" and stores the transformed vertices in the location pointed to by "res". Each vertex consists of four floats. The 4x4 transform matrix is pointed to by "m". The matrix elements are also floats. The argument "numverts" indicates how many vertices have to be transformed. The computation performed for each vertex is:

res->x = v->x*m[0][0] + v->y*m[1][0] + v->z*m[2][0] + v->w*m[3][0] res->y = v->x*m[0][1] + v->y*m[1][1] + v->z*m[2][1] + v->w*m[3][1] res->z = v->x*m[0][2] + v->y*m[1][2] + v->z*m[2][2] + v->w*m[3][2] res->w = v->x*m[0][3] + v->y*m[1][3] + v->z*m[2][3] + v->w*m[3][3]

*/

#define M00 0 #define M01 4 #define M02 8 #define M03 12 #define M10 16 #define M11 20 #define M12 24 #define M13 28 #define M20 32 #define M21 36 #define M22 40 #define M23 44 #define M30 48 #define M31 52 #define M32 56 #define M33 60

void XForm (float *res, const float *v, const float *m, int numverts)

{

_asm {

 

 

 

 

MOV

EDX, [V]

;EDX =

source vector ptr

MOV

EAX, [M]

;EAX =

matrix ptr

MOV

EBX, [RES]

;EBX =

destination vector ptr

MOV

ECX, [NUMVERTS]

;ECX =

number of vertices to transform

;3DNow! version of fully general 3D vertex tranformation.

;Optimal for AMD Athlon (completes in 16

cycles)

FEMMS

 

;clear MMX state

 

 

ALIGN

16

;for optimal branch alignment

120

Optimized Matrix Multiplication

Page 136
Image 136
AMD x86 manual Mov Ebx, Res