25112 Rev. 3.06 September 2005

Software Optimization Guide for AMD64 Processors

;x*R00+y*R10+z*R20+w*R30]

movntq

[ebx-16],mm3

; Store

lower

quadword of transformed vertex.

pfadd

mm5,mm1

; MM3 =

[x*R03+y*R13+z*R23+w*R33,

 

 

; x*R02+y*R12+z*R22+w*R32]

movntq

[ebx-8],mm5

; Store

upper

QWORD of transformed vertex.

dec

ecx

; Decrement #

of vertices to transform.

jnz

rotate_vertices_loop

 

 

femms

 

; Clear

MMX state.

sfence

 

; Finish all memory writes.

;==============================================================================

;INSTRUCTIONS BELOW RESTORE THE REGISTER STATE WITH WHICH THIS ROUTINE

;WAS ENTERED.

;REGISTERS EAX, ECX, EDX ARE CONSIDERED VOLATILE AND ASSUMED TO BE CHANGED

;WHILE THE REGISTERS BELOW MUST BE PRESERVED IF THE USER IS CHANGING THEM pop edi

pop esi

pop ebx

mov esp, ebp pop ebp

;============================================================================== ret

_matrix_x_vector_3dnow ENDP _TEXT ENDS

END

Chapter 9

Optimizing with SIMD Instructions

235

Page 251
Image 251
AMD 250 manual 235