AMD Athlon™ Processor x86 Code Optimization

22007E/0 — November 1999

Example 1 (Avoid):

MOV

ESI, DWORD PTR Src_MB

MOV

EDI, DWORD PTR Dst_MB

MOV

EDX, DWORD PTR SrcStride

MOV

EBX, DWORD PTR DstStride

MOVQ

MM7, QWORD PTR [ConstFEFE]

MOVQ

MM6, QWORD PTR [Const0101]

MOV

ECX, 16

 

L1:

 

 

MOVQ

MM0, [ESI]

;MM0=QWORD1

MOVQ

MM1, [EDI]

;MM1=QWORD3

MOVQ

MM2, MM0

 

MOVQ

MM3, MM1

 

PAND

MM2, MM6

 

PAND

MM3, MM6

 

PAND

MM0, MM7

;MM0 = QWORD1 & 0xfefefefe

PAND

MM1, MM7

;MM1 = QWORD3 & 0xfefefefe

POR

MM2, MM3

;calculate adjustment

PSRLQ

MM0, 1

;MM0 = (QWORD1 & 0xfefefefe)/2

PSRLQ

MM1, 1

;MM1 = (QWORD3 & 0xfefefefe)/2

PAND

MM2, MM6

 

PADDB

MM0, MM1

;MM0 = QWORD1/2 + QWORD3/2 w/o

 

 

; adjustment

PADDB

MM0, MM2

;add lsb adjustment

MOVQ

[EDI], MM0

 

MOVQ

MM4, [ESI+8]

;MM4=QWORD2

MOVQ

MM5, [EDI+8]

;MM5=QWORD4

MOVQ

MM2, MM4

 

MOVQ

MM3, MM5

 

PAND

MM2, MM6

 

PAND

MM3, MM6

 

PAND

MM4, MM7

;MM0 = QWORD2 & 0xfefefefe

PAND

MM5, MM7

;MM1 = QWORD4 & 0xfefefefe

POR

MM2, MM3

;calculate adjustment

PSRLQ

MM4, 1

;MM0 = (QWORD2 & 0xfefefefe)/2

PSRLQ

MM5, 1

;MM1 = (QWORD4 & 0xfefefefe)/2

PAND

MM2, MM6

 

PADDB

MM4, MM5

;MM0 = QWORD2/2 + QWORD4/2 w/o

 

 

; adjustment

PADDB

MM4, MM2

;add lsb adjustment

MOVQ

[EDI+8], MM4

 

ADD

ESI, EDX

 

ADD

EDI, EBX

 

LOOP

L1

 

124

Use 3DNow!™ PAVGUSB for MPEG-2 Motion

Page 140
Image 140
AMD x86 manual MM0=QWORD1