1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 ; Use of this source code is governed by a BSD-style license that can be 3 ; found in the LICENSE file. 4 5 %include "media/base/simd/media_export.asm" 6 %include "third_party/x86inc/x86inc.asm" 7 8 ; 9 ; This file uses MMX instructions. 10 ; 11 SECTION_TEXT 12 CPU MMX 13 14 ;void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf, 15 ; const uint8* u_buf, 16 ; const uint8* v_buf, 17 ; uint8* rgb_buf, 18 ; ptrdiff_t width, 19 ; ptrdiff_t source_dx); 20 %define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64 21 EXPORT SYMBOL 22 align function_align 23 24 mangle(SYMBOL): 25 %assign stack_offset 0 26 extern mangle(kCoefficientsRgbY) 27 28 ; Parameters are in the following order: 29 ; 1. Y plane 30 ; 2. U plane 31 ; 3. V plane 32 ; 4. ARGB frame 33 ; 5. Width 34 ; 6. Source dx 35 ; 7. Conversion lookup table 36 37 PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1 38 39 %define TABLEq r10 40 %define Xq r11 41 %define INDEXq r12 42 %define COMPRd r13d 43 %define COMPRq r13 44 %define FRACTIONq r14 45 %define COMPL R1 46 %define COMPLq R1q 47 %define COMPLd R1d 48 49 PUSH TABLEq 50 PUSH Xq 51 PUSH INDEXq 52 PUSH COMPRq 53 PUSH FRACTIONq 54 55 %macro EPILOGUE 0 56 POP FRACTIONq 57 POP COMPRq 58 POP INDEXq 59 POP Xq 60 POP TABLEq 61 %endmacro 62 63 mov TABLEq, R1q 64 65 imul WIDTHq, SOURCE_DXq ; source_width = width * source_dx 66 xor Xq, Xq ; x = 0 67 cmp SOURCE_DXq, 0x20000 68 jl .lscaleend 69 mov Xq, 0x8000 ; x = 0.5 for 1/2 or less 70 jmp .lscaleend 71 72 .lscaleloop: 73 ; Interpolate U 74 mov INDEXq, Xq 75 sar INDEXq, 0x11 76 movzx COMPLd, BYTE [Uq + INDEXq] 77 movzx COMPRd, BYTE [Uq + INDEXq + 1] 78 mov FRACTIONq, Xq 79 and FRACTIONq, 0x1fffe 80 imul COMPRq, FRACTIONq 81 xor FRACTIONq, 0x1fffe 82 imul COMPLq, FRACTIONq 83 add COMPLq, COMPRq 84 shr COMPLq, 17 85 movq mm0, [TABLEq + 2048 + 8 * COMPLq] 86 87 ; Interpolate V 88 movzx COMPLd, BYTE [Vq + INDEXq] 89 movzx COMPRd, BYTE [Vq + INDEXq + 1] 90 ; Trick here to imul COMPL first then COMPR. 91 ; Saves two instruction. :) 92 imul COMPLq, FRACTIONq 93 xor FRACTIONq, 0x1fffe 94 imul COMPRq, FRACTIONq 95 add COMPLq, COMPRq 96 shr COMPLq, 17 97 paddsw mm0, [TABLEq + 4096 + 8 * COMPLq] 98 99 ; Interpolate first Y1. 100 lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq now points to next pixel. 101 ; Xq points to current pixel. 102 mov FRACTIONq, Xq 103 sar Xq, 0x10 104 movzx COMPLd, BYTE [Yq + Xq] 105 movzx COMPRd, BYTE [Yq + Xq + 1] 106 and FRACTIONq, 0xffff 107 imul COMPRq, FRACTIONq 108 xor FRACTIONq, 0xffff 109 imul COMPLq, FRACTIONq 110 add COMPLq, COMPRq 111 shr COMPLq, 16 112 movq mm1, [TABLEq + 8 * COMPLq] 113 114 ; Interpolate Y2 if available. 115 cmp INDEXq, WIDTHq 116 jge .lscalelastpixel 117 118 lea Xq, [INDEXq + SOURCE_DXq] ; Xq points to next pixel. 119 ; INDEXq points to current pixel. 120 mov FRACTIONq, INDEXq 121 sar INDEXq, 0x10 122 movzx COMPLd, BYTE [Yq + INDEXq] 123 movzx COMPRd, BYTE [Yq + INDEXq + 1] 124 and FRACTIONq, 0xffff 125 imul COMPRq, FRACTIONq 126 xor FRACTIONq, 0xffff 127 imul COMPLq, FRACTIONq 128 add COMPLq, COMPRq 129 shr COMPLq, 16 130 movq mm2, [TABLEq + 8 * COMPLq] 131 132 paddsw mm1, mm0 133 paddsw mm2, mm0 134 psraw mm1, 0x6 135 psraw mm2, 0x6 136 packuswb mm1, mm2 137 movntq [ARGBq], mm1 138 add ARGBq, 0x8 139 140 .lscaleend: 141 cmp Xq, WIDTHq 142 jl .lscaleloop 143 jmp .epilogue 144 145 .lscalelastpixel: 146 paddsw mm1, mm0 147 psraw mm1, 6 148 packuswb mm1, mm1 149 movd [ARGBq], mm1 150 151 .epilogue 152 EPILOGUE 153 RET 154