1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 ; Use of this source code is governed by a BSD-style license that can be 3 ; found in the LICENSE file. 4 5 %include "media/base/simd/media_export.asm" 6 %include "third_party/x86inc/x86inc.asm" 7 8 ; 9 ; This file uses MMX, SSE2 and instructions. 10 ; 11 SECTION_TEXT 12 CPU SSE2 13 14 ; void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf, 15 ; const uint8* u_buf, 16 ; const uint8* v_buf, 17 ; uint8* rgb_buf, 18 ; ptrdiff_t width, 19 ; ptrdiff_t source_dx); 20 %define SYMBOL ScaleYUVToRGB32Row_SSE2_X64 21 EXPORT SYMBOL 22 align function_align 23 24 mangle(SYMBOL): 25 %assign stack_offset 0 26 extern mangle(kCoefficientsRgbY) 27 28 ; Parameters are in the following order: 29 ; 1. Y plane 30 ; 2. U plane 31 ; 3. V plane 32 ; 4. ARGB frame 33 ; 5. Width 34 ; 6. Source dx 35 ; 7. Convert table 36 37 PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1 38 39 %define TABLEq r10 40 %define Xq r11 41 %define INDEXq r12 42 %define COMPq R1q 43 %define COMPd R1d 44 45 PUSH r10 46 PUSH r11 47 PUSH r12 48 49 mov TABLEq, R1q 50 51 ; Set Xq index to 0. 52 xor Xq, Xq 53 jmp .scaleend 54 55 .scaleloop: 56 ; Read UV pixels. 57 mov INDEXq, Xq 58 sar INDEXq, 17 59 movzx COMPd, BYTE [Uq + INDEXq] 60 movq xmm0, [TABLEq + 2048 + 8 * COMPq] 61 movzx COMPd, BYTE [Vq + INDEXq] 62 movq xmm1, [TABLEq + 4096 + 8 * COMPq] 63 64 ; Read first Y pixel. 65 lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq nows points to next pixel. 66 sar Xq, 16 67 movzx COMPd, BYTE [Yq + Xq] 68 paddsw xmm0, xmm1 ; Hide a ADD after memory load. 69 movq xmm1, [TABLEq + 8 * COMPq] 70 71 ; Read next Y pixel. 72 lea Xq, [INDEXq + SOURCE_DXq] ; Xq now points to next pixel. 73 sar INDEXq, 16 74 movzx COMPd, BYTE [Yq + INDEXq] 75 movq xmm2, [TABLEq + 8 * COMPq] 76 paddsw xmm1, xmm0 77 paddsw xmm2, xmm0 78 shufps xmm1, xmm2, 0x44 ; Join two pixels into one XMM register 79 psraw xmm1, 6 80 packuswb xmm1, xmm1 81 movq QWORD [ARGBq], xmm1 82 add ARGBq, 8 83 84 .scaleend: 85 sub WIDTHq, 2 86 jns .scaleloop 87 88 and WIDTHq, 1 ; odd number of pixels? 89 jz .scaledone 90 91 ; Read U V components. 92 mov INDEXq, Xq 93 sar INDEXq, 17 94 movzx COMPd, BYTE [Uq + INDEXq] 95 movq xmm0, [TABLEq + 2048 + 8 * COMPq] 96 movzx COMPd, BYTE [Vq + INDEXq] 97 movq xmm1, [TABLEq + 4096 + 8 * COMPq] 98 paddsw xmm0, xmm1 99 100 ; Read one Y component. 101 mov INDEXq, Xq 102 sar INDEXq, 16 103 movzx COMPd, BYTE [Yq + INDEXq] 104 movq xmm1, [TABLEq + 8 * COMPq] 105 paddsw xmm1, xmm0 106 psraw xmm1, 6 107 packuswb xmm1, xmm1 108 movd DWORD [ARGBq], xmm1 109 110 .scaledone: 111 POP r12 112 POP r11 113 POP r10 114 RET 115