Home | History | Annotate | Download | only in simd
      1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 ; Use of this source code is governed by a BSD-style license that can be
      3 ; found in the LICENSE file.
      4 
      5 %include "media/base/simd/media_export.asm"
      6 %include "third_party/x86inc/x86inc.asm"
      7 
      8 ;
      9 ; This file uses MMX, SSE2 and instructions.
     10 ;
     11   SECTION_TEXT
     12   CPU       SSE2
     13 
     14 ; void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf,
     15 ;                                  const uint8* u_buf,
     16 ;                                  const uint8* v_buf,
     17 ;                                  uint8* rgb_buf,
     18 ;                                  ptrdiff_t width,
     19 ;                                  ptrdiff_t source_dx);
     20 %define SYMBOL ScaleYUVToRGB32Row_SSE2_X64
     21   EXPORT    SYMBOL
     22   align     function_align
     23 
     24 mangle(SYMBOL):
     25   %assign   stack_offset 0
     26   extern    mangle(kCoefficientsRgbY)
     27 
     28 ; Parameters are in the following order:
     29 ; 1. Y plane
     30 ; 2. U plane
     31 ; 3. V plane
     32 ; 4. ARGB frame
     33 ; 5. Width
     34 ; 6. Source dx
     35 ; 7. Convert table
     36 
     37 PROLOGUE  7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1
     38 
     39 %define     TABLEq   r10
     40 %define     Xq       r11
     41 %define     INDEXq   r12
     42 %define     COMPq    R1q
     43 %define     COMPd    R1d
     44 
     45   PUSH      r10
     46   PUSH      r11
     47   PUSH      r12
     48 
     49   mov TABLEq, R1q
     50 
     51   ; Set Xq index to 0.
     52   xor       Xq, Xq
     53   jmp       .scaleend
     54 
     55 .scaleloop:
     56   ; Read UV pixels.
     57   mov       INDEXq, Xq
     58   sar       INDEXq, 17
     59   movzx     COMPd, BYTE [Uq + INDEXq]
     60   movq      xmm0, [TABLEq + 2048 + 8 * COMPq]
     61   movzx     COMPd, BYTE [Vq + INDEXq]
     62   movq      xmm1, [TABLEq + 4096 + 8 * COMPq]
     63 
     64   ; Read first Y pixel.
     65   lea       INDEXq, [Xq + SOURCE_DXq] ; INDEXq nows points to next pixel.
     66   sar       Xq, 16
     67   movzx     COMPd, BYTE [Yq + Xq]
     68   paddsw    xmm0, xmm1		      ; Hide a ADD after memory load.
     69   movq      xmm1, [TABLEq + 8 * COMPq]
     70 
     71   ;  Read next Y pixel.
     72   lea       Xq, [INDEXq + SOURCE_DXq] ; Xq now points to next pixel.
     73   sar       INDEXq, 16
     74   movzx     COMPd, BYTE [Yq + INDEXq]
     75   movq      xmm2, [TABLEq + 8 * COMPq]
     76   paddsw    xmm1, xmm0
     77   paddsw    xmm2, xmm0
     78   shufps    xmm1, xmm2, 0x44          ; Join two pixels into one XMM register
     79   psraw     xmm1, 6
     80   packuswb  xmm1, xmm1
     81   movq      QWORD [ARGBq], xmm1
     82   add       ARGBq, 8
     83 
     84 .scaleend:
     85   sub       WIDTHq, 2
     86   jns       .scaleloop
     87 
     88   and       WIDTHq, 1                 ; odd number of pixels?
     89   jz        .scaledone
     90 
     91   ; Read U V components.
     92   mov       INDEXq, Xq
     93   sar       INDEXq, 17
     94   movzx     COMPd, BYTE [Uq + INDEXq]
     95   movq      xmm0, [TABLEq + 2048 + 8 * COMPq]
     96   movzx     COMPd, BYTE [Vq + INDEXq]
     97   movq      xmm1, [TABLEq + 4096 + 8 * COMPq]
     98   paddsw    xmm0, xmm1
     99 
    100   ; Read one Y component.
    101   mov       INDEXq, Xq
    102   sar       INDEXq, 16
    103   movzx     COMPd, BYTE [Yq + INDEXq]
    104   movq      xmm1, [TABLEq + 8 * COMPq]
    105   paddsw    xmm1, xmm0
    106   psraw     xmm1, 6
    107   packuswb  xmm1, xmm1
    108   movd      DWORD [ARGBq], xmm1
    109 
    110 .scaledone:
    111   POP       r12
    112   POP       r11
    113   POP       r10
    114   RET
    115