Home | History | Annotate | Download | only in simd
      1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 ; Use of this source code is governed by a BSD-style license that can be
      3 ; found in the LICENSE file.
      4 
      5 %include "media/base/simd/media_export.asm"
      6 %include "third_party/x86inc/x86inc.asm"
      7 
      8 ;
      9 ; This file uses MMX instructions.
     10 ;
     11   SECTION_TEXT
     12   CPU       MMX
     13 
     14 ;void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf,
     15 ;                                      const uint8* u_buf,
     16 ;                                      const uint8* v_buf,
     17 ;                                      uint8* rgb_buf,
     18 ;                                      ptrdiff_t width,
     19 ;                                      ptrdiff_t source_dx);
     20 %define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64
     21   EXPORT    SYMBOL
     22   align     function_align
     23 
     24 mangle(SYMBOL):
     25   %assign   stack_offset 0
     26   extern    mangle(kCoefficientsRgbY)
     27 
     28 ; Parameters are in the following order:
     29 ; 1. Y plane
     30 ; 2. U plane
     31 ; 3. V plane
     32 ; 4. ARGB frame
     33 ; 5. Width
     34 ; 6. Source dx
     35 ; 7. Conversion lookup table
     36 
     37 PROLOGUE  7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1
     38 
     39 %define     TABLEq     r10
     40 %define     Xq         r11
     41 %define     INDEXq     r12
     42 %define     COMPRd     r13d
     43 %define     COMPRq     r13
     44 %define     FRACTIONq  r14
     45 %define     COMPL      R1
     46 %define     COMPLq     R1q
     47 %define     COMPLd     R1d
     48 
     49   PUSH      TABLEq
     50   PUSH      Xq
     51   PUSH      INDEXq
     52   PUSH      COMPRq
     53   PUSH      FRACTIONq
     54 
     55 %macro EPILOGUE 0
     56   POP       FRACTIONq
     57   POP       COMPRq
     58   POP       INDEXq
     59   POP       Xq
     60   POP       TABLEq
     61 %endmacro
     62 
     63   mov       TABLEq, R1q
     64 
     65   imul      WIDTHq, SOURCE_DXq           ; source_width = width * source_dx
     66   xor       Xq, Xq                       ; x = 0
     67   cmp       SOURCE_DXq, 0x20000
     68   jl        .lscaleend
     69   mov       Xq, 0x8000                   ; x = 0.5 for 1/2 or less
     70   jmp       .lscaleend
     71 
     72 .lscaleloop:
     73   ; Interpolate U
     74   mov       INDEXq, Xq
     75   sar       INDEXq, 0x11
     76   movzx     COMPLd, BYTE [Uq + INDEXq]
     77   movzx     COMPRd, BYTE [Uq + INDEXq + 1]
     78   mov       FRACTIONq, Xq
     79   and       FRACTIONq, 0x1fffe
     80   imul      COMPRq, FRACTIONq
     81   xor       FRACTIONq, 0x1fffe
     82   imul      COMPLq, FRACTIONq
     83   add       COMPLq, COMPRq
     84   shr       COMPLq, 17
     85   movq      mm0, [TABLEq + 2048 + 8 * COMPLq]
     86 
     87   ; Interpolate V
     88   movzx     COMPLd, BYTE [Vq + INDEXq]
     89   movzx     COMPRd, BYTE [Vq + INDEXq + 1]
     90   ; Trick here to imul COMPL first then COMPR.
     91   ; Saves two instruction. :)
     92   imul      COMPLq, FRACTIONq
     93   xor       FRACTIONq, 0x1fffe
     94   imul      COMPRq, FRACTIONq
     95   add       COMPLq, COMPRq
     96   shr       COMPLq, 17
     97   paddsw    mm0, [TABLEq + 4096 + 8 * COMPLq]
     98 
     99   ; Interpolate first Y1.
    100   lea       INDEXq, [Xq + SOURCE_DXq]   ; INDEXq now points to next pixel.
    101                                         ; Xq points to current pixel.
    102   mov       FRACTIONq, Xq
    103   sar       Xq, 0x10
    104   movzx     COMPLd, BYTE [Yq + Xq]
    105   movzx     COMPRd, BYTE [Yq + Xq + 1]
    106   and       FRACTIONq, 0xffff
    107   imul      COMPRq, FRACTIONq
    108   xor       FRACTIONq, 0xffff
    109   imul      COMPLq, FRACTIONq
    110   add       COMPLq, COMPRq
    111   shr       COMPLq, 16
    112   movq      mm1, [TABLEq + 8 * COMPLq]
    113 
    114   ; Interpolate Y2 if available.
    115   cmp       INDEXq, WIDTHq
    116   jge       .lscalelastpixel
    117 
    118   lea       Xq, [INDEXq + SOURCE_DXq]    ; Xq points to next pixel.
    119                                          ; INDEXq points to current pixel.
    120   mov       FRACTIONq, INDEXq
    121   sar       INDEXq, 0x10
    122   movzx     COMPLd, BYTE [Yq + INDEXq]
    123   movzx     COMPRd, BYTE [Yq + INDEXq + 1]
    124   and       FRACTIONq, 0xffff
    125   imul      COMPRq, FRACTIONq
    126   xor       FRACTIONq, 0xffff
    127   imul      COMPLq, FRACTIONq
    128   add       COMPLq, COMPRq
    129   shr       COMPLq, 16
    130   movq      mm2, [TABLEq + 8 * COMPLq]
    131 
    132   paddsw    mm1, mm0
    133   paddsw    mm2, mm0
    134   psraw     mm1, 0x6
    135   psraw     mm2, 0x6
    136   packuswb  mm1, mm2
    137   movntq    [ARGBq], mm1
    138   add       ARGBq, 0x8
    139 
    140 .lscaleend:
    141   cmp       Xq, WIDTHq
    142   jl        .lscaleloop
    143   jmp       .epilogue
    144 
    145 .lscalelastpixel:
    146   paddsw    mm1, mm0
    147   psraw     mm1, 6
    148   packuswb  mm1, mm1
    149   movd      [ARGBq], mm1
    150 
    151 .epilogue
    152   EPILOGUE
    153   RET
    154