Home | History | Annotate | Download | only in x86
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12 %include "vpx_ports/x86_abi_support.asm"
     13 
     14 ;void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
     15 ;                            short *diff, unsigned char *Predictor,
     16 ;                            int pitch);
     17 global sym(vp8_subtract_b_sse2_impl)
     18 sym(vp8_subtract_b_sse2_impl):
     19     push        rbp
     20     mov         rbp, rsp
     21     SHADOW_ARGS_TO_STACK 5
     22     GET_GOT     rbx
     23     push rsi
     24     push rdi
     25     ; end prolog
     26 
     27         mov     rdi,        arg(2) ;diff
     28         mov     rax,        arg(3) ;Predictor
     29         mov     rsi,        arg(0) ;z
     30         movsxd  rdx,        dword ptr arg(1);src_stride;
     31         movsxd  rcx,        dword ptr arg(4);pitch
     32         pxor    mm7,        mm7
     33 
     34         movd    mm0,        [rsi]
     35         movd    mm1,        [rax]
     36         punpcklbw   mm0,    mm7
     37         punpcklbw   mm1,    mm7
     38         psubw   mm0,        mm1
     39         movq    MMWORD PTR [rdi],      mm0
     40 
     41         movd    mm0,        [rsi+rdx]
     42         movd    mm1,        [rax+rcx]
     43         punpcklbw   mm0,    mm7
     44         punpcklbw   mm1,    mm7
     45         psubw   mm0,        mm1
     46         movq    MMWORD PTR [rdi+rcx*2], mm0
     47 
     48         movd    mm0,        [rsi+rdx*2]
     49         movd    mm1,        [rax+rcx*2]
     50         punpcklbw   mm0,    mm7
     51         punpcklbw   mm1,    mm7
     52         psubw   mm0,        mm1
     53         movq    MMWORD PTR [rdi+rcx*4], mm0
     54 
     55         lea     rsi,        [rsi+rdx*2]
     56         lea     rcx,        [rcx+rcx*2]
     57 
     58         movd    mm0,        [rsi+rdx]
     59         movd    mm1,        [rax+rcx]
     60         punpcklbw   mm0,    mm7
     61         punpcklbw   mm1,    mm7
     62         psubw   mm0,        mm1
     63         movq    MMWORD PTR [rdi+rcx*2], mm0
     64 
     65     ; begin epilog
     66     pop rdi
     67     pop rsi
     68     RESTORE_GOT
     69     UNSHADOW_ARGS
     70     pop         rbp
     71     ret
     72 
     73 
     74 ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
     75 global sym(vp8_subtract_mby_sse2)
     76 sym(vp8_subtract_mby_sse2):
     77     push        rbp
     78     mov         rbp, rsp
     79     SHADOW_ARGS_TO_STACK 4
     80     SAVE_XMM
     81     GET_GOT     rbx
     82     push rsi
     83     push rdi
     84     ; end prolog
     85 
     86             mov         rsi,            arg(1) ;src
     87             mov         rdi,            arg(0) ;diff
     88 
     89             mov         rax,            arg(2) ;pred
     90             movsxd      rdx,            dword ptr arg(3) ;stride
     91 
     92             mov         rcx,            8      ; do two lines at one time
     93 
     94 submby_loop:
     95             movdqa      xmm0,           XMMWORD PTR [rsi]   ; src
     96             movdqa      xmm1,           XMMWORD PTR [rax]   ; pred
     97 
     98             movdqa      xmm2,           xmm0
     99             psubb       xmm0,           xmm1
    100 
    101             pxor        xmm1,           [GLOBAL(t80)]   ;convert to signed values
    102             pxor        xmm2,           [GLOBAL(t80)]
    103             pcmpgtb     xmm1,           xmm2            ; obtain sign information
    104 
    105             movdqa      xmm2,    xmm0
    106             movdqa      xmm3,    xmm1
    107             punpcklbw   xmm0,    xmm1            ; put sign back to subtraction
    108             punpckhbw   xmm2,    xmm3            ; put sign back to subtraction
    109 
    110             movdqa      XMMWORD PTR [rdi],   xmm0
    111             movdqa      XMMWORD PTR [rdi +16], xmm2
    112 
    113             movdqa      xmm4,           XMMWORD PTR [rsi + rdx]
    114             movdqa      xmm5,           XMMWORD PTR [rax + 16]
    115 
    116             movdqa      xmm6,           xmm4
    117             psubb       xmm4,           xmm5
    118 
    119             pxor        xmm5,           [GLOBAL(t80)]   ;convert to signed values
    120             pxor        xmm6,           [GLOBAL(t80)]
    121             pcmpgtb     xmm5,           xmm6            ; obtain sign information
    122 
    123             movdqa      xmm6,    xmm4
    124             movdqa      xmm7,    xmm5
    125             punpcklbw   xmm4,    xmm5            ; put sign back to subtraction
    126             punpckhbw   xmm6,    xmm7            ; put sign back to subtraction
    127 
    128             movdqa      XMMWORD PTR [rdi +32], xmm4
    129             movdqa      XMMWORD PTR [rdi +48], xmm6
    130 
    131             add         rdi,            64
    132             add         rax,            32
    133             lea         rsi,            [rsi+rdx*2]
    134 
    135             sub         rcx,            1
    136             jnz         submby_loop
    137 
    138     pop rdi
    139     pop rsi
    140     ; begin epilog
    141     RESTORE_GOT
    142     RESTORE_XMM
    143     UNSHADOW_ARGS
    144     pop         rbp
    145     ret
    146 
    147 
    148 ;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
    149 global sym(vp8_subtract_mbuv_sse2)
    150 sym(vp8_subtract_mbuv_sse2):
    151     push        rbp
    152     mov         rbp, rsp
    153     SHADOW_ARGS_TO_STACK 5
    154     GET_GOT     rbx
    155     push rsi
    156     push rdi
    157     ; end prolog
    158 
    159             mov     rdi,        arg(0) ;diff
    160             mov     rax,        arg(3) ;pred
    161             mov     rsi,        arg(1) ;z = usrc
    162             add     rdi,        256*2  ;diff = diff + 256 (shorts)
    163             add     rax,        256    ;Predictor = pred + 256
    164             movsxd  rdx,        dword ptr arg(4) ;stride;
    165             lea     rcx,        [rdx + rdx*2]
    166 
    167             ;u
    168             ;line 0 1
    169             movq       xmm0,    MMWORD PTR [rsi]  ; src
    170             movq       xmm2,    MMWORD PTR [rsi+rdx]
    171             movdqa     xmm1,    XMMWORD PTR [rax]  ; pred
    172             punpcklqdq xmm0,    xmm2
    173 
    174             movdqa     xmm2,    xmm0
    175             psubb      xmm0,    xmm1            ; subtraction with sign missed
    176 
    177             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
    178             pxor       xmm2,    [GLOBAL(t80)]
    179             pcmpgtb    xmm1,    xmm2            ; obtain sign information
    180 
    181             movdqa     xmm2,    xmm0
    182             movdqa     xmm3,    xmm1
    183             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
    184             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
    185 
    186             movdqa     XMMWORD PTR [rdi],   xmm0
    187             movdqa     XMMWORD PTR [rdi +16],   xmm2
    188 
    189             ;line 2 3
    190             movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
    191             movq       xmm2,    MMWORD PTR [rsi+rcx]
    192             movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred
    193             punpcklqdq xmm0,    xmm2
    194 
    195             movdqa     xmm2,    xmm0
    196             psubb      xmm0,    xmm1            ; subtraction with sign missed
    197 
    198             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
    199             pxor       xmm2,    [GLOBAL(t80)]
    200             pcmpgtb    xmm1,    xmm2            ; obtain sign information
    201 
    202             movdqa     xmm2,    xmm0
    203             movdqa     xmm3,    xmm1
    204             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
    205             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
    206 
    207             movdqa     XMMWORD PTR [rdi + 32],   xmm0
    208             movdqa     XMMWORD PTR [rdi + 48],   xmm2
    209 
    210             ;line 4 5
    211             lea        rsi,     [rsi + rdx*4]
    212 
    213             movq       xmm0,    MMWORD PTR [rsi]  ; src
    214             movq       xmm2,    MMWORD PTR [rsi+rdx]
    215             movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred
    216             punpcklqdq xmm0,    xmm2
    217 
    218             movdqa     xmm2,    xmm0
    219             psubb      xmm0,    xmm1            ; subtraction with sign missed
    220 
    221             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
    222             pxor       xmm2,    [GLOBAL(t80)]
    223             pcmpgtb    xmm1,    xmm2            ; obtain sign information
    224 
    225             movdqa     xmm2,    xmm0
    226             movdqa     xmm3,    xmm1
    227             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
    228             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
    229 
    230             movdqa     XMMWORD PTR [rdi + 64],   xmm0
    231             movdqa     XMMWORD PTR [rdi + 80],   xmm2
    232 
    233             ;line 6 7
    234             movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
    235             movq       xmm2,    MMWORD PTR [rsi+rcx]
    236             movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred
    237             punpcklqdq xmm0,    xmm2
    238 
    239             movdqa     xmm2,    xmm0
    240             psubb      xmm0,    xmm1            ; subtraction with sign missed
    241 
    242             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
    243             pxor       xmm2,    [GLOBAL(t80)]
    244             pcmpgtb    xmm1,    xmm2            ; obtain sign information
    245 
    246             movdqa     xmm2,    xmm0
    247             movdqa     xmm3,    xmm1
    248             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
    249             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
    250 
    251             movdqa     XMMWORD PTR [rdi + 96],   xmm0
    252             movdqa     XMMWORD PTR [rdi + 112],  xmm2
    253 
    254             ;v
    255             mov     rsi,        arg(2) ;z = vsrc
    256             add     rdi,        64*2  ;diff = diff + 320 (shorts)
    257             add     rax,        64    ;Predictor = pred + 320
    258 
    259             ;line 0 1
    260             movq       xmm0,    MMWORD PTR [rsi]  ; src
    261             movq       xmm2,    MMWORD PTR [rsi+rdx]
    262             movdqa     xmm1,    XMMWORD PTR [rax]  ; pred
    263             punpcklqdq xmm0,    xmm2
    264 
    265             movdqa     xmm2,    xmm0
    266             psubb      xmm0,    xmm1            ; subtraction with sign missed
    267 
    268             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
    269             pxor       xmm2,    [GLOBAL(t80)]
    270             pcmpgtb    xmm1,    xmm2            ; obtain sign information
    271 
    272             movdqa     xmm2,    xmm0
    273             movdqa     xmm3,    xmm1
    274             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
    275             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
    276 
    277             movdqa     XMMWORD PTR [rdi],   xmm0
    278             movdqa     XMMWORD PTR [rdi +16],   xmm2
    279 
    280             ;line 2 3
    281             movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
    282             movq       xmm2,    MMWORD PTR [rsi+rcx]
    283             movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred
    284             punpcklqdq xmm0,    xmm2
    285 
    286             movdqa     xmm2,    xmm0
    287             psubb      xmm0,    xmm1            ; subtraction with sign missed
    288 
    289             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
    290             pxor       xmm2,    [GLOBAL(t80)]
    291             pcmpgtb    xmm1,    xmm2            ; obtain sign information
    292 
    293             movdqa     xmm2,    xmm0
    294             movdqa     xmm3,    xmm1
    295             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
    296             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
    297 
    298             movdqa     XMMWORD PTR [rdi + 32],   xmm0
    299             movdqa     XMMWORD PTR [rdi + 48],   xmm2
    300 
    301             ;line 4 5
    302             lea        rsi,     [rsi + rdx*4]
    303 
    304             movq       xmm0,    MMWORD PTR [rsi]  ; src
    305             movq       xmm2,    MMWORD PTR [rsi+rdx]
    306             movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred
    307             punpcklqdq xmm0,    xmm2
    308 
    309             movdqa     xmm2,    xmm0
    310             psubb      xmm0,    xmm1            ; subtraction with sign missed
    311 
    312             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
    313             pxor       xmm2,    [GLOBAL(t80)]
    314             pcmpgtb    xmm1,    xmm2            ; obtain sign information
    315 
    316             movdqa     xmm2,    xmm0
    317             movdqa     xmm3,    xmm1
    318             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
    319             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
    320 
    321             movdqa     XMMWORD PTR [rdi + 64],   xmm0
    322             movdqa     XMMWORD PTR [rdi + 80],   xmm2
    323 
    324             ;line 6 7
    325             movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
    326             movq       xmm2,    MMWORD PTR [rsi+rcx]
    327             movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred
    328             punpcklqdq xmm0,    xmm2
    329 
    330             movdqa     xmm2,    xmm0
    331             psubb      xmm0,    xmm1            ; subtraction with sign missed
    332 
    333             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
    334             pxor       xmm2,    [GLOBAL(t80)]
    335             pcmpgtb    xmm1,    xmm2            ; obtain sign information
    336 
    337             movdqa     xmm2,    xmm0
    338             movdqa     xmm3,    xmm1
    339             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
    340             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
    341 
    342             movdqa     XMMWORD PTR [rdi + 96],   xmm0
    343             movdqa     XMMWORD PTR [rdi + 112],  xmm2
    344 
    345     ; begin epilog
    346     pop rdi
    347     pop rsi
    348     RESTORE_GOT
    349     UNSHADOW_ARGS
    350     pop         rbp
    351     ret
    352 
    353 SECTION_RODATA
    354 align 16
    355 t80:
    356     times 16 db 0x80
    357