Home | History | Annotate | Download | only in x86
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12 %include "vpx_ports/x86_abi_support.asm"
     13 
     14 ;void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
     15 ;                            short *diff, unsigned char *Predictor,
     16 ;                            int pitch);
     17 global sym(vp8_subtract_b_sse2_impl) PRIVATE
     18 sym(vp8_subtract_b_sse2_impl):
     19     push        rbp
     20     mov         rbp, rsp
     21     SHADOW_ARGS_TO_STACK 5
     22     GET_GOT     rbx
     23     push rsi
     24     push rdi
     25     ; end prolog
     26 
     27         mov     rdi,        arg(2) ;diff
     28         mov     rax,        arg(3) ;Predictor
     29         mov     rsi,        arg(0) ;z
     30         movsxd  rdx,        dword ptr arg(1);src_stride;
     31         movsxd  rcx,        dword ptr arg(4);pitch
     32         pxor    mm7,        mm7
     33 
     34         movd    mm0,        [rsi]
     35         movd    mm1,        [rax]
     36         punpcklbw   mm0,    mm7
     37         punpcklbw   mm1,    mm7
     38         psubw   mm0,        mm1
     39         movq    MMWORD PTR [rdi],      mm0
     40 
     41         movd    mm0,        [rsi+rdx]
     42         movd    mm1,        [rax+rcx]
     43         punpcklbw   mm0,    mm7
     44         punpcklbw   mm1,    mm7
     45         psubw   mm0,        mm1
     46         movq    MMWORD PTR [rdi+rcx*2], mm0
     47 
     48         movd    mm0,        [rsi+rdx*2]
     49         movd    mm1,        [rax+rcx*2]
     50         punpcklbw   mm0,    mm7
     51         punpcklbw   mm1,    mm7
     52         psubw   mm0,        mm1
     53         movq    MMWORD PTR [rdi+rcx*4], mm0
     54 
     55         lea     rsi,        [rsi+rdx*2]
     56         lea     rcx,        [rcx+rcx*2]
     57 
     58         movd    mm0,        [rsi+rdx]
     59         movd    mm1,        [rax+rcx]
     60         punpcklbw   mm0,    mm7
     61         punpcklbw   mm1,    mm7
     62         psubw   mm0,        mm1
     63         movq    MMWORD PTR [rdi+rcx*2], mm0
     64 
     65     ; begin epilog
     66     pop rdi
     67     pop rsi
     68     RESTORE_GOT
     69     UNSHADOW_ARGS
     70     pop         rbp
     71     ret
     72 
     73 
     74 ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
     75 ;unsigned char *pred, int pred_stride)
     76 global sym(vp8_subtract_mby_sse2) PRIVATE
     77 sym(vp8_subtract_mby_sse2):
     78     push        rbp
     79     mov         rbp, rsp
     80     SHADOW_ARGS_TO_STACK 5
     81     GET_GOT     rbx
     82     push rsi
     83     push rdi
     84     ; end prolog
     85 
     86     mov         rdi,        arg(0)          ;diff
     87     mov         rsi,        arg(1)          ;src
     88     movsxd      rdx,        dword ptr arg(2);src_stride
     89     mov         rax,        arg(3)          ;pred
     90     movdqa      xmm4,       [GLOBAL(t80)]
     91     push        rbx
     92     mov         rcx,        8               ; do two lines at one time
     93     movsxd      rbx,        dword ptr arg(4);pred_stride
     94 
     95 .submby_loop:
     96     movdqa      xmm0,       [rsi]           ; src
     97     movdqa      xmm1,       [rax]           ; pred
     98 
     99     movdqa      xmm2,       xmm0
    100     psubb       xmm0,       xmm1
    101 
    102     pxor        xmm1,       xmm4            ;convert to signed values
    103     pxor        xmm2,       xmm4
    104     pcmpgtb     xmm1,       xmm2            ; obtain sign information
    105 
    106     movdqa      xmm2,       xmm0
    107     punpcklbw   xmm0,       xmm1            ; put sign back to subtraction
    108     punpckhbw   xmm2,       xmm1            ; put sign back to subtraction
    109 
    110     movdqa      xmm3,       [rsi + rdx]
    111     movdqa      xmm5,       [rax + rbx]
    112 
    113     lea         rsi,        [rsi+rdx*2]
    114     lea         rax,        [rax+rbx*2]
    115 
    116     movdqa      [rdi],      xmm0
    117     movdqa      [rdi +16],  xmm2
    118 
    119     movdqa      xmm1,       xmm3
    120     psubb       xmm3,       xmm5
    121 
    122     pxor        xmm5,       xmm4            ;convert to signed values
    123     pxor        xmm1,       xmm4
    124     pcmpgtb     xmm5,       xmm1            ; obtain sign information
    125 
    126     movdqa      xmm1,       xmm3
    127     punpcklbw   xmm3,       xmm5            ; put sign back to subtraction
    128     punpckhbw   xmm1,       xmm5            ; put sign back to subtraction
    129 
    130     movdqa      [rdi +32],  xmm3
    131     movdqa      [rdi +48],  xmm1
    132 
    133     add         rdi,        64
    134     dec         rcx
    135     jnz         .submby_loop
    136 
    137     pop rbx
    138     pop rdi
    139     pop rsi
    140     ; begin epilog
    141     RESTORE_GOT
    142     UNSHADOW_ARGS
    143     pop         rbp
    144     ret
    145 
    146 ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
    147 ;                         int src_stride, unsigned char *upred,
    148 ;                         unsigned char *vpred, int pred_stride)
    149 global sym(vp8_subtract_mbuv_sse2) PRIVATE
    150 sym(vp8_subtract_mbuv_sse2):
    151     push        rbp
    152     mov         rbp, rsp
    153     SHADOW_ARGS_TO_STACK 7
    154     GET_GOT     rbx
    155     push rsi
    156     push rdi
    157     ; end prolog
    158 
    159     movdqa      xmm4,       [GLOBAL(t80)]
    160     mov         rdi,        arg(0)          ;diff
    161     mov         rsi,        arg(1)          ;usrc
    162     movsxd      rdx,        dword ptr arg(3);src_stride;
    163     mov         rax,        arg(4)          ;upred
    164     add         rdi,        256*2           ;diff = diff + 256 (shorts)
    165     mov         rcx,        4
    166     push        rbx
    167     movsxd      rbx,        dword ptr arg(6);pred_stride
    168 
    169     ;u
    170 .submbu_loop:
    171     movq        xmm0,       [rsi]           ; src
    172     movq        xmm2,       [rsi+rdx]       ; src -- next line
    173     movq        xmm1,       [rax]           ; pred
    174     movq        xmm3,       [rax+rbx]       ; pred -- next line
    175     lea         rsi,        [rsi + rdx*2]
    176     lea         rax,        [rax + rbx*2]
    177 
    178     punpcklqdq  xmm0,       xmm2
    179     punpcklqdq  xmm1,       xmm3
    180 
    181     movdqa      xmm2,       xmm0
    182     psubb       xmm0,       xmm1            ; subtraction with sign missed
    183 
    184     pxor        xmm1,       xmm4            ;convert to signed values
    185     pxor        xmm2,       xmm4
    186     pcmpgtb     xmm1,       xmm2            ; obtain sign information
    187 
    188     movdqa      xmm2,       xmm0
    189     movdqa      xmm3,       xmm1
    190     punpcklbw   xmm0,       xmm1            ; put sign back to subtraction
    191     punpckhbw   xmm2,       xmm3            ; put sign back to subtraction
    192 
    193     movdqa      [rdi],      xmm0            ; store difference
    194     movdqa      [rdi +16],  xmm2            ; store difference
    195     add         rdi,        32
    196     sub         rcx, 1
    197     jnz         .submbu_loop
    198 
    199     mov         rsi,        arg(2)          ;vsrc
    200     mov         rax,        arg(5)          ;vpred
    201     mov         rcx,        4
    202 
    203     ;v
    204 .submbv_loop:
    205     movq        xmm0,       [rsi]           ; src
    206     movq        xmm2,       [rsi+rdx]       ; src -- next line
    207     movq        xmm1,       [rax]           ; pred
    208     movq        xmm3,       [rax+rbx]       ; pred -- next line
    209     lea         rsi,        [rsi + rdx*2]
    210     lea         rax,        [rax + rbx*2]
    211 
    212     punpcklqdq  xmm0,       xmm2
    213     punpcklqdq  xmm1,       xmm3
    214 
    215     movdqa      xmm2,       xmm0
    216     psubb       xmm0,       xmm1            ; subtraction with sign missed
    217 
    218     pxor        xmm1,       xmm4            ;convert to signed values
    219     pxor        xmm2,       xmm4
    220     pcmpgtb     xmm1,       xmm2            ; obtain sign information
    221 
    222     movdqa      xmm2,       xmm0
    223     movdqa      xmm3,       xmm1
    224     punpcklbw   xmm0,       xmm1            ; put sign back to subtraction
    225     punpckhbw   xmm2,       xmm3            ; put sign back to subtraction
    226 
    227     movdqa      [rdi],      xmm0            ; store difference
    228     movdqa      [rdi +16],  xmm2            ; store difference
    229     add         rdi,        32
    230     sub         rcx, 1
    231     jnz         .submbv_loop
    232 
    233     pop         rbx
    234     ; begin epilog
    235     pop rdi
    236     pop rsi
    237     RESTORE_GOT
    238     UNSHADOW_ARGS
    239     pop         rbp
    240     ret
    241 
    242 SECTION_RODATA
    243 align 16
    244 t80:
    245     times 16 db 0x80
    246