Home | History | Annotate | Download | only in x86
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12 %include "vpx_ports/x86_abi_support.asm"
     13 
     14 ;void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
     15 ;                            short *diff, unsigned char *Predictor,
     16 ;                            int pitch);
     17 global sym(vp8_subtract_b_mmx_impl) PRIVATE
     18 sym(vp8_subtract_b_mmx_impl):
     19     push        rbp
     20     mov         rbp, rsp
     21     SHADOW_ARGS_TO_STACK 5
     22     push rsi
     23     push rdi
     24     ; end prolog
     25 
     26 
     27         mov     rdi,        arg(2) ;diff
     28         mov     rax,        arg(3) ;Predictor
     29         mov     rsi,        arg(0) ;z
     30         movsxd  rdx,        dword ptr arg(1);src_stride;
     31         movsxd  rcx,        dword ptr arg(4);pitch
     32         pxor    mm7,        mm7
     33 
     34         movd    mm0,        [rsi]
     35         movd    mm1,        [rax]
     36         punpcklbw   mm0,    mm7
     37         punpcklbw   mm1,    mm7
     38         psubw   mm0,        mm1
     39         movq    [rdi],      mm0
     40 
     41 
     42         movd    mm0,        [rsi+rdx]
     43         movd    mm1,        [rax+rcx]
     44         punpcklbw   mm0,    mm7
     45         punpcklbw   mm1,    mm7
     46         psubw   mm0,        mm1
     47         movq    [rdi+rcx*2],mm0
     48 
     49 
     50         movd    mm0,        [rsi+rdx*2]
     51         movd    mm1,        [rax+rcx*2]
     52         punpcklbw   mm0,    mm7
     53         punpcklbw   mm1,    mm7
     54         psubw   mm0,        mm1
     55         movq    [rdi+rcx*4],        mm0
     56 
     57         lea     rsi,        [rsi+rdx*2]
     58         lea     rcx,        [rcx+rcx*2]
     59 
     60 
     61 
     62         movd    mm0,        [rsi+rdx]
     63         movd    mm1,        [rax+rcx]
     64         punpcklbw   mm0,    mm7
     65         punpcklbw   mm1,    mm7
     66         psubw   mm0,        mm1
     67         movq    [rdi+rcx*2],        mm0
     68 
     69     ; begin epilog
     70     pop rdi
     71     pop rsi
     72     UNSHADOW_ARGS
     73     pop         rbp
     74     ret
     75 
     76 ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
     77 ;unsigned char *pred, int pred_stride)
     78 global sym(vp8_subtract_mby_mmx) PRIVATE
     79 sym(vp8_subtract_mby_mmx):
     80     push        rbp
     81     mov         rbp, rsp
     82     SHADOW_ARGS_TO_STACK 5
     83     push rsi
     84     push rdi
     85     ; end prolog
     86 
     87     mov         rdi,        arg(0)          ;diff
     88     mov         rsi,        arg(1)          ;src
     89     movsxd      rdx,        dword ptr arg(2);src_stride
     90     mov         rax,        arg(3)          ;pred
     91     push        rbx
     92     movsxd      rbx,        dword ptr arg(4);pred_stride
     93 
     94     pxor        mm0,        mm0
     95     mov         rcx,        16
     96 
     97 
     98 .submby_loop:
     99     movq        mm1,        [rsi]
    100     movq        mm3,        [rax]
    101 
    102     movq        mm2,        mm1
    103     movq        mm4,        mm3
    104 
    105     punpcklbw   mm1,        mm0
    106     punpcklbw   mm3,        mm0
    107 
    108     punpckhbw   mm2,        mm0
    109     punpckhbw   mm4,        mm0
    110 
    111     psubw       mm1,        mm3
    112     psubw       mm2,        mm4
    113 
    114     movq        [rdi],      mm1
    115     movq        [rdi+8],    mm2
    116 
    117     movq        mm1,        [rsi+8]
    118     movq        mm3,        [rax+8]
    119 
    120     movq        mm2,        mm1
    121     movq        mm4,        mm3
    122 
    123     punpcklbw   mm1,        mm0
    124     punpcklbw   mm3,        mm0
    125 
    126     punpckhbw   mm2,        mm0
    127     punpckhbw   mm4,        mm0
    128 
    129     psubw       mm1,        mm3
    130     psubw       mm2,        mm4
    131 
    132     movq        [rdi+16],   mm1
    133     movq        [rdi+24],   mm2
    134     add         rdi,        32
    135     lea         rax,        [rax+rbx]
    136     lea         rsi,        [rsi+rdx]
    137     dec         rcx
    138     jnz         .submby_loop
    139 
    140     pop rbx
    141     pop rdi
    142     pop rsi
    143     ; begin epilog
    144     UNSHADOW_ARGS
    145     pop         rbp
    146     ret
    147 
    148 
    149 ;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc,
    150 ;                         int src_stride, unsigned char *upred,
    151 ;                         unsigned char *vpred, int pred_stride)
    152 
    153 global sym(vp8_subtract_mbuv_mmx) PRIVATE
    154 sym(vp8_subtract_mbuv_mmx):
    155     push        rbp
    156     mov         rbp, rsp
    157     SHADOW_ARGS_TO_STACK 7
    158     push rsi
    159     push rdi
    160     ; end prolog
    161 
    162     mov         rdi,        arg(0)          ;diff
    163     mov         rsi,        arg(1)          ;usrc
    164     movsxd      rdx,        dword ptr arg(3);src_stride;
    165     mov         rax,        arg(4)          ;upred
    166     add         rdi,        256*2           ;diff = diff + 256 (shorts)
    167     mov         rcx,        8
    168     push        rbx
    169     movsxd      rbx,        dword ptr arg(6);pred_stride
    170 
    171     pxor        mm7,        mm7
    172 
    173 .submbu_loop:
    174     movq        mm0,        [rsi]
    175     movq        mm1,        [rax]
    176     movq        mm3,        mm0
    177     movq        mm4,        mm1
    178     punpcklbw   mm0,        mm7
    179     punpcklbw   mm1,        mm7
    180     punpckhbw   mm3,        mm7
    181     punpckhbw   mm4,        mm7
    182     psubw       mm0,        mm1
    183     psubw       mm3,        mm4
    184     movq        [rdi],      mm0
    185     movq        [rdi+8],    mm3
    186     add         rdi, 16
    187     add         rsi, rdx
    188     add         rax, rbx
    189 
    190     dec         rcx
    191     jnz         .submbu_loop
    192 
    193     mov         rsi,        arg(2)          ;vsrc
    194     mov         rax,        arg(5)          ;vpred
    195     mov         rcx,        8
    196 
    197 .submbv_loop:
    198     movq        mm0,        [rsi]
    199     movq        mm1,        [rax]
    200     movq        mm3,        mm0
    201     movq        mm4,        mm1
    202     punpcklbw   mm0,        mm7
    203     punpcklbw   mm1,        mm7
    204     punpckhbw   mm3,        mm7
    205     punpckhbw   mm4,        mm7
    206     psubw       mm0,        mm1
    207     psubw       mm3,        mm4
    208     movq        [rdi],      mm0
    209     movq        [rdi+8],    mm3
    210     add         rdi, 16
    211     add         rsi, rdx
    212     add         rax, rbx
    213 
    214     dec         rcx
    215     jnz         .submbv_loop
    216 
    217     pop         rbx
    218     ; begin epilog
    219     pop rdi
    220     pop rsi
    221     UNSHADOW_ARGS
    222     pop         rbp
    223     ret
    224