Home | History | Annotate | Download | only in x86
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12 %include "vpx_ports/x86_abi_support.asm"
     13 
     14 ;unsigned int vp8_sad16x16_wmt(
     15 ;    unsigned char *src_ptr,
     16 ;    int  src_stride,
     17 ;    unsigned char *ref_ptr,
     18 ;    int  ref_stride)
     19 global sym(vp8_sad16x16_wmt)
     20 sym(vp8_sad16x16_wmt):
     21     push        rbp
     22     mov         rbp, rsp
     23     SHADOW_ARGS_TO_STACK 4
     24     push        rsi
     25     push        rdi
     26     ; end prolog
     27 
     28         mov             rsi,        arg(0) ;src_ptr
     29         mov             rdi,        arg(2) ;ref_ptr
     30 
     31         movsxd          rax,        dword ptr arg(1) ;src_stride
     32         movsxd          rdx,        dword ptr arg(3) ;ref_stride
     33 
     34         lea             rcx,        [rsi+rax*8]
     35 
     36         lea             rcx,        [rcx+rax*8]
     37         pxor            xmm7,       xmm7
     38 
     39 x16x16sad_wmt_loop:
     40 
     41         movq            xmm0,       QWORD PTR [rsi]
     42         movq            xmm2,       QWORD PTR [rsi+8]
     43 
     44         movq            xmm1,       QWORD PTR [rdi]
     45         movq            xmm3,       QWORD PTR [rdi+8]
     46 
     47         movq            xmm4,       QWORD PTR [rsi+rax]
     48         movq            xmm5,       QWORD PTR [rdi+rdx]
     49 
     50 
     51         punpcklbw       xmm0,       xmm2
     52         punpcklbw       xmm1,       xmm3
     53 
     54         psadbw          xmm0,       xmm1
     55         movq            xmm6,       QWORD PTR [rsi+rax+8]
     56 
     57         movq            xmm3,       QWORD PTR [rdi+rdx+8]
     58         lea             rsi,        [rsi+rax*2]
     59 
     60         lea             rdi,        [rdi+rdx*2]
     61         punpcklbw       xmm4,       xmm6
     62 
     63         punpcklbw       xmm5,       xmm3
     64         psadbw          xmm4,       xmm5
     65 
     66         paddw           xmm7,       xmm0
     67         paddw           xmm7,       xmm4
     68 
     69         cmp             rsi,        rcx
     70         jne             x16x16sad_wmt_loop
     71 
     72         movq            xmm0,       xmm7
     73         psrldq          xmm7,       8
     74 
     75         paddw           xmm0,       xmm7
     76         movq            rax,        xmm0
     77 
     78     ; begin epilog
     79     pop rdi
     80     pop rsi
     81     UNSHADOW_ARGS
     82     pop         rbp
     83     ret
     84 
     85 ;unsigned int vp8_sad8x16_wmt(
     86 ;    unsigned char *src_ptr,
     87 ;    int  src_stride,
     88 ;    unsigned char *ref_ptr,
     89 ;    int  ref_stride,
     90 ;    int  max_err)
     91 global sym(vp8_sad8x16_wmt)
     92 sym(vp8_sad8x16_wmt):
     93     push        rbp
     94     mov         rbp, rsp
     95     SHADOW_ARGS_TO_STACK 5
     96     push        rbx
     97     push        rsi
     98     push        rdi
     99     ; end prolog
    100 
    101         mov             rsi,        arg(0) ;src_ptr
    102         mov             rdi,        arg(2) ;ref_ptr
    103 
    104         movsxd          rbx,        dword ptr arg(1) ;src_stride
    105         movsxd          rdx,        dword ptr arg(3) ;ref_stride
    106 
    107         lea             rcx,        [rsi+rbx*8]
    108 
    109         lea             rcx,        [rcx+rbx*8]
    110         pxor            mm7,        mm7
    111 
    112 x8x16sad_wmt_loop:
    113 
    114         movq            rax,        mm7
    115         cmp             rax,        arg(4)
    116         jg              x8x16sad_wmt_early_exit
    117 
    118         movq            mm0,        QWORD PTR [rsi]
    119         movq            mm1,        QWORD PTR [rdi]
    120 
    121         movq            mm2,        QWORD PTR [rsi+rbx]
    122         movq            mm3,        QWORD PTR [rdi+rdx]
    123 
    124         psadbw          mm0,        mm1
    125         psadbw          mm2,        mm3
    126 
    127         lea             rsi,        [rsi+rbx*2]
    128         lea             rdi,        [rdi+rdx*2]
    129 
    130         paddw           mm7,        mm0
    131         paddw           mm7,        mm2
    132 
    133         cmp             rsi,        rcx
    134         jne             x8x16sad_wmt_loop
    135 
    136         movq            rax,        mm7
    137 
    138 x8x16sad_wmt_early_exit:
    139 
    140     ; begin epilog
    141     pop         rdi
    142     pop         rsi
    143     pop         rbx
    144     UNSHADOW_ARGS
    145     pop         rbp
    146     ret
    147 
    148 
    149 ;unsigned int vp8_sad8x8_wmt(
    150 ;    unsigned char *src_ptr,
    151 ;    int  src_stride,
    152 ;    unsigned char *ref_ptr,
    153 ;    int  ref_stride)
    154 global sym(vp8_sad8x8_wmt)
    155 sym(vp8_sad8x8_wmt):
    156     push        rbp
    157     mov         rbp, rsp
    158     SHADOW_ARGS_TO_STACK 5
    159     push        rbx
    160     push        rsi
    161     push        rdi
    162     ; end prolog
    163 
    164         mov             rsi,        arg(0) ;src_ptr
    165         mov             rdi,        arg(2) ;ref_ptr
    166 
    167         movsxd          rbx,        dword ptr arg(1) ;src_stride
    168         movsxd          rdx,        dword ptr arg(3) ;ref_stride
    169 
    170         lea             rcx,        [rsi+rbx*8]
    171         pxor            mm7,        mm7
    172 
    173 x8x8sad_wmt_loop:
    174 
    175         movq            rax,        mm7
    176         cmp             rax,        arg(4)
    177         jg              x8x8sad_wmt_early_exit
    178 
    179         movq            mm0,        QWORD PTR [rsi]
    180         movq            mm1,        QWORD PTR [rdi]
    181 
    182         psadbw          mm0,        mm1
    183         lea             rsi,        [rsi+rbx]
    184 
    185         add             rdi,        rdx
    186         paddw           mm7,        mm0
    187 
    188         cmp             rsi,        rcx
    189         jne             x8x8sad_wmt_loop
    190 
    191         movq            rax,        mm7
    192 x8x8sad_wmt_early_exit:
    193 
    194     ; begin epilog
    195     pop         rdi
    196     pop         rsi
    197     pop         rbx
    198     UNSHADOW_ARGS
    199     pop         rbp
    200     ret
    201 
    202 ;unsigned int vp8_sad4x4_wmt(
    203 ;    unsigned char *src_ptr,
    204 ;    int  src_stride,
    205 ;    unsigned char *ref_ptr,
    206 ;    int  ref_stride)
    207 global sym(vp8_sad4x4_wmt)
    208 sym(vp8_sad4x4_wmt):
    209     push        rbp
    210     mov         rbp, rsp
    211     SHADOW_ARGS_TO_STACK 4
    212     push        rsi
    213     push        rdi
    214     ; end prolog
    215 
    216         mov             rsi,        arg(0) ;src_ptr
    217         mov             rdi,        arg(2) ;ref_ptr
    218 
    219         movsxd          rax,        dword ptr arg(1) ;src_stride
    220         movsxd          rdx,        dword ptr arg(3) ;ref_stride
    221 
    222         movd            mm0,        DWORD PTR [rsi]
    223         movd            mm1,        DWORD PTR [rdi]
    224 
    225         movd            mm2,        DWORD PTR [rsi+rax]
    226         movd            mm3,        DWORD PTR [rdi+rdx]
    227 
    228         punpcklbw       mm0,        mm2
    229         punpcklbw       mm1,        mm3
    230 
    231         psadbw          mm0,        mm1
    232         lea             rsi,        [rsi+rax*2]
    233 
    234         lea             rdi,        [rdi+rdx*2]
    235         movd            mm4,        DWORD PTR [rsi]
    236 
    237         movd            mm5,        DWORD PTR [rdi]
    238         movd            mm6,        DWORD PTR [rsi+rax]
    239 
    240         movd            mm7,        DWORD PTR [rdi+rdx]
    241         punpcklbw       mm4,        mm6
    242 
    243         punpcklbw       mm5,        mm7
    244         psadbw          mm4,        mm5
    245 
    246         paddw           mm0,        mm4
    247         movq            rax,        mm0
    248 
    249     ; begin epilog
    250     pop rdi
    251     pop rsi
    252     UNSHADOW_ARGS
    253     pop         rbp
    254     ret
    255 
    256 
    257 ;unsigned int vp8_sad16x8_wmt(
    258 ;    unsigned char *src_ptr,
    259 ;    int  src_stride,
    260 ;    unsigned char *ref_ptr,
    261 ;    int  ref_stride)
    262 global sym(vp8_sad16x8_wmt)
    263 sym(vp8_sad16x8_wmt):
    264     push        rbp
    265     mov         rbp, rsp
    266     SHADOW_ARGS_TO_STACK 5
    267     push        rbx
    268     push        rsi
    269     push        rdi
    270     ; end prolog
    271 
    272 
    273         mov             rsi,        arg(0) ;src_ptr
    274         mov             rdi,        arg(2) ;ref_ptr
    275 
    276         movsxd          rbx,        dword ptr arg(1) ;src_stride
    277         movsxd          rdx,        dword ptr arg(3) ;ref_stride
    278 
    279         lea             rcx,        [rsi+rbx*8]
    280         pxor            mm7,        mm7
    281 
    282 x16x8sad_wmt_loop:
    283 
    284         movq            rax,        mm7
    285         cmp             rax,        arg(4)
    286         jg              x16x8sad_wmt_early_exit
    287 
    288         movq            mm0,        QWORD PTR [rsi]
    289         movq            mm2,        QWORD PTR [rsi+8]
    290 
    291         movq            mm1,        QWORD PTR [rdi]
    292         movq            mm3,        QWORD PTR [rdi+8]
    293 
    294         movq            mm4,        QWORD PTR [rsi+rbx]
    295         movq            mm5,        QWORD PTR [rdi+rdx]
    296 
    297         psadbw          mm0,        mm1
    298         psadbw          mm2,        mm3
    299 
    300         movq            mm1,        QWORD PTR [rsi+rbx+8]
    301         movq            mm3,        QWORD PTR [rdi+rdx+8]
    302 
    303         psadbw          mm4,        mm5
    304         psadbw          mm1,        mm3
    305 
    306         lea             rsi,        [rsi+rbx*2]
    307         lea             rdi,        [rdi+rdx*2]
    308 
    309         paddw           mm0,        mm2
    310         paddw           mm4,        mm1
    311 
    312         paddw           mm7,        mm0
    313         paddw           mm7,        mm4
    314 
    315         cmp             rsi,        rcx
    316         jne             x16x8sad_wmt_loop
    317 
    318         movq            rax,        mm7
    319 
    320 x16x8sad_wmt_early_exit:
    321 
    322     ; begin epilog
    323     pop         rdi
    324     pop         rsi
    325     pop         rbx
    326     UNSHADOW_ARGS
    327     pop         rbp
    328     ret
    329