Home | History | Annotate | Download | only in x86
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12 %include "vpx_ports/x86_abi_support.asm"
     13 
     14 ;void copy_mem16x16_sse2(
     15 ;    unsigned char *src,
     16 ;    int src_stride,
     17 ;    unsigned char *dst,
     18 ;    int dst_stride
     19 ;    )
     20 global sym(vp8_copy_mem16x16_sse2) PRIVATE
     21 sym(vp8_copy_mem16x16_sse2):
     22     push        rbp
     23     mov         rbp, rsp
     24     SHADOW_ARGS_TO_STACK 4
     25     push        rsi
     26     push        rdi
     27     ; end prolog
     28 
     29         mov         rsi,        arg(0) ;src;
     30         movdqu      xmm0,       [rsi]
     31 
     32         movsxd      rax,        dword ptr arg(1) ;src_stride;
     33         mov         rdi,        arg(2) ;dst;
     34 
     35         movdqu      xmm1,       [rsi+rax]
     36         movdqu      xmm2,       [rsi+rax*2]
     37 
     38         movsxd      rcx,        dword ptr arg(3) ;dst_stride
     39         lea         rsi,        [rsi+rax*2]
     40 
     41         movdqa      [rdi],      xmm0
     42         add         rsi,        rax
     43 
     44         movdqa      [rdi+rcx],  xmm1
     45         movdqa      [rdi+rcx*2],xmm2
     46 
     47         lea         rdi,        [rdi+rcx*2]
     48         movdqu      xmm3,       [rsi]
     49 
     50         add         rdi,        rcx
     51         movdqu      xmm4,       [rsi+rax]
     52 
     53         movdqu      xmm5,       [rsi+rax*2]
     54         lea         rsi,        [rsi+rax*2]
     55 
     56         movdqa      [rdi],  xmm3
     57         add         rsi,        rax
     58 
     59         movdqa      [rdi+rcx],  xmm4
     60         movdqa      [rdi+rcx*2],xmm5
     61 
     62         lea         rdi,        [rdi+rcx*2]
     63         movdqu      xmm0,       [rsi]
     64 
     65         add         rdi,        rcx
     66         movdqu      xmm1,       [rsi+rax]
     67 
     68         movdqu      xmm2,       [rsi+rax*2]
     69         lea         rsi,        [rsi+rax*2]
     70 
     71         movdqa      [rdi],      xmm0
     72         add         rsi,        rax
     73 
     74         movdqa      [rdi+rcx],  xmm1
     75 
     76         movdqa      [rdi+rcx*2],    xmm2
     77         movdqu      xmm3,       [rsi]
     78 
     79         movdqu      xmm4,       [rsi+rax]
     80         lea         rdi,        [rdi+rcx*2]
     81 
     82         add         rdi,        rcx
     83         movdqu      xmm5,       [rsi+rax*2]
     84 
     85         lea         rsi,        [rsi+rax*2]
     86         movdqa      [rdi],  xmm3
     87 
     88         add         rsi,        rax
     89         movdqa      [rdi+rcx],  xmm4
     90 
     91         movdqa      [rdi+rcx*2],xmm5
     92         movdqu      xmm0,       [rsi]
     93 
     94         lea         rdi,        [rdi+rcx*2]
     95         movdqu      xmm1,       [rsi+rax]
     96 
     97         add         rdi,        rcx
     98         movdqu      xmm2,       [rsi+rax*2]
     99 
    100         lea         rsi,        [rsi+rax*2]
    101         movdqa      [rdi],      xmm0
    102 
    103         movdqa      [rdi+rcx],  xmm1
    104         movdqa      [rdi+rcx*2],xmm2
    105 
    106         movdqu      xmm3,       [rsi+rax]
    107         lea         rdi,        [rdi+rcx*2]
    108 
    109         movdqa      [rdi+rcx],  xmm3
    110 
    111     ; begin epilog
    112     pop rdi
    113     pop rsi
    114     UNSHADOW_ARGS
    115     pop         rbp
    116     ret
    117