Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT |vp8_memcpy_neon|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA ||.text||, CODE, READONLY, ALIGN=2
     19 ;=========================================
     20 ;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
     21 |vp8_memcpy_neon| PROC
     22     ;pld                [r1]                        ;preload pred data
     23     ;pld                [r1, #128]
     24     ;pld                [r1, #256]
     25     ;pld                [r1, #384]
     26 
     27     mov             r12, r2, lsr #8                 ;copy 256 bytes data at one time
     28 
     29 memcpy_neon_loop
     30     vld1.8          {q0, q1}, [r1]!                 ;load src data
     31     subs            r12, r12, #1
     32     vld1.8          {q2, q3}, [r1]!
     33     vst1.8          {q0, q1}, [r0]!                 ;copy to dst_ptr
     34     vld1.8          {q4, q5}, [r1]!
     35     vst1.8          {q2, q3}, [r0]!
     36     vld1.8          {q6, q7}, [r1]!
     37     vst1.8          {q4, q5}, [r0]!
     38     vld1.8          {q8, q9}, [r1]!
     39     vst1.8          {q6, q7}, [r0]!
     40     vld1.8          {q10, q11}, [r1]!
     41     vst1.8          {q8, q9}, [r0]!
     42     vld1.8          {q12, q13}, [r1]!
     43     vst1.8          {q10, q11}, [r0]!
     44     vld1.8          {q14, q15}, [r1]!
     45     vst1.8          {q12, q13}, [r0]!
     46     vst1.8          {q14, q15}, [r0]!
     47 
     48     ;pld                [r1]                        ;preload pred data -- need to adjust for real device
     49     ;pld                [r1, #128]
     50     ;pld                [r1, #256]
     51     ;pld                [r1, #384]
     52 
     53     bne             memcpy_neon_loop
     54 
     55     ands            r3, r2, #0xff                   ;extra copy
     56     beq             done_copy_neon_loop
     57 
     58 extra_copy_neon_loop
     59     vld1.8          {q0}, [r1]!                 ;load src data
     60     subs            r3, r3, #16
     61     vst1.8          {q0}, [r0]!
     62     bne             extra_copy_neon_loop
     63 
     64 done_copy_neon_loop
     65     bx              lr
     66     ENDP
     67 
     68     END
     69