Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT |vp8_memcpy_partial_neon|
     13 
     14     ARM
     15     REQUIRE8
     16     PRESERVE8
     17 
     18     AREA ||.text||, CODE, READONLY, ALIGN=2
     19 ;=========================================
     20 ;this is not a full memcpy function!!!
     21 ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
     22 ;                             int sz);
     23 |vp8_memcpy_partial_neon| PROC
     24     ;pld                [r1]                        ;preload pred data
     25     ;pld                [r1, #128]
     26     ;pld                [r1, #256]
     27     ;pld                [r1, #384]
     28 
     29     mov             r12, r2, lsr #8                 ;copy 256 bytes data at one time
     30 
     31 memcpy_neon_loop
     32     vld1.8          {q0, q1}, [r1]!                 ;load src data
     33     subs            r12, r12, #1
     34     vld1.8          {q2, q3}, [r1]!
     35     vst1.8          {q0, q1}, [r0]!                 ;copy to dst_ptr
     36     vld1.8          {q4, q5}, [r1]!
     37     vst1.8          {q2, q3}, [r0]!
     38     vld1.8          {q6, q7}, [r1]!
     39     vst1.8          {q4, q5}, [r0]!
     40     vld1.8          {q8, q9}, [r1]!
     41     vst1.8          {q6, q7}, [r0]!
     42     vld1.8          {q10, q11}, [r1]!
     43     vst1.8          {q8, q9}, [r0]!
     44     vld1.8          {q12, q13}, [r1]!
     45     vst1.8          {q10, q11}, [r0]!
     46     vld1.8          {q14, q15}, [r1]!
     47     vst1.8          {q12, q13}, [r0]!
     48     vst1.8          {q14, q15}, [r0]!
     49 
     50     ;pld                [r1]                        ;preload pred data -- need to adjust for real device
     51     ;pld                [r1, #128]
     52     ;pld                [r1, #256]
     53     ;pld                [r1, #384]
     54 
     55     bne             memcpy_neon_loop
     56 
     57     ands            r3, r2, #0xff                   ;extra copy
     58     beq             done_copy_neon_loop
     59 
     60 extra_copy_neon_loop
     61     vld1.8          {q0}, [r1]!                 ;load src data
     62     subs            r3, r3, #16
     63     vst1.8          {q0}, [r0]!
     64     bne             extra_copy_neon_loop
     65 
     66 done_copy_neon_loop
     67     bx              lr
     68     ENDP
     69 
     70     END
     71