Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_recon4b_neon|
     13     ARM
     14     REQUIRE8
     15     PRESERVE8
     16 
     17     AREA ||.text||, CODE, READONLY, ALIGN=2
     18 
     19 ; r0    unsigned char  *pred_ptr,
     20 ; r1    short *diff_ptr,
     21 ; r2    unsigned char *dst_ptr,
     22 ; r3    int stride
     23 
     24 |vp8_recon4b_neon| PROC
     25     vld1.u8         {q12, q13}, [r0]!   ;load data from pred_ptr
     26     vld1.16         {q8, q9}, [r1]!     ;load data from diff_ptr
     27     vld1.u8         {q14, q15}, [r0]
     28     vld1.16         {q10, q11}, [r1]!
     29 
     30     vmovl.u8        q0, d24             ;modify Pred data from 8 bits to 16 bits
     31     vmovl.u8        q1, d25
     32     vmovl.u8        q2, d26
     33     vmovl.u8        q3, d27
     34     vmovl.u8        q4, d28
     35     vmovl.u8        q5, d29
     36     vmovl.u8        q6, d30
     37     vld1.16         {q12, q13}, [r1]!
     38     vmovl.u8        q7, d31
     39     vld1.16         {q14, q15}, [r1]
     40 
     41     vadd.s16        q0, q0, q8          ;add Diff data and Pred data together
     42     vadd.s16        q1, q1, q9
     43     vadd.s16        q2, q2, q10
     44     vadd.s16        q3, q3, q11
     45     vadd.s16        q4, q4, q12
     46     vadd.s16        q5, q5, q13
     47     vadd.s16        q6, q6, q14
     48     vadd.s16        q7, q7, q15
     49 
     50     vqmovun.s16     d0, q0              ;CLAMP() saturation
     51     vqmovun.s16     d1, q1
     52     vqmovun.s16     d2, q2
     53     vqmovun.s16     d3, q3
     54     vqmovun.s16     d4, q4
     55     vqmovun.s16     d5, q5
     56     vqmovun.s16     d6, q6
     57     vqmovun.s16     d7, q7
     58     add             r0, r2, r3
     59 
     60     vst1.u8         {q0}, [r2]          ;store result
     61     vst1.u8         {q1}, [r0], r3
     62     add             r2, r0, r3
     63     vst1.u8         {q2}, [r0]
     64     vst1.u8         {q3}, [r2], r3
     65 
     66     bx             lr
     67 
     68     ENDP
     69     END
     70