Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_recon_b_neon|
     13     ARM
     14     REQUIRE8
     15     PRESERVE8
     16 
     17     AREA ||.text||, CODE, READONLY, ALIGN=2
     18 
     19 ; r0    unsigned char  *pred_ptr,
     20 ; r1    short *diff_ptr,
     21 ; r2    unsigned char *dst_ptr,
     22 ; r3    int stride
     23 
     24 |vp8_recon_b_neon| PROC
     25     mov             r12, #16
     26 
     27     vld1.u8         {d28}, [r0], r12    ;load 4 data/line from pred_ptr
     28     vld1.16         {q10, q11}, [r1]!   ;load data from diff_ptr
     29     vld1.u8         {d29}, [r0], r12
     30     vld1.16         {q11, q12}, [r1]!
     31     vld1.u8         {d30}, [r0], r12
     32     vld1.16         {q12, q13}, [r1]!
     33     vld1.u8         {d31}, [r0], r12
     34     vld1.16         {q13}, [r1]
     35 
     36     vmovl.u8        q0, d28             ;modify Pred data from 8 bits to 16 bits
     37     vmovl.u8        q1, d29             ;Pred data in d0, d2, d4, d6
     38     vmovl.u8        q2, d30
     39     vmovl.u8        q3, d31
     40 
     41     vadd.s16        d0, d0, d20         ;add Diff data and Pred data together
     42     vadd.s16        d2, d2, d22
     43     vadd.s16        d4, d4, d24
     44     vadd.s16        d6, d6, d26
     45 
     46     vqmovun.s16     d0, q0              ;CLAMP() saturation
     47     vqmovun.s16     d1, q1
     48     vqmovun.s16     d2, q2
     49     vqmovun.s16     d3, q3
     50     add             r1, r2, r3
     51 
     52     vst1.32         {d0[0]}, [r2]       ;store result
     53     vst1.32         {d1[0]}, [r1], r3
     54     add             r2, r1, r3
     55     vst1.32         {d2[0]}, [r1]
     56     vst1.32         {d3[0]}, [r2], r3
     57 
     58     bx             lr
     59 
     60     ENDP
     61     END
     62