Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The Webm project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license and patent
      5 ;  grant that can be found in the LICENSE file in the root of the source
      6 ;  tree. All contributing project authors may be found in the AUTHORS
      7 ;  file in the root of the source tree.
      8 ;
      9 
     10 
     11     EXPORT  |idct_dequant_dc_0_2x_neon|
     12     ARM
     13     REQUIRE8
     14     PRESERVE8
     15 
     16     AREA ||.text||, CODE, READONLY, ALIGN=2
     17 ;void idct_dequant_dc_0_2x_neon(short *dc, unsigned char *pre,
     18 ;                               unsigned char *dst, int stride);
     19 ; r0  *dc
     20 ; r1  *pre
     21 ; r2  *dst
     22 ; r3  stride
     23 |idct_dequant_dc_0_2x_neon| PROC
     24     ldr             r0, [r0]                ; *dc
     25     mov             r12, #16
     26 
     27     vld1.32         {d2[0]}, [r1], r12      ; lo
     28     vld1.32         {d2[1]}, [r1], r12
     29     vld1.32         {d4[0]}, [r1], r12
     30     vld1.32         {d4[1]}, [r1]
     31     sub             r1, r1, #44
     32     vld1.32         {d8[0]}, [r1], r12      ; hi
     33     vld1.32         {d8[1]}, [r1], r12
     34     vld1.32         {d10[0]}, [r1], r12
     35     vld1.32         {d10[1]}, [r1]
     36 
     37     sxth            r1, r0                  ; lo *dc
     38     add             r1, r1, #4
     39     asr             r1, r1, #3
     40     vdup.16         q0, r1
     41     sxth            r0, r0, ror #16         ; hi *dc
     42     add             r0, r0, #4
     43     asr             r0, r0, #3
     44     vdup.16         q3, r0
     45 
     46     vaddw.u8        q1, q0, d2              ; lo
     47     vaddw.u8        q2, q0, d4
     48     vaddw.u8        q4, q3, d8              ; hi
     49     vaddw.u8        q5, q3, d10
     50 
     51     vqmovun.s16     d2, q1                  ; lo
     52     vqmovun.s16     d4, q2
     53     vqmovun.s16     d8, q4                  ; hi
     54     vqmovun.s16     d10, q5
     55 
     56     add             r0, r2, #4
     57     vst1.32         {d2[0]}, [r2], r3       ; lo
     58     vst1.32         {d2[1]}, [r2], r3
     59     vst1.32         {d4[0]}, [r2], r3
     60     vst1.32         {d4[1]}, [r2]
     61     vst1.32         {d8[0]}, [r0], r3       ; hi
     62     vst1.32         {d8[1]}, [r0], r3
     63     vst1.32         {d10[0]}, [r0], r3
     64     vst1.32         {d10[1]}, [r0]
     65 
     66     bx             lr
     67 
     68     ENDP           ;|idct_dequant_dc_0_2x_neon|
     69     END
     70