1 ; 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 12 EXPORT |vp8_dequant_idct_add_neon| 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 AREA ||.text||, CODE, READONLY, ALIGN=2 18 ;void vp8_dequant_idct_add_neon(short *input, short *dq, 19 ; unsigned char *dest, int stride) 20 ; r0 short *input, 21 ; r1 short *dq, 22 ; r2 unsigned char *dest 23 ; r3 int stride 24 25 |vp8_dequant_idct_add_neon| PROC 26 vld1.16 {q3, q4}, [r0] 27 vld1.16 {q5, q6}, [r1] 28 29 add r1, r2, r3 ; r1 = dest + stride 30 lsl r3, #1 ; 2x stride 31 32 vld1.32 {d14[0]}, [r2], r3 33 vld1.32 {d14[1]}, [r1], r3 34 vld1.32 {d15[0]}, [r2] 35 vld1.32 {d15[1]}, [r1] 36 37 adr r12, cospi8sqrt2minus1 ; pointer to the first constant 38 39 vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon 40 vmul.i16 q2, q4, q6 41 42 ;|short_idct4x4llm_neon| PROC 43 vld1.16 {d0}, [r12] 44 vswp d3, d4 ;q2(vp[4] vp[12]) 45 46 vqdmulh.s16 q3, q2, d0[2] 47 vqdmulh.s16 q4, q2, d0[0] 48 49 vqadd.s16 d12, d2, d3 ;a1 50 vqsub.s16 d13, d2, d3 ;b1 51 52 vshr.s16 q3, q3, #1 53 vshr.s16 q4, q4, #1 54 55 vqadd.s16 q3, q3, q2 56 vqadd.s16 q4, q4, q2 57 58 vqsub.s16 d10, d6, d9 ;c1 59 vqadd.s16 d11, d7, d8 ;d1 60 61 vqadd.s16 d2, d12, d11 62 vqadd.s16 d3, d13, d10 63 vqsub.s16 d4, d13, d10 64 vqsub.s16 d5, d12, d11 65 66 vtrn.32 d2, d4 67 vtrn.32 d3, d5 68 vtrn.16 d2, d3 69 vtrn.16 d4, d5 70 71 ; memset(input, 0, 32) -- 32bytes 72 vmov.i16 q14, #0 73 74 vswp d3, d4 75 vqdmulh.s16 q3, q2, d0[2] 76 vqdmulh.s16 q4, q2, d0[0] 77 78 vqadd.s16 d12, d2, d3 ;a1 79 vqsub.s16 d13, d2, d3 ;b1 80 81 vmov q15, q14 82 83 vshr.s16 q3, q3, #1 84 vshr.s16 q4, q4, #1 85 86 vqadd.s16 q3, q3, q2 87 vqadd.s16 q4, q4, q2 88 89 vqsub.s16 d10, d6, d9 ;c1 90 vqadd.s16 d11, d7, d8 ;d1 91 92 vqadd.s16 d2, d12, d11 93 vqadd.s16 d3, d13, d10 94 vqsub.s16 d4, d13, d10 95 vqsub.s16 d5, d12, d11 96 97 vst1.16 {q14, q15}, [r0] 98 99 vrshr.s16 d2, d2, #3 100 vrshr.s16 d3, d3, #3 101 vrshr.s16 d4, d4, #3 102 vrshr.s16 d5, d5, #3 103 104 vtrn.32 d2, d4 105 vtrn.32 d3, d5 106 vtrn.16 d2, d3 107 vtrn.16 d4, d5 108 109 vaddw.u8 q1, q1, d14 110 vaddw.u8 q2, q2, d15 111 112 sub r2, r2, r3 113 sub r1, r1, r3 114 115 vqmovun.s16 d0, q1 116 vqmovun.s16 d1, q2 117 118 vst1.32 {d0[0]}, [r2], r3 119 vst1.32 {d0[1]}, [r1], r3 120 vst1.32 {d1[0]}, [r2] 121 vst1.32 {d1[1]}, [r1] 122 123 bx lr 124 125 ENDP ; |vp8_dequant_idct_add_neon| 126 127 ; Constant Pool 128 cospi8sqrt2minus1 DCD 0x4e7b4e7b 129 sinpi8sqrt2 DCD 0x8a8c8a8c 130 131 END 132