1 ; 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 EXPORT |vp8_subtract_b_neon| 12 EXPORT |vp8_subtract_mby_neon| 13 EXPORT |vp8_subtract_mbuv_neon| 14 15 INCLUDE asm_enc_offsets.asm 16 17 ARM 18 REQUIRE8 19 PRESERVE8 20 21 AREA ||.text||, CODE, READONLY, ALIGN=2 22 23 ;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch) 24 |vp8_subtract_b_neon| PROC 25 26 stmfd sp!, {r4-r7} 27 28 ldr r3, [r0, #vp8_block_base_src] 29 ldr r4, [r0, #vp8_block_src] 30 ldr r5, [r0, #vp8_block_src_diff] 31 ldr r3, [r3] 32 ldr r6, [r0, #vp8_block_src_stride] 33 add r3, r3, r4 ; src = *base_src + src 34 ldr r7, [r1, #vp8_blockd_predictor] 35 36 vld1.8 {d0}, [r3], r6 ;load src 37 vld1.8 {d1}, [r7], r2 ;load pred 38 vld1.8 {d2}, [r3], r6 39 vld1.8 {d3}, [r7], r2 40 vld1.8 {d4}, [r3], r6 41 vld1.8 {d5}, [r7], r2 42 vld1.8 {d6}, [r3], r6 43 vld1.8 {d7}, [r7], r2 44 45 vsubl.u8 q10, d0, d1 46 vsubl.u8 q11, d2, d3 47 vsubl.u8 q12, d4, d5 48 vsubl.u8 q13, d6, d7 49 50 mov r2, r2, lsl #1 51 52 vst1.16 {d20}, [r5], r2 ;store diff 53 vst1.16 {d22}, [r5], r2 54 vst1.16 {d24}, [r5], r2 55 vst1.16 {d26}, [r5], r2 56 57 ldmfd sp!, {r4-r7} 58 bx lr 59 60 ENDP 61 62 63 ;========================================== 64 ;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride) 65 |vp8_subtract_mby_neon| PROC 66 mov r12, #4 67 68 subtract_mby_loop 69 vld1.8 {q0}, [r1], r3 ;load src 70 vld1.8 {q1}, [r2]! ;load pred 71 vld1.8 {q2}, [r1], r3 72 vld1.8 {q3}, [r2]! 73 vld1.8 {q4}, [r1], r3 74 vld1.8 {q5}, [r2]! 75 vld1.8 {q6}, [r1], r3 76 vld1.8 {q7}, [r2]! 77 78 vsubl.u8 q8, d0, d2 79 vsubl.u8 q9, d1, d3 80 vsubl.u8 q10, d4, d6 81 vsubl.u8 q11, d5, d7 82 vsubl.u8 q12, d8, d10 83 vsubl.u8 q13, d9, d11 84 vsubl.u8 q14, d12, d14 85 vsubl.u8 q15, d13, d15 86 87 vst1.16 {q8}, [r0]! ;store diff 88 vst1.16 {q9}, [r0]! 89 vst1.16 {q10}, [r0]! 90 vst1.16 {q11}, [r0]! 91 vst1.16 {q12}, [r0]! 92 vst1.16 {q13}, [r0]! 93 vst1.16 {q14}, [r0]! 94 vst1.16 {q15}, [r0]! 95 96 subs r12, r12, #1 97 bne subtract_mby_loop 98 99 bx lr 100 ENDP 101 102 ;================================= 103 ;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) 104 |vp8_subtract_mbuv_neon| PROC 105 ldr r12, [sp] 106 107 ;u 108 add r0, r0, #512 ; short *udiff = diff + 256; 109 add r3, r3, #256 ; unsigned char *upred = pred + 256; 110 111 vld1.8 {d0}, [r1], r12 ;load src 112 vld1.8 {d1}, [r3]! ;load pred 113 vld1.8 {d2}, [r1], r12 114 vld1.8 {d3}, [r3]! 115 vld1.8 {d4}, [r1], r12 116 vld1.8 {d5}, [r3]! 117 vld1.8 {d6}, [r1], r12 118 vld1.8 {d7}, [r3]! 119 vld1.8 {d8}, [r1], r12 120 vld1.8 {d9}, [r3]! 121 vld1.8 {d10}, [r1], r12 122 vld1.8 {d11}, [r3]! 123 vld1.8 {d12}, [r1], r12 124 vld1.8 {d13}, [r3]! 125 vld1.8 {d14}, [r1], r12 126 vld1.8 {d15}, [r3]! 127 128 vsubl.u8 q8, d0, d1 129 vsubl.u8 q9, d2, d3 130 vsubl.u8 q10, d4, d5 131 vsubl.u8 q11, d6, d7 132 vsubl.u8 q12, d8, d9 133 vsubl.u8 q13, d10, d11 134 vsubl.u8 q14, d12, d13 135 vsubl.u8 q15, d14, d15 136 137 vst1.16 {q8}, [r0]! ;store diff 138 vst1.16 {q9}, [r0]! 139 vst1.16 {q10}, [r0]! 140 vst1.16 {q11}, [r0]! 141 vst1.16 {q12}, [r0]! 142 vst1.16 {q13}, [r0]! 143 vst1.16 {q14}, [r0]! 144 vst1.16 {q15}, [r0]! 145 146 ;v 147 vld1.8 {d0}, [r2], r12 ;load src 148 vld1.8 {d1}, [r3]! ;load pred 149 vld1.8 {d2}, [r2], r12 150 vld1.8 {d3}, [r3]! 151 vld1.8 {d4}, [r2], r12 152 vld1.8 {d5}, [r3]! 153 vld1.8 {d6}, [r2], r12 154 vld1.8 {d7}, [r3]! 155 vld1.8 {d8}, [r2], r12 156 vld1.8 {d9}, [r3]! 157 vld1.8 {d10}, [r2], r12 158 vld1.8 {d11}, [r3]! 159 vld1.8 {d12}, [r2], r12 160 vld1.8 {d13}, [r3]! 161 vld1.8 {d14}, [r2], r12 162 vld1.8 {d15}, [r3]! 163 164 vsubl.u8 q8, d0, d1 165 vsubl.u8 q9, d2, d3 166 vsubl.u8 q10, d4, d5 167 vsubl.u8 q11, d6, d7 168 vsubl.u8 q12, d8, d9 169 vsubl.u8 q13, d10, d11 170 vsubl.u8 q14, d12, d13 171 vsubl.u8 q15, d14, d15 172 173 vst1.16 {q8}, [r0]! ;store diff 174 vst1.16 {q9}, [r0]! 175 vst1.16 {q10}, [r0]! 176 vst1.16 {q11}, [r0]! 177 vst1.16 {q12}, [r0]! 178 vst1.16 {q13}, [r0]! 179 vst1.16 {q14}, [r0]! 180 vst1.16 {q15}, [r0]! 181 182 bx lr 183 ENDP 184 185 END 186