1 ; 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license and patent 5 ; grant that can be found in the LICENSE file in the root of the source 6 ; tree. All contributing project authors may be found in the AUTHORS 7 ; file in the root of the source tree. 8 ; 9 10 EXPORT |vp8_dequant_idct_add_v6| 11 12 AREA |.text|, CODE, READONLY 13 ;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred, 14 ; unsigned char *dest, int pitch, int stride) 15 ; r0 = input 16 ; r1 = dq 17 ; r2 = pred 18 ; r3 = dest 19 ; sp + 36 = pitch ; +4 = 40 20 ; sp + 40 = stride ; +4 = 44 21 22 23 |vp8_dequant_idct_add_v6| PROC 24 stmdb sp!, {r4-r11, lr} 25 26 ldr r4, [r0] ;input 27 ldr r5, [r1], #4 ;dq 28 29 sub sp, sp, #4 30 str r3, [sp] 31 32 mov r12, #4 33 34 vp8_dequant_add_loop 35 smulbb r6, r4, r5 36 smultt r7, r4, r5 37 38 ldr r4, [r0, #4] ;input 39 ldr r5, [r1], #4 ;dq 40 41 strh r6, [r0], #2 42 strh r7, [r0], #2 43 44 smulbb r6, r4, r5 45 smultt r7, r4, r5 46 47 subs r12, r12, #1 48 49 ldrne r4, [r0, #4] 50 ldrne r5, [r1], #4 51 52 strh r6, [r0], #2 53 strh r7, [r0], #2 54 55 bne vp8_dequant_add_loop 56 57 sub r0, r0, #32 58 mov r1, r0 59 60 ; short_idct4x4llm_v6_dual 61 ldr r3, cospi8sqrt2minus1 62 ldr r4, sinpi8sqrt2 63 ldr r6, [r0, #8] 64 mov r5, #2 65 vp8_dequant_idct_loop1_v6 66 ldr r12, [r0, #24] 67 ldr r14, [r0, #16] 68 smulwt r9, r3, r6 69 smulwb r7, r3, r6 70 smulwt r10, r4, r6 71 smulwb r8, r4, r6 72 pkhbt r7, r7, r9, lsl #16 73 smulwt r11, r3, r12 74 pkhbt r8, r8, r10, lsl #16 75 uadd16 r6, r6, r7 76 smulwt r7, r4, r12 77 smulwb r9, r3, r12 78 smulwb r10, r4, r12 79 subs r5, r5, #1 80 pkhbt r9, r9, r11, lsl #16 81 ldr r11, [r0], #4 82 pkhbt r10, r10, r7, lsl #16 83 uadd16 r7, r12, r9 84 usub16 r7, r8, r7 85 uadd16 r6, r6, r10 86 uadd16 r10, r11, r14 87 usub16 r8, r11, r14 88 uadd16 r9, r10, r6 89 usub16 r10, r10, r6 90 uadd16 r6, r8, r7 91 usub16 r7, r8, r7 92 str r6, [r1, #8] 93 ldrne r6, [r0, #8] 94 str r7, [r1, #16] 95 str r10, [r1, #24] 96 str r9, [r1], #4 97 bne vp8_dequant_idct_loop1_v6 98 99 mov r5, #2 100 sub r0, r1, #8 101 vp8_dequant_idct_loop2_v6 102 ldr r6, [r0], #4 103 ldr r7, [r0], #4 104 ldr r8, [r0], #4 105 ldr r9, [r0], #4 106 smulwt r1, r3, r6 107 smulwt r12, r4, r6 108 smulwt lr, r3, r8 109 smulwt r10, r4, r8 110 pkhbt r11, r8, r6, lsl #16 111 pkhbt r1, lr, r1, lsl #16 112 pkhbt r12, r10, r12, lsl #16 113 pkhtb r6, r6, r8, asr #16 114 uadd16 r6, r1, r6 115 pkhbt lr, r9, r7, lsl #16 116 uadd16 r10, r11, lr 117 usub16 lr, r11, lr 118 pkhtb r8, r7, r9, asr #16 119 subs r5, r5, #1 120 smulwt r1, r3, r8 121 smulwb r7, r3, r8 122 smulwt r11, r4, r8 123 smulwb r9, r4, r8 124 pkhbt r1, r7, r1, lsl #16 125 uadd16 r8, r1, r8 126 pkhbt r11, r9, r11, lsl #16 127 usub16 r1, r12, r8 128 uadd16 r8, r11, r6 129 ldr r9, c0x00040004 130 ldr r12, [sp, #40] 131 uadd16 r6, r10, r8 132 usub16 r7, r10, r8 133 uadd16 r7, r7, r9 134 uadd16 r6, r6, r9 135 uadd16 r10, r14, r1 136 usub16 r1, r14, r1 137 uadd16 r10, r10, r9 138 uadd16 r1, r1, r9 139 ldr r11, [r2], r12 140 mov r8, r7, asr #3 141 pkhtb r9, r8, r10, asr #19 142 mov r8, r1, asr #3 143 pkhtb r8, r8, r6, asr #19 144 uxtb16 lr, r11, ror #8 145 qadd16 r9, r9, lr 146 uxtb16 lr, r11 147 qadd16 r8, r8, lr 148 usat16 r9, #8, r9 149 usat16 r8, #8, r8 150 orr r9, r8, r9, lsl #8 151 ldr r11, [r2], r12 152 ldr lr, [sp] 153 ldr r12, [sp, #44] 154 mov r7, r7, lsl #16 155 mov r1, r1, lsl #16 156 mov r10, r10, lsl #16 157 mov r6, r6, lsl #16 158 mov r7, r7, asr #3 159 pkhtb r7, r7, r10, asr #19 160 mov r1, r1, asr #3 161 pkhtb r1, r1, r6, asr #19 162 uxtb16 r8, r11, ror #8 163 qadd16 r7, r7, r8 164 uxtb16 r8, r11 165 qadd16 r1, r1, r8 166 usat16 r7, #8, r7 167 usat16 r1, #8, r1 168 orr r1, r1, r7, lsl #8 169 str r9, [lr], r12 170 str r1, [lr], r12 171 str lr, [sp] 172 bne vp8_dequant_idct_loop2_v6 173 174 ; vpx_memset 175 sub r0, r0, #32 176 add sp, sp, #4 177 178 mov r12, #0 179 str r12, [r0] 180 str r12, [r0, #4] 181 str r12, [r0, #8] 182 str r12, [r0, #12] 183 str r12, [r0, #16] 184 str r12, [r0, #20] 185 str r12, [r0, #24] 186 str r12, [r0, #28] 187 188 ldmia sp!, {r4 - r11, pc} 189 ENDP ; |vp8_dequant_idct_add_v6| 190 191 ; Constant Pool 192 cospi8sqrt2minus1 DCD 0x00004E7B 193 sinpi8sqrt2 DCD 0x00008A8C 194 c0x00040004 DCD 0x00040004 195 196 END 197