1 ; 2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 12 EXPORT |vp8_fast_quantize_b_armv6| 13 14 INCLUDE asm_enc_offsets.asm 15 16 ARM 17 REQUIRE8 18 PRESERVE8 19 20 AREA ||.text||, CODE, READONLY, ALIGN=2 21 22 ; r0 BLOCK *b 23 ; r1 BLOCKD *d 24 |vp8_fast_quantize_b_armv6| PROC 25 stmfd sp!, {r1, r4-r11, lr} 26 27 ldr r3, [r0, #vp8_block_coeff] ; coeff 28 ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast 29 ldr r5, [r0, #vp8_block_round] ; round 30 ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff 31 ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff 32 ldr r8, [r1, #vp8_blockd_dequant] ; dequant 33 34 ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction 35 ; is used to update the counter so that 36 ; it can be used to mark nonzero 37 ; quantized coefficient pairs. 38 39 mov r1, #0 ; flags for quantized coeffs 40 41 ; PART 1: quantization and dequantization loop 42 loop 43 ldr r9, [r3], #4 ; [z1 | z0] 44 ldr r10, [r5], #4 ; [r1 | r0] 45 ldr r11, [r4], #4 ; [q1 | q0] 46 47 ssat16 lr, #1, r9 ; [sz1 | sz0] 48 eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] 49 ssub16 r9, r9, lr ; x = (z ^ sz) - sz 50 sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] 51 52 ldr r12, [r3], #4 ; [z3 | z2] 53 54 smulbb r0, r9, r11 ; [(x0+r0)*q0] 55 smultt r9, r9, r11 ; [(x1+r1)*q1] 56 57 ldr r10, [r5], #4 ; [r3 | r2] 58 59 ssat16 r11, #1, r12 ; [sz3 | sz2] 60 eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] 61 pkhtb r0, r9, r0, asr #16 ; [y1 | y0] 62 ldr r9, [r4], #4 ; [q3 | q2] 63 ssub16 r12, r12, r11 ; x = (z ^ sz) - sz 64 65 sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] 66 67 eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)] 68 69 smulbb r10, r12, r9 ; [(x2+r2)*q2] 70 smultt r12, r12, r9 ; [(x3+r3)*q3] 71 72 ssub16 r0, r0, lr ; x = (y ^ sz) - sz 73 74 cmp r0, #0 ; check if zero 75 orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs 76 77 str r0, [r6], #4 ; *qcoeff++ = x 78 ldr r9, [r8], #4 ; [dq1 | dq0] 79 80 pkhtb r10, r12, r10, asr #16 ; [y3 | y2] 81 eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)] 82 ssub16 r10, r10, r11 ; x = (y ^ sz) - sz 83 84 cmp r10, #0 ; check if zero 85 orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs 86 87 str r10, [r6], #4 ; *qcoeff++ = x 88 ldr r11, [r8], #4 ; [dq3 | dq2] 89 90 smulbb r12, r0, r9 ; [x0*dq0] 91 smultt r0, r0, r9 ; [x1*dq1] 92 93 smulbb r9, r10, r11 ; [x2*dq2] 94 smultt r10, r10, r11 ; [x3*dq3] 95 96 lsls r2, r2, #2 ; update loop counter 97 strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] 98 strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1] 99 strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2] 100 strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3] 101 add r7, r7, #8 ; dqcoeff += 8 102 bne loop 103 104 ; PART 2: check position for eob... 105 mov lr, #0 ; init eob 106 cmp r1, #0 ; coeffs after quantization? 107 ldr r11, [sp, #0] ; restore BLOCKD pointer 108 beq end ; skip eob calculations if all zero 109 110 ldr r0, [r11, #vp8_blockd_qcoeff] 111 112 ; check shortcut for nonzero qcoeffs 113 tst r1, #0x80 114 bne quant_coeff_15_14 115 tst r1, #0x20 116 bne quant_coeff_13_11 117 tst r1, #0x8 118 bne quant_coeff_12_7 119 tst r1, #0x40 120 bne quant_coeff_10_9 121 tst r1, #0x10 122 bne quant_coeff_8_3 123 tst r1, #0x2 124 bne quant_coeff_6_5 125 tst r1, #0x4 126 bne quant_coeff_4_2 127 b quant_coeff_1_0 128 129 quant_coeff_15_14 130 ldrh r2, [r0, #30] ; rc=15, i=15 131 mov lr, #16 132 cmp r2, #0 133 bne end 134 135 ldrh r3, [r0, #28] ; rc=14, i=14 136 mov lr, #15 137 cmp r3, #0 138 bne end 139 140 quant_coeff_13_11 141 ldrh r2, [r0, #22] ; rc=11, i=13 142 mov lr, #14 143 cmp r2, #0 144 bne end 145 146 quant_coeff_12_7 147 ldrh r3, [r0, #14] ; rc=7, i=12 148 mov lr, #13 149 cmp r3, #0 150 bne end 151 152 ldrh r2, [r0, #20] ; rc=10, i=11 153 mov lr, #12 154 cmp r2, #0 155 bne end 156 157 quant_coeff_10_9 158 ldrh r3, [r0, #26] ; rc=13, i=10 159 mov lr, #11 160 cmp r3, #0 161 bne end 162 163 ldrh r2, [r0, #24] ; rc=12, i=9 164 mov lr, #10 165 cmp r2, #0 166 bne end 167 168 quant_coeff_8_3 169 ldrh r3, [r0, #18] ; rc=9, i=8 170 mov lr, #9 171 cmp r3, #0 172 bne end 173 174 ldrh r2, [r0, #12] ; rc=6, i=7 175 mov lr, #8 176 cmp r2, #0 177 bne end 178 179 quant_coeff_6_5 180 ldrh r3, [r0, #6] ; rc=3, i=6 181 mov lr, #7 182 cmp r3, #0 183 bne end 184 185 ldrh r2, [r0, #4] ; rc=2, i=5 186 mov lr, #6 187 cmp r2, #0 188 bne end 189 190 quant_coeff_4_2 191 ldrh r3, [r0, #10] ; rc=5, i=4 192 mov lr, #5 193 cmp r3, #0 194 bne end 195 196 ldrh r2, [r0, #16] ; rc=8, i=3 197 mov lr, #4 198 cmp r2, #0 199 bne end 200 201 ldrh r3, [r0, #8] ; rc=4, i=2 202 mov lr, #3 203 cmp r3, #0 204 bne end 205 206 quant_coeff_1_0 207 ldrh r2, [r0, #2] ; rc=1, i=1 208 mov lr, #2 209 cmp r2, #0 210 bne end 211 212 mov lr, #1 ; rc=0, i=0 213 214 end 215 str lr, [r11, #vp8_blockd_eob] 216 ldmfd sp!, {r1, r4-r11, pc} 217 218 ENDP 219 220 loop_count 221 DCD 0x1000000 222 223 END 224 225