1 ; 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 12 EXPORT |vp8cx_pack_tokens_into_partitions_armv5| 13 14 INCLUDE asm_enc_offsets.asm 15 16 ARM 17 REQUIRE8 18 PRESERVE8 19 20 AREA |.text|, CODE, READONLY 21 22 ; r0 VP8_COMP *cpi 23 ; r1 unsigned char *cx_data 24 ; r2 int num_part 25 ; r3 *size 26 ; s0 vp8_coef_encodings 27 ; s1 vp8_extra_bits, 28 ; s2 const vp8_tree_index *, 29 30 |vp8cx_pack_tokens_into_partitions_armv5| PROC 31 push {r4-r11, lr} 32 sub sp, sp, #44 33 34 ; Compute address of cpi->common.mb_rows 35 ldr r4, _VP8_COMP_common_ 36 ldr r6, _VP8_COMMON_MBrows_ 37 add r4, r0, r4 38 39 ldr r5, [r4, r6] ; load up mb_rows 40 41 str r5, [sp, #36] ; save mb_rows 42 str r1, [sp, #24] ; save cx_data 43 str r2, [sp, #20] ; save num_part 44 str r3, [sp, #8] ; save *size 45 46 ; *size = 3*(num_part -1 ); 47 sub r2, r2, #1 ; num_part - 1 48 add r2, r2, r2, lsl #1 ; 3*(num_part - 1) 49 str r2, [r3] 50 51 add r2, r2, r1 ; cx_data + *size 52 str r2, [sp, #40] ; ptr 53 54 ldr r4, _VP8_COMP_tplist_ 55 add r4, r0, r4 56 ldr r7, [r4, #0] ; dereference cpi->tp_list 57 str r7, [sp, #32] ; store start of cpi->tp_list 58 59 ldr r11, _VP8_COMP_bc2_ ; load up vp8_writer out of cpi 60 add r0, r0, r11 61 62 mov r11, #0 63 str r11, [sp, #28] ; i 64 65 numparts_loop 66 ldr r10, [sp, #40] ; ptr 67 ldr r5, [sp, #36] ; move mb_rows to the counting section 68 sub r5, r5, r11 ; move start point with each partition 69 ; mb_rows starts at i 70 str r5, [sp, #12] 71 72 ; Reset all of the VP8 Writer data for each partition that 73 ; is processed. 74 ; start_encode 75 mov r2, #0 ; vp8_writer_lowvalue 76 mov r5, #255 ; vp8_writer_range 77 mvn r3, #23 ; vp8_writer_count 78 79 str r2, [r0, #vp8_writer_value] 80 str r2, [r0, #vp8_writer_pos] 81 str r10, [r0, #vp8_writer_buffer] 82 83 mb_row_loop 84 85 ldr r1, [r7, #tokenlist_start] 86 ldr r9, [r7, #tokenlist_stop] 87 str r9, [sp, #0] ; save stop for later comparison 88 str r7, [sp, #16] ; tokenlist address for next time 89 90 b check_p_lt_stop 91 92 ; actual work gets done here! 93 94 while_p_lt_stop 95 ldrb r6, [r1, #tokenextra_token] ; t 96 ldr r4, [sp, #80] ; vp8_coef_encodings 97 mov lr, #0 98 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t 99 ldr r9, [r1, #tokenextra_context_tree] ; pp 100 101 ldrb r7, [r1, #tokenextra_skip_eob_node] 102 103 ldr r6, [r4, #vp8_token_value] ; v 104 ldr r8, [r4, #vp8_token_len] ; n 105 106 ; vp8 specific skip_eob_node 107 cmp r7, #0 108 movne lr, #2 ; i = 2 109 subne r8, r8, #1 ; --n 110 111 rsb r4, r8, #32 ; 32-n 112 ldr r10, [sp, #88] ; vp8_coef_tree 113 114 ; v is kept in r12 during the token pack loop 115 lsl r12, r6, r4 ; r12 = v << 32 - n 116 117 ; loop start 118 token_loop 119 ldrb r4, [r9, lr, asr #1] ; pp [i>>1] 120 sub r7, r5, #1 ; range-1 121 122 ; Decisions are made based on the bit value shifted 123 ; off of v, so set a flag here based on this. 124 ; This value is refered to as "bb" 125 lsls r12, r12, #1 ; bb = v >> n 126 mul r4, r4, r7 ; ((range-1) * pp[i>>1])) 127 128 ; bb can only be 0 or 1. So only execute this statement 129 ; if bb == 1, otherwise it will act like i + 0 130 addcs lr, lr, #1 ; i + bb 131 132 mov r7, #1 133 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] 134 add r4, r7, r4, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) 135 136 addcs r2, r2, r4 ; if (bb) lowvalue += split 137 subcs r4, r5, r4 ; if (bb) range = range-split 138 139 ; Counting the leading zeros is used to normalize range. 140 clz r6, r4 141 sub r6, r6, #24 ; shift 142 143 ; Flag is set on the sum of count. This flag is used later 144 ; to determine if count >= 0 145 adds r3, r3, r6 ; count += shift 146 lsl r5, r4, r6 ; range <<= shift 147 bmi token_count_lt_zero ; if(count >= 0) 148 149 sub r6, r6, r3 ; offset = shift - count 150 sub r4, r6, #1 ; offset-1 151 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 152 bpl token_high_bit_not_set 153 154 ldr r4, [r0, #vp8_writer_pos] ; x 155 sub r4, r4, #1 ; x = w->pos-1 156 b token_zero_while_start 157 token_zero_while_loop 158 mov r10, #0 159 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 160 sub r4, r4, #1 ; x-- 161 token_zero_while_start 162 cmp r4, #0 163 ldrge r7, [r0, #vp8_writer_buffer] 164 ldrb r11, [r7, r4] 165 cmpge r11, #0xff 166 beq token_zero_while_loop 167 168 ldr r7, [r0, #vp8_writer_buffer] 169 ldrb r10, [r7, r4] ; w->buffer[x] 170 add r10, r10, #1 171 strb r10, [r7, r4] ; w->buffer[x] + 1 172 token_high_bit_not_set 173 rsb r4, r6, #24 ; 24-offset 174 ldr r10, [r0, #vp8_writer_buffer] 175 lsr r7, r2, r4 ; lowvalue >> (24-offset) 176 ldr r4, [r0, #vp8_writer_pos] ; w->pos 177 lsl r2, r2, r6 ; lowvalue <<= offset 178 mov r6, r3 ; shift = count 179 add r11, r4, #1 ; w->pos++ 180 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 181 str r11, [r0, #vp8_writer_pos] 182 sub r3, r3, #8 ; count -= 8 183 strb r7, [r10, r4] ; w->buffer[w->pos++] 184 185 ; r10 is used earlier in the loop, but r10 is used as 186 ; temp variable here. So after r10 is used, reload 187 ; vp8_coef_tree_dcd into r10 188 ldr r10, [sp, #88] ; vp8_coef_tree 189 190 token_count_lt_zero 191 lsl r2, r2, r6 ; lowvalue <<= shift 192 193 subs r8, r8, #1 ; --n 194 bne token_loop 195 196 ldrb r6, [r1, #tokenextra_token] ; t 197 ldr r7, [sp, #84] ; vp8_extra_bits 198 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired 199 ; element. Here vp8_extra_bit_struct == 16 200 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t 201 202 ldr r4, [r12, #vp8_extra_bit_struct_base_val] 203 cmp r4, #0 204 beq skip_extra_bits 205 206 ; if( b->base_val) 207 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L 208 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra 209 cmp r8, #0 ; if( L) 210 beq no_extra_bits 211 212 ldr r9, [r12, #vp8_extra_bit_struct_prob] 213 asr r7, lr, #1 ; v=e>>1 214 215 ldr r10, [r12, #vp8_extra_bit_struct_tree] 216 str r10, [sp, #4] ; b->tree 217 218 rsb r4, r8, #32 219 lsl r12, r7, r4 220 221 mov lr, #0 ; i = 0 222 223 extra_bits_loop 224 ldrb r4, [r9, lr, asr #1] ; pp[i>>1] 225 sub r7, r5, #1 ; range-1 226 lsls r12, r12, #1 ; v >> n 227 mul r4, r4, r7 ; (range-1) * pp[i>>1] 228 addcs lr, lr, #1 ; i + bb 229 230 mov r7, #1 231 ldrsb lr, [r10, lr] ; i = b->tree[i+bb] 232 add r4, r7, r4, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) 233 234 addcs r2, r2, r4 ; if (bb) lowvalue += split 235 subcs r4, r5, r4 ; if (bb) range = range-split 236 237 clz r6, r4 238 sub r6, r6, #24 239 240 adds r3, r3, r6 ; count += shift 241 lsl r5, r4, r6 ; range <<= shift 242 bmi extra_count_lt_zero ; if(count >= 0) 243 244 sub r6, r6, r3 ; offset= shift - count 245 sub r4, r6, #1 ; offset-1 246 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 247 bpl extra_high_bit_not_set 248 249 ldr r4, [r0, #vp8_writer_pos] ; x 250 sub r4, r4, #1 ; x = w->pos - 1 251 b extra_zero_while_start 252 extra_zero_while_loop 253 mov r10, #0 254 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 255 sub r4, r4, #1 ; x-- 256 extra_zero_while_start 257 cmp r4, #0 258 ldrge r7, [r0, #vp8_writer_buffer] 259 ldrb r11, [r7, r4] 260 cmpge r11, #0xff 261 beq extra_zero_while_loop 262 263 ldr r7, [r0, #vp8_writer_buffer] 264 ldrb r10, [r7, r4] 265 add r10, r10, #1 266 strb r10, [r7, r4] 267 extra_high_bit_not_set 268 rsb r4, r6, #24 ; 24-offset 269 ldr r10, [r0, #vp8_writer_buffer] 270 lsr r7, r2, r4 ; lowvalue >> (24-offset) 271 ldr r4, [r0, #vp8_writer_pos] 272 lsl r2, r2, r6 ; lowvalue <<= offset 273 mov r6, r3 ; shift = count 274 add r11, r4, #1 ; w->pos++ 275 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 276 str r11, [r0, #vp8_writer_pos] 277 sub r3, r3, #8 ; count -= 8 278 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) 279 ldr r10, [sp, #4] ; b->tree 280 extra_count_lt_zero 281 lsl r2, r2, r6 282 283 subs r8, r8, #1 ; --n 284 bne extra_bits_loop ; while (n) 285 286 no_extra_bits 287 ldr lr, [r1, #4] ; e = p->Extra 288 add r4, r5, #1 ; range + 1 289 tst lr, #1 290 lsr r4, r4, #1 ; split = (range + 1) >> 1 291 addne r2, r2, r4 ; lowvalue += split 292 subne r4, r5, r4 ; range = range-split 293 tst r2, #0x80000000 ; lowvalue & 0x80000000 294 lsl r5, r4, #1 ; range <<= 1 295 beq end_high_bit_not_set 296 297 ldr r4, [r0, #vp8_writer_pos] 298 mov r7, #0 299 sub r4, r4, #1 300 b end_zero_while_start 301 end_zero_while_loop 302 strb r7, [r6, r4] 303 sub r4, r4, #1 ; x-- 304 end_zero_while_start 305 cmp r4, #0 306 ldrge r6, [r0, #vp8_writer_buffer] 307 ldrb r12, [r6, r4] 308 cmpge r12, #0xff 309 beq end_zero_while_loop 310 311 ldr r6, [r0, #vp8_writer_buffer] 312 ldrb r7, [r6, r4] 313 add r7, r7, #1 314 strb r7, [r6, r4] 315 end_high_bit_not_set 316 adds r3, r3, #1 ; ++count 317 lsl r2, r2, #1 ; lowvalue <<= 1 318 bne end_count_zero 319 320 ldr r4, [r0, #vp8_writer_pos] 321 mvn r3, #7 322 ldr r7, [r0, #vp8_writer_buffer] 323 lsr r6, r2, #24 ; lowvalue >> 24 324 add r12, r4, #1 ; w->pos++ 325 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 326 str r12, [r0, #0x10] 327 strb r6, [r7, r4] 328 end_count_zero 329 skip_extra_bits 330 add r1, r1, #TOKENEXTRA_SZ ; ++p 331 check_p_lt_stop 332 ldr r4, [sp, #0] ; stop 333 cmp r1, r4 ; while( p < stop) 334 bcc while_p_lt_stop 335 336 ldr r10, [sp, #20] ; num_parts 337 mov r1, #TOKENLIST_SZ 338 mul r1, r10, r1 339 340 ldr r6, [sp, #12] ; mb_rows 341 ldr r7, [sp, #16] ; tokenlist address 342 subs r6, r6, r10 343 add r7, r7, r1 ; next element in the array 344 str r6, [sp, #12] 345 bgt mb_row_loop 346 347 mov r12, #32 348 349 stop_encode_loop 350 sub r7, r5, #1 ; range-1 351 352 mov r4, r7, lsl #7 ; ((range-1) * 128) 353 354 mov r7, #1 355 add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) 356 357 ; Counting the leading zeros is used to normalize range. 358 clz r6, r4 359 sub r6, r6, #24 ; shift 360 361 ; Flag is set on the sum of count. This flag is used later 362 ; to determine if count >= 0 363 adds r3, r3, r6 ; count += shift 364 lsl r5, r4, r6 ; range <<= shift 365 bmi token_count_lt_zero_se ; if(count >= 0) 366 367 sub r6, r6, r3 ; offset = shift - count 368 sub r4, r6, #1 ; offset-1 369 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 370 bpl token_high_bit_not_set_se 371 372 ldr r4, [r0, #vp8_writer_pos] ; x 373 sub r4, r4, #1 ; x = w->pos-1 374 b token_zero_while_start_se 375 token_zero_while_loop_se 376 mov r10, #0 377 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 378 sub r4, r4, #1 ; x-- 379 token_zero_while_start_se 380 cmp r4, #0 381 ldrge r7, [r0, #vp8_writer_buffer] 382 ldrb r11, [r7, r4] 383 cmpge r11, #0xff 384 beq token_zero_while_loop_se 385 386 ldr r7, [r0, #vp8_writer_buffer] 387 ldrb r10, [r7, r4] ; w->buffer[x] 388 add r10, r10, #1 389 strb r10, [r7, r4] ; w->buffer[x] + 1 390 token_high_bit_not_set_se 391 rsb r4, r6, #24 ; 24-offset 392 ldr r10, [r0, #vp8_writer_buffer] 393 lsr r7, r2, r4 ; lowvalue >> (24-offset) 394 ldr r4, [r0, #vp8_writer_pos] ; w->pos 395 lsl r2, r2, r6 ; lowvalue <<= offset 396 mov r6, r3 ; shift = count 397 add r11, r4, #1 ; w->pos++ 398 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 399 str r11, [r0, #vp8_writer_pos] 400 sub r3, r3, #8 ; count -= 8 401 strb r7, [r10, r4] ; w->buffer[w->pos++] 402 403 token_count_lt_zero_se 404 lsl r2, r2, r6 ; lowvalue <<= shift 405 406 subs r12, r12, #1 407 bne stop_encode_loop 408 409 ldr r10, [sp, #8] ; *size 410 ldr r11, [r10] 411 ldr r4, [r0, #vp8_writer_pos] ; w->pos 412 add r11, r11, r4 ; *size += w->pos 413 str r11, [r10] 414 415 ldr r9, [sp, #20] ; num_parts 416 sub r9, r9, #1 417 ldr r10, [sp, #28] ; i 418 cmp r10, r9 ; if(i<(num_part - 1)) 419 bge skip_write_partition 420 421 ldr r12, [sp, #40] ; ptr 422 add r12, r12, r4 ; ptr += w->pos 423 str r12, [sp, #40] 424 425 ldr r9, [sp, #24] ; cx_data 426 mov r8, r4, asr #8 427 strb r4, [r9, #0] 428 strb r8, [r9, #1] 429 mov r4, r4, asr #16 430 strb r4, [r9, #2] 431 432 add r9, r9, #3 ; cx_data += 3 433 str r9, [sp, #24] 434 435 skip_write_partition 436 437 ldr r11, [sp, #28] ; i 438 ldr r10, [sp, #20] ; num_parts 439 440 add r11, r11, #1 ; i++ 441 str r11, [sp, #28] 442 443 ldr r7, [sp, #32] ; cpi->tp_list[i] 444 mov r1, #TOKENLIST_SZ 445 add r7, r7, r1 ; next element in cpi->tp_list 446 str r7, [sp, #32] ; cpi->tp_list[i+1] 447 448 cmp r10, r11 449 bgt numparts_loop 450 451 452 add sp, sp, #44 453 pop {r4-r11, pc} 454 ENDP 455 456 _VP8_COMP_common_ 457 DCD vp8_comp_common 458 _VP8_COMMON_MBrows_ 459 DCD vp8_common_mb_rows 460 _VP8_COMP_tplist_ 461 DCD vp8_comp_tplist 462 _VP8_COMP_bc2_ 463 DCD vp8_comp_bc2 464 465 END 466