Home | History | Annotate | Download | only in armv5te
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
     13     IMPORT |vp8_validate_buffer_arm|
     14 
     15     INCLUDE vp8_asm_enc_offsets.asm
     16 
     17     ARM
     18     REQUIRE8
     19     PRESERVE8
     20 
     21     AREA    |.text|, CODE, READONLY
     22 
     23     ; macro for validating write buffer position
     24     ; needs vp8_writer in r0
     25     ; start shall not be in r1
     26     MACRO
     27     VALIDATE_POS $start, $pos
     28     push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
     29     ldr  r2, [r0, #vp8_writer_buffer_end]
     30     ldr  r3, [r0, #vp8_writer_error]
     31     mov  r1, $pos
     32     mov  r0, $start
     33     bl   vp8_validate_buffer_arm
     34     pop  {r0-r3, r12, lr}
     35     MEND
     36 
     37 ; r0 VP8_COMP *cpi
     38 ; r1 unsigned char *cx_data
     39 ; r2 const unsigned char *cx_data_end
     40 ; r3 int num_part
     41 ; s0 vp8_coef_encodings
     42 ; s1 vp8_extra_bits,
     43 ; s2 const vp8_tree_index *
     44 
     45 |vp8cx_pack_tokens_into_partitions_armv5| PROC
     46     push    {r4-r12, lr}
     47     sub     sp, sp, #40
     48 
     49     ; Compute address of cpi->common.mb_rows
     50     ldr     r4, _VP8_COMP_common_
     51     ldr     r6, _VP8_COMMON_MBrows_
     52     add     r4, r0, r4
     53 
     54     ldr     r5, [r4, r6]                ; load up mb_rows
     55 
     56     str     r5, [sp, #36]               ; save mb_rows
     57     str     r1, [sp, #24]               ; save ptr = cx_data
     58     str     r3, [sp, #20]               ; save num_part
     59     str     r2, [sp, #8]                ; save cx_data_end
     60 
     61     ldr     r4, _VP8_COMP_tplist_
     62     add     r4, r0, r4
     63     ldr     r7, [r4, #0]                ; dereference cpi->tp_list
     64     str     r7, [sp, #32]               ; store start of cpi->tp_list
     65 
     66     ldr     r11, _VP8_COMP_bc_          ; load up vp8_writer out of cpi
     67     add     r0, r0, r11
     68 
     69     mov     r11, #0
     70     str     r11, [sp, #28]              ; i
     71 
     72 numparts_loop
     73     ldr     r2, _vp8_writer_sz_         ; load up sizeof(vp8_writer)
     74     add     r0, r2                      ; bc[i + 1]
     75 
     76     ldr     r10, [sp, #24]              ; ptr
     77     ldr     r5,  [sp, #36]              ; move mb_rows to the counting section
     78     subs    r5, r5, r11                 ; move start point with each partition
     79                                         ; mb_rows starts at i
     80     str     r5,  [sp, #12]
     81 
     82     ; Reset all of the VP8 Writer data for each partition that
     83     ; is processed.
     84     ; start_encode
     85 
     86     ldr     r3, [sp, #8]
     87     str     r3, [r0, #vp8_writer_buffer_end]
     88 
     89     mov     r2, #0                      ; vp8_writer_lowvalue
     90     mov     r5, #255                    ; vp8_writer_range
     91     mvn     r3, #23                     ; vp8_writer_count
     92 
     93     str     r2,  [r0, #vp8_writer_pos]
     94     str     r10, [r0, #vp8_writer_buffer]
     95 
     96     ble     end_partition               ; if (mb_rows <= 0) end partition
     97 
     98 mb_row_loop
     99 
    100     ldr     r1, [r7, #tokenlist_start]
    101     ldr     r9, [r7, #tokenlist_stop]
    102     str     r9, [sp, #0]                ; save stop for later comparison
    103     str     r7, [sp, #16]               ; tokenlist address for next time
    104 
    105     b       check_p_lt_stop
    106 
    107     ; actual work gets done here!
    108 
    109 while_p_lt_stop
    110     ldrb    r6, [r1, #tokenextra_token] ; t
    111     ldr     r4, [sp, #80]               ; vp8_coef_encodings
    112     mov     lr, #0
    113     add     r4, r4, r6, lsl #3          ; a = vp8_coef_encodings + t
    114     ldr     r9, [r1, #tokenextra_context_tree]   ; pp
    115 
    116     ldrb    r7, [r1, #tokenextra_skip_eob_node]
    117 
    118     ldr     r6, [r4, #vp8_token_value]  ; v
    119     ldr     r8, [r4, #vp8_token_len]    ; n
    120 
    121     ; vp8 specific skip_eob_node
    122     cmp     r7, #0
    123     movne   lr, #2                      ; i = 2
    124     subne   r8, r8, #1                  ; --n
    125 
    126     rsb     r4, r8, #32                 ; 32-n
    127     ldr     r10, [sp, #88]              ; vp8_coef_tree
    128 
    129     ; v is kept in r12 during the token pack loop
    130     lsl     r12, r6, r4                ; r12 = v << 32 - n
    131 
    132 ; loop start
    133 token_loop
    134     ldrb    r4, [r9, lr, asr #1]        ; pp [i>>1]
    135     sub     r7, r5, #1                  ; range-1
    136 
    137     ; Decisions are made based on the bit value shifted
    138     ; off of v, so set a flag here based on this.
    139     ; This value is refered to as "bb"
    140     lsls    r12, r12, #1                ; bb = v >> n
    141     mul     r6, r4, r7                  ; ((range-1) * pp[i>>1]))
    142 
    143     ; bb can only be 0 or 1.  So only execute this statement
    144     ; if bb == 1, otherwise it will act like i + 0
    145     addcs   lr, lr, #1                  ; i + bb
    146 
    147     mov     r7, #1
    148     ldrsb   lr, [r10, lr]               ; i = vp8_coef_tree[i+bb]
    149     add     r4, r7, r6, lsr #8          ; 1 + (((range-1) * pp[i>>1]) >> 8)
    150 
    151     addcs   r2, r2, r4                  ; if  (bb) lowvalue += split
    152     subcs   r4, r5, r4                  ; if  (bb) range = range-split
    153 
    154     ; Counting the leading zeros is used to normalize range.
    155     clz     r6, r4
    156     sub     r6, r6, #24                 ; shift
    157 
    158     ; Flag is set on the sum of count.  This flag is used later
    159     ; to determine if count >= 0
    160     adds    r3, r3, r6                  ; count += shift
    161     lsl     r5, r4, r6                  ; range <<= shift
    162     bmi     token_count_lt_zero         ; if(count >= 0)
    163 
    164     sub     r6, r6, r3                  ; offset = shift - count
    165     sub     r4, r6, #1                  ; offset-1
    166     lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
    167     bpl     token_high_bit_not_set
    168 
    169     ldr     r4, [r0, #vp8_writer_pos]   ; x
    170     sub     r4, r4, #1                  ; x = w->pos-1
    171     b       token_zero_while_start
    172 token_zero_while_loop
    173     mov     r10, #0
    174     strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
    175     sub     r4, r4, #1                  ; x--
    176 token_zero_while_start
    177     cmp     r4, #0
    178     ldrge   r7, [r0, #vp8_writer_buffer]
    179     ldrb    r11, [r7, r4]
    180     cmpge   r11, #0xff
    181     beq     token_zero_while_loop
    182 
    183     ldr     r7, [r0, #vp8_writer_buffer]
    184     ldrb    r10, [r7, r4]               ; w->buffer[x]
    185     add     r10, r10, #1
    186     strb    r10, [r7, r4]               ; w->buffer[x] + 1
    187 token_high_bit_not_set
    188     rsb     r4, r6, #24                 ; 24-offset
    189     ldr     r10, [r0, #vp8_writer_buffer]
    190     lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
    191     ldr     r4, [r0, #vp8_writer_pos]   ; w->pos
    192     lsl     r2, r2, r6                  ; lowvalue <<= offset
    193     mov     r6, r3                      ; shift = count
    194     add     r11, r4, #1                 ; w->pos++
    195     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
    196     str     r11, [r0, #vp8_writer_pos]
    197     sub     r3, r3, #8                  ; count -= 8
    198 
    199     VALIDATE_POS r10, r11               ; validate_buffer at pos
    200 
    201     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
    202 
    203     ; r10 is used earlier in the loop, but r10 is used as
    204     ; temp variable here.  So after r10 is used, reload
    205     ; vp8_coef_tree_dcd into r10
    206     ldr     r10, [sp, #88]              ; vp8_coef_tree
    207 
    208 token_count_lt_zero
    209     lsl     r2, r2, r6                  ; lowvalue <<= shift
    210 
    211     subs    r8, r8, #1                  ; --n
    212     bne     token_loop
    213 
    214     ldrb    r6, [r1, #tokenextra_token] ; t
    215     ldr     r7, [sp, #84]                ; vp8_extra_bits
    216     ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
    217     ;  element.  Here vp8_extra_bit_struct == 16
    218     add     r12, r7, r6, lsl #4         ; b = vp8_extra_bits + t
    219 
    220     ldr     r4, [r12, #vp8_extra_bit_struct_base_val]
    221     cmp     r4, #0
    222     beq     skip_extra_bits
    223 
    224 ;   if( b->base_val)
    225     ldr     r8, [r12, #vp8_extra_bit_struct_len] ; L
    226     ldrsh   lr, [r1, #tokenextra_extra] ; e = p->Extra
    227     cmp     r8, #0                      ; if( L)
    228     beq     no_extra_bits
    229 
    230     ldr     r9, [r12, #vp8_extra_bit_struct_prob]
    231     asr     r7, lr, #1                  ; v=e>>1
    232 
    233     ldr     r10, [r12, #vp8_extra_bit_struct_tree]
    234     str     r10, [sp, #4]               ; b->tree
    235 
    236     rsb     r4, r8, #32
    237     lsl     r12, r7, r4
    238 
    239     mov     lr, #0                      ; i = 0
    240 
    241 extra_bits_loop
    242     ldrb    r4, [r9, lr, asr #1]        ; pp[i>>1]
    243     sub     r7, r5, #1                  ; range-1
    244     lsls    r12, r12, #1                ; v >> n
    245     mul     r6, r4, r7                  ; (range-1) * pp[i>>1]
    246     addcs   lr, lr, #1                  ; i + bb
    247 
    248     mov     r7, #1
    249     ldrsb   lr, [r10, lr]               ; i = b->tree[i+bb]
    250     add     r4, r7, r6, lsr #8          ; split = 1 +  (((range-1) * pp[i>>1]) >> 8)
    251 
    252     addcs   r2, r2, r4                  ; if  (bb) lowvalue += split
    253     subcs   r4, r5, r4                  ; if  (bb) range = range-split
    254 
    255     clz     r6, r4
    256     sub     r6, r6, #24
    257 
    258     adds    r3, r3, r6                  ; count += shift
    259     lsl     r5, r4, r6                  ; range <<= shift
    260     bmi     extra_count_lt_zero         ; if(count >= 0)
    261 
    262     sub     r6, r6, r3                  ; offset= shift - count
    263     sub     r4, r6, #1                  ; offset-1
    264     lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
    265     bpl     extra_high_bit_not_set
    266 
    267     ldr     r4, [r0, #vp8_writer_pos]   ; x
    268     sub     r4, r4, #1                  ; x = w->pos - 1
    269     b       extra_zero_while_start
    270 extra_zero_while_loop
    271     mov     r10, #0
    272     strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
    273     sub     r4, r4, #1                  ; x--
    274 extra_zero_while_start
    275     cmp     r4, #0
    276     ldrge   r7, [r0, #vp8_writer_buffer]
    277     ldrb    r11, [r7, r4]
    278     cmpge   r11, #0xff
    279     beq     extra_zero_while_loop
    280 
    281     ldr     r7, [r0, #vp8_writer_buffer]
    282     ldrb    r10, [r7, r4]
    283     add     r10, r10, #1
    284     strb    r10, [r7, r4]
    285 extra_high_bit_not_set
    286     rsb     r4, r6, #24                 ; 24-offset
    287     ldr     r10, [r0, #vp8_writer_buffer]
    288     lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
    289     ldr     r4, [r0, #vp8_writer_pos]
    290     lsl     r2, r2, r6                  ; lowvalue <<= offset
    291     mov     r6, r3                      ; shift = count
    292     add     r11, r4, #1                 ; w->pos++
    293     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
    294     str     r11, [r0, #vp8_writer_pos]
    295     sub     r3, r3, #8                  ; count -= 8
    296 
    297     VALIDATE_POS r10, r11               ; validate_buffer at pos
    298 
    299     strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
    300     ldr     r10, [sp, #4]               ; b->tree
    301 extra_count_lt_zero
    302     lsl     r2, r2, r6
    303 
    304     subs    r8, r8, #1                  ; --n
    305     bne     extra_bits_loop             ; while (n)
    306 
    307 no_extra_bits
    308     ldr     lr, [r1, #4]                ; e = p->Extra
    309     add     r4, r5, #1                  ; range + 1
    310     tst     lr, #1
    311     lsr     r4, r4, #1                  ; split = (range + 1) >> 1
    312     addne   r2, r2, r4                  ; lowvalue += split
    313     subne   r4, r5, r4                  ; range = range-split
    314     tst     r2, #0x80000000             ; lowvalue & 0x80000000
    315     lsl     r5, r4, #1                  ; range <<= 1
    316     beq     end_high_bit_not_set
    317 
    318     ldr     r4, [r0, #vp8_writer_pos]
    319     mov     r7, #0
    320     sub     r4, r4, #1
    321     b       end_zero_while_start
    322 end_zero_while_loop
    323     strb    r7, [r6, r4]
    324     sub     r4, r4, #1                  ; x--
    325 end_zero_while_start
    326     cmp     r4, #0
    327     ldrge   r6, [r0, #vp8_writer_buffer]
    328     ldrb    r12, [r6, r4]
    329     cmpge   r12, #0xff
    330     beq     end_zero_while_loop
    331 
    332     ldr     r6, [r0, #vp8_writer_buffer]
    333     ldrb    r7, [r6, r4]
    334     add     r7, r7, #1
    335     strb    r7, [r6, r4]
    336 end_high_bit_not_set
    337     adds    r3, r3, #1                  ; ++count
    338     lsl     r2, r2, #1                  ; lowvalue  <<= 1
    339     bne     end_count_zero
    340 
    341     ldr     r4, [r0, #vp8_writer_pos]
    342     mvn     r3, #7                      ; count = -8
    343     ldr     r7, [r0, #vp8_writer_buffer]
    344     lsr     r6, r2, #24                 ; lowvalue >> 24
    345     add     r12, r4, #1                 ; w->pos++
    346     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
    347     str     r12, [r0, #vp8_writer_pos]
    348 
    349     VALIDATE_POS r7, r12                ; validate_buffer at pos
    350 
    351     strb    r6, [r7, r4]
    352 end_count_zero
    353 skip_extra_bits
    354     add     r1, r1, #TOKENEXTRA_SZ      ; ++p
    355 check_p_lt_stop
    356     ldr     r4, [sp, #0]                ; stop
    357     cmp     r1, r4                      ; while( p < stop)
    358     bcc     while_p_lt_stop
    359 
    360     ldr     r10, [sp, #20]              ; num_parts
    361     mov     r1, #TOKENLIST_SZ
    362     mul     r1, r10, r1
    363 
    364     ldr     r6, [sp, #12]               ; mb_rows
    365     ldr     r7, [sp, #16]               ; tokenlist address
    366     subs    r6, r6, r10
    367     add     r7, r7, r1                  ; next element in the array
    368     str     r6, [sp, #12]
    369     bgt     mb_row_loop
    370 
    371 end_partition
    372     mov     r12, #32
    373 
    374 stop_encode_loop
    375     sub     r7, r5, #1                  ; range-1
    376 
    377     mov     r4, r7, lsl #7              ; ((range-1) * 128)
    378 
    379     mov     r7, #1
    380     add     r4, r7, r4, lsr #8          ; 1 + (((range-1) * 128) >> 8)
    381 
    382     ; Counting the leading zeros is used to normalize range.
    383     clz     r6, r4
    384     sub     r6, r6, #24                 ; shift
    385 
    386     ; Flag is set on the sum of count.  This flag is used later
    387     ; to determine if count >= 0
    388     adds    r3, r3, r6                  ; count += shift
    389     lsl     r5, r4, r6                  ; range <<= shift
    390     bmi     token_count_lt_zero_se      ; if(count >= 0)
    391 
    392     sub     r6, r6, r3                  ; offset = shift - count
    393     sub     r4, r6, #1                  ; offset-1
    394     lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
    395     bpl     token_high_bit_not_set_se
    396 
    397     ldr     r4, [r0, #vp8_writer_pos]   ; x
    398     sub     r4, r4, #1                  ; x = w->pos-1
    399     b       token_zero_while_start_se
    400 token_zero_while_loop_se
    401     mov     r10, #0
    402     strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
    403     sub     r4, r4, #1                  ; x--
    404 token_zero_while_start_se
    405     cmp     r4, #0
    406     ldrge   r7, [r0, #vp8_writer_buffer]
    407     ldrb    r11, [r7, r4]
    408     cmpge   r11, #0xff
    409     beq     token_zero_while_loop_se
    410 
    411     ldr     r7, [r0, #vp8_writer_buffer]
    412     ldrb    r10, [r7, r4]               ; w->buffer[x]
    413     add     r10, r10, #1
    414     strb    r10, [r7, r4]               ; w->buffer[x] + 1
    415 token_high_bit_not_set_se
    416     rsb     r4, r6, #24                 ; 24-offset
    417     ldr     r10, [r0, #vp8_writer_buffer]
    418     lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
    419     ldr     r4, [r0, #vp8_writer_pos]   ; w->pos
    420     lsl     r2, r2, r6                  ; lowvalue <<= offset
    421     mov     r6, r3                      ; shift = count
    422     add     r11, r4, #1                 ; w->pos++
    423     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
    424     str     r11, [r0, #vp8_writer_pos]
    425     sub     r3, r3, #8                  ; count -= 8
    426 
    427     VALIDATE_POS r10, r11               ; validate_buffer at pos
    428 
    429     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
    430 
    431 token_count_lt_zero_se
    432     lsl     r2, r2, r6                  ; lowvalue <<= shift
    433 
    434     subs    r12, r12, #1
    435     bne     stop_encode_loop
    436 
    437     ldr     r4,  [r0, #vp8_writer_pos]  ; w->pos
    438     ldr     r12, [sp, #24]              ; ptr
    439     add     r12, r12, r4                ; ptr += w->pos
    440     str     r12, [sp, #24]
    441 
    442     ldr     r11, [sp, #28]              ; i
    443     ldr     r10, [sp, #20]              ; num_parts
    444 
    445     add     r11, r11, #1                ; i++
    446     str     r11, [sp, #28]
    447 
    448     ldr     r7, [sp, #32]               ; cpi->tp_list[i]
    449     mov     r1, #TOKENLIST_SZ
    450     add     r7, r7, r1                  ; next element in cpi->tp_list
    451     str     r7, [sp, #32]               ; cpi->tp_list[i+1]
    452 
    453     cmp     r10, r11
    454     bgt     numparts_loop
    455 
    456     add     sp, sp, #40
    457     pop     {r4-r12, pc}
    458     ENDP
    459 
    460 _VP8_COMP_common_
    461     DCD     vp8_comp_common
    462 _VP8_COMMON_MBrows_
    463     DCD     vp8_common_mb_rows
    464 _VP8_COMP_tplist_
    465     DCD     vp8_comp_tplist
    466 _VP8_COMP_bc_
    467     DCD     vp8_comp_bc
    468 _vp8_writer_sz_
    469     DCD     vp8_writer_sz
    470 
    471     END
    472