Home | History | Annotate | Download | only in armv5te
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
     13 
     14     INCLUDE asm_enc_offsets.asm
     15 
     16     ARM
     17     REQUIRE8
     18     PRESERVE8
     19 
     20     AREA    |.text|, CODE, READONLY
     21 
     22 ; r0 VP8_COMP *cpi
     23 ; r1 unsigned char *cx_data
     24 ; r2 int num_part
     25 ; r3 *size
     26 ; s0 vp8_coef_encodings
     27 ; s1 vp8_extra_bits,
     28 ; s2 const vp8_tree_index *,
     29 
     30 |vp8cx_pack_tokens_into_partitions_armv5| PROC
     31     push    {r4-r11, lr}
     32     sub     sp, sp, #44
     33 
     34     ; Compute address of cpi->common.mb_rows
     35     ldr     r4, _VP8_COMP_common_
     36     ldr     r6, _VP8_COMMON_MBrows_
     37     add     r4, r0, r4
     38 
     39     ldr     r5, [r4, r6]                ; load up mb_rows
     40 
     41     str     r5, [sp, #36]               ; save mb_rows
     42     str     r1, [sp, #24]               ; save cx_data
     43     str     r2, [sp, #20]               ; save num_part
     44     str     r3, [sp, #8]                ; save *size
     45 
     46     ; *size = 3*(num_part -1 );
     47     sub     r2, r2, #1                  ; num_part - 1
     48     add     r2, r2, r2, lsl #1          ; 3*(num_part - 1)
     49     str     r2, [r3]
     50 
     51     add     r2, r2, r1                  ; cx_data + *size
     52     str     r2, [sp, #40]               ; ptr
     53 
     54     ldr     r4, _VP8_COMP_tplist_
     55     add     r4, r0, r4
     56     ldr     r7, [r4, #0]                ; dereference cpi->tp_list
     57     str     r7, [sp, #32]               ; store start of cpi->tp_list
     58 
     59     ldr     r11, _VP8_COMP_bc2_         ; load up vp8_writer out of cpi
     60     add     r0, r0, r11
     61 
     62     mov     r11, #0
     63     str     r11, [sp, #28]              ; i
     64 
     65 numparts_loop
     66     ldr     r10, [sp, #40]              ; ptr
     67     ldr     r5,  [sp, #36]              ; move mb_rows to the counting section
     68     sub     r5, r5, r11                 ; move start point with each partition
     69                                         ; mb_rows starts at i
     70     str     r5,  [sp, #12]
     71 
     72     ; Reset all of the VP8 Writer data for each partition that
     73     ; is processed.
     74     ; start_encode
     75     mov     r2, #0                      ; vp8_writer_lowvalue
     76     mov     r5, #255                    ; vp8_writer_range
     77     mvn     r3, #23                     ; vp8_writer_count
     78 
     79     str     r2,  [r0, #vp8_writer_value]
     80     str     r2,  [r0, #vp8_writer_pos]
     81     str     r10, [r0, #vp8_writer_buffer]
     82 
     83 mb_row_loop
     84 
     85     ldr     r1, [r7, #tokenlist_start]
     86     ldr     r9, [r7, #tokenlist_stop]
     87     str     r9, [sp, #0]                ; save stop for later comparison
     88     str     r7, [sp, #16]               ; tokenlist address for next time
     89 
     90     b       check_p_lt_stop
     91 
     92     ; actual work gets done here!
     93 
     94 while_p_lt_stop
     95     ldrb    r6, [r1, #tokenextra_token] ; t
     96     ldr     r4, [sp, #80]               ; vp8_coef_encodings
     97     mov     lr, #0
     98     add     r4, r4, r6, lsl #3          ; a = vp8_coef_encodings + t
     99     ldr     r9, [r1, #tokenextra_context_tree]   ; pp
    100 
    101     ldrb    r7, [r1, #tokenextra_skip_eob_node]
    102 
    103     ldr     r6, [r4, #vp8_token_value]  ; v
    104     ldr     r8, [r4, #vp8_token_len]    ; n
    105 
    106     ; vp8 specific skip_eob_node
    107     cmp     r7, #0
    108     movne   lr, #2                      ; i = 2
    109     subne   r8, r8, #1                  ; --n
    110 
    111     rsb     r4, r8, #32                 ; 32-n
    112     ldr     r10, [sp, #88]              ; vp8_coef_tree
    113 
    114     ; v is kept in r12 during the token pack loop
    115     lsl     r12, r6, r4                ; r12 = v << 32 - n
    116 
    117 ; loop start
    118 token_loop
    119     ldrb    r4, [r9, lr, asr #1]        ; pp [i>>1]
    120     sub     r7, r5, #1                  ; range-1
    121 
    122     ; Decisions are made based on the bit value shifted
    123     ; off of v, so set a flag here based on this.
    124     ; This value is refered to as "bb"
    125     lsls    r12, r12, #1                ; bb = v >> n
    126     mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
    127 
    128     ; bb can only be 0 or 1.  So only execute this statement
    129     ; if bb == 1, otherwise it will act like i + 0
    130     addcs   lr, lr, #1                  ; i + bb
    131 
    132     mov     r7, #1
    133     ldrsb   lr, [r10, lr]               ; i = vp8_coef_tree[i+bb]
    134     add     r4, r7, r4, lsr #8          ; 1 + (((range-1) * pp[i>>1]) >> 8)
    135 
    136     addcs   r2, r2, r4                  ; if  (bb) lowvalue += split
    137     subcs   r4, r5, r4                  ; if  (bb) range = range-split
    138 
    139     ; Counting the leading zeros is used to normalize range.
    140     clz     r6, r4
    141     sub     r6, r6, #24                 ; shift
    142 
    143     ; Flag is set on the sum of count.  This flag is used later
    144     ; to determine if count >= 0
    145     adds    r3, r3, r6                  ; count += shift
    146     lsl     r5, r4, r6                  ; range <<= shift
    147     bmi     token_count_lt_zero         ; if(count >= 0)
    148 
    149     sub     r6, r6, r3                  ; offset = shift - count
    150     sub     r4, r6, #1                  ; offset-1
    151     lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
    152     bpl     token_high_bit_not_set
    153 
    154     ldr     r4, [r0, #vp8_writer_pos]   ; x
    155     sub     r4, r4, #1                  ; x = w->pos-1
    156     b       token_zero_while_start
    157 token_zero_while_loop
    158     mov     r10, #0
    159     strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
    160     sub     r4, r4, #1                  ; x--
    161 token_zero_while_start
    162     cmp     r4, #0
    163     ldrge   r7, [r0, #vp8_writer_buffer]
    164     ldrb    r11, [r7, r4]
    165     cmpge   r11, #0xff
    166     beq     token_zero_while_loop
    167 
    168     ldr     r7, [r0, #vp8_writer_buffer]
    169     ldrb    r10, [r7, r4]               ; w->buffer[x]
    170     add     r10, r10, #1
    171     strb    r10, [r7, r4]               ; w->buffer[x] + 1
    172 token_high_bit_not_set
    173     rsb     r4, r6, #24                 ; 24-offset
    174     ldr     r10, [r0, #vp8_writer_buffer]
    175     lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
    176     ldr     r4, [r0, #vp8_writer_pos]   ; w->pos
    177     lsl     r2, r2, r6                  ; lowvalue <<= offset
    178     mov     r6, r3                      ; shift = count
    179     add     r11, r4, #1                 ; w->pos++
    180     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
    181     str     r11, [r0, #vp8_writer_pos]
    182     sub     r3, r3, #8                  ; count -= 8
    183     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
    184 
    185     ; r10 is used earlier in the loop, but r10 is used as
    186     ; temp variable here.  So after r10 is used, reload
    187     ; vp8_coef_tree_dcd into r10
    188     ldr     r10, [sp, #88]              ; vp8_coef_tree
    189 
    190 token_count_lt_zero
    191     lsl     r2, r2, r6                  ; lowvalue <<= shift
    192 
    193     subs    r8, r8, #1                  ; --n
    194     bne     token_loop
    195 
    196     ldrb    r6, [r1, #tokenextra_token] ; t
    197     ldr     r7, [sp, #84]                ; vp8_extra_bits
    198     ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
    199     ;  element.  Here vp8_extra_bit_struct == 16
    200     add     r12, r7, r6, lsl #4         ; b = vp8_extra_bits + t
    201 
    202     ldr     r4, [r12, #vp8_extra_bit_struct_base_val]
    203     cmp     r4, #0
    204     beq     skip_extra_bits
    205 
    206 ;   if( b->base_val)
    207     ldr     r8, [r12, #vp8_extra_bit_struct_len] ; L
    208     ldrsh   lr, [r1, #tokenextra_extra] ; e = p->Extra
    209     cmp     r8, #0                      ; if( L)
    210     beq     no_extra_bits
    211 
    212     ldr     r9, [r12, #vp8_extra_bit_struct_prob]
    213     asr     r7, lr, #1                  ; v=e>>1
    214 
    215     ldr     r10, [r12, #vp8_extra_bit_struct_tree]
    216     str     r10, [sp, #4]               ; b->tree
    217 
    218     rsb     r4, r8, #32
    219     lsl     r12, r7, r4
    220 
    221     mov     lr, #0                      ; i = 0
    222 
    223 extra_bits_loop
    224     ldrb    r4, [r9, lr, asr #1]        ; pp[i>>1]
    225     sub     r7, r5, #1                  ; range-1
    226     lsls    r12, r12, #1                ; v >> n
    227     mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
    228     addcs   lr, lr, #1                  ; i + bb
    229 
    230     mov     r7, #1
    231     ldrsb   lr, [r10, lr]               ; i = b->tree[i+bb]
    232     add     r4, r7, r4, lsr #8          ; split = 1 +  (((range-1) * pp[i>>1]) >> 8)
    233 
    234     addcs   r2, r2, r4                  ; if  (bb) lowvalue += split
    235     subcs   r4, r5, r4                  ; if  (bb) range = range-split
    236 
    237     clz     r6, r4
    238     sub     r6, r6, #24
    239 
    240     adds    r3, r3, r6                  ; count += shift
    241     lsl     r5, r4, r6                  ; range <<= shift
    242     bmi     extra_count_lt_zero         ; if(count >= 0)
    243 
    244     sub     r6, r6, r3                  ; offset= shift - count
    245     sub     r4, r6, #1                  ; offset-1
    246     lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
    247     bpl     extra_high_bit_not_set
    248 
    249     ldr     r4, [r0, #vp8_writer_pos]   ; x
    250     sub     r4, r4, #1                  ; x = w->pos - 1
    251     b       extra_zero_while_start
    252 extra_zero_while_loop
    253     mov     r10, #0
    254     strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
    255     sub     r4, r4, #1                  ; x--
    256 extra_zero_while_start
    257     cmp     r4, #0
    258     ldrge   r7, [r0, #vp8_writer_buffer]
    259     ldrb    r11, [r7, r4]
    260     cmpge   r11, #0xff
    261     beq     extra_zero_while_loop
    262 
    263     ldr     r7, [r0, #vp8_writer_buffer]
    264     ldrb    r10, [r7, r4]
    265     add     r10, r10, #1
    266     strb    r10, [r7, r4]
    267 extra_high_bit_not_set
    268     rsb     r4, r6, #24                 ; 24-offset
    269     ldr     r10, [r0, #vp8_writer_buffer]
    270     lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
    271     ldr     r4, [r0, #vp8_writer_pos]
    272     lsl     r2, r2, r6                  ; lowvalue <<= offset
    273     mov     r6, r3                      ; shift = count
    274     add     r11, r4, #1                 ; w->pos++
    275     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
    276     str     r11, [r0, #vp8_writer_pos]
    277     sub     r3, r3, #8                  ; count -= 8
    278     strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
    279     ldr     r10, [sp, #4]               ; b->tree
    280 extra_count_lt_zero
    281     lsl     r2, r2, r6
    282 
    283     subs    r8, r8, #1                  ; --n
    284     bne     extra_bits_loop             ; while (n)
    285 
    286 no_extra_bits
    287     ldr     lr, [r1, #4]                ; e = p->Extra
    288     add     r4, r5, #1                  ; range + 1
    289     tst     lr, #1
    290     lsr     r4, r4, #1                  ; split = (range + 1) >> 1
    291     addne   r2, r2, r4                  ; lowvalue += split
    292     subne   r4, r5, r4                  ; range = range-split
    293     tst     r2, #0x80000000             ; lowvalue & 0x80000000
    294     lsl     r5, r4, #1                  ; range <<= 1
    295     beq     end_high_bit_not_set
    296 
    297     ldr     r4, [r0, #vp8_writer_pos]
    298     mov     r7, #0
    299     sub     r4, r4, #1
    300     b       end_zero_while_start
    301 end_zero_while_loop
    302     strb    r7, [r6, r4]
    303     sub     r4, r4, #1                  ; x--
    304 end_zero_while_start
    305     cmp     r4, #0
    306     ldrge   r6, [r0, #vp8_writer_buffer]
    307     ldrb    r12, [r6, r4]
    308     cmpge   r12, #0xff
    309     beq     end_zero_while_loop
    310 
    311     ldr     r6, [r0, #vp8_writer_buffer]
    312     ldrb    r7, [r6, r4]
    313     add     r7, r7, #1
    314     strb    r7, [r6, r4]
    315 end_high_bit_not_set
    316     adds    r3, r3, #1                  ; ++count
    317     lsl     r2, r2, #1                  ; lowvalue  <<= 1
    318     bne     end_count_zero
    319 
    320     ldr     r4, [r0, #vp8_writer_pos]
    321     mvn     r3, #7
    322     ldr     r7, [r0, #vp8_writer_buffer]
    323     lsr     r6, r2, #24                 ; lowvalue >> 24
    324     add     r12, r4, #1                 ; w->pos++
    325     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
    326     str     r12, [r0, #0x10]
    327     strb    r6, [r7, r4]
    328 end_count_zero
    329 skip_extra_bits
    330     add     r1, r1, #TOKENEXTRA_SZ      ; ++p
    331 check_p_lt_stop
    332     ldr     r4, [sp, #0]                ; stop
    333     cmp     r1, r4                      ; while( p < stop)
    334     bcc     while_p_lt_stop
    335 
    336     ldr     r10, [sp, #20]              ; num_parts
    337     mov     r1, #TOKENLIST_SZ
    338     mul     r1, r10, r1
    339 
    340     ldr     r6, [sp, #12]               ; mb_rows
    341     ldr     r7, [sp, #16]               ; tokenlist address
    342     subs    r6, r6, r10
    343     add     r7, r7, r1                  ; next element in the array
    344     str     r6, [sp, #12]
    345     bgt     mb_row_loop
    346 
    347     mov     r12, #32
    348 
    349 stop_encode_loop
    350     sub     r7, r5, #1                  ; range-1
    351 
    352     mov     r4, r7, lsl #7              ; ((range-1) * 128)
    353 
    354     mov     r7, #1
    355     add     r4, r7, r4, lsr #8          ; 1 + (((range-1) * 128) >> 8)
    356 
    357     ; Counting the leading zeros is used to normalize range.
    358     clz     r6, r4
    359     sub     r6, r6, #24                 ; shift
    360 
    361     ; Flag is set on the sum of count.  This flag is used later
    362     ; to determine if count >= 0
    363     adds    r3, r3, r6                  ; count += shift
    364     lsl     r5, r4, r6                  ; range <<= shift
    365     bmi     token_count_lt_zero_se      ; if(count >= 0)
    366 
    367     sub     r6, r6, r3                  ; offset = shift - count
    368     sub     r4, r6, #1                  ; offset-1
    369     lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
    370     bpl     token_high_bit_not_set_se
    371 
    372     ldr     r4, [r0, #vp8_writer_pos]   ; x
    373     sub     r4, r4, #1                  ; x = w->pos-1
    374     b       token_zero_while_start_se
    375 token_zero_while_loop_se
    376     mov     r10, #0
    377     strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
    378     sub     r4, r4, #1                  ; x--
    379 token_zero_while_start_se
    380     cmp     r4, #0
    381     ldrge   r7, [r0, #vp8_writer_buffer]
    382     ldrb    r11, [r7, r4]
    383     cmpge   r11, #0xff
    384     beq     token_zero_while_loop_se
    385 
    386     ldr     r7, [r0, #vp8_writer_buffer]
    387     ldrb    r10, [r7, r4]               ; w->buffer[x]
    388     add     r10, r10, #1
    389     strb    r10, [r7, r4]               ; w->buffer[x] + 1
    390 token_high_bit_not_set_se
    391     rsb     r4, r6, #24                 ; 24-offset
    392     ldr     r10, [r0, #vp8_writer_buffer]
    393     lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
    394     ldr     r4, [r0, #vp8_writer_pos]   ; w->pos
    395     lsl     r2, r2, r6                  ; lowvalue <<= offset
    396     mov     r6, r3                      ; shift = count
    397     add     r11, r4, #1                 ; w->pos++
    398     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
    399     str     r11, [r0, #vp8_writer_pos]
    400     sub     r3, r3, #8                  ; count -= 8
    401     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
    402 
    403 token_count_lt_zero_se
    404     lsl     r2, r2, r6                  ; lowvalue <<= shift
    405 
    406     subs    r12, r12, #1
    407     bne     stop_encode_loop
    408 
    409     ldr     r10, [sp, #8]               ; *size
    410     ldr     r11, [r10]
    411     ldr     r4,  [r0, #vp8_writer_pos]  ; w->pos
    412     add     r11, r11, r4                ; *size += w->pos
    413     str     r11, [r10]
    414 
    415     ldr     r9, [sp, #20]               ; num_parts
    416     sub     r9, r9, #1
    417     ldr     r10, [sp, #28]              ; i
    418     cmp     r10, r9                     ; if(i<(num_part - 1))
    419     bge     skip_write_partition
    420 
    421     ldr     r12, [sp, #40]              ; ptr
    422     add     r12, r12, r4                ; ptr += w->pos
    423     str     r12, [sp, #40]
    424 
    425     ldr     r9, [sp, #24]               ; cx_data
    426     mov     r8, r4, asr #8
    427     strb    r4, [r9, #0]
    428     strb    r8, [r9, #1]
    429     mov     r4, r4, asr #16
    430     strb    r4, [r9, #2]
    431 
    432     add     r9, r9, #3                  ; cx_data += 3
    433     str     r9, [sp, #24]
    434 
    435 skip_write_partition
    436 
    437     ldr     r11, [sp, #28]              ; i
    438     ldr     r10, [sp, #20]              ; num_parts
    439 
    440     add     r11, r11, #1                ; i++
    441     str     r11, [sp, #28]
    442 
    443     ldr     r7, [sp, #32]               ; cpi->tp_list[i]
    444     mov     r1, #TOKENLIST_SZ
    445     add     r7, r7, r1                  ; next element in cpi->tp_list
    446     str     r7, [sp, #32]               ; cpi->tp_list[i+1]
    447 
    448     cmp     r10, r11
    449     bgt     numparts_loop
    450 
    451 
    452     add     sp, sp, #44
    453     pop     {r4-r11, pc}
    454     ENDP
    455 
    456 _VP8_COMP_common_
    457     DCD     vp8_comp_common
    458 _VP8_COMMON_MBrows_
    459     DCD     vp8_common_mb_rows
    460 _VP8_COMP_tplist_
    461     DCD     vp8_comp_tplist
    462 _VP8_COMP_bc2_
    463     DCD     vp8_comp_bc2
    464 
    465     END
    466