Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_fast_quantize_b_armv6|
     13 
     14     INCLUDE asm_enc_offsets.asm
     15 
     16     ARM
     17     REQUIRE8
     18     PRESERVE8
     19 
     20     AREA ||.text||, CODE, READONLY, ALIGN=2
     21 
     22 ; r0    BLOCK *b
     23 ; r1    BLOCKD *d
     24 |vp8_fast_quantize_b_armv6| PROC
     25     stmfd   sp!, {r1, r4-r11, lr}
     26 
     27     ldr     r3, [r0, #vp8_block_coeff]      ; coeff
     28     ldr     r4, [r0, #vp8_block_quant_fast] ; quant_fast
     29     ldr     r5, [r0, #vp8_block_round]      ; round
     30     ldr     r6, [r1, #vp8_blockd_qcoeff]    ; qcoeff
     31     ldr     r7, [r1, #vp8_blockd_dqcoeff]   ; dqcoeff
     32     ldr     r8, [r1, #vp8_blockd_dequant]   ; dequant
     33 
     34     ldr     r2, loop_count          ; loop_count=0x1000000. 'lsls' instruction
     35                                     ; is used to update the counter so that
     36                                     ; it can be used to mark nonzero
     37                                     ; quantized coefficient pairs.
     38 
     39     mov     r1, #0                  ; flags for quantized coeffs
     40 
     41     ; PART 1: quantization and dequantization loop
     42 loop
     43     ldr     r9, [r3], #4            ; [z1 | z0]
     44     ldr     r10, [r5], #4           ; [r1 | r0]
     45     ldr     r11, [r4], #4           ; [q1 | q0]
     46 
     47     ssat16  lr, #1, r9              ; [sz1 | sz0]
     48     eor     r9, r9, lr              ; [z1 ^ sz1 | z0 ^ sz0]
     49     ssub16  r9, r9, lr              ; x = (z ^ sz) - sz
     50     sadd16  r9, r9, r10             ; [x1+r1 | x0+r0]
     51 
     52     ldr     r12, [r3], #4           ; [z3 | z2]
     53 
     54     smulbb  r0, r9, r11             ; [(x0+r0)*q0]
     55     smultt  r9, r9, r11             ; [(x1+r1)*q1]
     56 
     57     ldr     r10, [r5], #4           ; [r3 | r2]
     58 
     59     ssat16  r11, #1, r12            ; [sz3 | sz2]
     60     eor     r12, r12, r11           ; [z3 ^ sz3 | z2 ^ sz2]
     61     pkhtb   r0, r9, r0, asr #16     ; [y1 | y0]
     62     ldr     r9, [r4], #4            ; [q3 | q2]
     63     ssub16  r12, r12, r11           ; x = (z ^ sz) - sz
     64 
     65     sadd16  r12, r12, r10           ; [x3+r3 | x2+r2]
     66 
     67     eor     r0, r0, lr              ; [(y1 ^ sz1) | (y0 ^ sz0)]
     68 
     69     smulbb  r10, r12, r9            ; [(x2+r2)*q2]
     70     smultt  r12, r12, r9            ; [(x3+r3)*q3]
     71 
     72     ssub16  r0, r0, lr              ; x = (y ^ sz) - sz
     73 
     74     cmp     r0, #0                  ; check if zero
     75     orrne   r1, r1, r2, lsr #24     ; add flag for nonzero coeffs
     76 
     77     str     r0, [r6], #4            ; *qcoeff++ = x
     78     ldr     r9, [r8], #4            ; [dq1 | dq0]
     79 
     80     pkhtb   r10, r12, r10, asr #16  ; [y3 | y2]
     81     eor     r10, r10, r11           ; [(y3 ^ sz3) | (y2 ^ sz2)]
     82     ssub16  r10, r10, r11           ; x = (y ^ sz) - sz
     83 
     84     cmp     r10, #0                 ; check if zero
     85     orrne   r1, r1, r2, lsr #23     ; add flag for nonzero coeffs
     86 
     87     str     r10, [r6], #4           ; *qcoeff++ = x
     88     ldr     r11, [r8], #4           ; [dq3 | dq2]
     89 
     90     smulbb  r12, r0, r9             ; [x0*dq0]
     91     smultt  r0, r0, r9              ; [x1*dq1]
     92 
     93     smulbb  r9, r10, r11            ; [x2*dq2]
     94     smultt  r10, r10, r11           ; [x3*dq3]
     95 
     96     lsls    r2, r2, #2              ; update loop counter
     97     strh    r12, [r7, #0]           ; dqcoeff[0] = [x0*dq0]
     98     strh    r0, [r7, #2]            ; dqcoeff[1] = [x1*dq1]
     99     strh    r9, [r7, #4]            ; dqcoeff[2] = [x2*dq2]
    100     strh    r10, [r7, #6]           ; dqcoeff[3] = [x3*dq3]
    101     add     r7, r7, #8              ; dqcoeff += 8
    102     bne     loop
    103 
    104     ; PART 2: check position for eob...
    105     mov     lr, #0                  ; init eob
    106     cmp     r1, #0                  ; coeffs after quantization?
    107     ldr     r11, [sp, #0]           ; restore BLOCKD pointer
    108     beq     end                     ; skip eob calculations if all zero
    109 
    110     ldr     r0, [r11, #vp8_blockd_qcoeff]
    111 
    112     ; check shortcut for nonzero qcoeffs
    113     tst    r1, #0x80
    114     bne    quant_coeff_15_14
    115     tst    r1, #0x20
    116     bne    quant_coeff_13_11
    117     tst    r1, #0x8
    118     bne    quant_coeff_12_7
    119     tst    r1, #0x40
    120     bne    quant_coeff_10_9
    121     tst    r1, #0x10
    122     bne    quant_coeff_8_3
    123     tst    r1, #0x2
    124     bne    quant_coeff_6_5
    125     tst    r1, #0x4
    126     bne    quant_coeff_4_2
    127     b      quant_coeff_1_0
    128 
    129 quant_coeff_15_14
    130     ldrh    r2, [r0, #30]       ; rc=15, i=15
    131     mov     lr, #16
    132     cmp     r2, #0
    133     bne     end
    134 
    135     ldrh    r3, [r0, #28]       ; rc=14, i=14
    136     mov     lr, #15
    137     cmp     r3, #0
    138     bne     end
    139 
    140 quant_coeff_13_11
    141     ldrh    r2, [r0, #22]       ; rc=11, i=13
    142     mov     lr, #14
    143     cmp     r2, #0
    144     bne     end
    145 
    146 quant_coeff_12_7
    147     ldrh    r3, [r0, #14]       ; rc=7,  i=12
    148     mov     lr, #13
    149     cmp     r3, #0
    150     bne     end
    151 
    152     ldrh    r2, [r0, #20]       ; rc=10, i=11
    153     mov     lr, #12
    154     cmp     r2, #0
    155     bne     end
    156 
    157 quant_coeff_10_9
    158     ldrh    r3, [r0, #26]       ; rc=13, i=10
    159     mov     lr, #11
    160     cmp     r3, #0
    161     bne     end
    162 
    163     ldrh    r2, [r0, #24]       ; rc=12, i=9
    164     mov     lr, #10
    165     cmp     r2, #0
    166     bne     end
    167 
    168 quant_coeff_8_3
    169     ldrh    r3, [r0, #18]       ; rc=9,  i=8
    170     mov     lr, #9
    171     cmp     r3, #0
    172     bne     end
    173 
    174     ldrh    r2, [r0, #12]       ; rc=6,  i=7
    175     mov     lr, #8
    176     cmp     r2, #0
    177     bne     end
    178 
    179 quant_coeff_6_5
    180     ldrh    r3, [r0, #6]        ; rc=3,  i=6
    181     mov     lr, #7
    182     cmp     r3, #0
    183     bne     end
    184 
    185     ldrh    r2, [r0, #4]        ; rc=2,  i=5
    186     mov     lr, #6
    187     cmp     r2, #0
    188     bne     end
    189 
    190 quant_coeff_4_2
    191     ldrh    r3, [r0, #10]       ; rc=5,  i=4
    192     mov     lr, #5
    193     cmp     r3, #0
    194     bne     end
    195 
    196     ldrh    r2, [r0, #16]       ; rc=8,  i=3
    197     mov     lr, #4
    198     cmp     r2, #0
    199     bne     end
    200 
    201     ldrh    r3, [r0, #8]        ; rc=4,  i=2
    202     mov     lr, #3
    203     cmp     r3, #0
    204     bne     end
    205 
    206 quant_coeff_1_0
    207     ldrh    r2, [r0, #2]        ; rc=1,  i=1
    208     mov     lr, #2
    209     cmp     r2, #0
    210     bne     end
    211 
    212     mov     lr, #1              ; rc=0,  i=0
    213 
    214 end
    215     str     lr, [r11, #vp8_blockd_eob]
    216     ldmfd   sp!, {r1, r4-r11, pc}
    217 
    218     ENDP
    219 
    220 loop_count
    221     DCD     0x1000000
    222 
    223     END
    224 
    225