Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license and patent
      5 ;  grant that can be found in the LICENSE file in the root of the source
      6 ;  tree. All contributing project authors may be found in the AUTHORS
      7 ;  file in the root of the source tree.
      8 ;
      9 
     10 
     11     EXPORT |vp8_dequant_dc_idct_add_v6|
     12 
     13     AREA |.text|, CODE, READONLY
     14 
     15 ;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
     16 ; unsigned char *dest, int pitch, int stride, int Dc)
     17 ; r0 = input
     18 ; r1 = dq
     19 ; r2 = pred
     20 ; r3 = dest
     21 ; sp + 36 = pitch  ; +4 = 40
     22 ; sp + 40 = stride  ; +4 = 44
     23 ; sp + 44 = Dc  ; +4 = 48
     24 
     25 
     26 |vp8_dequant_dc_idct_add_v6| PROC
     27     stmdb   sp!, {r4-r11, lr}
     28 
     29     ldr     r6, [sp, #44]
     30 
     31     ldr     r4, [r0]                ;input
     32     ldr     r5, [r1], #4            ;dq
     33 
     34     sub     sp, sp, #4
     35     str     r3, [sp]
     36 
     37     smultt  r7, r4, r5
     38 
     39     ldr     r4, [r0, #4]            ;input
     40     ldr     r5, [r1], #4            ;dq
     41 
     42     strh    r6, [r0], #2
     43     strh    r7, [r0], #2
     44 
     45     smulbb  r6, r4, r5
     46     smultt  r7, r4, r5
     47 
     48     ldr     r4, [r0, #4]            ;input
     49     ldr     r5, [r1], #4            ;dq
     50 
     51     strh    r6, [r0], #2
     52     strh    r7, [r0], #2
     53 
     54     mov     r12, #3
     55 
     56 vp8_dequant_dc_add_loop
     57     smulbb  r6, r4, r5
     58     smultt  r7, r4, r5
     59 
     60     ldr     r4, [r0, #4]            ;input
     61     ldr     r5, [r1], #4            ;dq
     62 
     63     strh    r6, [r0], #2
     64     strh    r7, [r0], #2
     65 
     66     smulbb  r6, r4, r5
     67     smultt  r7, r4, r5
     68 
     69     subs    r12, r12, #1
     70 
     71     ldrne   r4, [r0, #4]
     72     ldrne   r5, [r1], #4
     73 
     74     strh    r6, [r0], #2
     75     strh    r7, [r0], #2
     76 
     77     bne     vp8_dequant_dc_add_loop
     78 
     79     sub     r0, r0, #32
     80     mov     r1, r0
     81 
     82 ; short_idct4x4llm_v6_dual
     83     ldr     r3, cospi8sqrt2minus1
     84     ldr     r4, sinpi8sqrt2
     85     ldr     r6, [r0, #8]
     86     mov     r5, #2
     87 vp8_dequant_dc_idct_loop1_v6
     88     ldr     r12, [r0, #24]
     89     ldr     r14, [r0, #16]
     90     smulwt  r9, r3, r6
     91     smulwb  r7, r3, r6
     92     smulwt  r10, r4, r6
     93     smulwb  r8, r4, r6
     94     pkhbt   r7, r7, r9, lsl #16
     95     smulwt  r11, r3, r12
     96     pkhbt   r8, r8, r10, lsl #16
     97     uadd16  r6, r6, r7
     98     smulwt  r7, r4, r12
     99     smulwb  r9, r3, r12
    100     smulwb  r10, r4, r12
    101     subs    r5, r5, #1
    102     pkhbt   r9, r9, r11, lsl #16
    103     ldr     r11, [r0], #4
    104     pkhbt   r10, r10, r7, lsl #16
    105     uadd16  r7, r12, r9
    106     usub16  r7, r8, r7
    107     uadd16  r6, r6, r10
    108     uadd16  r10, r11, r14
    109     usub16  r8, r11, r14
    110     uadd16  r9, r10, r6
    111     usub16  r10, r10, r6
    112     uadd16  r6, r8, r7
    113     usub16  r7, r8, r7
    114     str     r6, [r1, #8]
    115     ldrne   r6, [r0, #8]
    116     str     r7, [r1, #16]
    117     str     r10, [r1, #24]
    118     str     r9, [r1], #4
    119     bne     vp8_dequant_dc_idct_loop1_v6
    120 
    121     mov     r5, #2
    122     sub     r0, r1, #8
    123 vp8_dequant_dc_idct_loop2_v6
    124     ldr     r6, [r0], #4
    125     ldr     r7, [r0], #4
    126     ldr     r8, [r0], #4
    127     ldr     r9, [r0], #4
    128     smulwt  r1, r3, r6
    129     smulwt  r12, r4, r6
    130     smulwt  lr, r3, r8
    131     smulwt  r10, r4, r8
    132     pkhbt   r11, r8, r6, lsl #16
    133     pkhbt   r1, lr, r1, lsl #16
    134     pkhbt   r12, r10, r12, lsl #16
    135     pkhtb   r6, r6, r8, asr #16
    136     uadd16  r6, r1, r6
    137     pkhbt   lr, r9, r7, lsl #16
    138     uadd16  r10, r11, lr
    139     usub16  lr, r11, lr
    140     pkhtb   r8, r7, r9, asr #16
    141     subs    r5, r5, #1
    142     smulwt  r1, r3, r8
    143     smulwb  r7, r3, r8
    144     smulwt  r11, r4, r8
    145     smulwb  r9, r4, r8
    146     pkhbt   r1, r7, r1, lsl #16
    147     uadd16  r8, r1, r8
    148     pkhbt   r11, r9, r11, lsl #16
    149     usub16  r1, r12, r8
    150     uadd16  r8, r11, r6
    151     ldr     r9, c0x00040004
    152     ldr     r12, [sp, #40]
    153     uadd16  r6, r10, r8
    154     usub16  r7, r10, r8
    155     uadd16  r7, r7, r9
    156     uadd16  r6, r6, r9
    157     uadd16  r10, r14, r1
    158     usub16  r1, r14, r1
    159     uadd16  r10, r10, r9
    160     uadd16  r1, r1, r9
    161     ldr     r11, [r2], r12
    162     mov     r8, r7, asr #3
    163     pkhtb   r9, r8, r10, asr #19
    164     mov     r8, r1, asr #3
    165     pkhtb   r8, r8, r6, asr #19
    166     uxtb16  lr, r11, ror #8
    167     qadd16  r9, r9, lr
    168     uxtb16  lr, r11
    169     qadd16  r8, r8, lr
    170     usat16  r9, #8, r9
    171     usat16  r8, #8, r8
    172     orr     r9, r8, r9, lsl #8
    173     ldr     r11, [r2], r12
    174     ldr     lr, [sp]
    175     ldr     r12, [sp, #44]
    176     mov     r7, r7, lsl #16
    177     mov     r1, r1, lsl #16
    178     mov     r10, r10, lsl #16
    179     mov     r6, r6, lsl #16
    180     mov     r7, r7, asr #3
    181     pkhtb   r7, r7, r10, asr #19
    182     mov     r1, r1, asr #3
    183     pkhtb   r1, r1, r6, asr #19
    184     uxtb16  r8, r11, ror #8
    185     qadd16  r7, r7, r8
    186     uxtb16  r8, r11
    187     qadd16  r1, r1, r8
    188     usat16  r7, #8, r7
    189     usat16  r1, #8, r1
    190     orr     r1, r1, r7, lsl #8
    191     str     r9, [lr], r12
    192     str     r1, [lr], r12
    193     str     lr, [sp]
    194     bne     vp8_dequant_dc_idct_loop2_v6
    195 
    196 ; vpx_memset
    197     sub     r0, r0, #32
    198     add     sp, sp, #4
    199 
    200     mov     r12, #0
    201     str     r12, [r0]
    202     str     r12, [r0, #4]
    203     str     r12, [r0, #8]
    204     str     r12, [r0, #12]
    205     str     r12, [r0, #16]
    206     str     r12, [r0, #20]
    207     str     r12, [r0, #24]
    208     str     r12, [r0, #28]
    209 
    210     ldmia   sp!, {r4 - r11, pc}
    211     ENDP    ; |vp8_dequant_dc_idct_add_v6|
    212 
    213 ; Constant Pool
    214 cospi8sqrt2minus1 DCD 0x00004E7B
    215 sinpi8sqrt2       DCD 0x00008A8C
    216 c0x00040004       DCD 0x00040004
    217 
    218     END
    219