Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license and patent
      5 ;  grant that can be found in the LICENSE file in the root of the source
      6 ;  tree. All contributing project authors may be found in the AUTHORS
      7 ;  file in the root of the source tree.
      8 ;
      9 
     10     EXPORT |vp8_dequant_idct_add_v6|
     11 
     12     AREA |.text|, CODE, READONLY
     13 ;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
     14 ; unsigned char *dest, int pitch, int stride)
     15 ; r0 = input
     16 ; r1 = dq
     17 ; r2 = pred
     18 ; r3 = dest
     19 ; sp + 36 = pitch  ; +4 = 40
     20 ; sp + 40 = stride  ; +4 = 44
     21 
     22 
     23 |vp8_dequant_idct_add_v6| PROC
     24     stmdb   sp!, {r4-r11, lr}
     25 
     26     ldr     r4, [r0]                ;input
     27     ldr     r5, [r1], #4            ;dq
     28 
     29     sub     sp, sp, #4
     30     str     r3, [sp]
     31 
     32     mov     r12, #4
     33 
     34 vp8_dequant_add_loop
     35     smulbb  r6, r4, r5
     36     smultt  r7, r4, r5
     37 
     38     ldr     r4, [r0, #4]            ;input
     39     ldr     r5, [r1], #4            ;dq
     40 
     41     strh    r6, [r0], #2
     42     strh    r7, [r0], #2
     43 
     44     smulbb  r6, r4, r5
     45     smultt  r7, r4, r5
     46 
     47     subs    r12, r12, #1
     48 
     49     ldrne   r4, [r0, #4]
     50     ldrne   r5, [r1], #4
     51 
     52     strh    r6, [r0], #2
     53     strh    r7, [r0], #2
     54 
     55     bne     vp8_dequant_add_loop
     56 
     57     sub     r0, r0, #32
     58     mov     r1, r0
     59 
     60 ; short_idct4x4llm_v6_dual
     61     ldr     r3, cospi8sqrt2minus1
     62     ldr     r4, sinpi8sqrt2
     63     ldr     r6, [r0, #8]
     64     mov     r5, #2
     65 vp8_dequant_idct_loop1_v6
     66     ldr     r12, [r0, #24]
     67     ldr     r14, [r0, #16]
     68     smulwt  r9, r3, r6
     69     smulwb  r7, r3, r6
     70     smulwt  r10, r4, r6
     71     smulwb  r8, r4, r6
     72     pkhbt   r7, r7, r9, lsl #16
     73     smulwt  r11, r3, r12
     74     pkhbt   r8, r8, r10, lsl #16
     75     uadd16  r6, r6, r7
     76     smulwt  r7, r4, r12
     77     smulwb  r9, r3, r12
     78     smulwb  r10, r4, r12
     79     subs    r5, r5, #1
     80     pkhbt   r9, r9, r11, lsl #16
     81     ldr     r11, [r0], #4
     82     pkhbt   r10, r10, r7, lsl #16
     83     uadd16  r7, r12, r9
     84     usub16  r7, r8, r7
     85     uadd16  r6, r6, r10
     86     uadd16  r10, r11, r14
     87     usub16  r8, r11, r14
     88     uadd16  r9, r10, r6
     89     usub16  r10, r10, r6
     90     uadd16  r6, r8, r7
     91     usub16  r7, r8, r7
     92     str     r6, [r1, #8]
     93     ldrne   r6, [r0, #8]
     94     str     r7, [r1, #16]
     95     str     r10, [r1, #24]
     96     str     r9, [r1], #4
     97     bne     vp8_dequant_idct_loop1_v6
     98 
     99     mov     r5, #2
    100     sub     r0, r1, #8
    101 vp8_dequant_idct_loop2_v6
    102     ldr     r6, [r0], #4
    103     ldr     r7, [r0], #4
    104     ldr     r8, [r0], #4
    105     ldr     r9, [r0], #4
    106     smulwt  r1, r3, r6
    107     smulwt  r12, r4, r6
    108     smulwt  lr, r3, r8
    109     smulwt  r10, r4, r8
    110     pkhbt   r11, r8, r6, lsl #16
    111     pkhbt   r1, lr, r1, lsl #16
    112     pkhbt   r12, r10, r12, lsl #16
    113     pkhtb   r6, r6, r8, asr #16
    114     uadd16  r6, r1, r6
    115     pkhbt   lr, r9, r7, lsl #16
    116     uadd16  r10, r11, lr
    117     usub16  lr, r11, lr
    118     pkhtb   r8, r7, r9, asr #16
    119     subs    r5, r5, #1
    120     smulwt  r1, r3, r8
    121     smulwb  r7, r3, r8
    122     smulwt  r11, r4, r8
    123     smulwb  r9, r4, r8
    124     pkhbt   r1, r7, r1, lsl #16
    125     uadd16  r8, r1, r8
    126     pkhbt   r11, r9, r11, lsl #16
    127     usub16  r1, r12, r8
    128     uadd16  r8, r11, r6
    129     ldr     r9, c0x00040004
    130     ldr     r12, [sp, #40]
    131     uadd16  r6, r10, r8
    132     usub16  r7, r10, r8
    133     uadd16  r7, r7, r9
    134     uadd16  r6, r6, r9
    135     uadd16  r10, r14, r1
    136     usub16  r1, r14, r1
    137     uadd16  r10, r10, r9
    138     uadd16  r1, r1, r9
    139     ldr     r11, [r2], r12
    140     mov     r8, r7, asr #3
    141     pkhtb   r9, r8, r10, asr #19
    142     mov     r8, r1, asr #3
    143     pkhtb   r8, r8, r6, asr #19
    144     uxtb16  lr, r11, ror #8
    145     qadd16  r9, r9, lr
    146     uxtb16  lr, r11
    147     qadd16  r8, r8, lr
    148     usat16  r9, #8, r9
    149     usat16  r8, #8, r8
    150     orr     r9, r8, r9, lsl #8
    151     ldr     r11, [r2], r12
    152     ldr     lr, [sp]
    153     ldr     r12, [sp, #44]
    154     mov     r7, r7, lsl #16
    155     mov     r1, r1, lsl #16
    156     mov     r10, r10, lsl #16
    157     mov     r6, r6, lsl #16
    158     mov     r7, r7, asr #3
    159     pkhtb   r7, r7, r10, asr #19
    160     mov     r1, r1, asr #3
    161     pkhtb   r1, r1, r6, asr #19
    162     uxtb16  r8, r11, ror #8
    163     qadd16  r7, r7, r8
    164     uxtb16  r8, r11
    165     qadd16  r1, r1, r8
    166     usat16  r7, #8, r7
    167     usat16  r1, #8, r1
    168     orr     r1, r1, r7, lsl #8
    169     str     r9, [lr], r12
    170     str     r1, [lr], r12
    171     str     lr, [sp]
    172     bne     vp8_dequant_idct_loop2_v6
    173 
    174 ; vpx_memset
    175     sub     r0, r0, #32
    176     add     sp, sp, #4
    177 
    178     mov     r12, #0
    179     str     r12, [r0]
    180     str     r12, [r0, #4]
    181     str     r12, [r0, #8]
    182     str     r12, [r0, #12]
    183     str     r12, [r0, #16]
    184     str     r12, [r0, #20]
    185     str     r12, [r0, #24]
    186     str     r12, [r0, #28]
    187 
    188     ldmia   sp!, {r4 - r11, pc}
    189     ENDP    ; |vp8_dequant_idct_add_v6|
    190 
    191 ; Constant Pool
    192 cospi8sqrt2minus1 DCD 0x00004E7B
    193 sinpi8sqrt2       DCD 0x00008A8C
    194 c0x00040004       DCD 0x00040004
    195 
    196     END
    197