Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license and patent
      5 ;  grant that can be found in the LICENSE file in the root of the source
      6 ;  tree. All contributing project authors may be found in the AUTHORS
      7 ;  file in the root of the source tree.
      8 ;
      9 
     10     EXPORT |vp8_dequant_idct_add_v6|
     11 
     12     AREA |.text|, CODE, READONLY
     13 ;void vp8_dequant_idct_v6(short *input, short *dq,
     14 ;                         unsigned char *dest, int stride)
     15 ; r0 = q
     16 ; r1 = dq
     17 ; r2 = dst
     18 ; r3 = stride
     19 
     20 |vp8_dequant_idct_add_v6| PROC
     21     stmdb   sp!, {r4-r11, lr}
     22 
     23     ldr     r4, [r0]                ;input
     24     ldr     r5, [r1], #4            ;dq
     25 
     26     sub     sp, sp, #4
     27     str     r3, [sp]
     28 
     29     mov     r12, #4
     30 
     31 vp8_dequant_add_loop
     32     smulbb  r6, r4, r5
     33     smultt  r7, r4, r5
     34 
     35     ldr     r4, [r0, #4]            ;input
     36     ldr     r5, [r1], #4            ;dq
     37 
     38     strh    r6, [r0], #2
     39     strh    r7, [r0], #2
     40 
     41     smulbb  r6, r4, r5
     42     smultt  r7, r4, r5
     43 
     44     subs    r12, r12, #1
     45 
     46     ldrne   r4, [r0, #4]
     47     ldrne   r5, [r1], #4
     48 
     49     strh    r6, [r0], #2
     50     strh    r7, [r0], #2
     51 
     52     bne     vp8_dequant_add_loop
     53 
     54     sub     r0, r0, #32
     55     mov     r1, r0
     56 
     57 ; short_idct4x4llm_v6_dual
     58     ldr     r3, cospi8sqrt2minus1
     59     ldr     r4, sinpi8sqrt2
     60     ldr     r6, [r0, #8]
     61     mov     r5, #2
     62 vp8_dequant_idct_loop1_v6
     63     ldr     r12, [r0, #24]
     64     ldr     r14, [r0, #16]
     65     smulwt  r9, r3, r6
     66     smulwb  r7, r3, r6
     67     smulwt  r10, r4, r6
     68     smulwb  r8, r4, r6
     69     pkhbt   r7, r7, r9, lsl #16
     70     smulwt  r11, r3, r12
     71     pkhbt   r8, r8, r10, lsl #16
     72     uadd16  r6, r6, r7
     73     smulwt  r7, r4, r12
     74     smulwb  r9, r3, r12
     75     smulwb  r10, r4, r12
     76     subs    r5, r5, #1
     77     pkhbt   r9, r9, r11, lsl #16
     78     ldr     r11, [r0], #4
     79     pkhbt   r10, r10, r7, lsl #16
     80     uadd16  r7, r12, r9
     81     usub16  r7, r8, r7
     82     uadd16  r6, r6, r10
     83     uadd16  r10, r11, r14
     84     usub16  r8, r11, r14
     85     uadd16  r9, r10, r6
     86     usub16  r10, r10, r6
     87     uadd16  r6, r8, r7
     88     usub16  r7, r8, r7
     89     str     r6, [r1, #8]
     90     ldrne   r6, [r0, #8]
     91     str     r7, [r1, #16]
     92     str     r10, [r1, #24]
     93     str     r9, [r1], #4
     94     bne     vp8_dequant_idct_loop1_v6
     95 
     96     mov     r5, #2
     97     sub     r0, r1, #8
     98 vp8_dequant_idct_loop2_v6
     99     ldr     r6, [r0], #4
    100     ldr     r7, [r0], #4
    101     ldr     r8, [r0], #4
    102     ldr     r9, [r0], #4
    103     smulwt  r1, r3, r6
    104     smulwt  r12, r4, r6
    105     smulwt  lr, r3, r8
    106     smulwt  r10, r4, r8
    107     pkhbt   r11, r8, r6, lsl #16
    108     pkhbt   r1, lr, r1, lsl #16
    109     pkhbt   r12, r10, r12, lsl #16
    110     pkhtb   r6, r6, r8, asr #16
    111     uadd16  r6, r1, r6
    112     pkhbt   lr, r9, r7, lsl #16
    113     uadd16  r10, r11, lr
    114     usub16  lr, r11, lr
    115     pkhtb   r8, r7, r9, asr #16
    116     subs    r5, r5, #1
    117     smulwt  r1, r3, r8
    118     smulwb  r7, r3, r8
    119     smulwt  r11, r4, r8
    120     smulwb  r9, r4, r8
    121     pkhbt   r1, r7, r1, lsl #16
    122     uadd16  r8, r1, r8
    123     pkhbt   r11, r9, r11, lsl #16
    124     usub16  r1, r12, r8
    125     uadd16  r8, r11, r6
    126     ldr     r9, c0x00040004
    127     ldr     r12, [sp]               ; get stride from stack
    128     uadd16  r6, r10, r8
    129     usub16  r7, r10, r8
    130     uadd16  r7, r7, r9
    131     uadd16  r6, r6, r9
    132     uadd16  r10, r14, r1
    133     usub16  r1, r14, r1
    134     uadd16  r10, r10, r9
    135     uadd16  r1, r1, r9
    136     ldr     r11, [r2]               ; load input from dst
    137     mov     r8, r7, asr #3
    138     pkhtb   r9, r8, r10, asr #19
    139     mov     r8, r1, asr #3
    140     pkhtb   r8, r8, r6, asr #19
    141     uxtb16  lr, r11, ror #8
    142     qadd16  r9, r9, lr
    143     uxtb16  lr, r11
    144     qadd16  r8, r8, lr
    145     usat16  r9, #8, r9
    146     usat16  r8, #8, r8
    147     orr     r9, r8, r9, lsl #8
    148     ldr     r11, [r2, r12]          ; load input from dst
    149     mov     r7, r7, lsl #16
    150     mov     r1, r1, lsl #16
    151     mov     r10, r10, lsl #16
    152     mov     r6, r6, lsl #16
    153     mov     r7, r7, asr #3
    154     pkhtb   r7, r7, r10, asr #19
    155     mov     r1, r1, asr #3
    156     pkhtb   r1, r1, r6, asr #19
    157     uxtb16  r8, r11, ror #8
    158     qadd16  r7, r7, r8
    159     uxtb16  r8, r11
    160     qadd16  r1, r1, r8
    161     usat16  r7, #8, r7
    162     usat16  r1, #8, r1
    163     orr     r1, r1, r7, lsl #8
    164     str     r9, [r2], r12           ; store output to dst
    165     str     r1, [r2], r12           ; store output to dst
    166     bne     vp8_dequant_idct_loop2_v6
    167 
    168 ; vpx_memset
    169     sub     r0, r0, #32
    170     add     sp, sp, #4
    171 
    172     mov     r12, #0
    173     str     r12, [r0]
    174     str     r12, [r0, #4]
    175     str     r12, [r0, #8]
    176     str     r12, [r0, #12]
    177     str     r12, [r0, #16]
    178     str     r12, [r0, #20]
    179     str     r12, [r0, #24]
    180     str     r12, [r0, #28]
    181 
    182     ldmia   sp!, {r4 - r11, pc}
    183     ENDP    ; |vp8_dequant_idct_add_v6|
    184 
    185 ; Constant Pool
    186 cospi8sqrt2minus1 DCD 0x00004E7B
    187 sinpi8sqrt2       DCD 0x00008A8C
    188 c0x00040004       DCD 0x00040004
    189 
    190     END
    191