Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_recon_b_armv6|
     13     EXPORT  |vp8_recon2b_armv6|
     14     EXPORT  |vp8_recon4b_armv6|
     15 
     16     AREA    |.text|, CODE, READONLY  ; name this block of code
     17 prd     RN  r0
     18 dif     RN  r1
     19 dst     RN  r2
     20 stride      RN  r3
     21 
     22 ;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
     23 ; R0 char* pred_ptr
     24 ; R1 short * dif_ptr
     25 ; R2 char * dst_ptr
     26 ; R3 int stride
     27 
     28 ; Description:
     29 ; Loop through the block adding the Pred and Diff together.  Clamp and then
     30 ; store back into the Dst.
     31 
     32 ; Restrictions :
     33 ; all buffers are expected to be 4 byte aligned coming in and
     34 ; going out.
     35 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
     36 ;
     37 ;
     38 ;
     39 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
     40 |vp8_recon_b_armv6| PROC
     41     stmdb   sp!, {r4 - r9, lr}
     42 
     43     ;0, 1, 2, 3
     44     ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
     45     ldr     r6, [dif, #0]           ;     1 |     0
     46     ldr     r7, [dif, #4]           ;     3 |     2
     47 
     48     pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
     49     pkhtb   r9, r7, r6, asr #16     ;     3 |     1
     50 
     51     uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
     52     uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
     53 
     54     usat16  r8, #8, r8
     55     usat16  r9, #8, r9
     56     add     dif, dif, #32
     57     orr     r8, r8, r9, lsl #8
     58 
     59     str     r8, [dst], stride
     60 
     61     ;0, 1, 2, 3
     62     ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
     63 ;;  ldr     r6, [dif, #8]           ;     1 |     0
     64 ;;  ldr     r7, [dif, #12]          ;     3 |     2
     65     ldr     r6, [dif, #0]           ;     1 |     0
     66     ldr     r7, [dif, #4]           ;     3 |     2
     67 
     68     pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
     69     pkhtb   r9, r7, r6, asr #16     ;     3 |     1
     70 
     71     uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
     72     uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
     73 
     74     usat16  r8, #8, r8
     75     usat16  r9, #8, r9
     76     add     dif, dif, #32
     77     orr     r8, r8, r9, lsl #8
     78 
     79     str     r8, [dst], stride
     80 
     81     ;0, 1, 2, 3
     82     ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
     83 ;;  ldr     r6, [dif, #16]          ;     1 |     0
     84 ;;  ldr     r7, [dif, #20]          ;     3 |     2
     85     ldr     r6, [dif, #0]           ;     1 |     0
     86     ldr     r7, [dif, #4]           ;     3 |     2
     87 
     88     pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
     89     pkhtb   r9, r7, r6, asr #16     ;     3 |     1
     90 
     91     uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
     92     uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
     93 
     94     usat16  r8, #8, r8
     95     usat16  r9, #8, r9
     96     add     dif, dif, #32
     97     orr     r8, r8, r9, lsl #8
     98 
     99     str     r8, [dst], stride
    100 
    101     ;0, 1, 2, 3
    102     ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
    103 ;;  ldr     r6, [dif, #24]          ;     1 |     0
    104 ;;  ldr     r7, [dif, #28]          ;     3 |     2
    105     ldr     r6, [dif, #0]           ;     1 |     0
    106     ldr     r7, [dif, #4]           ;     3 |     2
    107 
    108     pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
    109     pkhtb   r9, r7, r6, asr #16     ;     3 |     1
    110 
    111     uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
    112     uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
    113 
    114     usat16  r8, #8, r8
    115     usat16  r9, #8, r9
    116     orr     r8, r8, r9, lsl #8
    117 
    118     str     r8, [dst], stride
    119 
    120     ldmia   sp!, {r4 - r9, pc}
    121 
    122     ENDP    ; |recon_b|
    123 
    124 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    125 ;
    126 ;
    127 ;
    128 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    129 ; R0 char  *pred_ptr
    130 ; R1 short *dif_ptr
    131 ; R2 char  *dst_ptr
    132 ; R3 int stride
    133 |vp8_recon4b_armv6| PROC
    134     stmdb   sp!, {r4 - r9, lr}
    135 
    136     mov     lr, #4
    137 
    138 recon4b_loop
    139     ;0, 1, 2, 3
    140     ldr     r4, [prd], #4           ; 3 | 2 | 1 | 0
    141     ldr     r6, [dif, #0]           ;     1 |     0
    142     ldr     r7, [dif, #4]           ;     3 |     2
    143 
    144     pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
    145     pkhtb   r9, r7, r6, asr #16     ;     3 |     1
    146 
    147     uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
    148     uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
    149 
    150     usat16  r8, #8, r8
    151     usat16  r9, #8, r9
    152     orr     r8, r8, r9, lsl #8
    153 
    154     str     r8, [dst]
    155 
    156     ;4, 5, 6, 7
    157     ldr     r4, [prd], #4
    158 ;;  ldr     r6, [dif, #32]
    159 ;;  ldr     r7, [dif, #36]
    160     ldr     r6, [dif, #8]
    161     ldr     r7, [dif, #12]
    162 
    163     pkhbt   r8, r6, r7, lsl #16
    164     pkhtb   r9, r7, r6, asr #16
    165 
    166     uxtab16 r8, r8, r4
    167     uxtab16 r9, r9, r4, ror #8
    168     usat16  r8, #8, r8
    169     usat16  r9, #8, r9
    170     orr     r8, r8, r9, lsl #8
    171 
    172     str     r8, [dst, #4]
    173 
    174     ;8, 9, 10, 11
    175     ldr     r4, [prd], #4
    176 ;;  ldr     r6, [dif, #64]
    177 ;;  ldr     r7, [dif, #68]
    178     ldr     r6, [dif, #16]
    179     ldr     r7, [dif, #20]
    180 
    181     pkhbt   r8, r6, r7, lsl #16
    182     pkhtb   r9, r7, r6, asr #16
    183 
    184     uxtab16 r8, r8, r4
    185     uxtab16 r9, r9, r4, ror #8
    186     usat16  r8, #8, r8
    187     usat16  r9, #8, r9
    188     orr     r8, r8, r9, lsl #8
    189 
    190     str     r8, [dst, #8]
    191 
    192     ;12, 13, 14, 15
    193     ldr     r4, [prd], #4
    194 ;;  ldr     r6, [dif, #96]
    195 ;;  ldr     r7, [dif, #100]
    196     ldr     r6, [dif, #24]
    197     ldr     r7, [dif, #28]
    198 
    199     pkhbt   r8, r6, r7, lsl #16
    200     pkhtb   r9, r7, r6, asr #16
    201 
    202     uxtab16 r8, r8, r4
    203     uxtab16 r9, r9, r4, ror #8
    204     usat16  r8, #8, r8
    205     usat16  r9, #8, r9
    206     orr     r8, r8, r9, lsl #8
    207 
    208     str     r8, [dst, #12]
    209 
    210     add     dst, dst, stride
    211 ;;  add     dif, dif, #8
    212     add     dif, dif, #32
    213 
    214     subs    lr, lr, #1
    215     bne     recon4b_loop
    216 
    217     ldmia   sp!, {r4 - r9, pc}
    218 
    219     ENDP    ; |Recon4B|
    220 
    221 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    222 ;
    223 ;
    224 ;
    225 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    226 ; R0 char  *pred_ptr
    227 ; R1 short *dif_ptr
    228 ; R2 char  *dst_ptr
    229 ; R3 int stride
    230 |vp8_recon2b_armv6| PROC
    231     stmdb   sp!, {r4 - r9, lr}
    232 
    233     mov     lr, #4
    234 
    235 recon2b_loop
    236     ;0, 1, 2, 3
    237     ldr     r4, [prd], #4
    238     ldr     r6, [dif, #0]
    239     ldr     r7, [dif, #4]
    240 
    241     pkhbt   r8, r6, r7, lsl #16
    242     pkhtb   r9, r7, r6, asr #16
    243 
    244     uxtab16 r8, r8, r4
    245     uxtab16 r9, r9, r4, ror #8
    246     usat16  r8, #8, r8
    247     usat16  r9, #8, r9
    248     orr     r8, r8, r9, lsl #8
    249 
    250     str     r8, [dst]
    251 
    252     ;4, 5, 6, 7
    253     ldr     r4, [prd], #4
    254 ;;  ldr     r6, [dif, #32]
    255 ;;  ldr     r7, [dif, #36]
    256     ldr     r6, [dif, #8]
    257     ldr     r7, [dif, #12]
    258 
    259     pkhbt   r8, r6, r7, lsl #16
    260     pkhtb   r9, r7, r6, asr #16
    261 
    262     uxtab16 r8, r8, r4
    263     uxtab16 r9, r9, r4, ror #8
    264     usat16  r8, #8, r8
    265     usat16  r9, #8, r9
    266     orr     r8, r8, r9, lsl #8
    267 
    268     str     r8, [dst, #4]
    269 
    270     add     dst, dst, stride
    271 ;;  add     dif, dif, #8
    272     add     dif, dif, #16
    273 
    274     subs    lr, lr, #1
    275     bne     recon2b_loop
    276 
    277     ldmia   sp!, {r4 - r9, pc}
    278 
    279     ENDP    ; |Recon2B|
    280 
    281     END
    282