Home | History | Annotate | Download | only in armv6
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_copy_mem16x16_v6|
     13     ; ARM
     14     ; REQUIRE8
     15     ; PRESERVE8
     16 
     17     AREA    Block, CODE, READONLY ; name this block of code
     18 ;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
     19 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
     20 |vp8_copy_mem16x16_v6| PROC
     21     stmdb       sp!, {r4 - r7}
     22     ;push   {r4-r7}
     23 
     24     ;preload
     25     pld     [r0, #31]                ; preload for next 16x16 block
     26 
     27     ands    r4, r0, #15
     28     beq     copy_mem16x16_fast
     29 
     30     ands    r4, r0, #7
     31     beq     copy_mem16x16_8
     32 
     33     ands    r4, r0, #3
     34     beq     copy_mem16x16_4
     35 
     36     ;copy one byte each time
     37     ldrb    r4, [r0]
     38     ldrb    r5, [r0, #1]
     39     ldrb    r6, [r0, #2]
     40     ldrb    r7, [r0, #3]
     41 
     42     mov     r12, #16
     43 
     44 copy_mem16x16_1_loop
     45     strb    r4, [r2]
     46     strb    r5, [r2, #1]
     47     strb    r6, [r2, #2]
     48     strb    r7, [r2, #3]
     49 
     50     ldrb    r4, [r0, #4]
     51     ldrb    r5, [r0, #5]
     52     ldrb    r6, [r0, #6]
     53     ldrb    r7, [r0, #7]
     54 
     55     subs    r12, r12, #1
     56 
     57     strb    r4, [r2, #4]
     58     strb    r5, [r2, #5]
     59     strb    r6, [r2, #6]
     60     strb    r7, [r2, #7]
     61 
     62     ldrb    r4, [r0, #8]
     63     ldrb    r5, [r0, #9]
     64     ldrb    r6, [r0, #10]
     65     ldrb    r7, [r0, #11]
     66 
     67     strb    r4, [r2, #8]
     68     strb    r5, [r2, #9]
     69     strb    r6, [r2, #10]
     70     strb    r7, [r2, #11]
     71 
     72     ldrb    r4, [r0, #12]
     73     ldrb    r5, [r0, #13]
     74     ldrb    r6, [r0, #14]
     75     ldrb    r7, [r0, #15]
     76 
     77     add     r0, r0, r1
     78 
     79     strb    r4, [r2, #12]
     80     strb    r5, [r2, #13]
     81     strb    r6, [r2, #14]
     82     strb    r7, [r2, #15]
     83 
     84     add     r2, r2, r3
     85 
     86     ldrneb  r4, [r0]
     87     ldrneb  r5, [r0, #1]
     88     ldrneb  r6, [r0, #2]
     89     ldrneb  r7, [r0, #3]
     90 
     91     pld     [r0, #31]               ; preload for next 16x16 block
     92 
     93     bne     copy_mem16x16_1_loop
     94 
     95     ldmia       sp!, {r4 - r7}
     96     ;pop        {r4-r7}
     97     mov     pc, lr
     98 
     99 ;copy 4 bytes each time
    100 copy_mem16x16_4
    101     ldr     r4, [r0]
    102     ldr     r5, [r0, #4]
    103     ldr     r6, [r0, #8]
    104     ldr     r7, [r0, #12]
    105 
    106     mov     r12, #16
    107 
    108 copy_mem16x16_4_loop
    109     subs    r12, r12, #1
    110     add     r0, r0, r1
    111 
    112     str     r4, [r2]
    113     str     r5, [r2, #4]
    114     str     r6, [r2, #8]
    115     str     r7, [r2, #12]
    116 
    117     add     r2, r2, r3
    118 
    119     ldrne   r4, [r0]
    120     ldrne   r5, [r0, #4]
    121     ldrne   r6, [r0, #8]
    122     ldrne   r7, [r0, #12]
    123 
    124     pld     [r0, #31]               ; preload for next 16x16 block
    125 
    126     bne     copy_mem16x16_4_loop
    127 
    128     ldmia       sp!, {r4 - r7}
    129     ;pop        {r4-r7}
    130     mov     pc, lr
    131 
    132 ;copy 8 bytes each time
    133 copy_mem16x16_8
    134     sub     r1, r1, #16
    135     sub     r3, r3, #16
    136 
    137     mov     r12, #16
    138 
    139 copy_mem16x16_8_loop
    140     ldmia   r0!, {r4-r5}
    141     ;ldm        r0, {r4-r5}
    142     ldmia   r0!, {r6-r7}
    143 
    144     add     r0, r0, r1
    145 
    146     stmia   r2!, {r4-r5}
    147     subs    r12, r12, #1
    148     ;stm        r2, {r4-r5}
    149     stmia   r2!, {r6-r7}
    150 
    151     add     r2, r2, r3
    152 
    153     pld     [r0, #31]               ; preload for next 16x16 block
    154     bne     copy_mem16x16_8_loop
    155 
    156     ldmia       sp!, {r4 - r7}
    157     ;pop        {r4-r7}
    158     mov     pc, lr
    159 
    160 ;copy 16 bytes each time
    161 copy_mem16x16_fast
    162     ;sub        r1, r1, #16
    163     ;sub        r3, r3, #16
    164 
    165     mov     r12, #16
    166 
    167 copy_mem16x16_fast_loop
    168     ldmia   r0, {r4-r7}
    169     ;ldm        r0, {r4-r7}
    170     add     r0, r0, r1
    171 
    172     subs    r12, r12, #1
    173     stmia   r2, {r4-r7}
    174     ;stm        r2, {r4-r7}
    175     add     r2, r2, r3
    176 
    177     pld     [r0, #31]               ; preload for next 16x16 block
    178     bne     copy_mem16x16_fast_loop
    179 
    180     ldmia       sp!, {r4 - r7}
    181     ;pop        {r4-r7}
    182     mov     pc, lr
    183 
    184     ENDP  ; |vp8_copy_mem16x16_v6|
    185 
    186     END
    187