Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_yv12_copy_frame_func_neon|
     13     ARM
     14     REQUIRE8
     15     PRESERVE8
     16 
     17     INCLUDE asm_com_offsets.asm
     18 
     19     AREA ||.text||, CODE, READONLY, ALIGN=2
     20 
     21 ;void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
     22 
     23 |vp8_yv12_copy_frame_func_neon| PROC
     24     push            {r4 - r11, lr}
     25     vpush           {d8 - d15}
     26 
     27     sub             sp, sp, #16
     28 
     29     ;Copy Y plane
     30     ldr             r8, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
     31     ldr             r9, [r1, #yv12_buffer_config_u_buffer]       ;srcptr1
     32     ldr             r10, [r0, #yv12_buffer_config_v_buffer]      ;srcptr1
     33     ldr             r11, [r1, #yv12_buffer_config_v_buffer]      ;srcptr1
     34 
     35     ldr             r4, [r0, #yv12_buffer_config_y_height]
     36     ldr             r5, [r0, #yv12_buffer_config_y_width]
     37     ldr             r6, [r0, #yv12_buffer_config_y_stride]
     38     ldr             r7, [r1, #yv12_buffer_config_y_stride]
     39     ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
     40     ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
     41 
     42     str             r8, [sp]
     43     str             r9, [sp, #4]
     44     str             r10, [sp, #8]
     45     str             r11, [sp, #12]
     46 
     47     ; copy two rows at one time
     48     mov             lr, r4, lsr #1
     49 
     50 cp_src_to_dst_height_loop
     51     mov             r8, r2
     52     mov             r9, r3
     53     add             r10, r2, r6
     54     add             r11, r3, r7
     55     mov             r12, r5, lsr #7
     56 
     57 cp_src_to_dst_width_loop
     58     vld1.8          {q0, q1}, [r8]!
     59     vld1.8          {q8, q9}, [r10]!
     60     vld1.8          {q2, q3}, [r8]!
     61     vld1.8          {q10, q11}, [r10]!
     62     vld1.8          {q4, q5}, [r8]!
     63     vld1.8          {q12, q13}, [r10]!
     64     vld1.8          {q6, q7}, [r8]!
     65     vld1.8          {q14, q15}, [r10]!
     66 
     67     subs            r12, r12, #1
     68 
     69     vst1.8          {q0, q1}, [r9]!
     70     vst1.8          {q8, q9}, [r11]!
     71     vst1.8          {q2, q3}, [r9]!
     72     vst1.8          {q10, q11}, [r11]!
     73     vst1.8          {q4, q5}, [r9]!
     74     vst1.8          {q12, q13}, [r11]!
     75     vst1.8          {q6, q7}, [r9]!
     76     vst1.8          {q14, q15}, [r11]!
     77 
     78     bne             cp_src_to_dst_width_loop
     79 
     80     subs            lr, lr, #1
     81     add             r2, r2, r6, lsl #1
     82     add             r3, r3, r7, lsl #1
     83 
     84     bne             cp_src_to_dst_height_loop
     85 
     86     ands            r10, r5, #0x7f                  ;check to see if extra copy is needed
     87     sub             r11, r5, r10
     88     ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
     89     ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
     90     bne             extra_cp_src_to_dst_width
     91 end_of_cp_src_to_dst
     92 
     93 ;Copy U & V planes
     94     ldr             r2, [sp]        ;srcptr1
     95     ldr             r3, [sp, #4]        ;dstptr1
     96     mov             r4, r4, lsr #1                  ;src uv_height
     97     mov             r5, r5, lsr #1                  ;src uv_width
     98     mov             r6, r6, lsr #1                  ;src uv_stride
     99     mov             r7, r7, lsr #1                  ;dst uv_stride
    100 
    101     mov             r1, #2
    102 
    103 cp_uv_loop
    104 
    105     ;copy two rows at one time
    106     mov             lr, r4, lsr #1
    107 
    108 cp_src_to_dst_height_uv_loop
    109     mov             r8, r2
    110     mov             r9, r3
    111     add             r10, r2, r6
    112     add             r11, r3, r7
    113     mov             r12, r5, lsr #6
    114 
    115 cp_src_to_dst_width_uv_loop
    116     vld1.8          {q0, q1}, [r8]!
    117     vld1.8          {q8, q9}, [r10]!
    118     vld1.8          {q2, q3}, [r8]!
    119     vld1.8          {q10, q11}, [r10]!
    120 
    121     subs            r12, r12, #1
    122 
    123     vst1.8          {q0, q1}, [r9]!
    124     vst1.8          {q8, q9}, [r11]!
    125     vst1.8          {q2, q3}, [r9]!
    126     vst1.8          {q10, q11}, [r11]!
    127 
    128     bne             cp_src_to_dst_width_uv_loop
    129 
    130     subs            lr, lr, #1
    131     add             r2, r2, r6, lsl #1
    132     add             r3, r3, r7, lsl #1
    133 
    134     bne             cp_src_to_dst_height_uv_loop
    135 
    136     ands            r10, r5, #0x3f                  ;check to see if extra copy is needed
    137     sub             r11, r5, r10
    138     ldr             r2, [sp]        ;srcptr1
    139     ldr             r3, [sp, #4]        ;dstptr1
    140     bne             extra_cp_src_to_dst_uv_width
    141 end_of_cp_src_to_dst_uv
    142 
    143     subs            r1, r1, #1
    144 
    145     addne               sp, sp, #8
    146 
    147     ldrne               r2, [sp]        ;srcptr1
    148     ldrne               r3, [sp, #4]        ;dstptr1
    149 
    150     bne             cp_uv_loop
    151 
    152     add             sp, sp, #8
    153 
    154     vpop            {d8 - d15}
    155     pop             {r4 - r11, pc}
    156 
    157 ;=============================
    158 extra_cp_src_to_dst_width
    159     add             r2, r2, r11
    160     add             r3, r3, r11
    161     add             r0, r8, r6
    162     add             r11, r9, r7
    163 
    164     mov             lr, r4, lsr #1
    165 extra_cp_src_to_dst_height_loop
    166     mov             r8, r2
    167     mov             r9, r3
    168     add             r0, r8, r6
    169     add             r11, r9, r7
    170 
    171     mov             r12, r10
    172 
    173 extra_cp_src_to_dst_width_loop
    174     vld1.8          {q0}, [r8]!
    175     vld1.8          {q1}, [r0]!
    176 
    177     subs            r12, r12, #16
    178 
    179     vst1.8          {q0}, [r9]!
    180     vst1.8          {q1}, [r11]!
    181     bne             extra_cp_src_to_dst_width_loop
    182 
    183     subs            lr, lr, #1
    184 
    185     add             r2, r2, r6, lsl #1
    186     add             r3, r3, r7, lsl #1
    187 
    188     bne             extra_cp_src_to_dst_height_loop
    189 
    190     b               end_of_cp_src_to_dst
    191 
    192 ;=================================
    193 extra_cp_src_to_dst_uv_width
    194     add             r2, r2, r11
    195     add             r3, r3, r11
    196     add             r0, r8, r6
    197     add             r11, r9, r7
    198 
    199     mov             lr, r4, lsr #1
    200 extra_cp_src_to_dst_height_uv_loop
    201     mov             r8, r2
    202     mov             r9, r3
    203     add             r0, r8, r6
    204     add             r11, r9, r7
    205 
    206     mov             r12, r10
    207 
    208 extra_cp_src_to_dst_width_uv_loop
    209     vld1.8          {d0}, [r8]!
    210     vld1.8          {d1}, [r0]!
    211 
    212     subs            r12, r12, #8
    213 
    214     vst1.8          {d0}, [r9]!
    215     vst1.8          {d1}, [r11]!
    216     bne             extra_cp_src_to_dst_width_uv_loop
    217 
    218     subs            lr, lr, #1
    219 
    220     add             r2, r2, r6, lsl #1
    221     add             r3, r3, r7, lsl #1
    222 
    223     bne             extra_cp_src_to_dst_height_uv_loop
    224 
    225     b               end_of_cp_src_to_dst_uv
    226 
    227     ENDP
    228     END
    229