Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_yv12_copy_frame_func_neon|
     13     ARM
     14     REQUIRE8
     15     PRESERVE8
     16 
     17     INCLUDE vpx_scale_asm_offsets.asm
     18 
     19     AREA ||.text||, CODE, READONLY, ALIGN=2
     20 
     21 ;void vp8_yv12_copy_frame_func_neon(const YV12_BUFFER_CONFIG *src_ybc,
     22 ;                                   YV12_BUFFER_CONFIG *dst_ybc);
     23 
     24 |vp8_yv12_copy_frame_func_neon| PROC
     25     push            {r4 - r11, lr}
     26     vpush           {d8 - d15}
     27 
     28     sub             sp, sp, #16
     29 
     30     ;Copy Y plane
     31     ldr             r8, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
     32     ldr             r9, [r1, #yv12_buffer_config_u_buffer]       ;srcptr1
     33     ldr             r10, [r0, #yv12_buffer_config_v_buffer]      ;srcptr1
     34     ldr             r11, [r1, #yv12_buffer_config_v_buffer]      ;srcptr1
     35 
     36     ldr             r4, [r0, #yv12_buffer_config_y_height]
     37     ldr             r5, [r0, #yv12_buffer_config_y_width]
     38     ldr             r6, [r0, #yv12_buffer_config_y_stride]
     39     ldr             r7, [r1, #yv12_buffer_config_y_stride]
     40     ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
     41     ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
     42 
     43     str             r8, [sp]
     44     str             r9, [sp, #4]
     45     str             r10, [sp, #8]
     46     str             r11, [sp, #12]
     47 
     48     ; copy two rows at one time
     49     mov             lr, r4, lsr #1
     50 
     51 cp_src_to_dst_height_loop
     52     mov             r8, r2
     53     mov             r9, r3
     54     add             r10, r2, r6
     55     add             r11, r3, r7
     56     movs            r12, r5, lsr #7
     57     ble             extra_cp_needed   ; y_width < 128
     58 
     59 cp_src_to_dst_width_loop
     60     vld1.8          {q0, q1}, [r8]!
     61     vld1.8          {q8, q9}, [r10]!
     62     vld1.8          {q2, q3}, [r8]!
     63     vld1.8          {q10, q11}, [r10]!
     64     vld1.8          {q4, q5}, [r8]!
     65     vld1.8          {q12, q13}, [r10]!
     66     vld1.8          {q6, q7}, [r8]!
     67     vld1.8          {q14, q15}, [r10]!
     68 
     69     subs            r12, r12, #1
     70 
     71     vst1.8          {q0, q1}, [r9]!
     72     vst1.8          {q8, q9}, [r11]!
     73     vst1.8          {q2, q3}, [r9]!
     74     vst1.8          {q10, q11}, [r11]!
     75     vst1.8          {q4, q5}, [r9]!
     76     vst1.8          {q12, q13}, [r11]!
     77     vst1.8          {q6, q7}, [r9]!
     78     vst1.8          {q14, q15}, [r11]!
     79 
     80     bne             cp_src_to_dst_width_loop
     81 
     82     subs            lr, lr, #1
     83     add             r2, r2, r6, lsl #1
     84     add             r3, r3, r7, lsl #1
     85 
     86     bne             cp_src_to_dst_height_loop
     87 
     88 extra_cp_needed
     89     ands            r10, r5, #0x7f                  ;check to see if extra copy is needed
     90     sub             r11, r5, r10
     91     ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
     92     ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
     93     bne             extra_cp_src_to_dst_width
     94 end_of_cp_src_to_dst
     95 
     96 ;Copy U & V planes
     97     ldr             r2, [sp]        ;srcptr1
     98     ldr             r3, [sp, #4]        ;dstptr1
     99     mov             r4, r4, lsr #1                  ;src uv_height
    100     mov             r5, r5, lsr #1                  ;src uv_width
    101     mov             r6, r6, lsr #1                  ;src uv_stride
    102     mov             r7, r7, lsr #1                  ;dst uv_stride
    103 
    104     mov             r1, #2
    105 
    106 cp_uv_loop
    107 
    108     ;copy two rows at one time
    109     mov             lr, r4, lsr #1
    110 
    111 cp_src_to_dst_height_uv_loop
    112     mov             r8, r2
    113     mov             r9, r3
    114     add             r10, r2, r6
    115     add             r11, r3, r7
    116     movs            r12, r5, lsr #6
    117     ble             extra_uv_cp_needed
    118 
    119 cp_src_to_dst_width_uv_loop
    120     vld1.8          {q0, q1}, [r8]!
    121     vld1.8          {q8, q9}, [r10]!
    122     vld1.8          {q2, q3}, [r8]!
    123     vld1.8          {q10, q11}, [r10]!
    124 
    125     subs            r12, r12, #1
    126 
    127     vst1.8          {q0, q1}, [r9]!
    128     vst1.8          {q8, q9}, [r11]!
    129     vst1.8          {q2, q3}, [r9]!
    130     vst1.8          {q10, q11}, [r11]!
    131 
    132     bne             cp_src_to_dst_width_uv_loop
    133 
    134     subs            lr, lr, #1
    135     add             r2, r2, r6, lsl #1
    136     add             r3, r3, r7, lsl #1
    137 
    138     bne             cp_src_to_dst_height_uv_loop
    139 
    140 extra_uv_cp_needed
    141     ands            r10, r5, #0x3f                  ;check to see if extra copy is needed
    142     sub             r11, r5, r10
    143     ldr             r2, [sp]        ;srcptr1
    144     ldr             r3, [sp, #4]        ;dstptr1
    145     bne             extra_cp_src_to_dst_uv_width
    146 end_of_cp_src_to_dst_uv
    147 
    148     subs            r1, r1, #1
    149 
    150     addne               sp, sp, #8
    151 
    152     ldrne               r2, [sp]        ;srcptr1
    153     ldrne               r3, [sp, #4]        ;dstptr1
    154 
    155     bne             cp_uv_loop
    156 
    157     add             sp, sp, #8
    158 
    159     vpop            {d8 - d15}
    160     pop             {r4 - r11, pc}
    161 
    162 ;=============================
    163 extra_cp_src_to_dst_width
    164     add             r2, r2, r11
    165     add             r3, r3, r11
    166     add             r0, r8, r6
    167     add             r11, r9, r7
    168 
    169     mov             lr, r4, lsr #1
    170 extra_cp_src_to_dst_height_loop
    171     mov             r8, r2
    172     mov             r9, r3
    173     add             r0, r8, r6
    174     add             r11, r9, r7
    175 
    176     mov             r12, r10
    177 
    178 extra_cp_src_to_dst_width_loop
    179     vld1.8          {q0}, [r8]!
    180     vld1.8          {q1}, [r0]!
    181 
    182     subs            r12, r12, #16
    183 
    184     vst1.8          {q0}, [r9]!
    185     vst1.8          {q1}, [r11]!
    186     bne             extra_cp_src_to_dst_width_loop
    187 
    188     subs            lr, lr, #1
    189 
    190     add             r2, r2, r6, lsl #1
    191     add             r3, r3, r7, lsl #1
    192 
    193     bne             extra_cp_src_to_dst_height_loop
    194 
    195     b               end_of_cp_src_to_dst
    196 
    197 ;=================================
    198 extra_cp_src_to_dst_uv_width
    199     add             r2, r2, r11
    200     add             r3, r3, r11
    201     add             r0, r8, r6
    202     add             r11, r9, r7
    203 
    204     mov             lr, r4, lsr #1
    205 extra_cp_src_to_dst_height_uv_loop
    206     mov             r8, r2
    207     mov             r9, r3
    208     add             r0, r8, r6
    209     add             r11, r9, r7
    210 
    211     mov             r12, r10
    212 
    213 extra_cp_src_to_dst_width_uv_loop
    214     vld1.8          {d0}, [r8]!
    215     vld1.8          {d1}, [r0]!
    216 
    217     subs            r12, r12, #8
    218 
    219     vst1.8          {d0}, [r9]!
    220     vst1.8          {d1}, [r11]!
    221     bne             extra_cp_src_to_dst_width_uv_loop
    222 
    223     subs            lr, lr, #1
    224 
    225     add             r2, r2, r6, lsl #1
    226     add             r3, r3, r7, lsl #1
    227 
    228     bne             extra_cp_src_to_dst_height_uv_loop
    229 
    230     b               end_of_cp_src_to_dst_uv
    231 
    232     ENDP
    233     END
    234