Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_yv12_copy_src_frame_func_neon|
     13     ARM
     14     REQUIRE8
     15     PRESERVE8
     16 
     17     INCLUDE vpx_scale_asm_offsets.asm
     18 
     19     AREA ||.text||, CODE, READONLY, ALIGN=2
     20 ;Note: This function is used to copy source data in src_buffer[i] at beginning
     21 ;of the encoding. The buffer has a width and height of cpi->oxcf.Width and
     22 ;cpi->oxcf.Height, which can be ANY numbers(NOT always multiples of 16 or 4).
     23 
     24 ;void vp8_yv12_copy_src_frame_func_neon(const YV12_BUFFER_CONFIG *src_ybc,
     25 ;                                       YV12_BUFFER_CONFIG *dst_ybc);
     26 
     27 |vp8_yv12_copy_src_frame_func_neon| PROC
     28     push            {r4 - r11, lr}
     29     vpush           {d8 - d15}
     30 
     31     ;Copy Y plane
     32     ldr             r4, [r0, #yv12_buffer_config_y_height]
     33     ldr             r5, [r0, #yv12_buffer_config_y_width]
     34     ldr             r6, [r0, #yv12_buffer_config_y_stride]
     35     ldr             r7, [r1, #yv12_buffer_config_y_stride]
     36     ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
     37     ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
     38 
     39     add             r10, r2, r6             ;second row src
     40     add             r11, r3, r7             ;second row dst
     41     mov             r6, r6, lsl #1
     42     mov             r7, r7, lsl #1
     43     sub             r6, r6, r5              ;adjust stride
     44     sub             r7, r7, r5
     45 
     46     ; copy two rows at one time
     47     mov             lr, r4, lsr #1
     48 
     49 cp_src_to_dst_height_loop
     50     mov             r12, r5
     51 
     52 cp_width_128_loop
     53     vld1.8          {q0, q1}, [r2]!
     54     vld1.8          {q4, q5}, [r10]!
     55     vld1.8          {q2, q3}, [r2]!
     56     vld1.8          {q6, q7}, [r10]!
     57     vld1.8          {q8, q9}, [r2]!
     58     vld1.8          {q12, q13}, [r10]!
     59     vld1.8          {q10, q11}, [r2]!
     60     vld1.8          {q14, q15}, [r10]!
     61     sub             r12, r12, #128
     62     cmp             r12, #128
     63     vst1.8          {q0, q1}, [r3]!
     64     vst1.8          {q4, q5}, [r11]!
     65     vst1.8          {q2, q3}, [r3]!
     66     vst1.8          {q6, q7}, [r11]!
     67     vst1.8          {q8, q9}, [r3]!
     68     vst1.8          {q12, q13}, [r11]!
     69     vst1.8          {q10, q11}, [r3]!
     70     vst1.8          {q14, q15}, [r11]!
     71     bhs             cp_width_128_loop
     72 
     73     cmp             r12, #0
     74     beq             cp_width_done
     75 
     76 cp_width_8_loop
     77     vld1.8          {d0}, [r2]!
     78     vld1.8          {d1}, [r10]!
     79     sub             r12, r12, #8
     80     cmp             r12, #8
     81     vst1.8          {d0}, [r3]!
     82     vst1.8          {d1}, [r11]!
     83     bhs             cp_width_8_loop
     84 
     85     cmp             r12, #0
     86     beq             cp_width_done
     87 
     88 cp_width_1_loop
     89     ldrb            r8, [r2], #1
     90     subs            r12, r12, #1
     91     strb            r8, [r3], #1
     92     ldrb            r8, [r10], #1
     93     strb            r8, [r11], #1
     94     bne             cp_width_1_loop
     95 
     96 cp_width_done
     97     subs            lr, lr, #1
     98     add             r2, r2, r6
     99     add             r3, r3, r7
    100     add             r10, r10, r6
    101     add             r11, r11, r7
    102     bne             cp_src_to_dst_height_loop
    103 
    104 ;copy last line for Y if y_height is odd
    105     tst             r4, #1
    106     beq             cp_width_done_1
    107     mov             r12, r5
    108 
    109 cp_width_128_loop_1
    110     vld1.8          {q0, q1}, [r2]!
    111     vld1.8          {q2, q3}, [r2]!
    112     vld1.8          {q8, q9}, [r2]!
    113     vld1.8          {q10, q11}, [r2]!
    114     sub             r12, r12, #128
    115     cmp             r12, #128
    116     vst1.8          {q0, q1}, [r3]!
    117     vst1.8          {q2, q3}, [r3]!
    118     vst1.8          {q8, q9}, [r3]!
    119     vst1.8          {q10, q11}, [r3]!
    120     bhs             cp_width_128_loop_1
    121 
    122     cmp             r12, #0
    123     beq             cp_width_done_1
    124 
    125 cp_width_8_loop_1
    126     vld1.8          {d0}, [r2]!
    127     sub             r12, r12, #8
    128     cmp             r12, #8
    129     vst1.8          {d0}, [r3]!
    130     bhs             cp_width_8_loop_1
    131 
    132     cmp             r12, #0
    133     beq             cp_width_done_1
    134 
    135 cp_width_1_loop_1
    136     ldrb            r8, [r2], #1
    137     subs            r12, r12, #1
    138     strb            r8, [r3], #1
    139     bne             cp_width_1_loop_1
    140 cp_width_done_1
    141 
    142 ;Copy U & V planes
    143     ldr             r4, [r0, #yv12_buffer_config_uv_height]
    144     ldr             r5, [r0, #yv12_buffer_config_uv_width]
    145     ldr             r6, [r0, #yv12_buffer_config_uv_stride]
    146     ldr             r7, [r1, #yv12_buffer_config_uv_stride]
    147     ldr             r2, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
    148     ldr             r3, [r1, #yv12_buffer_config_u_buffer]       ;dstptr1
    149 
    150     add             r10, r2, r6             ;second row src
    151     add             r11, r3, r7             ;second row dst
    152     mov             r6, r6, lsl #1
    153     mov             r7, r7, lsl #1
    154     sub             r6, r6, r5              ;adjust stride
    155     sub             r7, r7, r5
    156 
    157     mov             r9, #2
    158 
    159 cp_uv_loop
    160     ;copy two rows at one time
    161     mov             lr, r4, lsr #1
    162 
    163 cp_src_to_dst_height_uv_loop
    164     mov             r12, r5
    165 
    166 cp_width_uv_64_loop
    167     vld1.8          {q0, q1}, [r2]!
    168     vld1.8          {q4, q5}, [r10]!
    169     vld1.8          {q2, q3}, [r2]!
    170     vld1.8          {q6, q7}, [r10]!
    171     sub             r12, r12, #64
    172     cmp             r12, #64
    173     vst1.8          {q0, q1}, [r3]!
    174     vst1.8          {q4, q5}, [r11]!
    175     vst1.8          {q2, q3}, [r3]!
    176     vst1.8          {q6, q7}, [r11]!
    177     bhs             cp_width_uv_64_loop
    178 
    179     cmp             r12, #0
    180     beq             cp_width_uv_done
    181 
    182 cp_width_uv_8_loop
    183     vld1.8          {d0}, [r2]!
    184     vld1.8          {d1}, [r10]!
    185     sub             r12, r12, #8
    186     cmp             r12, #8
    187     vst1.8          {d0}, [r3]!
    188     vst1.8          {d1}, [r11]!
    189     bhs             cp_width_uv_8_loop
    190 
    191     cmp             r12, #0
    192     beq             cp_width_uv_done
    193 
    194 cp_width_uv_1_loop
    195     ldrb            r8, [r2], #1
    196     subs            r12, r12, #1
    197     strb            r8, [r3], #1
    198     ldrb            r8, [r10], #1
    199     strb            r8, [r11], #1
    200     bne             cp_width_uv_1_loop
    201 
    202 cp_width_uv_done
    203     subs            lr, lr, #1
    204     add             r2, r2, r6
    205     add             r3, r3, r7
    206     add             r10, r10, r6
    207     add             r11, r11, r7
    208     bne             cp_src_to_dst_height_uv_loop
    209 
    210 ;copy last line for U & V if uv_height is odd
    211     tst             r4, #1
    212     beq             cp_width_uv_done_1
    213     mov             r12, r5
    214 
    215 cp_width_uv_64_loop_1
    216     vld1.8          {q0, q1}, [r2]!
    217     vld1.8          {q2, q3}, [r2]!
    218     sub             r12, r12, #64
    219     cmp             r12, #64
    220     vst1.8          {q0, q1}, [r3]!
    221     vst1.8          {q2, q3}, [r3]!
    222     bhs             cp_width_uv_64_loop_1
    223 
    224     cmp             r12, #0
    225     beq             cp_width_uv_done_1
    226 
    227 cp_width_uv_8_loop_1
    228     vld1.8          {d0}, [r2]!
    229     sub             r12, r12, #8
    230     cmp             r12, #8
    231     vst1.8          {d0}, [r3]!
    232     bhs             cp_width_uv_8_loop_1
    233 
    234     cmp             r12, #0
    235     beq             cp_width_uv_done_1
    236 
    237 cp_width_uv_1_loop_1
    238     ldrb            r8, [r2], #1
    239     subs            r12, r12, #1
    240     strb            r8, [r3], #1
    241     bne             cp_width_uv_1_loop_1
    242 cp_width_uv_done_1
    243 
    244     subs            r9, r9, #1
    245     ldrne           r2, [r0, #yv12_buffer_config_v_buffer]      ;srcptr1
    246     ldrne           r3, [r1, #yv12_buffer_config_v_buffer]      ;dstptr1
    247     ldrne           r10, [r0, #yv12_buffer_config_uv_stride]
    248     ldrne           r11, [r1, #yv12_buffer_config_uv_stride]
    249 
    250     addne           r10, r2, r10                ;second row src
    251     addne           r11, r3, r11                ;second row dst
    252 
    253     bne             cp_uv_loop
    254 
    255     vpop            {d8 - d15}
    256     pop             {r4 - r11, pc}
    257 
    258     ENDP
    259     END
    260