Home | History | Annotate | Download | only in neon
      1 ;
      2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3 ;
      4 ;  Use of this source code is governed by a BSD-style license
      5 ;  that can be found in the LICENSE file in the root of the source
      6 ;  tree. An additional intellectual property rights grant can be found
      7 ;  in the file PATENTS.  All contributing project authors may
      8 ;  be found in the AUTHORS file in the root of the source tree.
      9 ;
     10 
     11 
     12     EXPORT  |vp8_yv12_extend_frame_borders_neon|
     13     ARM
     14     REQUIRE8
     15     PRESERVE8
     16 
     17     INCLUDE asm_com_offsets.asm
     18 
     19     AREA ||.text||, CODE, READONLY, ALIGN=2
     20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);
     21 ;Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_height
     22 ; are always multiples of 16.
     23 
     24 |vp8_yv12_extend_frame_borders_neon| PROC
     25     push            {r4 - r10, lr}
     26     vpush           {d8 - d15}
     27 
     28     ;Not need to load y_width, since: y_width = y_stride - 2*border
     29     ldr             r3, [r0, #yv12_buffer_config_border]
     30     ldr             r1, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
     31     ldr             r4, [r0, #yv12_buffer_config_y_height]
     32     ldr             lr, [r0, #yv12_buffer_config_y_stride]
     33 
     34     cmp             r3, #16
     35     beq             b16_extend_frame_borders
     36 
     37 ;=======================
     38 b32_extend_frame_borders
     39 ;border = 32
     40 ;=======================
     41 ;Border copy for Y plane
     42 ;copy the left and right most columns out
     43     sub             r5, r1, r3              ;destptr1
     44     add             r6, r1, lr
     45     sub             r6, r6, r3, lsl #1      ;destptr2
     46     sub             r2, r6, #1              ;srcptr2
     47 
     48     ;Do four rows at one time
     49     mov             r12, r4, lsr #2
     50 
     51 copy_left_right_y
     52     vld1.8          {d0[], d1[]}, [r1], lr
     53     vld1.8          {d4[], d5[]}, [r2], lr
     54     vld1.8          {d8[], d9[]}, [r1], lr
     55     vld1.8          {d12[], d13[]}, [r2], lr
     56     vld1.8          {d16[], d17[]},  [r1], lr
     57     vld1.8          {d20[], d21[]}, [r2], lr
     58     vld1.8          {d24[], d25[]}, [r1], lr
     59     vld1.8          {d28[], d29[]}, [r2], lr
     60 
     61     vmov            q1, q0
     62     vmov            q3, q2
     63     vmov            q5, q4
     64     vmov            q7, q6
     65     vmov            q9, q8
     66     vmov            q11, q10
     67     vmov            q13, q12
     68     vmov            q15, q14
     69 
     70     subs            r12, r12, #1
     71 
     72     vst1.8          {q0, q1}, [r5], lr
     73     vst1.8          {q2, q3}, [r6], lr
     74     vst1.8          {q4, q5}, [r5], lr
     75     vst1.8          {q6, q7}, [r6], lr
     76     vst1.8          {q8, q9}, [r5], lr
     77     vst1.8          {q10, q11}, [r6], lr
     78     vst1.8          {q12, q13}, [r5], lr
     79     vst1.8          {q14, q15}, [r6], lr
     80 
     81     bne             copy_left_right_y
     82 
     83 ;Now copy the top and bottom source lines into each line of the respective borders
     84     ldr             r7, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
     85     mul             r8, r3, lr
     86 
     87     mov             r12, lr, lsr #7
     88 
     89     sub             r6, r1, r3              ;destptr2
     90     sub             r2, r6, lr              ;srcptr2
     91     sub             r1, r7, r3              ;srcptr1
     92     sub             r5, r1, r8              ;destptr1
     93 
     94 copy_top_bottom_y
     95     vld1.8          {q0, q1}, [r1]!
     96     vld1.8          {q8, q9}, [r2]!
     97     vld1.8          {q2, q3}, [r1]!
     98     vld1.8          {q10, q11}, [r2]!
     99     vld1.8          {q4, q5}, [r1]!
    100     vld1.8          {q12, q13}, [r2]!
    101     vld1.8          {q6, q7}, [r1]!
    102     vld1.8          {q14, q15}, [r2]!
    103 
    104     mov             r7, r3
    105 
    106 top_bottom_32
    107     subs            r7, r7, #1
    108 
    109     vst1.8          {q0, q1}, [r5]!
    110     vst1.8          {q8, q9}, [r6]!
    111     vst1.8          {q2, q3}, [r5]!
    112     vst1.8          {q10, q11}, [r6]!
    113     vst1.8          {q4, q5}, [r5]!
    114     vst1.8          {q12, q13}, [r6]!
    115     vst1.8          {q6, q7}, [r5]!
    116     vst1.8          {q14, q15}, [r6]!
    117 
    118     add             r5, r5, lr
    119     sub             r5, r5, #128
    120     add             r6, r6, lr
    121     sub             r6, r6, #128
    122 
    123     bne             top_bottom_32
    124 
    125     sub             r5, r1, r8
    126     add             r6, r2, lr
    127 
    128     subs            r12, r12, #1
    129     bne             copy_top_bottom_y
    130 
    131     mov             r7, lr, lsr #4              ;check to see if extra copy is needed
    132     ands            r7, r7, #0x7
    133     bne             extra_top_bottom_y
    134 end_of_border_copy_y
    135 
    136 ;Border copy for U, V planes
    137     ldr             r1, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
    138     mov             lr, lr, lsr #1              ;uv_stride
    139     mov             r3, r3, lsr #1              ;border
    140     mov             r4, r4, lsr #1              ;uv_height
    141     mov             r8, r8, lsr #2
    142 
    143     mov             r10, #2
    144 
    145 ;copy the left and right most columns out
    146 border_copy_uv
    147     sub             r5, r1, r3              ;destptr1
    148     add             r6, r1, lr
    149     sub             r6, r6, r3, lsl #1      ;destptr2
    150     sub             r2, r6, #1              ;srcptr2
    151 
    152     mov             r7, r1
    153 
    154     ;Do eight rows at one time
    155     mov             r12, r4, lsr #3
    156 
    157 copy_left_right_uv
    158     vld1.8          {d0[], d1[]}, [r1], lr
    159     vld1.8          {d2[], d3[]}, [r2], lr
    160     vld1.8          {d4[], d5[]}, [r1], lr
    161     vld1.8          {d6[], d7[]}, [r2], lr
    162     vld1.8          {d8[], d9[]},  [r1], lr
    163     vld1.8          {d10[], d11[]}, [r2], lr
    164     vld1.8          {d12[], d13[]}, [r1], lr
    165     vld1.8          {d14[], d15[]}, [r2], lr
    166     vld1.8          {d16[], d17[]}, [r1], lr
    167     vld1.8          {d18[], d19[]}, [r2], lr
    168     vld1.8          {d20[], d21[]}, [r1], lr
    169     vld1.8          {d22[], d23[]}, [r2], lr
    170     vld1.8          {d24[], d25[]},  [r1], lr
    171     vld1.8          {d26[], d27[]}, [r2], lr
    172     vld1.8          {d28[], d29[]}, [r1], lr
    173     vld1.8          {d30[], d31[]}, [r2], lr
    174 
    175     subs            r12, r12, #1
    176 
    177     vst1.8          {q0}, [r5], lr
    178     vst1.8          {q1}, [r6], lr
    179     vst1.8          {q2}, [r5], lr
    180     vst1.8          {q3}, [r6], lr
    181     vst1.8          {q4}, [r5], lr
    182     vst1.8          {q5}, [r6], lr
    183     vst1.8          {q6}, [r5], lr
    184     vst1.8          {q7}, [r6], lr
    185     vst1.8          {q8}, [r5], lr
    186     vst1.8          {q9}, [r6], lr
    187     vst1.8          {q10}, [r5], lr
    188     vst1.8          {q11}, [r6], lr
    189     vst1.8          {q12}, [r5], lr
    190     vst1.8          {q13}, [r6], lr
    191     vst1.8          {q14}, [r5], lr
    192     vst1.8          {q15}, [r6], lr
    193 
    194     bne             copy_left_right_uv
    195 
    196 ;Now copy the top and bottom source lines into each line of the respective borders
    197     mov             r12, lr, lsr #6
    198 
    199     sub             r6, r1, r3              ;destptr2
    200     sub             r2, r6, lr              ;srcptr2
    201     sub             r1, r7, r3              ;srcptr1
    202     sub             r5, r1, r8              ;destptr1
    203 
    204 copy_top_bottom_uv
    205     vld1.8          {q0, q1}, [r1]!
    206     vld1.8          {q8, q9}, [r2]!
    207     vld1.8          {q2, q3}, [r1]!
    208     vld1.8          {q10, q11}, [r2]!
    209 
    210     mov             r7, r3
    211 
    212 top_bottom_16
    213     subs            r7, r7, #1
    214 
    215     vst1.8          {q0, q1}, [r5]!
    216     vst1.8          {q8, q9}, [r6]!
    217     vst1.8          {q2, q3}, [r5]!
    218     vst1.8          {q10, q11}, [r6]!
    219 
    220     add             r5, r5, lr
    221     sub             r5, r5, #64
    222     add             r6, r6, lr
    223     sub             r6, r6, #64
    224 
    225     bne             top_bottom_16
    226 
    227     sub             r5, r1, r8
    228     add             r6, r2, lr
    229 
    230     subs            r12, r12, #1
    231     bne             copy_top_bottom_uv
    232 
    233     mov             r7, lr, lsr #3              ;check to see if extra copy is needed
    234     ands            r7, r7, #0x7
    235     bne             extra_top_bottom_uv
    236 
    237 end_of_border_copy_uv
    238     subs            r10, r10, #1
    239     ldrne           r1, [r0, #yv12_buffer_config_v_buffer]       ;srcptr1
    240     bne             border_copy_uv
    241 
    242     vpop            {d8 - d15}
    243     pop             {r4 - r10, pc}
    244 
    245 ;;;;;;;;;;;;;;;;;;;;;;
    246 ;extra copy part for Y
    247 extra_top_bottom_y
    248     vld1.8          {q0}, [r1]!
    249     vld1.8          {q2}, [r2]!
    250 
    251     mov             r9, r3, lsr #3
    252 
    253 extra_top_bottom_32
    254     subs            r9, r9, #1
    255 
    256     vst1.8          {q0}, [r5], lr
    257     vst1.8          {q2}, [r6], lr
    258     vst1.8          {q0}, [r5], lr
    259     vst1.8          {q2}, [r6], lr
    260     vst1.8          {q0}, [r5], lr
    261     vst1.8          {q2}, [r6], lr
    262     vst1.8          {q0}, [r5], lr
    263     vst1.8          {q2}, [r6], lr
    264     vst1.8          {q0}, [r5], lr
    265     vst1.8          {q2}, [r6], lr
    266     vst1.8          {q0}, [r5], lr
    267     vst1.8          {q2}, [r6], lr
    268     vst1.8          {q0}, [r5], lr
    269     vst1.8          {q2}, [r6], lr
    270     vst1.8          {q0}, [r5], lr
    271     vst1.8          {q2}, [r6], lr
    272     bne             extra_top_bottom_32
    273 
    274     sub             r5, r1, r8
    275     add             r6, r2, lr
    276     subs            r7, r7, #1
    277     bne             extra_top_bottom_y
    278 
    279     b               end_of_border_copy_y
    280 
    281 ;extra copy part for UV
    282 extra_top_bottom_uv
    283     vld1.8          {d0}, [r1]!
    284     vld1.8          {d8}, [r2]!
    285 
    286     mov             r9, r3, lsr #3
    287 
    288 extra_top_bottom_16
    289     subs            r9, r9, #1
    290 
    291     vst1.8          {d0}, [r5], lr
    292     vst1.8          {d8}, [r6], lr
    293     vst1.8          {d0}, [r5], lr
    294     vst1.8          {d8}, [r6], lr
    295     vst1.8          {d0}, [r5], lr
    296     vst1.8          {d8}, [r6], lr
    297     vst1.8          {d0}, [r5], lr
    298     vst1.8          {d8}, [r6], lr
    299     vst1.8          {d0}, [r5], lr
    300     vst1.8          {d8}, [r6], lr
    301     vst1.8          {d0}, [r5], lr
    302     vst1.8          {d8}, [r6], lr
    303     vst1.8          {d0}, [r5], lr
    304     vst1.8          {d8}, [r6], lr
    305     vst1.8          {d0}, [r5], lr
    306     vst1.8          {d8}, [r6], lr
    307     bne             extra_top_bottom_16
    308 
    309     sub             r5, r1, r8
    310     add             r6, r2, lr
    311     subs            r7, r7, #1
    312     bne             extra_top_bottom_uv
    313 
    314     b               end_of_border_copy_uv
    315 
    316 
    317 ;=======================
    318 b16_extend_frame_borders
    319 ;border = 16
    320 ;=======================
    321 ;Border copy for Y plane
    322 ;copy the left and right most columns out
    323     sub             r5, r1, r3              ;destptr1
    324     add             r6, r1, lr
    325     sub             r6, r6, r3, lsl #1      ;destptr2
    326     sub             r2, r6, #1              ;srcptr2
    327 
    328     ;Do four rows at one time
    329     mov             r12, r4, lsr #2
    330 
    331 copy_left_right_y_b16
    332     vld1.8          {d0[], d1[]}, [r1], lr
    333     vld1.8          {d4[], d5[]}, [r2], lr
    334     vld1.8          {d8[], d9[]}, [r1], lr
    335     vld1.8          {d12[], d13[]}, [r2], lr
    336     vld1.8          {d16[], d17[]},  [r1], lr
    337     vld1.8          {d20[], d21[]}, [r2], lr
    338     vld1.8          {d24[], d25[]}, [r1], lr
    339     vld1.8          {d28[], d29[]}, [r2], lr
    340 
    341     subs            r12, r12, #1
    342 
    343     vst1.8          {q0}, [r5], lr
    344     vst1.8          {q2}, [r6], lr
    345     vst1.8          {q4}, [r5], lr
    346     vst1.8          {q6}, [r6], lr
    347     vst1.8          {q8}, [r5], lr
    348     vst1.8          {q10}, [r6], lr
    349     vst1.8          {q12}, [r5], lr
    350     vst1.8          {q14}, [r6], lr
    351 
    352     bne             copy_left_right_y_b16
    353 
    354 ;Now copy the top and bottom source lines into each line of the respective borders
    355     ldr             r7, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
    356     mul             r8, r3, lr
    357 
    358     mov             r12, lr, lsr #7
    359 
    360     sub             r6, r1, r3              ;destptr2
    361     sub             r2, r6, lr              ;srcptr2
    362     sub             r1, r7, r3              ;srcptr1
    363     sub             r5, r1, r8              ;destptr1
    364 
    365 copy_top_bottom_y_b16
    366     vld1.8          {q0, q1}, [r1]!
    367     vld1.8          {q8, q9}, [r2]!
    368     vld1.8          {q2, q3}, [r1]!
    369     vld1.8          {q10, q11}, [r2]!
    370     vld1.8          {q4, q5}, [r1]!
    371     vld1.8          {q12, q13}, [r2]!
    372     vld1.8          {q6, q7}, [r1]!
    373     vld1.8          {q14, q15}, [r2]!
    374 
    375     mov             r7, r3
    376 
    377 top_bottom_16_b16
    378     subs            r7, r7, #1
    379 
    380     vst1.8          {q0, q1}, [r5]!
    381     vst1.8          {q8, q9}, [r6]!
    382     vst1.8          {q2, q3}, [r5]!
    383     vst1.8          {q10, q11}, [r6]!
    384     vst1.8          {q4, q5}, [r5]!
    385     vst1.8          {q12, q13}, [r6]!
    386     vst1.8          {q6, q7}, [r5]!
    387     vst1.8          {q14, q15}, [r6]!
    388 
    389     add             r5, r5, lr
    390     sub             r5, r5, #128
    391     add             r6, r6, lr
    392     sub             r6, r6, #128
    393 
    394     bne             top_bottom_16_b16
    395 
    396     sub             r5, r1, r8
    397     add             r6, r2, lr
    398 
    399     subs            r12, r12, #1
    400     bne             copy_top_bottom_y_b16
    401 
    402     mov             r7, lr, lsr #4              ;check to see if extra copy is needed
    403     ands            r7, r7, #0x7
    404     bne             extra_top_bottom_y_b16
    405 end_of_border_copy_y_b16
    406 
    407 ;Border copy for U, V planes
    408     ldr             r1, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
    409     mov             lr, lr, lsr #1              ;uv_stride
    410     mov             r3, r3, lsr #1              ;border
    411     mov             r4, r4, lsr #1              ;uv_height
    412     mov             r8, r8, lsr #2
    413 
    414     mov             r10, #2
    415 
    416 ;copy the left and right most columns out
    417 border_copy_uv_b16
    418     sub             r5, r1, r3              ;destptr1
    419     add             r6, r1, lr
    420     sub             r6, r6, r3, lsl #1      ;destptr2
    421     sub             r2, r6, #1              ;srcptr2
    422 
    423     mov             r7, r1
    424 
    425     ;Do eight rows at one time
    426     mov             r12, r4, lsr #3
    427 
    428 copy_left_right_uv_b16
    429     vld1.8          {d0[]}, [r1], lr
    430     vld1.8          {d2[]}, [r2], lr
    431     vld1.8          {d4[]}, [r1], lr
    432     vld1.8          {d6[]}, [r2], lr
    433     vld1.8          {d8[]},  [r1], lr
    434     vld1.8          {d10[]}, [r2], lr
    435     vld1.8          {d12[]}, [r1], lr
    436     vld1.8          {d14[]}, [r2], lr
    437     vld1.8          {d16[]}, [r1], lr
    438     vld1.8          {d18[]}, [r2], lr
    439     vld1.8          {d20[]}, [r1], lr
    440     vld1.8          {d22[]}, [r2], lr
    441     vld1.8          {d24[]},  [r1], lr
    442     vld1.8          {d26[]}, [r2], lr
    443     vld1.8          {d28[]}, [r1], lr
    444     vld1.8          {d30[]}, [r2], lr
    445 
    446     subs            r12, r12, #1
    447 
    448     vst1.8          {d0}, [r5], lr
    449     vst1.8          {d2}, [r6], lr
    450     vst1.8          {d4}, [r5], lr
    451     vst1.8          {d6}, [r6], lr
    452     vst1.8          {d8}, [r5], lr
    453     vst1.8          {d10}, [r6], lr
    454     vst1.8          {d12}, [r5], lr
    455     vst1.8          {d14}, [r6], lr
    456     vst1.8          {d16}, [r5], lr
    457     vst1.8          {d18}, [r6], lr
    458     vst1.8          {d20}, [r5], lr
    459     vst1.8          {d22}, [r6], lr
    460     vst1.8          {d24}, [r5], lr
    461     vst1.8          {d26}, [r6], lr
    462     vst1.8          {d28}, [r5], lr
    463     vst1.8          {d30}, [r6], lr
    464 
    465     bne             copy_left_right_uv_b16
    466 
    467 ;Now copy the top and bottom source lines into each line of the respective borders
    468     mov             r12, lr, lsr #6
    469 
    470     sub             r6, r1, r3              ;destptr2
    471     sub             r2, r6, lr              ;srcptr2
    472     sub             r1, r7, r3              ;srcptr1
    473     sub             r5, r1, r8              ;destptr1
    474 
    475 copy_top_bottom_uv_b16
    476     vld1.8          {q0, q1}, [r1]!
    477     vld1.8          {q8, q9}, [r2]!
    478     vld1.8          {q2, q3}, [r1]!
    479     vld1.8          {q10, q11}, [r2]!
    480 
    481     mov             r7, r3
    482 
    483 top_bottom_8_b16
    484     subs            r7, r7, #1
    485 
    486     vst1.8          {q0, q1}, [r5]!
    487     vst1.8          {q8, q9}, [r6]!
    488     vst1.8          {q2, q3}, [r5]!
    489     vst1.8          {q10, q11}, [r6]!
    490 
    491     add             r5, r5, lr
    492     sub             r5, r5, #64
    493     add             r6, r6, lr
    494     sub             r6, r6, #64
    495 
    496     bne             top_bottom_8_b16
    497 
    498     sub             r5, r1, r8
    499     add             r6, r2, lr
    500 
    501     subs            r12, r12, #1
    502     bne             copy_top_bottom_uv_b16
    503 
    504     mov             r7, lr, lsr #3              ;check to see if extra copy is needed
    505     ands            r7, r7, #0x7
    506     bne             extra_top_bottom_uv_b16
    507 
    508 end_of_border_copy_uv_b16
    509     subs            r10, r10, #1
    510     ldrne           r1, [r0, #yv12_buffer_config_v_buffer]       ;srcptr1
    511     bne             border_copy_uv_b16
    512 
    513     vpop            {d8-d15}
    514     pop             {r4 - r10, pc}
    515 
    516 ;;;;;;;;;;;;;;;;;;;;;;
    517 ;extra copy part for Y
    518 extra_top_bottom_y_b16
    519     vld1.8          {q0}, [r1]!
    520     vld1.8          {q2}, [r2]!
    521 
    522     mov             r9, r3, lsr #3
    523 
    524 extra_top_bottom_16_b16
    525     subs            r9, r9, #1
    526 
    527     vst1.8          {q0}, [r5], lr
    528     vst1.8          {q2}, [r6], lr
    529     vst1.8          {q0}, [r5], lr
    530     vst1.8          {q2}, [r6], lr
    531     vst1.8          {q0}, [r5], lr
    532     vst1.8          {q2}, [r6], lr
    533     vst1.8          {q0}, [r5], lr
    534     vst1.8          {q2}, [r6], lr
    535     vst1.8          {q0}, [r5], lr
    536     vst1.8          {q2}, [r6], lr
    537     vst1.8          {q0}, [r5], lr
    538     vst1.8          {q2}, [r6], lr
    539     vst1.8          {q0}, [r5], lr
    540     vst1.8          {q2}, [r6], lr
    541     vst1.8          {q0}, [r5], lr
    542     vst1.8          {q2}, [r6], lr
    543     bne             extra_top_bottom_16_b16
    544 
    545     sub             r5, r1, r8
    546     add             r6, r2, lr
    547     subs            r7, r7, #1
    548     bne             extra_top_bottom_y_b16
    549 
    550     b               end_of_border_copy_y_b16
    551 
    552 ;extra copy part for UV
    553 extra_top_bottom_uv_b16
    554     vld1.8          {d0}, [r1]!
    555     vld1.8          {d8}, [r2]!
    556 
    557     mov             r9, r3, lsr #3
    558 
    559 extra_top_bottom_8_b16
    560     subs            r9, r9, #1
    561 
    562     vst1.8          {d0}, [r5], lr
    563     vst1.8          {d8}, [r6], lr
    564     vst1.8          {d0}, [r5], lr
    565     vst1.8          {d8}, [r6], lr
    566     vst1.8          {d0}, [r5], lr
    567     vst1.8          {d8}, [r6], lr
    568     vst1.8          {d0}, [r5], lr
    569     vst1.8          {d8}, [r6], lr
    570     vst1.8          {d0}, [r5], lr
    571     vst1.8          {d8}, [r6], lr
    572     vst1.8          {d0}, [r5], lr
    573     vst1.8          {d8}, [r6], lr
    574     vst1.8          {d0}, [r5], lr
    575     vst1.8          {d8}, [r6], lr
    576     vst1.8          {d0}, [r5], lr
    577     vst1.8          {d8}, [r6], lr
    578     bne             extra_top_bottom_8_b16
    579 
    580     sub             r5, r1, r8
    581     add             r6, r2, lr
    582     subs            r7, r7, #1
    583     bne             extra_top_bottom_uv_b16
    584 
    585     b               end_of_border_copy_uv_b16
    586 
    587     ENDP
    588     END
    589