Home | History | Annotate | Download | only in armv8
      1 //******************************************************************************
      2 //*
      3 //* Copyright (C) 2015 The Android Open Source Project
      4 //*
      5 //* Licensed under the Apache License, Version 2.0 (the "License");
      6 //* you may not use this file except in compliance with the License.
      7 //* You may obtain a copy of the License at:
      8 //*
      9 //* http://www.apache.org/licenses/LICENSE-2.0
     10 //*
     11 //* Unless required by applicable law or agreed to in writing, software
     12 //* distributed under the License is distributed on an "AS IS" BASIS,
     13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 //* See the License for the specific language governing permissions and
     15 //* limitations under the License.
     16 //*
     17 //*****************************************************************************
     18 //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 //*/
     20 ///**
     21 //******************************************************************************
     22 //* @file
     23 //*  ih264_intra_pred_luma_4x4_av8.s
     24 //*
     25 //* @brief
     26 //*  Contains function definitions for intra 4x4 Luma prediction .
     27 //*
     28 //* @author
     29 //*  Ittiam
     30 //*
     31 //* @par List of Functions:
     32 //*
     33 //*  -ih264_intra_pred_luma_4x4_mode_vert_av8
     34 //*  -ih264_intra_pred_luma_4x4_mode_horz_av8
     35 //*  -ih264_intra_pred_luma_4x4_mode_dc_av8
     36 //*  -ih264_intra_pred_luma_4x4_mode_diag_dl_av8
     37 //*  -ih264_intra_pred_luma_4x4_mode_diag_dr_av8
     38 //*  -ih264_intra_pred_luma_4x4_mode_vert_r_av8
     39 //*  -ih264_intra_pred_luma_4x4_mode_horz_d_av8
     40 //*  -ih264_intra_pred_luma_4x4_mode_vert_l_av8
     41 //*  -ih264_intra_pred_luma_4x4_mode_horz_u_av8
     42 //*
     43 //* @remarks
     44 //*  None
     45 //*
     46 //*******************************************************************************
     47 //*/
     48 
     49 ///* All the functions here are replicated from ih264_intra_pred_filters.c
     50 //
     51 
     52 ///**
     53 ///**
     54 ///**
     55 //
     56 
     57 .text
     58 .p2align 2
     59 .include "ih264_neon_macros.s"
     60 
     61 
     62 
     63 
     64 ///**
     65 //*******************************************************************************
     66 //*
     67 //*ih264_intra_pred_luma_4x4_mode_vert
     68 //*
     69 //* @brief
     70 //*  Perform Intra prediction for  luma_4x4 mode:vertical
     71 //*
     72 //* @par Description:
     73 //* Perform Intra prediction for  luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
     74 //*
     75 //* @param[in] pu1_src
     76 //*  UWORD8 pointer to the source
     77 //*
     78 //* @param[out] pu1_dst
     79 //*  UWORD8 pointer to the destination
     80 //*
     81 //* @param[in] src_strd
     82 //*  integer source stride
     83 //*
     84 //* @param[in] dst_strd
     85 //*  integer destination stride
     86 //*
     87 //* @param[in] ui_neighboravailability
     88 //* availability of neighbouring pixels(Not used in this function)
     89 //*
     90 //* @returns
     91 //*
     92 //* @remarks
     93 //*  None
     94 //*
     95 //*******************************************************************************
     96 //void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src,
     97 //                                        UWORD8 *pu1_dst,
     98 //                                        WORD32 src_strd,
     99 //                                        WORD32 dst_strd,
    100 //                                        WORD32 ui_neighboravailability)
    101 
    102 //**************Variables Vs Registers*****************************************
    103 //    x0 => *pu1_src
    104 //    x1 => *pu1_dst
    105 //    x2 =>  src_strd
    106 //    x3 =>  dst_strd
    107 //   x4 =>  ui_neighboravailability
    108 
    109     .global ih264_intra_pred_luma_4x4_mode_vert_av8
    110 
    111 ih264_intra_pred_luma_4x4_mode_vert_av8:
    112 
    113     push_v_regs
    114 
    115     add       x0, x0, #5
    116 
    117     ld1       {v0.s}[0], [x0]
    118     st1       {v0.s}[0], [x1], x3
    119     st1       {v0.s}[0], [x1], x3
    120     st1       {v0.s}[0], [x1], x3
    121     st1       {v0.s}[0], [x1], x3
    122 
    123     pop_v_regs
    124     ret
    125 
    126 
    127 
    128 
    129 
    130 ///******************************************************************************
    131 
    132 
    133 ///**
    134 //*******************************************************************************
    135 //*
    136 //*ih264_intra_pred_luma_4x4_mode_horz
    137 //*
    138 //* @brief
    139 //*  Perform Intra prediction for  luma_4x4 mode:horizontal
    140 //*
    141 //* @par Description:
    142 //*  Perform Intra prediction for  luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
    143 //*
    144 //* @param[in] pu1_src
    145 //*  UWORD8 pointer to the source
    146 //*
    147 //* @param[out] pu1_dst
    148 //*  UWORD8 pointer to the destination
    149 //*
    150 //* @param[in] src_strd
    151 //*  integer source stride
    152 //*
    153 //* @param[in] dst_strd
    154 //*  integer destination stride
    155 //*
    156 //* @param[in] ui_neighboravailability
    157 //* availability of neighbouring pixels(Not used in this function)
    158 //*
    159 //* @returns
    160 //*
    161 //* @remarks
    162 //*  None
    163 //*
    164 //*******************************************************************************
    165 //*/
    166 //void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
    167 //                                         UWORD8 *pu1_dst,
    168 //                                         WORD32 src_strd,
    169 //                                         WORD32 dst_strd,
    170 //                                         WORD32 ui_neighboravailability)
    171 //**************Variables Vs Registers*****************************************
    172 //    x0 => *pu1_src
    173 //    x1 => *pu1_dst
    174 //    x2 =>  src_strd
    175 //    x3 =>  dst_strd
    176 //   x4 =>  ui_neighboravailability
    177 
    178 
    179 
    180     .global ih264_intra_pred_luma_4x4_mode_horz_av8
    181 
    182 ih264_intra_pred_luma_4x4_mode_horz_av8:
    183 
    184     push_v_regs
    185 
    186     ld1       {v1.s}[0], [x0]
    187     dup       v0.8b, v1.b[3]
    188     dup       v2.8b, v1.b[2]
    189     st1       {v0.s}[0], [x1], x3
    190     dup       v3.8b, v1.b[1]
    191     st1       {v2.s}[0], [x1], x3
    192     dup       v4.8b, v1.b[0]
    193     st1       {v3.s}[0], [x1], x3
    194     st1       {v4.s}[0], [x1], x3
    195 
    196     pop_v_regs
    197     ret
    198 
    199 
    200 
    201 
    202 
    203 
    204 
    205 ///******************************************************************************
    206 
    207 
    208 ///**
    209 //*******************************************************************************
    210 //*
    211 //*ih264_intra_pred_luma_4x4_mode_dc
    212 //*
    213 //* @brief
    214 //*  Perform Intra prediction for  luma_4x4 mode:DC
    215 //*
    216 //* @par Description:
    217 //*  Perform Intra prediction for  luma_4x4 mode:DC ,described in sec 8.3.1.2.3
    218 //*
    219 //* @param[in] pu1_src
    220 //*  UWORD8 pointer to the source
    221 //*
    222 //* @param[out] pu1_dst
    223 //*  UWORD8 pointer to the destination
    224 //*
    225 //* @param[in] src_strd
    226 //*  integer source stride
    227 //*
    228 //* @param[in] dst_strd
    229 //*  integer destination stride
    230 //*
    231 //* @param[in] ui_neighboravailability
    232 //*  availability of neighbouring pixels
    233 //*
    234 //* @returns
    235 //*
    236 //* @remarks
    237 //*  None
    238 //*
    239 //*******************************************************************************/
    240 //void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
    241 //                                       UWORD8 *pu1_dst,
    242 //                                       WORD32 src_strd,
    243 //                                       WORD32 dst_strd,
    244 //                                       WORD32 ui_neighboravailability)
    245 
    246 //**************Variables Vs Registers*****************************************
    247 //    x0 => *pu1_src
    248 //    x1 => *pu1_dst
    249 //    x2 =>  src_strd
    250 //    x3 =>  dst_strd
    251 //   x4 =>  ui_neighboravailability
    252 
    253 
    254 
    255     .global ih264_intra_pred_luma_4x4_mode_dc_av8
    256 
    257 ih264_intra_pred_luma_4x4_mode_dc_av8:
    258 
    259 
    260 
    261 
    262     push_v_regs
    263     stp       x19, x20, [sp, #-16]!
    264 
    265     ands      x5, x4, #0x01
    266     beq       top_available             //LEFT NOT AVAILABLE
    267 
    268     add       x10, x0, #3
    269     mov       x2, #-1
    270     ldrb      w5, [x10], #-1
    271     sxtw      x5, w5
    272     ldrb      w6, [x10], #-1
    273     sxtw      x6, w6
    274     ldrb      w7, [x10], #-1
    275     sxtw      x7, w7
    276     add       x5, x5, x6
    277     ldrb      w8, [x10], #-1
    278     sxtw      x8, w8
    279     add       x5, x5, x7
    280     ands      x11, x4, #0x04            // CHECKING IF TOP_AVAILABLE  ELSE BRANCHING TO ONLY LEFT AVAILABLE
    281     add       x5, x5, x8
    282     beq       left_available
    283     add       x10, x0, #5
    284     //    BOTH LEFT AND TOP AVAILABLE
    285     ldrb      w6, [x10], #1
    286     sxtw      x6, w6
    287     ldrb      w7, [x10], #1
    288     sxtw      x7, w7
    289     add       x5, x5, x6
    290     ldrb      w8, [x10], #1
    291     sxtw      x8, w8
    292     add       x5, x5, x7
    293     ldrb      w9, [x10], #1
    294     sxtw      x9, w9
    295     add       x5, x5, x8
    296     add       x5, x5, x9
    297     add       x5, x5, #4
    298     lsr       x5, x5, #3
    299     dup       v0.8b, w5
    300     st1       {v0.s}[0], [x1], x3
    301     st1       {v0.s}[0], [x1], x3
    302     st1       {v0.s}[0], [x1], x3
    303     st1       {v0.s}[0], [x1], x3
    304     b         end_func
    305 
    306 top_available: // ONLT TOP AVAILABLE
    307     ands      x11, x4, #0x04            // CHECKING TOP AVAILABILTY  OR ELSE BRANCH TO NONE AVAILABLE
    308     beq       none_available
    309 
    310     add       x10, x0, #5
    311     ldrb      w6, [x10], #1
    312     sxtw      x6, w6
    313     ldrb      w7, [x10], #1
    314     sxtw      x7, w7
    315     ldrb      w8, [x10], #1
    316     sxtw      x8, w8
    317     add       x5, x6, x7
    318     ldrb      w9, [x10], #1
    319     sxtw      x9, w9
    320     add       x5, x5, x8
    321     add       x5, x5, x9
    322     add       x5, x5, #2
    323     lsr       x5, x5, #2
    324     dup       v0.8b, w5
    325     st1       {v0.s}[0], [x1], x3
    326     st1       {v0.s}[0], [x1], x3
    327     st1       {v0.s}[0], [x1], x3
    328     st1       {v0.s}[0], [x1], x3
    329     b         end_func
    330 
    331 left_available: //ONLY LEFT AVAILABLE
    332     add       x5, x5, #2
    333     lsr       x5, x5, #2
    334     dup       v0.8b, w5
    335     st1       {v0.s}[0], [x1], x3
    336     st1       {v0.s}[0], [x1], x3
    337     st1       {v0.s}[0], [x1], x3
    338     st1       {v0.s}[0], [x1], x3
    339     b         end_func
    340 
    341 none_available:                         //NONE AVAILABLE
    342     mov       x5, #128
    343     dup       v0.8b, w5
    344     st1       {v0.s}[0], [x1], x3
    345     st1       {v0.s}[0], [x1], x3
    346     st1       {v0.s}[0], [x1], x3
    347     st1       {v0.s}[0], [x1], x3
    348     b         end_func
    349 
    350 
    351 end_func:
    352 
    353     ldp       x19, x20, [sp], #16
    354     pop_v_regs
    355     ret
    356 
    357 
    358 
    359 
    360 
    361 
    362 
    363 ///**
    364 //*******************************************************************************
    365 //*
    366 //*ih264_intra_pred_luma_4x4_mode_diag_dl
    367 //*
    368 //* @brief
    369 //*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Left
    370 //*
    371 //* @par Description:
    372 //*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
    373 //*
    374 //* @param[in] pu1_src
    375 //*  UWORD8 pointer to the source
    376 //*
    377 //* @param[out] pu1_dst
    378 //*  UWORD8 pointer to the destination
    379 //*
    380 //* @param[in] src_strd
    381 //*  integer source stride
    382 //*
    383 //* @param[in] dst_strd
    384 //*  integer destination stride
    385 //*
    386 //* @param[in] ui_neighboravailability
    387 //*  availability of neighbouring pixels
    388 //*
    389 //* @returns
    390 //*
    391 //* @remarks
    392 //*  None
    393 //*
    394 //*******************************************************************************/
    395 //void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
    396 //                                            UWORD8 *pu1_dst,
    397 //                                            WORD32 src_strd,
    398 //                                              WORD32 dst_strd,
    399 //                                              WORD32 ui_neighboravailability)
    400 
    401 //**************Variables Vs Registers*****************************************
    402 //    x0 => *pu1_src
    403 //    x1 => *pu1_dst
    404 //    x2 =>  src_strd
    405 //    x3 =>  dst_strd
    406 //   x4 =>  ui_neighboravailability
    407 
    408 
    409     .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8
    410 
    411 ih264_intra_pred_luma_4x4_mode_diag_dl_av8:
    412 
    413 
    414     push_v_regs
    415     stp       x19, x20, [sp, #-16]!
    416 
    417     add       x0, x0, #5
    418     sub       x5, x3, #2
    419     add       x6, x0, #7
    420     ld1       {v0.8b}, [x0]
    421     ext       v1.8b, v0.8b , v0.8b , #1
    422     ext       v2.8b, v0.8b , v0.8b , #2
    423     ld1       {v2.b}[6], [x6]
    424     uaddl     v20.8h, v0.8b, v1.8b
    425     uaddl     v22.8h, v1.8b, v2.8b
    426     add       v24.8h, v20.8h , v22.8h
    427     sqrshrun  v3.8b, v24.8h, #2
    428     st1       {v3.s}[0], [x1], x3
    429     ext       v4.8b, v3.8b , v3.8b , #1
    430     st1       {v4.s}[0], [x1], x3
    431     st1       {v3.h}[1], [x1], #2
    432     st1       {v3.h}[2], [x1], x5
    433     st1       {v4.h}[1], [x1], #2
    434     st1       {v4.h}[2], [x1]
    435 
    436 end_func_diag_dl:
    437 
    438     ldp       x19, x20, [sp], #16
    439     pop_v_regs
    440     ret
    441 
    442 
    443 
    444 
    445 
    446 
    447 
    448 
    449 
    450 ///**
    451 //*******************************************************************************
    452 //*
    453 //*ih264_intra_pred_luma_4x4_mode_diag_dr
    454 //*
    455 //* @brief
    456 //* Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Right
    457 //*
    458 //* @par Description:
    459 //*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
    460 //*
    461 //* @param[in] pu1_src
    462 //*  UWORD8 pointer to the source
    463 //*
    464 //* @param[out] pu1_dst
    465 //*  UWORD8 pointer to the destination
    466 //*
    467 //* @param[in] src_strd
    468 //*  integer source stride
    469 //*
    470 //* @param[in] dst_strd
    471 //*  integer destination stride
    472 //*
    473 //* @param[in] ui_neighboravailability
    474 //*  availability of neighbouring pixels
    475 //*
    476 //* @returns
    477 //*
    478 //* @remarks
    479 //*  None
    480 //*
    481 //*******************************************************************************/
    482 //void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
    483 //                                            UWORD8 *pu1_dst,
    484 //                                            WORD32 src_strd,
    485 //                                              WORD32 dst_strd,
    486 //                                              WORD32 ui_neighboravailability)
    487 
    488 //**************Variables Vs Registers*****************************************
    489 //    x0 => *pu1_src
    490 //    x1 => *pu1_dst
    491 //    x2 =>  src_strd
    492 //    x3 =>  dst_strd
    493 //   x4 =>  ui_neighboravailability
    494 
    495 
    496     .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8
    497 
    498 ih264_intra_pred_luma_4x4_mode_diag_dr_av8:
    499 
    500     push_v_regs
    501     stp       x19, x20, [sp, #-16]!
    502 
    503 
    504     ld1       {v0.8b}, [x0]
    505     add       x0, x0, #1
    506     ld1       {v1.8b}, [x0]
    507     ext       v2.8b, v1.8b , v1.8b , #1
    508     uaddl     v20.8h, v0.8b, v1.8b
    509     uaddl     v22.8h, v1.8b, v2.8b
    510     add       v24.8h, v20.8h , v22.8h
    511     sqrshrun  v3.8b, v24.8h, #2
    512 
    513     ext       v4.8b, v3.8b , v3.8b , #1
    514     sub       x5, x3, #2
    515     st1       {v4.h}[1], [x1], #2
    516     st1       {v4.h}[2], [x1], x5
    517     st1       {v3.h}[1], [x1], #2
    518     st1       {v3.h}[2], [x1], x5
    519     st1       {v4.s}[0], [x1], x3
    520     st1       {v3.s}[0], [x1], x3
    521 
    522 end_func_diag_dr:
    523     ldp       x19, x20, [sp], #16
    524     pop_v_regs
    525     ret
    526 
    527 
    528 
    529 
    530 
    531 
    532 
    533 ///**
    534 //*******************************************************************************
    535 //*
    536 //*ih264_intra_pred_luma_4x4_mode_vert_r
    537 //*
    538 //* @brief
    539 //* Perform Intra prediction for  luma_4x4 mode:Vertical_Right
    540 //*
    541 //* @par Description:
    542 //*   Perform Intra prediction for  luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
    543 //*
    544 //* @param[in] pu1_src
    545 //*  UWORD8 pointer to the source
    546 //*
    547 //* @param[out] pu1_dst
    548 //*  UWORD8 pointer to the destination
    549 //*
    550 //* @param[in] src_strd
    551 //*  integer source stride
    552 //*
    553 //* @param[in] dst_strd
    554 //*  integer destination stride
    555 //*
    556 //* @param[in] ui_neighboravailability
    557 //*  availability of neighbouring pixels
    558 //*
    559 //* @returns
    560 //*
    561 //* @remarks
    562 //*  None
    563 //*
    564 //*******************************************************************************/
    565 //void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
    566 //                                            UWORD8 *pu1_dst,
    567 //                                            WORD32 src_strd,
    568 //                                              WORD32 dst_strd,
    569 //                                              WORD32 ui_neighboravailability)
    570 
    571 //**************Variables Vs Registers*****************************************
    572 //    x0 => *pu1_src
    573 //    x1 => *pu1_dst
    574 //    x2 =>  src_strd
    575 //    x3 =>  dst_strd
    576 //   x4 =>  ui_neighboravailability
    577 
    578 
    579     .global ih264_intra_pred_luma_4x4_mode_vert_r_av8
    580 
    581 ih264_intra_pred_luma_4x4_mode_vert_r_av8:
    582 
    583     push_v_regs
    584     stp       x19, x20, [sp, #-16]!
    585 
    586 
    587     ld1       {v0.8b}, [x0]
    588     add       x0, x0, #1
    589     ld1       {v1.8b}, [x0]
    590     ext       v2.8b, v1.8b , v1.8b , #1
    591     uaddl     v20.8h, v0.8b, v1.8b
    592     uaddl     v22.8h, v1.8b, v2.8b
    593     add       v24.8h, v20.8h , v22.8h
    594     sqrshrun  v4.8b, v20.8h, #1
    595     sqrshrun  v3.8b, v24.8h, #2
    596     sub       x5, x3, #2
    597     ext       v5.8b, v3.8b , v3.8b , #3
    598     st1       {v4.s}[1], [x1], x3
    599     st1       {v5.s}[0], [x1], x3
    600     sub       x8, x3, #3
    601     st1       {v3.b}[2], [x1], #1
    602     st1       {v4.h}[2], [x1], #2
    603     st1       {v4.b}[6], [x1], x8
    604     st1       {v3.b}[1], [x1], #1
    605     st1       {v5.h}[0], [x1], #2
    606     st1       {v5.b}[2], [x1]
    607 
    608 
    609 end_func_vert_r:
    610     ldp       x19, x20, [sp], #16
    611     pop_v_regs
    612     ret
    613 
    614 
    615 
    616 
    617 
    618 ///**
    619 //*******************************************************************************
    620 //*
    621 //*ih264_intra_pred_luma_4x4_mode_horz_d
    622 //*
    623 //* @brief
    624 //* Perform Intra prediction for  luma_4x4 mode:Horizontal_Down
    625 //*
    626 //* @par Description:
    627 //*   Perform Intra prediction for  luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
    628 //*
    629 //* @param[in] pu1_src
    630 //*  UWORD8 pointer to the source
    631 //*
    632 //* @param[out] pu1_dst
    633 //*  UWORD8 pointer to the destination
    634 //*
    635 //* @param[in] src_strd
    636 //*  integer source stride
    637 //*
    638 //* @param[in] dst_strd
    639 //*  integer destination stride
    640 //*
    641 //* @param[in] ui_neighboravailability
    642 //*  availability of neighbouring pixels
    643 //*
    644 //* @returns
    645 //*
    646 //* @remarks
    647 //*  None
    648 //*
    649 //*******************************************************************************/
    650 //void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
    651 //                                            UWORD8 *pu1_dst,
    652 //                                            WORD32 src_strd,
    653 //                                              WORD32 dst_strd,
    654 //                                              WORD32 ui_neighboravailability)
    655 
    656 //**************Variables Vs Registers*****************************************
    657 //    x0 => *pu1_src
    658 //    x1 => *pu1_dst
    659 //    x2 =>  src_strd
    660 //    x3 =>  dst_strd
    661 //   x4 =>  ui_neighboravailability
    662 
    663 
    664     .global ih264_intra_pred_luma_4x4_mode_horz_d_av8
    665 
    666 ih264_intra_pred_luma_4x4_mode_horz_d_av8:
    667 
    668     push_v_regs
    669     stp       x19, x20, [sp, #-16]!
    670 
    671     ld1       {v0.8b}, [x0]
    672     add       x0, x0, #1
    673     ld1       {v1.8b}, [x0]
    674     ext       v2.8b, v1.8b , v0.8b , #1
    675     uaddl     v20.8h, v0.8b, v1.8b
    676     uaddl     v22.8h, v1.8b, v2.8b
    677     add       v24.8h, v20.8h , v22.8h
    678     sqrshrun  v4.8b, v20.8h, #1
    679     sqrshrun  v5.8b, v24.8h, #2
    680     sub       x5, x3, #2
    681     mov       v6.8b, v5.8b
    682     trn1      v10.8b, v4.8b, v5.8b
    683     trn2      v5.8b, v4.8b, v5.8b       //
    684     mov       v4.8b, v10.8b
    685     st1       {v5.h}[1], [x1], #2
    686     st1       {v6.h}[2], [x1], x5
    687     st1       {v4.h}[1], [x1], #2
    688     st1       {v5.h}[1], [x1], x5
    689     st1       {v5.h}[0], [x1], #2
    690     st1       {v4.h}[1], [x1], x5
    691     st1       {v4.h}[0], [x1], #2
    692     st1       {v5.h}[0], [x1], x5
    693 
    694 end_func_horz_d:
    695     ldp       x19, x20, [sp], #16
    696     pop_v_regs
    697     ret
    698 
    699 
    700 
    701 
    702 
    703 
    704 
    705 ///**
    706 //*******************************************************************************
    707 //*
    708 //*ih264_intra_pred_luma_4x4_mode_vert_l
    709 //*
    710 //* @brief
    711 //*  Perform Intra prediction for  luma_4x4 mode:Vertical_Left
    712 //*
    713 //* @par Description:
    714 //*   Perform Intra prediction for  luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
    715 //*
    716 //* @param[in] pu1_src
    717 //*  UWORD8 pointer to the source
    718 //*
    719 //* @param[out] pu1_dst
    720 //*  UWORD8 pointer to the destination
    721 //*
    722 //* @param[in] src_strd
    723 //*  integer source stride
    724 //*
    725 //* @param[in] dst_strd
    726 //*  integer destination stride
    727 //*
    728 //* @param[in] ui_neighboravailability
    729 //*  availability of neighbouring pixels
    730 //*
    731 //* @returns
    732 //*
    733 //* @remarks
    734 //*  None
    735 //*
    736 //*******************************************************************************/
    737 //void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
    738 //                                            UWORD8 *pu1_dst,
    739 //                                            WORD32 src_strd,
    740 //                                              WORD32 dst_strd,
    741 //                                              WORD32 ui_neighboravailability)
    742 
    743 //**************Variables Vs Registers*****************************************
    744 //    x0 => *pu1_src
    745 //    x1 => *pu1_dst
    746 //    x2 =>  src_strd
    747 //    x3 =>  dst_strd
    748 //   x4 =>  ui_neighboravailability
    749 
    750 
    751     .global ih264_intra_pred_luma_4x4_mode_vert_l_av8
    752 
    753 ih264_intra_pred_luma_4x4_mode_vert_l_av8:
    754 
    755     push_v_regs
    756     stp       x19, x20, [sp, #-16]!
    757     add       x0, x0, #4
    758     ld1       {v0.8b}, [x0]
    759     add       x0, x0, #1
    760     ld1       {v1.8b}, [x0]
    761     ext       v2.8b, v1.8b , v0.8b , #1
    762     uaddl     v20.8h, v0.8b, v1.8b
    763     uaddl     v22.8h, v1.8b, v2.8b
    764     add       v24.8h, v20.8h , v22.8h
    765     sqrshrun  v4.8b, v20.8h, #1
    766     sqrshrun  v5.8b, v24.8h, #2
    767     ext       v6.8b, v4.8b , v4.8b , #1
    768     ext       v7.8b, v5.8b , v5.8b , #1
    769     st1       {v6.s}[0], [x1], x3
    770     ext       v8.8b, v4.8b , v4.8b , #2
    771     ext       v9.8b, v5.8b , v5.8b , #2
    772     st1       {v7.s}[0], [x1], x3
    773     st1       {v8.s}[0], [x1], x3
    774     st1       {v9.s}[0], [x1], x3
    775 
    776 end_func_vert_l:
    777     ldp       x19, x20, [sp], #16
    778     pop_v_regs
    779     ret
    780 
    781 
    782 
    783 
    784 
    785 
    786 
    787 ///**
    788 //*******************************************************************************
    789 //*
    790 //*ih264_intra_pred_luma_4x4_mode_horz_u
    791 //*
    792 //* @brief
    793 //*     Perform Intra prediction for  luma_4x4 mode:Horizontal_Up
    794 //*
    795 //* @par Description:
    796 //*      Perform Intra prediction for  luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
    797 //*
    798 //* @param[in] pu1_src
    799 //*  UWORD8 pointer to the source
    800 //*
    801 //* @param[out] pu1_dst
    802 //*  UWORD8 pointer to the destination
    803 //*
    804 //* @param[in] src_strd
    805 //*  integer source stride
    806 //*
    807 //* @param[in] dst_strd
    808 //*  integer destination stride
    809 //*
    810 //* @param[in] ui_neighboravailability
    811 //*  availability of neighbouring pixels
    812 //*
    813 //* @returns
    814 //*
    815 //* @remarks
    816 //*  None
    817 //*
    818 //*******************************************************************************/
    819 //void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
    820 //                                           UWORD8 *pu1_dst,
    821 //                                           WORD32 src_strd,
    822 //                                             WORD32 dst_strd,
    823 //                                             WORD32 ui_neighboravailability)
    824 
    825 //**************Variables Vs Registers*****************************************
    826 //    x0 => *pu1_src
    827 //    x1 => *pu1_dst
    828 //    x2 =>  src_strd
    829 //    x3 =>  dst_strd
    830 //   x4 =>  ui_neighboravailability
    831 
    832 
    833     .global ih264_intra_pred_luma_4x4_mode_horz_u_av8
    834 
    835 ih264_intra_pred_luma_4x4_mode_horz_u_av8:
    836 
    837     push_v_regs
    838     stp       x19, x20, [sp, #-16]!
    839     mov       x10, x0
    840     ld1       {v0.8b}, [x0]
    841     ldrb      w9, [x0], #1
    842     sxtw      x9, w9
    843     ext       v1.8b, v0.8b , v0.8b , #1
    844     ld1       {v0.b}[7], [x10]
    845     ext       v2.8b, v1.8b , v1.8b , #1
    846     uaddl     v20.8h, v0.8b, v1.8b
    847     uaddl     v22.8h, v1.8b, v2.8b
    848     add       v24.8h, v20.8h , v22.8h
    849     sqrshrun  v4.8b, v20.8h, #1
    850     sqrshrun  v5.8b, v24.8h, #2
    851     mov       v6.8b, v4.8b
    852     ext       v6.8b, v5.8b , v4.8b , #1
    853     st1       {v4.b}[2], [x1], #1
    854     st1       {v6.b}[0], [x1], #1
    855     trn1      v10.8b, v6.8b, v5.8b
    856     trn2      v5.8b, v6.8b, v5.8b       //
    857     mov       v6.8b , v10.8b
    858     sub       x5, x3, #2
    859     trn1      v10.8b, v4.8b, v6.8b
    860     trn2      v6.8b, v4.8b, v6.8b       //
    861     mov       v4.8b , v10.8b
    862     dup       v7.8b, w9
    863     st1       {v6.h}[0], [x1], x5
    864     st1       {v6.h}[0], [x1], #2
    865     st1       {v5.h}[3], [x1], x5
    866     st1       {v5.h}[3], [x1], #2
    867     st1       {v7.h}[3], [x1], x5
    868     st1       {v7.s}[0], [x1], x3
    869 
    870 end_func_horz_u:
    871     ldp       x19, x20, [sp], #16
    872     pop_v_regs
    873     ret
    874 
    875 
    876 
    877