Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 @**
     21 @******************************************************************************
     22 @* @file
     23 @*  ih264_intra_pred_luma_8x8_a9q.s
     24 @*
     25 @* @brief
     26 @*  Contains function definitions for intra 8x8 Luma prediction .
     27 @*
     28 @* @author
     29 @*  Ittiam
     30 @*
     31 @* @par List of Functions:
     32 @*
     33 @*  -ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
     34 @*  -ih264_intra_pred_luma_8x8_mode_vert_a9q
     35 @*  -ih264_intra_pred_luma_8x8_mode_horz_a9q
     36 @*  -ih264_intra_pred_luma_8x8_mode_dc_a9q
     37 @*  -ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
     38 @*  -ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
     39 @*  -ih264_intra_pred_luma_8x8_mode_vert_r_a9q
     40 @*  -ih264_intra_pred_luma_8x8_mode_horz_d_a9q
     41 @*  -ih264_intra_pred_luma_8x8_mode_vert_l_a9q
     42 @*  -ih264_intra_pred_luma_8x8_mode_horz_u_a9q
     43 @*
     44 @* @remarks
     45 @*  None
     46 @*
     47 @*******************************************************************************
     48 @*
     49 
     50 @* All the functions here are replicated from ih264_intra_pred_filters.c
     51 @
     52 
     53 .text
     54 .p2align 2
     55 
     56     .extern ih264_gai1_intrapred_luma_8x8_horz_u
     57 .hidden ih264_gai1_intrapred_luma_8x8_horz_u
     58 scratch_intrapred_addr_8x8:
     59     .long ih264_gai1_intrapred_luma_8x8_horz_u -  scrlb8x8l2 - 8
     60 
     61 @**
     62 @*******************************************************************************
     63 @*
     64 @*ih264_intra_pred_luma_8x8_mode_ref_filtering
     65 @*
     66 @* @brief
     67 @* Reference sample filtering process for Intra_8x8 sample prediction
     68 @*
     69 @* @par Description:
     70 @*  Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1
     71 @*
     72 @* @param[in] pu1_src
     73 @*  UWORD8 pointer to the source
     74 @*
     75 @* @param[out] pu1_dst
     76 @*  UWORD8 pointer to the destination
     77 @*
     78 @* @param[in] src_strd
     79 @*  integer source stride [Not used]
     80 @*
     81 @* @param[in] dst_strd
     82 @*  integer destination stride[Not used]
     83 @*
     84 @* @param[in] ui_neighboravailability
     85 @*  availability of neighbouring pixels[Not used]
     86 @*
     87 @* @returns
     88 @*
     89 @* @remarks
     90 @*  None
     91 @*
     92 @*******************************************************************************
     93 @void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src,
     94 @                                                 UWORD8 *pu1_dst)
     95 
     96 @**************Variables Vs Registers*****************************************
     97 @   r0 => *pu1_src
     98 @   r1 => *pu1_dst
     99 
    100 
    101     .global ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
    102 
    103 ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
    104 
    105     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    106     vpush         {d8-d15}
    107 
    108     vld1.u8       {q0}, [r0]!           @
    109     vld1.u8       {q1}, [r0]
    110     add           r0, r0, #8            @
    111     vext.8        q2, q0, q1, #1
    112     vext.8        q3, q1, q1, #1
    113     vext.8        q4, q2, q3, #1
    114     vext.8        q5, q3, q3, #1
    115     vld1.8        {d10[7]}, [r0]        @ LOADING SRC[24] AGIN TO THE END FOR p'[ 15, -1 ] = ( p[ 14, -1 ] + 3 * p[ 15, -1 ] + 2 ) >> 2
    116     vaddl.u8      q10, d0, d4
    117     vaddl.u8      q7, d0, d0            @    SPECIAL CASE FOR p'[ -1 ,7 ] = ( p[ -1, 6 ] + 3 * p[ -1, 7 ] + 2 ) >> 2
    118     vadd.u16      q7, q10, q7
    119     vaddl.u8      q11, d1, d5
    120     vqrshrun.s16  d14, q7, #2
    121     vaddl.u8      q12, d4, d8
    122     vaddl.u8      q13, d5, d9
    123     vst1.8        {d14[0]}, [r1]!
    124     vadd.u16      q12, q10, q12
    125     vadd.u16      q13, q11, q13
    126     vaddl.u8      q9, d2, d6
    127     vaddl.u8      q8, d6, d10
    128     vqrshrun.s16  d4, q12, #2
    129     vqrshrun.s16  d5, q13, #2
    130     vadd.u16      q6, q8, q9
    131     vst1.8        {q2}, [r1]!
    132     vqrshrun.s16  d6, q6, #2
    133     vst1.8        {d6}, [r1]
    134 
    135 
    136 end_func_ref_filt:
    137 
    138     vpop          {d8-d15}
    139     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    140 
    141 
    142 
    143 
    144 
    145 
    146 @**
    147 @*******************************************************************************
    148 @*
    149 @*ih264_intra_pred_luma_8x8_mode_vert
    150 @*
    151 @* @brief
    152 @*   Perform Intra prediction for  luma_8x8 mode:vertical
    153 @*
    154 @* @par Description:
    155 @* Perform Intra prediction for  luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
    156 @*
    157 @* @param[in] pu1_src
    158 @*  UWORD8 pointer to the source
    159 @*
    160 @* @param[out] pu1_dst
    161 @*  UWORD8 pointer to the destination
    162 @*
    163 @* @param[in] src_strd
    164 @*  integer source stride
    165 @*
    166 @* @param[in] dst_strd
    167 @*  integer destination stride
    168 @*
    169 @* @param[in] ui_neighboravailability
    170 @* availability of neighbouring pixels(Not used in this function)
    171 @*
    172 @* @returns
    173 @*
    174 @* @remarks
    175 @*  None
    176 @*
    177 @*******************************************************************************
    178 @void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
    179 @                                        UWORD8 *pu1_dst,
    180 @                                        WORD32 src_strd,
    181 @                                        WORD32 dst_strd,
    182 @                                        WORD32 ui_neighboravailability)
    183 
    184 @**************Variables Vs Registers*****************************************
    185 @   r0 => *pu1_src
    186 @   r1 => *pu1_dst
    187 @   r2 =>  src_strd
    188 @   r3 =>  dst_strd
    189 @   r4 =>  ui_neighboravailability
    190 
    191 
    192     .global ih264_intra_pred_luma_8x8_mode_vert_a9q
    193 
    194 ih264_intra_pred_luma_8x8_mode_vert_a9q:
    195 
    196     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    197 
    198     add           r0, r0, #9
    199     vld1.8        d0, [r0]
    200 
    201     vst1.8        d0, [r1], r3
    202     vst1.8        d0, [r1], r3
    203     vst1.8        d0, [r1], r3
    204     vst1.8        d0, [r1], r3
    205     vst1.8        d0, [r1], r3
    206     vst1.8        d0, [r1], r3
    207     vst1.8        d0, [r1], r3
    208     vst1.8        d0, [r1], r3
    209 
    210     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    211 
    212 
    213 
    214 
    215 
    216 @******************************************************************************
    217 
    218 
    219 @**
    220 @*******************************************************************************
    221 @*
    222 @*ih264_intra_pred_luma_8x8_mode_horz
    223 @*
    224 @* @brief
    225 @*  Perform Intra prediction for  luma_8x8 mode:horizontal
    226 @*
    227 @* @par Description:
    228 @*  Perform Intra prediction for  luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
    229 @*
    230 @* @param[in] pu1_src
    231 @*  UWORD8 pointer to the source
    232 @*
    233 @* @param[out] pu1_dst
    234 @*  UWORD8 pointer to the destination
    235 @*
    236 @* @param[in] src_strd
    237 @*  integer source stride
    238 @*
    239 @* @param[in] dst_strd
    240 @*  integer destination stride
    241 @*
    242 @* @param[in] ui_neighboravailability
    243 @* availability of neighbouring pixels(Not used in this function)
    244 @*
    245 @* @returns
    246 @*
    247 @* @remarks
    248 @*  None
    249 @*
    250 @*******************************************************************************
    251 @*
    252 @void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
    253 @                                         UWORD8 *pu1_dst,
    254 @                                         WORD32 src_strd,
    255 @                                         WORD32 dst_strd,
    256 @                                         WORD32 ui_neighboravailability)
    257 @**************Variables Vs Registers*****************************************
    258 @   r0 => *pu1_src
    259 @   r1 => *pu1_dst
    260 @   r2 =>  src_strd
    261 @   r3 =>  dst_strd
    262 @   r4 =>  ui_neighboravailability
    263 
    264 
    265     .global ih264_intra_pred_luma_8x8_mode_horz_a9q
    266 
    267 ih264_intra_pred_luma_8x8_mode_horz_a9q:
    268 
    269     stmfd         sp!, {r14}            @store register values to stack
    270 
    271     vld1.u8       {d0}, [r0]
    272     mov           r2, #6
    273 
    274     vdup.u8       d1, d0[7]
    275     vdup.u8       d2, d0[6]
    276     vst1.8        {d1}, [r1], r3
    277 
    278 loop_8x8_horz:
    279     vext.8        d0, d0, d0, #6
    280     vst1.8        {d2}, [r1], r3
    281     vdup.u8       d1, d0[7]
    282     subs          r2, #2
    283     vdup.u8       d2, d0[6]
    284     vst1.8        {d1}, [r1], r3
    285     bne           loop_8x8_horz
    286 
    287     vext.8        d0, d0, d0, #6
    288     vst1.8        {d2}, [r1], r3
    289 
    290     ldmfd         sp!, {pc}             @restoring registers from stack
    291 
    292 
    293 
    294 
    295 
    296 @******************************************************************************
    297 
    298 
    299 @**
    300 @*******************************************************************************
    301 @*
    302 @*ih264_intra_pred_luma_8x8_mode_dc
    303 @*
    304 @* @brief
    305 @*  Perform Intra prediction for  luma_8x8 mode:DC
    306 @*
    307 @* @par Description:
    308 @*  Perform Intra prediction for  luma_8x8 mode:DC ,described in sec 8.3.2.2.3
    309 @*
    310 @* @param[in] pu1_src
    311 @*  UWORD8 pointer to the source
    312 @*
    313 @* @param[out] pu1_dst
    314 @*  UWORD8 pointer to the destination
    315 @*
    316 @* @param[in] src_strd
    317 @*  integer source stride
    318 @*
    319 @* @param[in] dst_strd
    320 @*  integer destination stride
    321 @*
    322 @* @param[in] ui_neighboravailability
    323 @*  availability of neighbouring pixels
    324 @*
    325 @* @returns
    326 @*
    327 @* @remarks
    328 @*  None
    329 @*
    330 @*******************************************************************************
    331 @void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
    332 @                                       UWORD8 *pu1_dst,
    333 @                                       WORD32 src_strd,
    334 @                                       WORD32 dst_strd,
    335 @                                       WORD32 ui_neighboravailability)
    336 
    337 @**************Variables Vs Registers*****************************************
    338 @   r0 => *pu1_src
    339 @   r1 => *pu1_dst
    340 @   r2 =>  src_strd
    341 @   r3 =>  dst_strd
    342 @   r4 =>  ui_neighboravailability
    343 
    344 
    345     .global ih264_intra_pred_luma_8x8_mode_dc_a9q
    346 
    347 ih264_intra_pred_luma_8x8_mode_dc_a9q:
    348 
    349     stmfd         sp!, {r4, r14}        @store register values to stack
    350     ldr           r4, [sp, #8]          @r4 =>  ui_neighboravailability
    351 
    352     ands          r2, r4, #0x01         @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
    353     beq           top_available
    354     ands          r2, r4, #0x04         @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
    355     beq           left_available
    356 
    357     vld1.u8       {d0}, [r0]            @BOTH LEFT AND TOP AVAILABLE
    358     add           r0, r0, #9
    359     vld1.u8       {d1}, [r0]
    360     vpaddl.u8     q0, q0
    361     vadd.u16      d0, d0, d1
    362     vpaddl.u16    d0, d0
    363     vpaddl.u32    d0, d0
    364     vqrshrun.s16  d0, q0, #4
    365     vdup.u8       d0, d0[0]
    366     b             str_pred
    367 
    368 top_available:                          @ONLY TOP AVAILABLE
    369     ands          r2, r4, #0x04         @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
    370     beq           none_available
    371 
    372     add           r0, r0, #9
    373     vld1.u8       {d0}, [r0]
    374     vpaddl.u8     d0, d0
    375     vpaddl.u16    d0, d0
    376     vpaddl.u32    d0, d0
    377     vqrshrun.s16  d0, q0, #3
    378     vdup.u8       d0, d0[0]
    379     b             str_pred
    380 
    381 left_available:                         @ONLY LEFT AVAILABLE
    382     vld1.u8       {d0}, [r0]
    383     vpaddl.u8     d0, d0
    384     vpaddl.u16    d0, d0
    385     vpaddl.u32    d0, d0
    386     vqrshrun.s16  d0, q0, #3
    387     vdup.u8       d0, d0[0]
    388     b             str_pred
    389 
    390 none_available:                         @NONE AVAILABLE
    391     vmov.u8       q0, #128
    392 
    393 str_pred:
    394     vst1.8        {d0}, [r1], r3
    395     vst1.8        {d0}, [r1], r3
    396     vst1.8        {d0}, [r1], r3
    397     vst1.8        {d0}, [r1], r3
    398     vst1.8        {d0}, [r1], r3
    399     vst1.8        {d0}, [r1], r3
    400     vst1.8        {d0}, [r1], r3
    401     vst1.8        {d0}, [r1], r3
    402 
    403     ldmfd         sp!, {r4, pc}         @Restoring registers from stack
    404 
    405 
    406 
    407 
    408 
    409 
    410 @**
    411 @*******************************************************************************
    412 @*
    413 @*ih264_intra_pred_luma_8x8_mode_diag_dl
    414 @*
    415 @* @brief
    416 @*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Left
    417 @*
    418 @* @par Description:
    419 @*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4
    420 @*
    421 @* @param[in] pu1_src
    422 @*  UWORD8 pointer to the source
    423 @*
    424 @* @param[out] pu1_dst
    425 @*  UWORD8 pointer to the destination
    426 @*
    427 @* @param[in] src_strd
    428 @*  integer source stride
    429 @*
    430 @* @param[in] dst_strd
    431 @*  integer destination stride
    432 @*
    433 @* @param[in] ui_neighboravailability
    434 @*  availability of neighbouring pixels
    435 @*
    436 @* @returns
    437 @*
    438 @* @remarks
    439 @*  None
    440 @*
    441 @*******************************************************************************
    442 @void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
    443 @                                            UWORD8 *pu1_dst,
    444 @                                            WORD32 src_strd,
    445 @                                            WORD32 dst_strd,
    446 @                                            WORD32 ui_neighboravailability)
    447 
    448 @**************Variables Vs Registers*****************************************
    449 @   r0 => *pu1_src
    450 @   r1 => *pu1_dst
    451 @   r2 =>  src_strd
    452 @   r3 =>  dst_strd
    453 @   r4 =>  ui_neighboravailability
    454 
    455     .global ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
    456 
    457 ih264_intra_pred_luma_8x8_mode_diag_dl_a9q:
    458 
    459     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    460 
    461     add           r0, r0, #9
    462     sub           r5, r3, #4
    463     add           r6, r0, #15
    464     vld1.8        {q0}, [r0]
    465     vext.8        q2, q0, q0, #2
    466     vext.8        q1, q0, q0, #1
    467     vld1.8        {d5[6]}, [r6]
    468     @ q1 = q0 shifted to left once
    469     @ q2 = q1 shifted to left once
    470     vaddl.u8      q10, d0, d2           @Adding for FILT121
    471     vaddl.u8      q11, d1, d3
    472     vaddl.u8      q12, d2, d4
    473     vaddl.u8      q13, d3, d5
    474     vadd.u16      q12, q10, q12
    475     vadd.u16      q13, q11, q13
    476 
    477     vqrshrun.s16  d4, q12, #2
    478     vqrshrun.s16  d5, q13, #2
    479     @Q2 has all FILT121 values
    480     vst1.8        {d4}, [r1], r3
    481     vext.8        q9, q2, q2, #1
    482     vext.8        q8, q9, q9, #1
    483     vst1.8        {d18}, [r1], r3
    484     vext.8        q15, q8, q8, #1
    485     vst1.8        {d16}, [r1], r3
    486     vst1.8        {d30}, [r1], r3
    487     vst1.32       {d4[1]}, [r1]!
    488     vst1.32       {d5[0]}, [r1], r5
    489     vst1.32       {d18[1]}, [r1]!
    490     vst1.32       {d19[0]}, [r1], r5
    491     vst1.32       {d16[1]}, [r1]!
    492     vst1.32       {d17[0]}, [r1], r5
    493     vst1.32       {d30[1]}, [r1]!
    494     vst1.32       {d31[0]}, [r1], r5
    495 
    496 
    497 end_func_diag_dl:
    498     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    499 
    500 
    501 
    502 
    503 @**
    504 @*******************************************************************************
    505 @*
    506 @*ih264_intra_pred_luma_8x8_mode_diag_dr
    507 @*
    508 @* @brief
    509 @* Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Right
    510 @*
    511 @* @par Description:
    512 @*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5
    513 @*
    514 @* @param[in] pu1_src
    515 @*  UWORD8 pointer to the source
    516 @*
    517 @* @param[out] pu1_dst
    518 @*  UWORD8 pointer to the destination
    519 @*
    520 @* @param[in] src_strd
    521 @*  integer source stride
    522 @*
    523 @* @param[in] dst_strd
    524 @*  integer destination stride
    525 @*
    526 @* @param[in] ui_neighboravailability
    527 @*  availability of neighbouring pixels
    528 @*
    529 @* @returns
    530 @*
    531 @* @remarks
    532 @*  None
    533 @*
    534 @*******************************************************************************
    535 @void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
    536 @                                            UWORD8 *pu1_dst,
    537 @                                            WORD32 src_strd,
    538 @                                            WORD32 dst_strd,
    539 @                                            WORD32 ui_neighboravailability)
    540 
    541 @**************Variables Vs Registers*****************************************
    542 @   r0 => *pu1_src
    543 @   r1 => *pu1_dst
    544 @   r2 =>  src_strd
    545 @   r3 =>  dst_strd
    546 @   r4 =>  ui_neighboravailability
    547 
    548 
    549     .global ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
    550 
    551 ih264_intra_pred_luma_8x8_mode_diag_dr_a9q:
    552 
    553     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    554 
    555 
    556     vld1.u8       {q0}, [r0]
    557     add           r0, r0, #1
    558     vld1.u8       {q1}, [r0]
    559     vext.8        q2, q1, q1, #1
    560     @ q1 = q0 shifted to left once
    561     @ q2 = q1 shifted to left once
    562     vaddl.u8      q10, d0, d2           @Adding for FILT121
    563     vaddl.u8      q11, d1, d3
    564     vaddl.u8      q12, d2, d4
    565     vaddl.u8      q13, d3, d5
    566     vadd.u16      q12, q10, q12
    567     vadd.u16      q13, q11, q13
    568     vqrshrun.s16  d4, q12, #2
    569     vqrshrun.s16  d5, q13, #2
    570     @Q2 has all FILT121 values
    571     sub           r5, r3, #4
    572     vext.8        q9, q2, q2, #15
    573     vst1.8        {d19}, [r1], r3
    574     vext.8        q8, q9, q9, #15
    575     vst1.8        {d17}, [r1], r3
    576     vext.8        q15, q8, q8, #15
    577     vst1.8        {d31}, [r1], r3
    578     vst1.32       {d4[1]}, [r1]!
    579     vst1.32       {d5[0]}, [r1], r5
    580     vst1.32       {d18[1]}, [r1]!
    581     vst1.32       {d19[0]}, [r1], r5
    582     vst1.32       {d16[1]}, [r1]!
    583     vst1.32       {d17[0]}, [r1], r5
    584     vst1.32       {d30[1]}, [r1]!
    585     vst1.32       {d31[0]}, [r1], r5
    586     vst1.8        {d4}, [r1], r3
    587 
    588 end_func_diag_dr:
    589     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    590 
    591 
    592 
    593 
    594 @**
    595 @*******************************************************************************
    596 @*
    597 @*ih264_intra_pred_luma_8x8_mode_vert_r
    598 @*
    599 @* @brief
    600 @* Perform Intra prediction for  luma_8x8 mode:Vertical_Right
    601 @*
    602 @* @par Description:
    603 @*   Perform Intra prediction for  luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6
    604 @*
    605 @* @param[in] pu1_src
    606 @*  UWORD8 pointer to the source
    607 @*
    608 @* @param[out] pu1_dst
    609 @*  UWORD8 pointer to the destination
    610 @*
    611 @* @param[in] src_strd
    612 @*  integer source stride
    613 @*
    614 @* @param[in] dst_strd
    615 @*  integer destination stride
    616 @*
    617 @* @param[in] ui_neighboravailability
    618 @*  availability of neighbouring pixels
    619 @*
    620 @* @returns
    621 @*
    622 @* @remarks
    623 @*  None
    624 @*
    625 @*******************************************************************************
    626 @void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
    627 @                                            UWORD8 *pu1_dst,
    628 @                                            WORD32 src_strd,
    629 @                                            WORD32 dst_strd,
    630 @                                            WORD32 ui_neighboravailability)
    631 
    632 @**************Variables Vs Registers*****************************************
    633 @   r0 => *pu1_src
    634 @   r1 => *pu1_dst
    635 @   r2 =>  src_strd
    636 @   r3 =>  dst_strd
    637 @   r4 =>  ui_neighboravailability
    638 
    639 
    640     .global ih264_intra_pred_luma_8x8_mode_vert_r_a9q
    641 
    642 ih264_intra_pred_luma_8x8_mode_vert_r_a9q:
    643 
    644     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    645 
    646     vld1.u8       {q0}, [r0]
    647     add           r0, r0, #1
    648     vld1.u8       {q1}, [r0]
    649     vext.8        q2, q1, q1, #1
    650     @ q1 = q0 shifted to left once
    651     @ q2 = q1 shifted to left once
    652     vaddl.u8      q10, d0, d2
    653     vaddl.u8      q11, d1, d3
    654     vaddl.u8      q12, d2, d4
    655     vaddl.u8      q13, d3, d5
    656     vadd.u16      q12, q10, q12
    657     vadd.u16      q13, q11, q13
    658 
    659     vqrshrun.s16  d4, q10, #1
    660     vqrshrun.s16  d5, q11, #1
    661     vqrshrun.s16  d6, q12, #2
    662     vqrshrun.s16  d7, q13, #2
    663     @Q2 has all FILT11 values
    664     @Q3 has all FILT121 values
    665     sub           r5, r3, #6
    666     sub           r6, r3, #4
    667     vst1.8        {d5}, [r1], r3        @ row 0
    668     vext.8        q9, q3, q3, #15
    669     vmov.8        q11, q9
    670     vext.8        q8, q2, q2, #1
    671     vst1.8        {d19}, [r1], r3       @row 1
    672 
    673     vmov.8        q15, q8
    674     vext.8        q10, q2, q2, #15
    675     vuzp.8        q8, q9
    676     @row 2
    677     vext.8        q14, q8, q8, #1
    678     vst1.8        {d21}, [r1]
    679     vst1.8        {d6[6]}, [r1], r3
    680     @row 3
    681 
    682     vst1.16       {d29[1]}, [r1]!
    683     vst1.32       {d7[0]}, [r1]!
    684     vst1.16       {d7[2]}, [r1], r5
    685 @row 4
    686     vst1.16       {d19[1]}, [r1]!
    687     vst1.32       {d5[0]}, [r1]!
    688     vst1.16       {d5[2]}, [r1], r5
    689 
    690 @row 5
    691     vext.8        q13, q9, q9, #1
    692     vst1.16       {d17[1]}, [r1]!
    693     vst1.32       {d23[0]}, [r1]!
    694     vst1.16       {d23[2]}, [r1], r5
    695 
    696 
    697 @row 6
    698     vst1.16       {d27[0]}, [r1]!
    699     vst1.8        {d27[2]}, [r1]!
    700     vst1.8        {d5[0]}, [r1]!
    701     vst1.32       {d31[0]}, [r1], r6
    702 @row 7
    703     vst1.32       {d29[0]}, [r1]!
    704     vst1.32       {d7[0]}, [r1]!
    705 
    706 
    707 
    708 end_func_vert_r:
    709     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    710 
    711 
    712 
    713 
    714 @**
    715 @*******************************************************************************
    716 @*
    717 @*ih264_intra_pred_luma_8x8_mode_horz_d
    718 @*
    719 @* @brief
    720 @* Perform Intra prediction for  luma_8x8 mode:Horizontal_Down
    721 @*
    722 @* @par Description:
    723 @*   Perform Intra prediction for  luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7
    724 @*
    725 @* @param[in] pu1_src
    726 @*  UWORD8 pointer to the source
    727 @*
    728 @* @param[out] pu1_dst
    729 @*  UWORD8 pointer to the destination
    730 @*
    731 @* @param[in] src_strd
    732 @*  integer source stride
    733 @*
    734 @* @param[in] dst_strd
    735 @*  integer destination stride
    736 @*
    737 @* @param[in] ui_neighboravailability
    738 @*  availability of neighbouring pixels
    739 @*
    740 @* @returns
    741 @*
    742 @* @remarks
    743 @*  None
    744 @*
    745 @*******************************************************************************
    746 @void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
    747 @                                            UWORD8 *pu1_dst,
    748 @                                            WORD32 src_strd,
    749 @                                            WORD32 dst_strd,
    750 @                                            WORD32 ui_neighboravailability)
    751 
    752 @**************Variables Vs Registers*****************************************
    753 @   r0 => *pu1_src
    754 @   r1 => *pu1_dst
    755 @   r2 =>  src_strd
    756 @   r3 =>  dst_strd
    757 @   r4 =>  ui_neighboravailability
    758 
    759     .global ih264_intra_pred_luma_8x8_mode_horz_d_a9q
    760 
    761 ih264_intra_pred_luma_8x8_mode_horz_d_a9q:
    762 
    763     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    764     vpush         {d8-d15}
    765 
    766     vld1.u8       {q0}, [r0]
    767     add           r0, r0, #1
    768     vld1.u8       {q1}, [r0]
    769     vext.8        q2, q1, q1, #1
    770     @ q1 = q0 shifted to left once
    771     @ q2 = q1 shifted to left once
    772     vaddl.u8      q10, d0, d2
    773     vaddl.u8      q11, d1, d3
    774     vaddl.u8      q12, d2, d4
    775     vaddl.u8      q13, d3, d5
    776     vadd.u16      q12, q10, q12
    777     vadd.u16      q13, q11, q13
    778 
    779     vqrshrun.s16  d4, q10, #1
    780     vqrshrun.s16  d5, q11, #1
    781     vqrshrun.s16  d6, q12, #2
    782     vqrshrun.s16  d7, q13, #2
    783     @Q2 has all FILT11 values
    784     @Q3 has all FILT121 values
    785     vmov.8        q4, q2
    786     vmov.8        q5, q3
    787     sub           r6, r3, #6
    788     vtrn.8        q4, q5                @
    789     vmov.8        q6, q4
    790     vmov.8        q7, q5
    791     sub           r5, r3, #4
    792     vtrn.16       q6, q7
    793     vext.8        q8, q3, q3, #14
    794     @ROW 0
    795     vst1.8        {d17}, [r1]
    796     vst1.16       {d10[3]}, [r1], r3
    797 
    798     @ROW 1
    799     vst1.32       {d14[1]}, [r1]!
    800     vst1.32       {d7[0]}, [r1], r5
    801     @ROW 2
    802     vst1.16       {d10[2]}, [r1]!
    803     vst1.32       {d14[1]}, [r1]!
    804     vst1.16       {d7[0]}, [r1], r6
    805     @ROW 3
    806     vst1.32       {d12[1]}, [r1]!
    807     vst1.32       {d14[1]}, [r1], r5
    808     @ROW 4
    809     vst1.16       {d14[1]}, [r1]!
    810     vst1.32       {d12[1]}, [r1]!
    811     vst1.16       {d14[2]}, [r1], r6
    812     @ROW 5
    813     vst1.32       {d14[0]}, [r1]!
    814     vst1.32       {d12[1]}, [r1], r5
    815     @ROW 6
    816     vst1.16       {d10[0]}, [r1]!
    817     vst1.16       {d8[1]}, [r1]!
    818     vst1.16       {d14[1]}, [r1]!
    819     vst1.16       {d12[2]}, [r1], r6
    820     @ROW 7
    821     vst1.32       {d12[0]}, [r1]!
    822     vst1.32       {d14[0]}, [r1], r5
    823 
    824 end_func_horz_d:
    825     vpop          {d8-d15}
    826     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    827 
    828 
    829 
    830 
    831 
    832 @**
    833 @*******************************************************************************
    834 @*
    835 @*ih264_intra_pred_luma_8x8_mode_vert_l
    836 @*
    837 @* @brief
    838 @*  Perform Intra prediction for  luma_8x8 mode:Vertical_Left
    839 @*
    840 @* @par Description:
    841 @*   Perform Intra prediction for  luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8
    842 @*
    843 @* @param[in] pu1_src
    844 @*  UWORD8 pointer to the source
    845 @*
    846 @* @param[out] pu1_dst
    847 @*  UWORD8 pointer to the destination
    848 @*
    849 @* @param[in] src_strd
    850 @*  integer source stride
    851 @*
    852 @* @param[in] dst_strd
    853 @*  integer destination stride
    854 @*
    855 @* @param[in] ui_neighboravailability
    856 @*  availability of neighbouring pixels
    857 @*
    858 @* @returns
    859 @*
    860 @* @remarks
    861 @*  None
    862 @*
    863 @*******************************************************************************
    864 @void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
    865 @                                            UWORD8 *pu1_dst,
    866 @                                            WORD32 src_strd,
    867 @                                            WORD32 dst_strd,
    868 @                                            WORD32 ui_neighboravailability)
    869 
    870 @**************Variables Vs Registers*****************************************
    871 @   r0 => *pu1_src
    872 @   r1 => *pu1_dst
    873 @   r2 =>  src_strd
    874 @   r3 =>  dst_strd
    875 @   r4 =>  ui_neighboravailability
    876 
    877 
    878     .global ih264_intra_pred_luma_8x8_mode_vert_l_a9q
    879 
    880 ih264_intra_pred_luma_8x8_mode_vert_l_a9q:
    881 
    882     stmfd         sp!, {r4-r12, r14}    @Restoring registers from stack
    883     vpush         {d8-d15}
    884 
    885     add           r0, r0, #9
    886     vld1.u8       {q0}, [r0]
    887     add           r0, r0, #1
    888     vld1.u8       {q1}, [r0]
    889     vext.8        q2, q1, q1, #1
    890     vaddl.u8      q10, d0, d2
    891     vaddl.u8      q11, d1, d3
    892     vaddl.u8      q12, d2, d4
    893     vaddl.u8      q13, d3, d5
    894     vadd.u16      q12, q10, q12
    895     vadd.u16      q13, q11, q13
    896 
    897     vqrshrun.s16  d4, q10, #1
    898     vqrshrun.s16  d5, q11, #1
    899     vqrshrun.s16  d6, q12, #2
    900     vext.8        q4, q2, q2, #1
    901     vqrshrun.s16  d7, q13, #2
    902     @Q2 has all FILT11 values
    903     @Q3 has all FILT121 values
    904 
    905     vext.8        q5, q3, q3, #1
    906     @ROW 0,1
    907     vst1.8        {d4}, [r1], r3
    908     vst1.8        {d6}, [r1], r3
    909 
    910     vext.8        q6, q4, q4, #1
    911     vext.8        q7, q5, q5, #1
    912     @ROW 2,3
    913     vst1.8        {d8}, [r1], r3
    914     vst1.8        {d10}, [r1], r3
    915 
    916     vext.8        q8, q6, q6, #1
    917     vext.8        q9, q7, q7, #1
    918     @ROW 4,5
    919     vst1.8        {d12}, [r1], r3
    920     vst1.8        {d14}, [r1], r3
    921     @ROW 6,7
    922     vst1.8        {d16}, [r1], r3
    923     vst1.8        {d18}, [r1], r3
    924 
    925 end_func_vert_l:
    926     vpop          {d8-d15}
    927     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    928 
    929 
    930 
    931 
    932 
    933 @**
    934 @*******************************************************************************
    935 @*
    936 @*ih264_intra_pred_luma_8x8_mode_horz_u
    937 @*
    938 @* @brief
    939 @*     Perform Intra prediction for  luma_8x8 mode:Horizontal_Up
    940 @*
    941 @* @par Description:
    942 @*      Perform Intra prediction for  luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9
    943 @*
    944 @* @param[in] pu1_src
    945 @*  UWORD8 pointer to the source
    946 @*
    947 @* @param[out] pu1_dst
    948 @*  UWORD8 pointer to the destination
    949 @*
    950 @* @param[in] src_strd
    951 @*  integer source stride
    952 @*
    953 @* @param[in] dst_strd
    954 @*  integer destination stride
    955 @*
    956 @* @param[in] ui_neighboravailability
    957 @*  availability of neighbouring pixels
    958 @*
    959 @* @returns
    960 @*
    961 @* @remarks
    962 @*  None
    963 @*
    964 @*******************************************************************************
    965 @void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
    966 @                                           UWORD8 *pu1_dst,
    967 @                                           WORD32 src_strd,
    968 @                                           WORD32 dst_strd,
    969 @                                           WORD32 ui_neighboravailability)
    970 
    971 @**************Variables Vs Registers*****************************************
    972 @   r0 => *pu1_src
    973 @   r1 => *pu1_dst
    974 @   r2 =>  src_strd
    975 @   r3 =>  dst_strd
    976 @   r4 =>  ui_neighboravailability
    977 
    978     .global ih264_intra_pred_luma_8x8_mode_horz_u_a9q
    979 
    980 ih264_intra_pred_luma_8x8_mode_horz_u_a9q:
    981 
    982     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    983     vpush         {d8-d15}
    984 
    985     vld1.u8       {q0}, [r0]
    986     vld1.u8       {d1[7]}, [r0]
    987     vext.8        q1, q0, q0, #1
    988     vext.8        q2, q1, q1, #1
    989     @ LOADING V TABLE
    990     ldr           r12, scratch_intrapred_addr_8x8
    991 scrlb8x8l2:
    992     add           r12, r12, pc
    993     vaddl.u8      q10, d0, d2
    994     vaddl.u8      q11, d1, d3
    995     vaddl.u8      q12, d2, d4
    996     vaddl.u8      q13, d3, d5
    997     vadd.u16      q12, q10, q12
    998     vadd.u16      q13, q11, q13
    999     vld1.u8       {q5}, [r12]
   1000     vqrshrun.s16  d4, q10, #1
   1001     vqrshrun.s16  d5, q11, #1
   1002     vqrshrun.s16  d6, q12, #2
   1003     vqrshrun.s16  d7, q13, #2
   1004     @Q2 has all FILT11 values
   1005     @Q3 has all FILT121 values
   1006     vtbl.u8       d12, {q2, q3}, d10
   1007     vdup.u8       q7, d5[7]             @
   1008     vtbl.u8       d13, {q2, q3}, d11
   1009     vext.8        q8, q6, q7, #2
   1010     vext.8        q9, q8, q7, #2
   1011     vst1.8        {d12}, [r1], r3
   1012     vext.8        q10, q9, q7, #2
   1013     vst1.8        {d16}, [r1], r3
   1014     vst1.8        {d18}, [r1], r3
   1015     vst1.8        {d20}, [r1], r3
   1016     vst1.8        {d13}, [r1], r3
   1017     vst1.8        {d17}, [r1], r3
   1018     vst1.8        {d19}, [r1], r3
   1019     vst1.8        {d21}, [r1], r3
   1020 
   1021 
   1022 end_func_horz_u:
   1023     vpop          {d8-d15}
   1024     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
   1025 
   1026 
   1027 
   1028 
   1029 
   1030 
   1031 
   1032 
   1033