Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 @**
     21 @******************************************************************************
     22 @* @file
     23 @*  ih264_intra_pred_luma_16x16_a9q.s
     24 @*
     25 @* @brief
     26 @*  Contains function definitions for intra 16x16 Luma prediction .
     27 @*
     28 @* @author
     29 @*  Ittiam
     30 @*
     31 @* @par List of Functions:
     32 @*
     33 @*  - ih264_intra_pred_luma_16x16_mode_vert_a9q()
     34 @*  - ih264_intra_pred_luma_16x16_mode_horz_a9q()
     35 @*  - ih264_intra_pred_luma_16x16_mode_dc_a9q()
     36 @*  - ih264_intra_pred_luma_16x16_mode_plane_a9q()
     37 @*
     38 @* @remarks
     39 @*  None
     40 @*
     41 @*******************************************************************************
     42 @*
     43 
     44 @* All the functions here are replicated from ih264_intra_pred_filters.c
     45 @
     46 
     47 @**
     48 @**
     49 @**
     50 @
     51 
     52 .text
     53 .p2align 2
     54 
     55 
     56     .extern ih264_gai1_intrapred_luma_plane_coeffs
     57 .hidden ih264_gai1_intrapred_luma_plane_coeffs
     58 scratch_intrapred_addr1:
     59     .long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8
     60 @**
     61 @*******************************************************************************
     62 @*
     63 @*ih264_intra_pred_luma_16x16_mode_vert
     64 @*
     65 @* @brief
     66 @*   Perform Intra prediction for  luma_16x16 mode:vertical
     67 @*
     68 @* @par Description:
     69 @* Perform Intra prediction for  luma_16x16 mode:Vertical ,described in sec 8.3.3.1
     70 @*
     71 @* @param[in] pu1_src
     72 @*  UWORD8 pointer to the source
     73 @*
     74 @* @param[out] pu1_dst
     75 @*  UWORD8 pointer to the destination
     76 @*
     77 @* @param[in] src_strd
     78 @*  integer source stride
     79 @*
     80 @* @param[in] dst_strd
     81 @*  integer destination stride
     82 @*
     83 @* @param[in] ui_neighboravailability
     84 @* availability of neighbouring pixels(Not used in this function)
     85 @*
     86 @* @returns
     87 @*
     88 @* @remarks
     89 @*  None
     90 @*
     91 @*******************************************************************************
     92 @void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src,
     93 @                                        UWORD8 *pu1_dst,
     94 @                                        WORD32 src_strd,
     95 @                                        WORD32 dst_strd,
     96 @                                        WORD32 ui_neighboravailability)
     97 
     98 @**************Variables Vs Registers*****************************************
     99 @   r0 => *pu1_src
    100 @   r1 => *pu1_dst
    101 @   r2 =>  src_strd
    102 @   r3 =>  dst_strd
    103 @   r4 =>  ui_neighboravailability
    104 
    105 
    106     .global ih264_intra_pred_luma_16x16_mode_vert_a9q
    107 
    108 ih264_intra_pred_luma_16x16_mode_vert_a9q:
    109 
    110     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    111 
    112     add           r0, r0, #17
    113     vld1.8        {q0}, [r0]
    114 
    115     vst1.8        {q0}, [r1], r3
    116     vst1.8        {q0}, [r1], r3
    117     vst1.8        {q0}, [r1], r3
    118     vst1.8        {q0}, [r1], r3
    119     vst1.8        {q0}, [r1], r3
    120     vst1.8        {q0}, [r1], r3
    121     vst1.8        {q0}, [r1], r3
    122     vst1.8        {q0}, [r1], r3
    123     vst1.8        {q0}, [r1], r3
    124     vst1.8        {q0}, [r1], r3
    125     vst1.8        {q0}, [r1], r3
    126     vst1.8        {q0}, [r1], r3
    127     vst1.8        {q0}, [r1], r3
    128     vst1.8        {q0}, [r1], r3
    129     vst1.8        {q0}, [r1], r3
    130     vst1.8        {q0}, [r1], r3
    131 
    132     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    133 
    134 
    135 
    136 
    137 
    138 @******************************************************************************
    139 
    140 
    141 @**
    142 @*******************************************************************************
    143 @*
    144 @*ih264_intra_pred_luma_16x16_mode_horz
    145 @*
    146 @* @brief
    147 @*  Perform Intra prediction for  luma_16x16 mode:horizontal
    148 @*
    149 @* @par Description:
    150 @*  Perform Intra prediction for  luma_16x16 mode:horizontal ,described in sec 8.3.3.2
    151 @*
    152 @* @param[in] pu1_src
    153 @*  UWORD8 pointer to the source
    154 @*
    155 @* @param[out] pu1_dst
    156 @*  UWORD8 pointer to the destination
    157 @*
    158 @* @param[in] src_strd
    159 @*  integer source stride
    160 @*
    161 @* @param[in] dst_strd
    162 @*  integer destination stride
    163 @*
    164 @* @param[in] ui_neighboravailability
    165 @* availability of neighbouring pixels(Not used in this function)
    166 @*
    167 @* @returns
    168 @*
    169 @* @remarks
    170 @*  None
    171 @*
    172 @*******************************************************************************
    173 @*
    174 @void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
    175 @                                         UWORD8 *pu1_dst,
    176 @                                         WORD32 src_strd,
    177 @                                         WORD32 dst_strd,
    178 @                                         WORD32 ui_neighboravailability)
    179 @**************Variables Vs Registers*****************************************
    180 @   r0 => *pu1_src
    181 @   r1 => *pu1_dst
    182 @   r2 =>  src_strd
    183 @   r3 =>  dst_strd
    184 @   r4 =>  ui_neighboravailability
    185 
    186     .global ih264_intra_pred_luma_16x16_mode_horz_a9q
    187 
    188 ih264_intra_pred_luma_16x16_mode_horz_a9q:
    189 
    190     stmfd         sp!, {r14}            @store register values to stack
    191 
    192     vld1.u8       {q0}, [r0]
    193     mov           r2, #14
    194 
    195     vdup.u8       q1, d1[7]
    196     vdup.u8       q2, d1[6]
    197     vst1.8        {q1}, [r1], r3
    198 
    199 loop_16x16_horz:
    200     vext.8        q0, q0, q0, #14
    201     vst1.8        {q2}, [r1], r3
    202     vdup.u8       q1, d1[7]
    203     subs          r2, #2
    204     vdup.u8       q2, d1[6]
    205     vst1.8        {q1}, [r1], r3
    206     bne           loop_16x16_horz
    207 
    208     vext.8        q0, q0, q0, #14
    209     vst1.8        {q2}, [r1], r3
    210 
    211     ldmfd         sp!, {pc}             @Restoring registers from stack
    212 
    213 
    214 
    215 
    216 @******************************************************************************
    217 
    218 
    219 @**
    220 @*******************************************************************************
    221 @*
    222 @*ih264_intra_pred_luma_16x16_mode_dc
    223 @*
    224 @* @brief
    225 @*  Perform Intra prediction for  luma_16x16 mode:DC
    226 @*
    227 @* @par Description:
    228 @*  Perform Intra prediction for  luma_16x16 mode:DC ,described in sec 8.3.3.3
    229 @*
    230 @* @param[in] pu1_src
    231 @*  UWORD8 pointer to the source
    232 @*
    233 @* @param[out] pu1_dst
    234 @*  UWORD8 pointer to the destination
    235 @*
    236 @* @param[in] src_strd
    237 @*  integer source stride
    238 @*
    239 @* @param[in] dst_strd
    240 @*  integer destination stride
    241 @*
    242 @* @param[in] ui_neighboravailability
    243 @*  availability of neighbouring pixels
    244 @*
    245 @* @returns
    246 @*
    247 @* @remarks
    248 @*  None
    249 @*
    250 @*******************************************************************************
    251 @void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
    252 @                                       UWORD8 *pu1_dst,
    253 @                                       WORD32 src_strd,
    254 @                                       WORD32 dst_strd,
    255 @                                       WORD32 ui_neighboravailability)
    256 
    257 @**************Variables Vs Registers*****************************************
    258 @   r0 => *pu1_src
    259 @   r1 => *pu1_dst
    260 @   r2 =>  src_strd
    261 @   r3 =>  dst_strd
    262 @   r4 =>  ui_neighboravailability
    263 
    264     .global ih264_intra_pred_luma_16x16_mode_dc_a9q
    265 
    266 ih264_intra_pred_luma_16x16_mode_dc_a9q:
    267 
    268     stmfd         sp!, {r4, r14}        @store register values to stack
    269     ldr           r4, [sp, #8]          @r4 =>  ui_neighboravailability
    270 
    271     ands          r2, r4, #0x01         @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
    272     beq           top_available
    273     ands          r2, r4, #0x04         @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
    274     beq           left_available
    275 
    276     vld1.u8       {q0}, [r0]            @BOTH LEFT AND TOP AVAILABLE
    277     add           r0, r0, #17
    278     vpaddl.u8     q0, q0
    279     vld1.u8       {q1}, [r0]
    280     vpaddl.u8     q1, q1
    281     vadd.u16      q0, q0, q1
    282     vadd.u16      d0, d0, d1
    283     vpaddl.u16    d0, d0
    284     vpaddl.u32    d0, d0
    285     vqrshrun.s16  d0, q0, #5
    286     vdup.u8       q0, d0[0]
    287     b             str_pred
    288 
    289 top_available:                          @ONLY TOP AVAILABLE
    290     ands          r2, r4, #0x04         @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
    291     beq           none_available
    292 
    293     add           r0, r0, #17
    294     vld1.u8       {q0}, [r0]
    295     vpaddl.u8     q0, q0
    296     vadd.u16      d0, d0, d1
    297     vpaddl.u16    d0, d0
    298     vpaddl.u32    d0, d0
    299     vqrshrun.s16  d0, q0, #4
    300     vdup.u8       q0, d0[0]
    301     b             str_pred
    302 
    303 left_available:                         @ONLY LEFT AVAILABLE
    304     vld1.u8       {q0}, [r0]
    305     vpaddl.u8     q0, q0
    306     vadd.u16      d0, d0, d1
    307     vpaddl.u16    d0, d0
    308     vpaddl.u32    d0, d0
    309     vqrshrun.s16  d0, q0, #4
    310     vdup.u8       q0, d0[0]
    311     b             str_pred
    312 
    313 none_available:                         @NONE AVAILABLE
    314     vmov.u8       q0, #128
    315 
    316 str_pred:
    317     vst1.8        {q0}, [r1], r3
    318     vst1.8        {q0}, [r1], r3
    319     vst1.8        {q0}, [r1], r3
    320     vst1.8        {q0}, [r1], r3
    321     vst1.8        {q0}, [r1], r3
    322     vst1.8        {q0}, [r1], r3
    323     vst1.8        {q0}, [r1], r3
    324     vst1.8        {q0}, [r1], r3
    325     vst1.8        {q0}, [r1], r3
    326     vst1.8        {q0}, [r1], r3
    327     vst1.8        {q0}, [r1], r3
    328     vst1.8        {q0}, [r1], r3
    329     vst1.8        {q0}, [r1], r3
    330     vst1.8        {q0}, [r1], r3
    331     vst1.8        {q0}, [r1], r3
    332     vst1.8        {q0}, [r1], r3
    333 
    334     ldmfd         sp!, {r4, pc}         @Restoring registers from stack
    335 
    336 
    337 
    338 
    339 
    340 @******************************************************************************
    341 
    342 
    343 @**
    344 @*******************************************************************************
    345 @*
    346 @*ih264_intra_pred_luma_16x16_mode_plane
    347 @*
    348 @* @brief
    349 @*  Perform Intra prediction for  luma_16x16 mode:PLANE
    350 @*
    351 @* @par Description:
    352 @*  Perform Intra prediction for  luma_16x16 mode:PLANE ,described in sec 8.3.3.4
    353 @*
    354 @* @param[in] pu1_src
    355 @*  UWORD8 pointer to the source
    356 @*
    357 @* @param[out] pu1_dst
    358 @*  UWORD8 pointer to the destination
    359 @*
    360 @* @param[in] src_strd
    361 @*  integer source stride
    362 @*
    363 @* @param[in] dst_strd
    364 @*  integer destination stride
    365 @*
    366 @* @param[in] ui_neighboravailability
    367 @*  availability of neighbouring pixels
    368 @*
    369 @* @returns
    370 @*
    371 @* @remarks
    372 @*  None
    373 @*
    374 @*******************************************************************************
    375 @void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
    376 @                                        UWORD8 *pu1_dst,
    377 @                                        WORD32 src_strd,
    378 @                                        WORD32 dst_strd,
    379 @                                        WORD32 ui_neighboravailability)
    380 
    381 @**************Variables Vs Registers*****************************************
    382 @   r0 => *pu1_src
    383 @   r1 => *pu1_dst
    384 @   r2 =>  src_strd
    385 @   r3 =>  dst_strd
    386 @   r4 =>  ui_neighboravailability
    387 
    388     .global ih264_intra_pred_luma_16x16_mode_plane_a9q
    389 ih264_intra_pred_luma_16x16_mode_plane_a9q:
    390 
    391     stmfd         sp!, {r4-r10, r12, lr}
    392 
    393     mov           r2, r1
    394     add           r1, r0, #17
    395     add           r0, r0, #15
    396 
    397     mov           r8, #9
    398     sub           r1, r1, #1
    399     mov           r10, r1               @top_left
    400     mov           r4, #-1
    401     vld1.32       d2, [r1], r8
    402     ldr           r7, scratch_intrapred_addr1
    403 scrlbl1:
    404     add           r7, r7, pc
    405 
    406     vld1.32       d0, [r1]
    407     vrev64.8      d2, d2
    408     vld1.32       {q3}, [r7]
    409     vsubl.u8      q0, d0, d2
    410     vmovl.u8      q8, d6
    411     vmul.s16      q0, q0, q8
    412     vmovl.u8      q9, d7
    413 
    414     add           r7, r0, r4, lsl #3
    415     sub           r0, r7, r4, lsl #1
    416     rsb           lr, r4, #0x0
    417 
    418     vpadd.s16     d0, d0, d1
    419 
    420     ldrb          r8, [r7], r4
    421     ldrb          r9, [r0], lr
    422 
    423     vpaddl.s16    d0, d0
    424     sub           r12, r8, r9
    425 
    426     ldrb          r8, [r7], r4
    427 
    428     vpaddl.s32    d0, d0
    429     ldrb          r9, [r0], lr
    430     sub           r8, r8, r9
    431     vshl.s32      d2, d0, #2
    432     add           r12, r12, r8, lsl #1
    433 
    434     vadd.s32      d0, d0, d2
    435     ldrb          r8, [r7], r4
    436     ldrb          r9, [r0], lr
    437     vrshr.s32     d0, d0, #6            @ i_b = D0[0]
    438     sub           r8, r8, r9
    439     ldrb          r5, [r7], r4
    440     add           r8, r8, r8, lsl #1
    441 
    442     vdup.16       q2, d0[0]
    443     add           r12, r12, r8
    444     ldrb          r9, [r0], lr
    445     vmul.s16      q0, q2, q8
    446     sub           r5, r5, r9
    447     vmul.s16      q1, q2, q9
    448     add           r12, r12, r5, lsl #2
    449 
    450     ldrb          r8, [r7], r4
    451     ldrb          r9, [r0], lr
    452     sub           r8, r8, r9
    453     ldrb          r5, [r7], r4
    454     add           r8, r8, r8, lsl #2
    455     ldrb          r6, [r0], lr
    456     add           r12, r12, r8
    457     ldrb          r8, [r7], r4
    458     ldrb          r9, [r0], lr
    459 
    460     sub           r5, r5, r6
    461     sub           r8, r8, r9
    462     add           r5, r5, r5, lsl #1
    463     rsb           r8, r8, r8, lsl #3
    464     add           r12, r12, r5, lsl #1
    465     ldrb          r5, [r7], r4
    466     ldrb          r6, [r10]             @top_left
    467     add           r12, r12, r8
    468     sub           r9, r5, r6
    469     ldrb          r6, [r1, #7]
    470     add           r12, r12, r9, lsl #3  @ i_c = r12
    471     add           r8, r5, r6
    472 
    473     add           r12, r12, r12, lsl #2
    474     lsl           r8, r8, #4            @ i_a = r8
    475 
    476     add           r12, r12, #0x20
    477     lsr           r12, r12, #6
    478 
    479     vshl.s16      q14, q2, #3
    480     vdup.16       q3, r12
    481 
    482     vdup.16       q15, r8
    483     vshl.s16      q13, q3, #3
    484     vsub.s16      q15, q15, q14
    485     vsub.s16      q15, q15, q13
    486     vadd.s16      q14, q15, q3
    487 
    488     mov           r0, #14
    489     vadd.s16      q13, q14, q0
    490     vadd.s16      q14, q14, q1
    491     vqrshrun.s16  d20, q13, #5
    492     vqrshrun.s16  d21, q14, #5
    493 
    494 loop_16x16_plane:
    495 
    496     vadd.s16      q13, q13, q3
    497     vadd.s16      q14, q14, q3
    498     vqrshrun.s16  d22, q13, #5
    499     vst1.32       {q10}, [r2], r3
    500     vqrshrun.s16  d23, q14, #5
    501 
    502     vadd.s16      q13, q13, q3
    503     subs          r0, #2
    504     vadd.s16      q14, q14, q3
    505     vqrshrun.s16  d20, q13, #5
    506     vst1.32       {q11}, [r2], r3
    507     vqrshrun.s16  d21, q14, #5
    508     bne           loop_16x16_plane
    509 
    510     vadd.s16      q13, q13, q3
    511     vadd.s16      q14, q14, q3
    512     vqrshrun.s16  d22, q13, #5
    513     vst1.32       {q10}, [r2], r3
    514     vqrshrun.s16  d23, q14, #5
    515     vst1.32       {q11}, [r2], r3
    516 
    517     ldmfd         sp!, {r4-r10, r12, pc}
    518 
    519 
    520 
    521