Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 @**
     21 @******************************************************************************
     22 @* @file
     23 @*  ih264_intra_pred_chroma_a9q.s
     24 @*
     25 @* @brief
     26 @*  Contains function definitions for intra chroma prediction .
     27 @*
     28 @* @author
     29 @*  Ittiam
     30 @*
     31 @* @par List of Functions:
     32 @*
     33 @*  - ih264_intra_pred_chroma_mode_horz_a9q()
     34 @*  - ih264_intra_pred_chroma_8x8_mode_vert_a9q()
     35 @*  - ih264_intra_pred_chroma_mode_dc_a9q()
     36 @*  - ih264_intra_pred_chroma_mode_plane_a9q()
     37 @*
     38 @* @remarks
     39 @*  None
     40 @*
     41 @*******************************************************************************
     42 @*
     43 
     44 @* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
     45 @
     46 
     47 .text
     48 .p2align 2
     49 
     50     .extern ih264_gai1_intrapred_chroma_plane_coeffs1
     51 .hidden ih264_gai1_intrapred_chroma_plane_coeffs1
     52     .extern ih264_gai1_intrapred_chroma_plane_coeffs2
     53 .hidden ih264_gai1_intrapred_chroma_plane_coeffs2
     54 scratch_chroma_intrapred_addr1:
     55     .long ih264_gai1_intrapred_chroma_plane_coeffs1 - scrlblc1 - 8
     56 
     57 scratch_intrapred_chroma_plane_addr1:
     58     .long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8
     59 @**
     60 @*******************************************************************************
     61 @*
     62 @*ih264_intra_pred_chroma_8x8_mode_dc
     63 @*
     64 @* @brief
     65 @*     Perform Intra prediction for  chroma_8x8 mode:DC
     66 @*
     67 @* @par Description:
     68 @*    Perform Intra prediction for  chroma_8x8 mode:DC ,described in sec 8.3.4.1
     69 @*
     70 @* @param[in] pu1_src
     71 @*  UWORD8 pointer to the source containing alternate U and V samples
     72 @*
     73 @* @param[out] pu1_dst
     74 @*  UWORD8 pointer to the destination with alternate U and V samples
     75 @*
     76 @* @param[in] src_strd
     77 @*  integer source stride
     78 @*
     79 @* @param[in] dst_strd
     80 @*  integer destination stride
     81 @*
     82 @** @param[in] ui_neighboravailability
     83 @*  availability of neighbouring pixels
     84 @*
     85 @* @returns
     86 @*
     87 @* @remarks
     88 @*  None
     89 @*
     90 @*******************************************************************************
     91 @void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
     92 @                                        UWORD8 *pu1_dst,
     93 @                                        WORD32 src_strd,
     94 @                                        WORD32 dst_strd,
     95 @                                        WORD32 ui_neighboravailability)
     96 
     97 @**************Variables Vs Registers*****************************************
     98 @   r0 => *pu1_src
     99 @   r1 => *pu1_dst
    100 @   r2 =>  src_strd
    101 @   r3 =>  dst_strd
    102 @   r4 =>  ui_neighboravailability
    103 
    104     .global ih264_intra_pred_chroma_8x8_mode_dc_a9q
    105 
    106 ih264_intra_pred_chroma_8x8_mode_dc_a9q:
    107 
    108     stmfd         sp!, {r4, r14}        @store register values to stack
    109     ldr           r4, [sp, #8]          @r4 =>  ui_neighboravailability
    110     vpush         {d8-d15}
    111 
    112     ands          r2, r4, #0x01         @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
    113     beq           top_available
    114     ands          r2, r4, #0x04         @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
    115     beq           left_available
    116 
    117     vld1.u8       {q0}, [r0]            @BOTH LEFT AND TOP AVAILABLE
    118     add           r0, r0, #18
    119     vld1.u8       {q1}, [r0]
    120     vaddl.u8      q2, d1, d2
    121     vaddl.u8      q3, d0, d3
    122     vmovl.u8      q1, d3
    123     vmovl.u8      q0, d0
    124 
    125     vadd.u16      d12, d4, d5
    126     vadd.u16      d13, d2, d3
    127     vadd.u16      d15, d6, d7
    128     vadd.u16      d14, d0, d1
    129 
    130     vpadd.u32     d12, d12, d15
    131     vpadd.u32     d14, d13, d14
    132     vqrshrun.s16  d12, q6, #3
    133     vqrshrun.s16  d14, q7, #2
    134     vdup.u16      d8, d12[0]
    135     vdup.u16      d9, d14[0]
    136     vdup.u16      d10, d14[1]
    137     vdup.u16      d11, d12[1]
    138     b             str_pred
    139 
    140 top_available:                          @ONLY TOP AVAILABLE
    141     ands          r2, r4, #0x04         @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
    142     beq           none_available
    143 
    144     add           r0, r0, #18
    145     vld1.u8       {q0}, [r0]
    146     vmovl.u8      q1, d0
    147     vmovl.u8      q2, d1
    148     vadd.u16      d0, d2, d3
    149     vadd.u16      d1, d4, d5
    150     vpaddl.u32    q0, q0
    151     vqrshrun.s16  d0, q0, #2
    152     vdup.u16      d8, d0[0]
    153     vdup.u16      d9, d0[2]
    154     vmov          q5, q4
    155     b             str_pred
    156 
    157 left_available:                         @ONLY LEFT AVAILABLE
    158     vld1.u8       {q0}, [r0]
    159     vmovl.u8      q1, d0
    160     vmovl.u8      q2, d1
    161     vadd.u16      d0, d2, d3
    162     vadd.u16      d1, d4, d5
    163     vpaddl.u32    q0, q0
    164     vqrshrun.s16  d0, q0, #2
    165     vdup.u16      q5, d0[0]
    166     vdup.u16      q4, d0[2]
    167     b             str_pred
    168 
    169 none_available:                         @NONE AVAILABLE
    170     vmov.u8       q4, #128
    171     vmov.u8       q5, #128
    172 
    173 str_pred:
    174     vst1.8        {q4}, [r1], r3
    175     vst1.8        {q4}, [r1], r3
    176     vst1.8        {q4}, [r1], r3
    177     vst1.8        {q4}, [r1], r3
    178     vst1.8        {q5}, [r1], r3
    179     vst1.8        {q5}, [r1], r3
    180     vst1.8        {q5}, [r1], r3
    181     vst1.8        {q5}, [r1], r3
    182 
    183     vpop          {d8-d15}
    184     ldmfd         sp!, {r4, pc}         @Restoring registers from stack
    185 
    186 
    187 
    188 @******************************************************************************
    189 
    190 
    191 @**
    192 @*******************************************************************************
    193 @*
    194 @*ih264_intra_pred_chroma_8x8_mode_horz
    195 @*
    196 @* @brief
    197 @*  Perform Intra prediction for  chroma_8x8 mode:Horizontal
    198 @*
    199 @* @par Description:
    200 @*   Perform Intra prediction for  chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2
    201 @*
    202 @* @param[in] pu1_src
    203 @* UWORD8 pointer to the source containing alternate U and V samples
    204 @*
    205 @* @param[out] pu1_dst
    206 @*  UWORD8 pointer to the destination with alternate U and V samples
    207 @*
    208 @* @param[in] src_strd
    209 @*  integer source stride
    210 @*
    211 @* @param[in] dst_strd
    212 @*  integer destination stride
    213 @*
    214 @* @param[in] ui_neighboravailability
    215 @* availability of neighbouring pixels(Not used in this function)
    216 @*
    217 @* @returns
    218 @*
    219 @* @remarks
    220 @*  None
    221 @*
    222 @*******************************************************************************
    223 @*
    224 @void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
    225 @                                         UWORD8 *pu1_dst,
    226 @                                         WORD32 src_strd,
    227 @                                         WORD32 dst_strd,
    228 @                                         WORD32 ui_neighboravailability)
    229 @**************Variables Vs Registers*****************************************
    230 @   r0 => *pu1_src
    231 @   r1 => *pu1_dst
    232 @   r2 =>  src_strd
    233 @   r3 =>  dst_strd
    234 @   r4 =>  ui_neighboravailability
    235 
    236 
    237     .global ih264_intra_pred_chroma_8x8_mode_horz_a9q
    238 
    239 ih264_intra_pred_chroma_8x8_mode_horz_a9q:
    240 
    241     stmfd         sp!, {r14}            @store register values to stack
    242 
    243     vld1.u8       {q0}, [r0]
    244     mov           r2, #6
    245 
    246     vdup.u16      q1, d1[3]
    247     vdup.u16      q2, d1[2]
    248     vst1.8        {q1}, [r1], r3
    249 
    250 loop_8x8_horz:
    251     vext.8        q0, q0, q0, #12
    252     vst1.8        {q2}, [r1], r3
    253     vdup.u16      q1, d1[3]
    254     subs          r2, #2
    255     vdup.u16      q2, d1[2]
    256     vst1.8        {q1}, [r1], r3
    257     bne           loop_8x8_horz
    258 
    259     vext.8        q0, q0, q0, #12
    260     vst1.8        {q2}, [r1], r3
    261 
    262     ldmfd         sp!, {pc}             @restoring registers from stack
    263 
    264 
    265 
    266 
    267 @**
    268 @*******************************************************************************
    269 @*
    270 @*ih264_intra_pred_chroma_8x8_mode_vert
    271 @*
    272 @* @brief
    273 @*   Perform Intra prediction for  chroma_8x8 mode:vertical
    274 @*
    275 @* @par Description:
    276 @*Perform Intra prediction for  chroma_8x8 mode:vertical ,described in sec 8.3.4.3
    277 @*
    278 @* @param[in] pu1_src
    279 @* UWORD8 pointer to the source containing alternate U and V samples
    280 @*
    281 @* @param[out] pu1_dst
    282 @*   UWORD8 pointer to the destination with alternate U and V samples
    283 @*
    284 @* @param[in] src_strd
    285 @*  integer source stride
    286 @*
    287 @* @param[in] dst_strd
    288 @*  integer destination stride
    289 @*
    290 @* @param[in] ui_neighboravailability
    291 @* availability of neighbouring pixels(Not used in this function)
    292 @*
    293 @* @returns
    294 @*
    295 @* @remarks
    296 @*  None
    297 @*
    298 @*******************************************************************************
    299 @void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src,
    300 @                                        UWORD8 *pu1_dst,
    301 @                                        WORD32 src_strd,
    302 @                                        WORD32 dst_strd,
    303 @                                        WORD32 ui_neighboravailability)
    304 
    305 @**************Variables Vs Registers*****************************************
    306 @   r0 => *pu1_src
    307 @   r1 => *pu1_dst
    308 @   r2 =>  src_strd
    309 @   r3 =>  dst_strd
    310 @   r4 =>  ui_neighboravailability
    311 
    312 
    313     .global ih264_intra_pred_chroma_8x8_mode_vert_a9q
    314 
    315 ih264_intra_pred_chroma_8x8_mode_vert_a9q:
    316 
    317     stmfd         sp!, {r4-r12, r14}    @store register values to stack
    318 
    319     add           r0, r0, #18
    320     vld1.8        {q0}, [r0]
    321 
    322     vst1.8        {q0}, [r1], r3
    323     vst1.8        {q0}, [r1], r3
    324     vst1.8        {q0}, [r1], r3
    325     vst1.8        {q0}, [r1], r3
    326     vst1.8        {q0}, [r1], r3
    327     vst1.8        {q0}, [r1], r3
    328     vst1.8        {q0}, [r1], r3
    329     vst1.8        {q0}, [r1], r3
    330 
    331     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    332 
    333 
    334 
    335 
    336 @******************************************************************************
    337 
    338 
    339 @**
    340 @*******************************************************************************
    341 @*
    342 @*ih264_intra_pred_chroma_8x8_mode_plane
    343 @*
    344 @* @brief
    345 @*   Perform Intra prediction for  chroma_8x8 mode:PLANE
    346 @*
    347 @* @par Description:
    348 @*  Perform Intra prediction for  chroma_8x8 mode:PLANE ,described in sec 8.3.4.4
    349 @*
    350 @* @param[in] pu1_src
    351 @*  UWORD8 pointer to the source containing alternate U and V samples
    352 @*
    353 @* @param[out] pu1_dst
    354 @*  UWORD8 pointer to the destination with alternate U and V samples
    355 @*
    356 @* @param[in] src_strd
    357 @*  integer source stride
    358 @*
    359 @* @param[in] dst_strd
    360 @*  integer destination stride
    361 @*
    362 @* @param[in] ui_neighboravailability
    363 @*  availability of neighbouring pixels
    364 @*
    365 @* @returns
    366 @*
    367 @* @remarks
    368 @*  None
    369 @*
    370 @*******************************************************************************
    371 @void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
    372 @                                        UWORD8 *pu1_dst,
    373 @                                        WORD32 src_strd,
    374 @                                        WORD32 dst_strd,
    375 @                                        WORD32 ui_neighboravailability)
    376 
    377 @**************Variables Vs Registers*****************************************
    378 @   r0 => *pu1_src
    379 @   r1 => *pu1_dst
    380 @   r2 =>  src_strd
    381 @   r3 =>  dst_strd
    382 @   r4 =>  ui_neighboravailability
    383 
    384     .global ih264_intra_pred_chroma_8x8_mode_plane_a9q
    385 ih264_intra_pred_chroma_8x8_mode_plane_a9q:
    386 
    387     stmfd         sp!, {r4-r10, r12, lr}
    388     vpush         {d8-d15}
    389 
    390     vld1.32       d0, [r0]
    391     add           r10, r0, #10
    392     vld1.32       d1, [r10]
    393     add           r10, r10, #6
    394     vrev64.16     d5, d0
    395     vld1.32       d2, [r10]!
    396     add           r10, r10, #2
    397     vrev64.16     d7, d2
    398     vld1.32       d3, [r10]
    399     sub           r5, r3, #8
    400     ldr           r12, scratch_chroma_intrapred_addr1
    401 scrlblc1:
    402     add           r12, r12, pc
    403     vsubl.u8      q5, d5, d1
    404     vld1.64       {q4}, [r12]           @ Load multiplication factors 1 to 8 into D3
    405     vsubl.u8      q6, d3, d7
    406     vmul.s16      q7, q5, q4
    407     vmul.s16      q8, q6, q4
    408     vuzp.16       q7, q8
    409 
    410     vpadd.s16     d14, d14
    411     vpadd.s16     d15, d15
    412     vpadd.s16     d16, d16
    413     vpadd.s16     d17, d17
    414     vpadd.s16     d14, d14
    415     vpadd.s16     d15, d15
    416     vpadd.s16     d16, d16
    417     vpadd.s16     d17, d17
    418 
    419     mov           r6, #34
    420     vdup.16       q9, r6
    421 
    422     vmull.s16     q11, d14, d18
    423     vmull.s16     q12, d15, d18
    424     vmull.s16     q13, d16, d18
    425     vmull.s16     q14, d17, d18
    426 
    427     vrshrn.s32    d10, q11, #6
    428     vrshrn.s32    d12, q12, #6
    429     vrshrn.s32    d13, q13, #6
    430     vrshrn.s32    d14, q14, #6
    431 
    432 
    433     ldrb          r6, [r0], #1
    434     add           r10, r0, #31
    435     ldrb          r8, [r0], #1
    436     ldrb          r7, [r10], #1
    437     ldrb          r9, [r10], #1
    438 
    439     add           r6, r6, r7
    440     add           r8, r8, r9
    441     lsl           r6, r6, #4
    442     lsl           r8, r8, #4
    443 
    444     vdup.16       q0, r6
    445     vdup.16       q1, r8
    446     vdup.16       q2, d12[0]
    447     vdup.16       q3, d10[0]
    448 
    449     vdup.16       q12, d14[0]
    450     vdup.16       q13, d13[0]
    451     vzip.16       q2, q12
    452     vzip.16       q3, q13
    453     vzip.16       q0, q1
    454 
    455     ldr           r12, scratch_intrapred_chroma_plane_addr1
    456 scrlblc2:
    457     add           r12, r12, pc
    458     vld1.64       {q4}, [r12]
    459     vmov.16       q5, q4
    460     vmov          q11, q4
    461     vzip.16       q4, q5
    462 
    463     vmul.s16      q6, q2, q4
    464     vmul.s16      q8, q2, q5
    465     vadd.s16      q6, q0, q6
    466     vadd.s16      q8, q0, q8
    467 
    468 
    469     vdup.16       q10, d22[0]
    470     vmul.s16      q2, q3, q10
    471     vdup.16       q15, d22[1]
    472     vmul.s16      q9, q3, q10
    473     vmul.s16      q7, q3, q15
    474     vmul.s16      q4, q3, q15
    475     vadd.s16      q12, q6, q2
    476     vadd.s16      q0, q8, q9
    477     vadd.s16      q1, q6, q7
    478     vqrshrun.s16  d28, q12, #5
    479     vadd.s16      q13, q8, q4
    480     vqrshrun.s16  d29, q0, #5
    481     vdup.16       q10, d22[2]
    482     vst1.8        {q14}, [r1], r3
    483     vqrshrun.s16  d28, q1, #5
    484     vqrshrun.s16  d29, q13, #5
    485     vmul.s16      q2, q3, q10
    486     vmul.s16      q9, q3, q10
    487     vst1.8        {q14}, [r1], r3
    488     vadd.s16      q12, q6, q2
    489     vadd.s16      q0, q8, q9
    490     vdup.16       q15, d22[3]
    491     vqrshrun.s16  d28, q12, #5
    492     vqrshrun.s16  d29, q0, #5
    493     vmul.s16      q7, q3, q15
    494     vmul.s16      q4, q3, q15
    495     vst1.8        {q14}, [r1], r3
    496     vadd.s16      q1, q6, q7
    497     vadd.s16      q13, q8, q4
    498     vdup.16       q10, d23[0]
    499     vqrshrun.s16  d28, q1, #5
    500     vqrshrun.s16  d29, q13, #5
    501     vmul.s16      q2, q3, q10
    502     vmul.s16      q9, q3, q10
    503     vst1.8        {q14}, [r1], r3
    504     vadd.s16      q12, q6, q2
    505     vadd.s16      q0, q8, q9
    506     vdup.16       q15, d23[1]
    507     vqrshrun.s16  d28, q12, #5
    508     vqrshrun.s16  d29, q0, #5
    509     vmul.s16      q7, q3, q15
    510     vmul.s16      q4, q3, q15
    511     vst1.8        {q14}, [r1], r3
    512     vadd.s16      q1, q6, q7
    513     vadd.s16      q13, q8, q4
    514     vdup.16       q10, d23[2]
    515     vqrshrun.s16  d28, q1, #5
    516     vqrshrun.s16  d29, q13, #5
    517     vmul.s16      q2, q3, q10
    518     vmul.s16      q9, q3, q10
    519     vst1.8        {q14}, [r1], r3
    520     vadd.s16      q12, q6, q2
    521     vadd.s16      q0, q8, q9
    522     vdup.16       q15, d23[3]
    523     vqrshrun.s16  d28, q12, #5
    524     vqrshrun.s16  d29, q0, #5
    525     vmul.s16      q7, q3, q15
    526     vmul.s16      q4, q3, q15
    527     vst1.8        {q14}, [r1], r3
    528     vadd.s16      q1, q6, q7
    529     vadd.s16      q13, q8, q4
    530     vqrshrun.s16  d28, q1, #5
    531     vqrshrun.s16  d29, q13, #5
    532     vst1.8        {q14}, [r1], r3
    533 
    534 
    535 
    536 end_func_plane:
    537 
    538     vpop          {d8-d15}
    539     ldmfd         sp!, {r4-r10, r12, pc}
    540 
    541 
    542 
    543 
    544