Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 
     21 @/**
     22 @******************************************************************************
     23 @*
     24 @* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
     25 @*                and do the prediction.
     26 @*
     27 @* @par Description
     28 @*   This function evaluates  first three intra chroma modes and compute corresponding sad
     29 @*   and return the buffer predicted with best mode.
     30 @*
     31 @* @param[in] pu1_src
     32 @*  UWORD8 pointer to the source
     33 @*
     34 @** @param[in] pu1_ngbr_pels
     35 @*  UWORD8 pointer to neighbouring pels
     36 @*
     37 @* @param[out] pu1_dst
     38 @*  UWORD8 pointer to the destination
     39 @*
     40 @* @param[in] src_strd
     41 @*  integer source stride
     42 @*
     43 @* @param[in] dst_strd
     44 @*  integer destination stride
     45 @*
     46 @* @param[in] u4_n_avblty
     47 @* availability of neighbouring pixels
     48 @*
     49 @* @param[in] u4_intra_mode
     50 @* Pointer to the variable in which best mode is returned
     51 @*
     52 @* @param[in] pu4_sadmin
     53 @* Pointer to the variable in which minimum sad is returned
     54 @*
     55 @* @param[in] u4_valid_intra_modes
     56 @* Says what all modes are valid
     57 @*
     58 @*
     59 @* @return      none
     60 @*
     61 @******************************************************************************
     62 @*/
     63 @
     64 @void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
     65 @                                      UWORD8 *pu1_ngbr_pels_i16,
     66 @                                      UWORD8 *pu1_dst,
     67 @                                      UWORD32 src_strd,
     68 @                                      UWORD32 dst_strd,
     69 @                                      WORD32 u4_n_avblty,
     70 @                                      UWORD32 *u4_intra_mode,
     71 @                                      WORD32 *pu4_sadmin,
     72 @                                      UWORD32 u4_valid_intra_modes)
     73 @
     74 .text
     75 .p2align 2
     76 
     77     .global ih264e_evaluate_intra_chroma_modes_a9q
     78 
     79 ih264e_evaluate_intra_chroma_modes_a9q:
     80 
     81 @r0 = pu1_src,
     82 @r1 = pu1_ngbr_pels_i16,
     83 @r2 = pu1_dst,
     84 @r3 = src_strd,
     85 @r4 = dst_strd,
     86 @r5 = u4_n_avblty,
     87 @r6 = u4_intra_mode,
     88 @r7 = pu4_sadmin
     89 
     90 
     91 
     92     stmfd         sp!, {r4-r12, r14}    @store register values to stack
     93     @-----------------------
     94     ldr           r5, [sp, #44]         @r5 = u4_n_avblty,
     95     @-------------------------
     96     mov           r12, r1               @
     97     vpush         {d8-d15}
     98     vld1.32       {q4}, [r1]!
     99     add           r1, r1, #2
    100     vld1.32       {q5}, [r1]!
    101 
    102     vuzp.u8       q4, q5                @
    103 
    104     vpaddl.u8     d8, d8
    105     vpadd.u16     d8, d8
    106 
    107     vpaddl.u8     d9, d9
    108     vpadd.u16     d9, d9
    109 
    110     vpaddl.u8     d10, d10
    111     vpadd.u16     d10, d10
    112 
    113     vpaddl.u8     d11, d11
    114 
    115     and           r7, r5, #5
    116     vpadd.u16     d11, d11
    117     subs          r8, r7, #5
    118     beq           all_available
    119     subs          r8, r7, #4
    120     beq           top_available
    121     subs          r8, r7, #1
    122     beq           left_available
    123     mov           r10, #128
    124     vdup.8        q14, r10
    125     vdup.8        q15, r10
    126     b             sad
    127 
    128 all_available:
    129     vzip.u16      q4, q5
    130     vext.16       q6, q4, q4, #2
    131     vadd.u16      q7, q5, q6
    132     vqrshrn.u16   d14, q7, #3
    133     vqrshrn.u16   d15, q4, #2
    134     vqrshrn.u16   d16, q5, #2
    135     vdup.16       d28, d14[0]
    136     vdup.16       d29, d16[1]
    137     vdup.16       d30, d15[0]
    138     vdup.16       d31, d14[1]
    139     b             sad
    140 top_available:
    141     vzip.u16      q4, q5
    142     vqrshrn.u16   d16, q5, #2
    143     vdup.16       d28, d16[0]
    144     vdup.16       d29, d16[1]
    145     vdup.16       d30, d16[0]
    146     vdup.16       d31, d16[1]
    147     b             sad
    148 left_available:
    149     vzip.u16      q4, q5
    150     vqrshrn.u16   d16, q4, #2
    151     vdup.16       d28, d16[3]
    152     vdup.16       d29, d16[3]
    153     vdup.16       d30, d16[2]
    154     vdup.16       d31, d16[2]
    155 
    156 
    157 sad:
    158     vld1.32       {q4}, [r12]!
    159     sub           r8, r12, #2
    160     add           r12, r12, #2
    161     vld1.32       {q5}, [r12]!
    162     add           r12, r0, r3, lsl  #2
    163     sub           r10, r8, #8
    164     vld1.32       {q0}, [r0], r3
    165     ldrh          r9, [r8]
    166     vdup.16       q10, r9               @ row 0
    167 
    168     @/vertical row 0;
    169     vabdl.u8      q8, d0, d10
    170     vabdl.u8      q9, d1, d11
    171     sub           r8, r8, #2
    172     vld1.32       {q1}, [r12], r3
    173 
    174     @/HORZ row 0;
    175     vabdl.u8      q13, d0, d20
    176     vabdl.u8      q7, d1, d21
    177     ldrh          r9, [r10]
    178     @/dc row 0;
    179     vabdl.u8      q11, d0, d28
    180     vabdl.u8      q12, d1, d29
    181 
    182 
    183     vdup.16       q10, r9               @ row 4
    184     @/vertical row 4;
    185     vabal.u8      q8, d2, d10
    186     vabal.u8      q9, d3, d11
    187     sub           r10, r10, #2
    188 
    189     @/HORZ row 4;
    190     vabal.u8      q13, d2, d20
    191     vabal.u8      q7, d3, d21
    192     @/dc row 4;
    193     vabal.u8      q11, d2, d30
    194     vabal.u8      q12, d3, d31
    195 
    196     mov           r11, #3
    197 
    198 loop:
    199     vld1.32       {q0}, [r0], r3
    200     ldrh          r9, [r8]
    201 
    202 
    203     @/vertical row i;
    204     vabal.u8      q8, d0, d10
    205     vabal.u8      q9, d1, d11
    206 
    207     vdup.16       q10, r9               @ row i
    208     vld1.32       {q1}, [r12], r3
    209     sub           r8, r8, #2
    210     @/HORZ row i;
    211     vabal.u8      q13, d0, d20
    212     vabal.u8      q7, d1, d21
    213     ldrh          r9, [r10]
    214     @/dc row i;
    215     vabal.u8      q11, d0, d28
    216     vabal.u8      q12, d1, d29
    217     sub           r10, r10, #2
    218 
    219     vdup.16       q10, r9               @ row i+4
    220     @/vertical row 4;
    221     vabal.u8      q8, d2, d10
    222     vabal.u8      q9, d3, d11
    223     subs          r11, r11, #1
    224 
    225     @/HORZ row i+4;
    226     vabal.u8      q13, d2, d20
    227     vabal.u8      q7, d3, d21
    228     @/dc row i+4;
    229     vabal.u8      q11, d2, d30
    230     vabal.u8      q12, d3, d31
    231     bne           loop
    232 
    233 
    234 
    235 @-------------------------------------------
    236 
    237     vadd.i16      q9, q9, q8            @/VERT
    238     vadd.i16      q7, q13, q7           @/HORZ
    239     vadd.i16      q12, q11, q12         @/DC
    240     vadd.i16      d18, d19, d18         @/VERT
    241     vadd.i16      d14, d15, d14         @/HORZ
    242     vadd.i16      d24, d24, d25         @/DC
    243     vpaddl.u16    d18, d18              @/VERT
    244     vpaddl.u16    d14, d14              @/HORZ
    245     vpaddl.u16    d24, d24              @/DC
    246     vpaddl.u32    d18, d18              @/VERT
    247     vpaddl.u32    d14, d14              @/HORZ
    248     vpaddl.u32    d24, d24              @/DC
    249 
    250 
    251 
    252     vmov.u32      r8, d18[0]            @ vert
    253     vmov.u32      r9, d14[0]            @horz
    254     vmov.u32      r10, d24[0]           @dc
    255 
    256     mov           r11, #1
    257 @-----------------------
    258     ldr           r0, [sp, #120]        @ u4_valid_intra_modes
    259 @--------------------------------------------
    260 
    261 
    262     lsl           r11 , #30
    263 
    264     ands          r7, r0, #04           @ vert mode valid????????????
    265     moveq         r8, r11
    266 
    267     ands          r6, r0, #02           @ horz mode valid????????????
    268     moveq         r9, r11
    269 
    270     ands          r6, r0, #01           @ dc mode valid????????????
    271     moveq         r10, r11
    272 
    273 
    274     @---------------------------
    275     ldr           r4, [sp, #104]        @r4 = dst_strd,
    276     ldr           r6, [sp, #112]        @ R6 =MODE
    277     ldr           r7, [sp, #116]        @r7 = pu4_sadmin
    278 
    279     @--------------------------
    280 
    281     cmp           r10, r9
    282     bgt           not_dc
    283     cmp           r10, r8
    284     bgt           do_vert
    285 
    286     @/----------------------
    287     @DO DC PREDICTION
    288     str           r10 , [r7]            @MIN SAD
    289     mov           r10, #0
    290     str           r10 , [r6]            @ MODE
    291     b             do_dc_vert
    292     @-----------------------------
    293 
    294 not_dc:
    295     cmp           r9, r8
    296     bgt           do_vert
    297     @/----------------------
    298     @DO HORIZONTAL
    299 
    300     vdup.16       q10, d9[3]            @/HORIZONTAL VALUE ROW=0;
    301     str           r9 , [r7]             @MIN SAD
    302     mov           r9, #1
    303     vdup.16       q11, d9[2]            @/HORIZONTAL VALUE ROW=1;
    304     str           r9 , [r6]             @ MODE
    305     vdup.16       q12, d9[1]            @/HORIZONTAL VALUE ROW=2;
    306     vst1.32       {d20, d21} , [r2], r4 @0
    307     vdup.16       q13, d9[0]            @/HORIZONTAL VALUE ROW=3;
    308     vst1.32       {d22, d23} , [r2], r4 @1
    309     vdup.16       q14, d8[3]            @/HORIZONTAL VALUE ROW=4;
    310     vst1.32       {d24, d25} , [r2], r4 @2
    311     vdup.16       q15, d8[2]            @/HORIZONTAL VALUE ROW=5;
    312     vst1.32       {d26, d27} , [r2], r4 @3
    313     vdup.16       q1, d8[1]             @/HORIZONTAL VALUE ROW=6;
    314     vst1.32       {d28, d29} , [r2], r4 @4
    315     vdup.16       q2, d8[0]             @/HORIZONTAL VALUE ROW=7;
    316     vst1.32       {d30, d31} , [r2], r4 @5
    317     vst1.32       {d2, d3} , [r2], r4   @6
    318     vst1.32       {d4, d5} , [r2], r4   @7
    319     b             end_func
    320 
    321 do_vert:
    322     @DO VERTICAL PREDICTION
    323     str           r8 , [r7]             @MIN SAD
    324     mov           r8, #2
    325     str           r8 , [r6]             @ MODE
    326     vmov          q15, q5
    327     vmov          q14, q5
    328 
    329 do_dc_vert:
    330     vst1.32       {d28, d29} , [r2], r4 @0
    331     vst1.32       {d28, d29} , [r2], r4 @1
    332     vst1.32       {d28, d29} , [r2], r4 @2
    333     vst1.32       {d28, d29} , [r2], r4 @3
    334     vst1.32       {d30, d31} , [r2], r4 @4
    335     vst1.32       {d30, d31} , [r2], r4 @5
    336     vst1.32       {d30, d31} , [r2], r4 @6
    337     vst1.32       {d30, d31} , [r2], r4 @7
    338 
    339 
    340 end_func:
    341     vpop          {d8-d15}
    342     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    343 
    344 
    345 
    346