Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 
     21 @/**
     22 @******************************************************************************
     23 @*
     24 @* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
     25 @*                and do the prediction.
     26 @*
     27 @* @par Description
     28 @*   This function evaluates  first three 16x16 modes and compute corresponding sad
     29 @*   and return the buffer predicted with best mode.
     30 @*
     31 @* @param[in] pu1_src
     32 @*  UWORD8 pointer to the source
     33 @*
     34 @** @param[in] pu1_ngbr_pels_i16
     35 @*  UWORD8 pointer to neighbouring pels
     36 @*
     37 @* @param[out] pu1_dst
     38 @*  UWORD8 pointer to the destination
     39 @*
     40 @* @param[in] src_strd
     41 @*  integer source stride
     42 @*
     43 @* @param[in] dst_strd
     44 @*  integer destination stride
     45 @*
     46 @* @param[in] u4_n_avblty
     47 @* availability of neighbouring pixels
     48 @*
     49 @* @param[in] u4_intra_mode
     50 @* Pointer to the variable in which best mode is returned
     51 @*
     52 @* @param[in] pu4_sadmin
     53 @* Pointer to the variable in which minimum sad is returned
     54 @*
     55 @* @param[in] u4_valid_intra_modes
     56 @* Says what all modes are valid
     57 @*
     58 @*
     59 @* @return      none
     60 @*
     61 @******************************************************************************
     62 @*/
     63 @
     64 @void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
     65 @                                      UWORD8 *pu1_ngbr_pels_i16,
     66 @                                      UWORD8 *pu1_dst,
     67 @                                      UWORD32 src_strd,
     68 @                                      UWORD32 dst_strd,
     69 @                                      WORD32 u4_n_avblty,
     70 @                                      UWORD32 *u4_intra_mode,
     71 @                                      WORD32 *pu4_sadmin,
     72 @                                      UWORD32 u4_valid_intra_modes)
     73 @
     74 .text
     75 .p2align 2
     76 
     77     .global ih264e_evaluate_intra16x16_modes_a9q
     78 
     79 ih264e_evaluate_intra16x16_modes_a9q:
     80 
     81 @r0 = pu1_src,
     82 @r1 = pu1_ngbr_pels_i16,
     83 @r2 = pu1_dst,
     84 @r3 = src_strd,
     85 @r4 = dst_strd,
     86 @r5 = u4_n_avblty,
     87 @r6 = u4_intra_mode,
     88 @r7 = pu4_sadmin
     89 
     90 
     91 
     92     stmfd         sp!, {r4-r12, r14}    @store register values to stack
     93     ldr           r5, [sp, #44]
     94 
     95 
     96     vpush         {d8-d15}
     97     vld1.32       {q4}, [r1]!
     98     sub           r6, r1, #1
     99     add           r1, r1, #1
    100     mov           r10, #0
    101     vld1.32       {q5}, [r1]!
    102     mov           r11, #0
    103     mov           r4, #0
    104     @/* Left available ???? */
    105     ands          r7, r5, #01
    106     movne         r10, #1
    107 
    108     @/* Top  available ???? */
    109     ands          r8, r5, #04
    110     lsl           r9, r10, #3
    111     movne         r11, #1
    112     lsl           r12, r11, #3
    113     adds          r8, r9, r12
    114 
    115 
    116     @/* None available :( */
    117     moveq         r4, #128
    118 
    119 
    120 
    121 @/fINDING dc val*/
    122     @----------------------
    123     vaddl.u8      q15, d8, d9
    124 
    125     vaddl.u8      q14, d10, d11
    126 
    127     vadd.u16      q15, q14, q15
    128     @ VLD1.32  {q2},[r0],r3;row 2
    129     vadd.u16      d30, d31, d30
    130     vpadd.u16     d30, d30
    131     @ VLD1.32  {q3},[r0],r3 ;row 3
    132     vpadd.u16     d30, d30
    133     @---------------------
    134 
    135 
    136     vmov.u16      r7, d30[0]
    137     add           r7, r7, r8
    138     add           r11, r11, #3
    139     add           r8, r10, r11
    140 
    141     lsr           r7, r8
    142     add           r7, r4, r7
    143     vld1.32       {q0}, [r0], r3        @ source r0w 0
    144     vdup.8        q15, r7               @dc val
    145 
    146 @/* computing SADs for all three modes*/
    147     ldrb          r7, [r6]
    148     vdup.8        q10, r7               @/HORIZONTAL VALUE ROW=0;
    149     @/vertical row 0;
    150     vabdl.u8      q8, d0, d10
    151     vabdl.u8      q9, d1, d11
    152     sub           r6, r6, #1
    153     @/HORZ row 0;
    154     vabdl.u8      q13, d0, d20
    155     vabdl.u8      q14, d1, d21
    156     mov           r1, #15
    157     @/dc row 0;
    158     vabdl.u8      q11, d0, d30
    159     vabdl.u8      q12, d1, d31
    160 
    161 
    162 loop:
    163     vld1.32       {q1}, [r0], r3        @row i
    164     @/dc row i;
    165     vabal.u8      q11, d2, d30
    166     ldrb          r7, [r6]
    167     vabal.u8      q12, d3, d31
    168 
    169     @/vertical row i;
    170     vabal.u8      q8, d2, d10
    171     vdup.8        q10, r7               @/HORIZONTAL VALUE ROW=i;
    172     sub           r6, r6, #1
    173     vabal.u8      q9, d3, d11
    174 
    175     subs          r1, r1, #1
    176     @/HORZ row i;
    177     vabal.u8      q13, d2, d20
    178     vabal.u8      q14, d3, d21
    179     bne           loop
    180 
    181     @------------------------------------------------------------------------------
    182 
    183     vadd.i16      q9, q9, q8            @/VERT
    184     vadd.i16      d18, d19, d18         @/VERT
    185     vpaddl.u16    d18, d18              @/VERT
    186     vadd.i16      q14, q13, q14         @/HORZ
    187     vadd.i16      d28, d29, d28         @/HORZ
    188     vpaddl.u32    d18, d18              @/VERT
    189     vpaddl.u16    d28, d28              @/HORZ
    190 
    191     vpaddl.u32    d28, d28              @/HORZ
    192     vmov.u32      r8, d18[0]            @ vert
    193     vadd.i16      q12, q11, q12         @/DC
    194     vmov.u32      r9, d28[0]            @horz
    195     mov           r11, #1
    196     vadd.i16      d24, d24, d25         @/DC
    197     lsl           r11 , #30
    198 
    199     @-----------------------
    200     ldr           r0, [sp, #120]        @ u4_valid_intra_modes
    201     @--------------------------------------------
    202     ands          r7, r0, #01           @ vert mode valid????????????
    203     moveq         r8, r11
    204     vpaddl.u16    d24, d24              @/DC
    205 
    206     ands          r6, r0, #02           @ horz mode valid????????????
    207     moveq         r9, r11
    208     vpaddl.u32    d24, d24              @/DC
    209 
    210     vmov.u32      r10, d24[0]           @dc
    211 @--------------------------------
    212     ldr           r4, [sp, #104]        @r4 = dst_strd,
    213     ldr           r7, [sp, #116]        @r7 = pu4_sadmin
    214 @----------------------------------------------
    215     ands          r6, r0, #04           @ dc mode valid????????????
    216     moveq         r10, r11
    217 
    218     @---------------------------
    219     ldr           r6, [sp, #112]        @ R6 =MODE
    220     @--------------------------
    221 
    222     cmp           r8, r9
    223     bgt           not_vert
    224     cmp           r8, r10
    225     bgt           do_dc
    226 
    227     @/----------------------
    228     @DO VERTICAL PREDICTION
    229     str           r8 , [r7]             @MIN SAD
    230     mov           r8, #0
    231     str           r8 , [r6]             @ MODE
    232     vmov          q15, q5
    233 
    234     b             do_dc_vert
    235     @-----------------------------
    236 not_vert:
    237     cmp           r9, r10
    238     bgt           do_dc
    239 
    240     @/----------------------
    241     @DO HORIZONTAL
    242     vdup.8        q5, d9[7]             @0
    243     str           r9 , [r7]             @MIN SAD
    244     vdup.8        q6, d9[6]             @1
    245     mov           r9, #1
    246     vdup.8        q7, d9[5]             @2
    247     vst1.32       {d10, d11} , [r2], r4 @0
    248     vdup.8        q8, d9[4]             @3
    249     str           r9 , [r6]             @ MODE
    250     vdup.8        q9, d9[3]             @4
    251     vst1.32       {d12, d13} , [r2], r4 @1
    252     vdup.8        q10, d9[2]            @5
    253     vst1.32       {d14, d15} , [r2], r4 @2
    254     vdup.8        q11, d9[1]            @6
    255     vst1.32       {d16, d17} , [r2], r4 @3
    256     vdup.8        q12, d9[0]            @7
    257     vst1.32       {d18, d19} , [r2], r4 @4
    258     vdup.8        q13, d8[7]            @8
    259     vst1.32       {d20, d21} , [r2], r4 @5
    260     vdup.8        q14, d8[6]            @9
    261     vst1.32       {d22, d23} , [r2], r4 @6
    262     vdup.8        q15, d8[5]            @10
    263     vst1.32       {d24, d25} , [r2], r4 @7
    264     vdup.8        q1, d8[4]             @11
    265     vst1.32       {d26, d27} , [r2], r4 @8
    266     vdup.8        q2, d8[3]             @12
    267     vst1.32       {d28, d29} , [r2], r4 @9
    268     vdup.8        q3, d8[2]             @13
    269     vst1.32       {d30, d31}, [r2], r4  @10
    270     vdup.8        q5, d8[1]             @14
    271     vst1.32       {d2, d3} , [r2], r4   @11
    272     vdup.8        q6, d8[0]             @15
    273     vst1.32       {d4, d5} , [r2], r4   @12
    274 
    275     vst1.32       {d6, d7} , [r2], r4   @13
    276 
    277     vst1.32       {d10, d11} , [r2], r4 @14
    278 
    279     vst1.32       {d12, d13} , [r2], r4 @15
    280     b             end_func
    281 
    282 
    283     @/-----------------------------
    284 
    285 do_dc: @/---------------------------------
    286     @DO DC
    287     str           r10 , [r7]            @MIN SAD
    288     mov           r10, #2
    289     str           r10 , [r6]            @ MODE
    290 do_dc_vert:
    291     vst1.32       {d30, d31}, [r2], r4  @0
    292     vst1.32       {d30, d31}, [r2], r4  @1
    293     vst1.32       {d30, d31}, [r2], r4  @2
    294     vst1.32       {d30, d31}, [r2], r4  @3
    295     vst1.32       {d30, d31}, [r2], r4  @4
    296     vst1.32       {d30, d31}, [r2], r4  @5
    297     vst1.32       {d30, d31}, [r2], r4  @6
    298     vst1.32       {d30, d31}, [r2], r4  @7
    299     vst1.32       {d30, d31}, [r2], r4  @8
    300     vst1.32       {d30, d31}, [r2], r4  @9
    301     vst1.32       {d30, d31}, [r2], r4  @10
    302     vst1.32       {d30, d31}, [r2], r4  @11
    303     vst1.32       {d30, d31}, [r2], r4  @12
    304     vst1.32       {d30, d31}, [r2], r4  @13
    305     vst1.32       {d30, d31}, [r2], r4  @14
    306     vst1.32       {d30, d31}, [r2], r4  @15
    307     @/------------------
    308 end_func:
    309     vpop          {d8-d15}
    310     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
    311 
    312 
    313