Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 
     21 @******************************************************************************
     22 @*
     23 @* @brief
     24 @*  This file contains definitions of routines for spatial filter
     25 @*
     26 @* @author
     27 @*  Ittiam
     28 @*
     29 @* @par List of Functions:
     30 @*  - ideint_cac_8x8_a9()
     31 @*
     32 @* @remarks
     33 @*  None
     34 @*
     35 @*******************************************************************************
     36 
     37 
     38 @******************************************************************************
     39 @*
     40 @*  @brief Calculates Combing Artifact
     41 @*
     42 @*  @par   Description
     43 @*   This functions calculates combing artifact check (CAC) for given two fields
     44 @*
     45 @* @param[in] pu1_top
     46 @*  UWORD8 pointer to top field
     47 @*
     48 @* @param[in] pu1_bot
     49 @*  UWORD8 pointer to bottom field
     50 @*
     51 @* @param[in] top_strd
     52 @*  Top field stride
     53 @*
     54 @* @param[in] bot_strd
     55 @*  Bottom field stride
     56 @*
     57 @* @returns
     58 @*  None
     59 @*
     60 @* @remarks
     61 @*
     62 @******************************************************************************
     63 
     64     .global ideint_cac_8x8_a9
     65 
     66 ideint_cac_8x8_a9:
     67 
     68     stmfd       sp!,    {r4-r10, lr}
     69 
     70     @ Load first row of top
     71     vld1.u8     d28,    [r0],   r2
     72 
     73     @ Load first row of bottom
     74     vld1.u8     d29,    [r1],   r3
     75 
     76     @ Load second row of top
     77     vld1.u8     d30,    [r0],   r2
     78 
     79     @ Load second row of bottom
     80     vld1.u8     d31,    [r1],   r3
     81 
     82 
     83     @ Calculate row based adj and alt values
     84     @ Get row sums
     85     vpaddl.u8   q0,     q14
     86 
     87     vpaddl.u8   q1,     q15
     88 
     89     vpaddl.u16  q0,     q0
     90 
     91     vpaddl.u16  q1,     q1
     92 
     93     @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows
     94     @ Pack q0 and q1 into a single register (sum does not exceed 16bits)
     95 
     96     vshl.u32    q8,     q1,     #16
     97     vorr.u32    q8,     q0,     q8
     98     @ q8 now contains 8 sums
     99 
    100     @ Load third row of top
    101     vld1.u8     d24,    [r0],   r2
    102 
    103     @ Load third row of bottom
    104     vld1.u8     d25,    [r1],   r3
    105 
    106     @ Load fourth row of top
    107     vld1.u8     d26,    [r0],   r2
    108 
    109     @ Load fourth row of bottom
    110     vld1.u8     d27,    [r1],   r3
    111 
    112     @ Get row sums
    113     vpaddl.u8   q2,     q12
    114 
    115     vpaddl.u8   q3,     q13
    116 
    117     vpaddl.u16  q2,     q2
    118 
    119     vpaddl.u16  q3,     q3
    120     @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows
    121     @ Pack q2 and q3 into a single register (sum does not exceed 16bits)
    122 
    123     vshl.u32    q9,     q3,     #16
    124     vorr.u32    q9,     q2,     q9
    125     @ q9 now contains 8 sums
    126 
    127     @ Compute absolute diff between top and bottom row sums
    128     vabd.u16    d16,    d16,    d17
    129     vabd.u16    d17,    d18,    d19
    130 
    131     @ RSUM_CSUM_THRESH
    132     vmov.u16    q9,     #20
    133 
    134     @ Eliminate values smaller than RSUM_CSUM_THRESH
    135     vcge.u16    q10,    q8,     q9
    136     vand.u16    q10,    q8,     q10
    137     @ q10 now contains 8 absolute diff of sums above the threshold
    138 
    139 
    140     @ Compute adj
    141     vadd.u16    d20,    d20,    d21
    142 
    143     @ d20 has four adj values for two sub-blocks
    144 
    145     @ Compute alt
    146     vabd.u32    q0,     q0,     q1
    147     vabd.u32    q2,     q2,     q3
    148 
    149     vadd.u32    q0,     q0,     q2
    150     vadd.u32    d21,    d0,     d1
    151     @ d21 has two values for two sub-blocks
    152 
    153 
    154     @ Calculate column based adj and alt values
    155 
    156     vrhadd.u8   q0,     q14,    q15
    157     vrhadd.u8   q1,     q12,    q13
    158     vrhadd.u8   q0,     q0,     q1
    159 
    160     vabd.u8     d0,     d0,     d1
    161 
    162     @ RSUM_CSUM_THRESH >> 2
    163     vmov.u8     d9,     #5
    164 
    165     @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2
    166     vcge.u8     d1,     d0,     d9
    167     vand.u8     d0,     d0,     d1
    168     @ d0 now contains 8 absolute diff of sums above the threshold
    169 
    170 
    171     vpaddl.u8   d0,     d0
    172     vshl.u16    d0,     d0,     #2
    173 
    174     @ Add row based adj
    175     vadd.u16    d20,    d0,     d20
    176 
    177     vpaddl.u16  d20,    d20
    178     @ d20 now contains 2 adj values
    179 
    180 
    181     vrhadd.u8   d0,     d28,    d29
    182     vrhadd.u8   d2,     d24,    d25
    183     vrhadd.u8   d0,     d0,     d2
    184 
    185     vrhadd.u8   d1,     d30,    d31
    186     vrhadd.u8   d3,     d26,    d27
    187     vrhadd.u8   d1,     d1,     d3
    188 
    189     vabd.u8     d0,     d0,     d1
    190     vpaddl.u8   d0,     d0
    191 
    192     vshl.u16    d0,     d0,     #2
    193     vpaddl.u16  d0,     d0
    194     vadd.u32    d21,    d0,     d21
    195 
    196 
    197     @ d21 now contains 2 alt values
    198 
    199     @ SAD_BIAS_MULT_SHIFT
    200     vshr.u32    d0,     d21,    #3
    201     vadd.u32    d21,    d21,    d0
    202 
    203     @ SAD_BIAS_ADDITIVE >> 1
    204     vmov.u32    d0,     #4
    205     vadd.u32    d21,    d21,    d0
    206 
    207     vclt.u32    d0,     d21,    d20
    208     vpaddl.u32  d0,     d0
    209 
    210     vmov.u32    r0,     d0[0]
    211     cmp         r0,     #0
    212     movne       r0,     #1
    213     ldmfd       sp!,    {r4-r10, pc}
    214