Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 
     21 @******************************************************************************
     22 @*
     23 @* @brief
     24 @*  This file contains definitions of routines for spatial filter
     25 @*
     26 @* @author
     27 @*  Ittiam
     28 @*
     29 @* @par List of Functions:
     30 @*  - ideint_spatial_filter_a9()
     31 @*
     32 @* @remarks
     33 @*  None
     34 @*
     35 @*******************************************************************************
     36 
     37 
     38 @******************************************************************************
     39 @*
     40 @*  @brief Performs spatial filtering
     41 @*
     42 @*  @par   Description
     43 @*   This functions performs edge adaptive spatial filtering on a 8x8 block
     44 @*
     45 @* @param[in] pu1_src
     46 @*  UWORD8 pointer to the source
     47 @*
     48 @* @param[in] pu1_out
     49 @*  UWORD8 pointer to the destination
     50 @*
     51 @* @param[in] src_strd
     52 @*  source stride
     53 @*
     54 @* @param[in] src_strd
     55 @*  destination stride
     56 @*
     57 @* @returns
     58 @*  None
     59 @*
     60 @* @remarks
     61 @*
     62 @******************************************************************************
     63 
     64     .global ideint_spatial_filter_a9
     65 
     66 ideint_spatial_filter_a9:
     67 
     68     stmfd       sp!,    {r4-r10, lr}
     69 
     70     vmov.u16    q8,     #0
     71     vmov.u16    q9,     #0
     72     vmov.u16    q10,    #0
     73 
     74     @ Backup r0
     75     mov         r10,    r0
     76 
     77     @ Load from &pu1_row_1[0]
     78     sub         r5,     r0,     #1
     79     vld1.8      d0,     [r0],   r2
     80 
     81     @ Load from &pu1_row_1[-1]
     82     vld1.8      d1,     [r5]
     83     add         r5,     r5,     #2
     84 
     85     @ Load from &pu1_row_1[1]
     86     vld1.8      d2,     [r5]
     87 
     88     @ Number of rows
     89     mov         r4,     #4
     90 
     91     @ EDGE_BIAS_0
     92     vmov.u32    d30,    #5
     93 
     94     @ EDGE_BIAS_1
     95     vmov.u32    d31,    #7
     96 
     97 detect_edge:
     98     @ Load from &pu1_row_2[0]
     99     sub         r5,     r0,     #1
    100     vld1.8      d3,     [r0],   r2
    101 
    102     @ Load from &pu1_row_2[-1]
    103     vld1.8      d4,     [r5]
    104     add         r5,     r5,     #2
    105 
    106     @ Load from &pu1_row_2[1]
    107     vld1.8      d5,     [r5]
    108 
    109     @ Calculate absolute differences
    110     @ pu1_row_1[i] - pu1_row_2[i]
    111     vabal.u8    q8,     d0,     d3
    112 
    113     @ pu1_row_1[i - 1] - pu1_row_2[i + 1]
    114     vabal.u8    q9,     d1,     d5
    115 
    116     @ pu1_row_1[i + 1] - pu1_row_2[i - 1]
    117     vabal.u8    q10,    d4,     d2
    118 
    119     vmov        d0,     d3
    120     vmov        d1,     d4
    121     vmov        d2,     d5
    122 
    123     subs        r4,     r4,     #1
    124     bgt         detect_edge
    125 
    126     @ Calculate sum of absolute differeces for each edge
    127     vpadd.u16   d16,    d16,    d17
    128     vpadd.u16   d18,    d18,    d19
    129     vpadd.u16   d20,    d20,    d21
    130 
    131     vpaddl.u16  d16,    d16
    132     vpaddl.u16  d18,    d18
    133     vpaddl.u16  d20,    d20
    134 
    135     @ adiff[0] *= EDGE_BIAS_0;
    136     vmul.u32    d16,    d16,    d30
    137 
    138     @ adiff[1] *= EDGE_BIAS_1;
    139     vmul.u32    d18,    d18,    d31
    140 
    141     @ adiff[2] *= EDGE_BIAS_1;
    142     vmul.u32    d20,    d20,    d31
    143 
    144     @ Move the differences to ARM registers
    145 
    146 
    147     @ Compute shift for first half of the block
    148 compute_shift_1:
    149     vmov.u32    r5,     d16[0]
    150     vmov.u32    r6,     d18[0]
    151     vmov.u32    r7,     d20[0]
    152 
    153     @ Compute shift
    154     mov         r8,     #0
    155 
    156     @ adiff[2] <= adiff[1]
    157     cmp         r7,     r6
    158     bgt         dir_45_gt_135_1
    159 
    160     @ adiff[2] <= adiff[0]
    161     cmp         r7,     r5
    162     movle       r8,     #1
    163 
    164     b           compute_shift_2
    165 dir_45_gt_135_1:
    166 
    167     @ adiff[1] <= adiff[0]
    168     cmp         r6,     r5
    169     @ Move -1 if less than or equal to
    170     mvnle       r8,     #0
    171 
    172 
    173 compute_shift_2:
    174     @ Compute shift for first half of the block
    175     vmov.u32    r5,     d16[1]
    176     vmov.u32    r6,     d18[1]
    177     vmov.u32    r7,     d20[1]
    178 
    179     @ Compute shift
    180     mov         r9,     #0
    181 
    182     @ adiff[2] <= adiff[1]
    183     cmp         r7,     r6
    184     bgt         dir_45_gt_135_2
    185 
    186     @ adiff[2] <= adiff[0]
    187     cmp         r7,     r5
    188     movle       r9,     #1
    189 
    190     b           interpolate
    191 dir_45_gt_135_2:
    192 
    193     @ adiff[1] <= adiff[0]
    194     cmp         r6,     r5
    195 
    196     @ Move -1 if less than or equal to
    197     mvnle       r9,     #0
    198 
    199 interpolate:
    200     add         r4,     r10,    r8
    201     add         r5,     r10,    r2
    202     sub         r5,     r5,     r8
    203 
    204     add         r10,    r10,    #4
    205     add         r6,     r10,    r9
    206     add         r7,     r10,    r2
    207     sub         r7,     r7,     r9
    208     mov         r8,     #4
    209 
    210 filter_loop:
    211     vld1.u32    d0[0],  [r4],   r2
    212     vld1.u32    d2[0],  [r5],   r2
    213 
    214     vld1.u32    d0[1],  [r6],   r2
    215     vld1.u32    d2[1],  [r7],   r2
    216 
    217     vrhadd.u8   d4,     d0,     d2
    218     vst1.u32    d4,     [r1],   r3
    219 
    220     subs        r8,     #1
    221     bgt         filter_loop
    222 
    223     ldmfd       sp!,    {r4-r10, pc}
    224