Home | History | Annotate | Download | only in armv8
      1 //******************************************************************************
      2 //*
      3 //* Copyright (C) 2015 The Android Open Source Project
      4 //*
      5 //* Licensed under the Apache License, Version 2.0 (the "License");
      6 //* you may not use this file except in compliance with the License.
      7 //* You may obtain a copy of the License at:
      8 //*
      9 //* http://www.apache.org/licenses/LICENSE-2.0
     10 //*
     11 //* Unless required by applicable law or agreed to in writing, software
     12 //* distributed under the License is distributed on an "AS IS" BASIS,
     13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 //* See the License for the specific language governing permissions and
     15 //* limitations under the License.
     16 //*
     17 //*****************************************************************************
     18 //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 //*/
     20 
     21 //******************************************************************************
     22 //*
     23 //* @brief
     24 //*  This file contains definitions of routines for spatial filter
     25 //*
     26 //* @author
     27 //*  Ittiam
     28 //*
     29 //* @par List of Functions:
     30 //*  - ideint_spatial_filter_a9()
     31 //*
     32 //* @remarks
     33 //*  None
     34 //*
     35 //*******************************************************************************
     36 
     37 
     38 //******************************************************************************
     39 //*
     40 //*  @brief Performs spatial filtering
     41 //*
     42 //*  @par   Description
     43 //*   This functions performs edge adaptive spatial filtering on a 8x8 block
     44 //*
     45 //* @param[in] pu1_src
     46 //*  UWORD8 pointer to the source
     47 //*
     48 //* @param[in] pu1_out
     49 //*  UWORD8 pointer to the destination
     50 //*
     51 //* @param[in] src_strd
     52 //*  source stride
     53 //*
     54 //* @param[in] src_strd
     55 //*  destination stride
     56 //*
     57 //* @returns
     58 //*     None
     59 //*
     60 //* @remarks
     61 //*
     62 //******************************************************************************
     63 
     64     .global ideint_spatial_filter_av8
     65 
     66 ideint_spatial_filter_av8:
     67 
     68     movi  v16.8h, #0
     69     movi  v18.8h, #0
     70     movi  v20.8h, #0
     71 
     72     // Backup x0
     73     mov     x10,    x0
     74 
     75     // Load from &pu1_row_1[0]
     76     sub     x5,         x0,         #1
     77     ld1     {v0.8b},    [x0],       x2
     78 
     79     // Load from &pu1_row_1[-1]
     80     ld1     {v1.8b},    [x5]
     81     add     x5,         x5,        #2
     82 
     83     // Load from &pu1_row_1[1]
     84     ld1     {v2.8b},    [x5]
     85 
     86     // Number of rows
     87     mov     x4,         #4
     88 
     89     // EDGE_BIAS_0
     90     movi    v30.2s,     #5
     91 
     92     // EDGE_BIAS_1
     93     movi    v31.2s,     #7
     94 
     95 detect_edge:
     96     // Load from &pu1_row_2[0]
     97     sub     x5,         x0,         #1
     98     ld1     {v3.8b},    [x0],       x2
     99 
    100     // Load from &pu1_row_2[-1]
    101     ld1     {v4.8b},    [x5]
    102     add     x5,         x5,         #2
    103 
    104     // Load from &pu1_row_2[1]
    105     ld1     {v5.8b},    [x5]
    106 
    107     // Calculate absolute differences
    108     // pu1_row_1[i] - pu1_row_2[i]
    109     uabal   v16.8h,      v0.8b,        v3.8b
    110 
    111     // pu1_row_1[i - 1] - pu1_row_2[i + 1]
    112     uabal   v18.8h,      v1.8b,        v5.8b
    113 
    114     // pu1_row_1[i + 1] - pu1_row_2[i - 1]
    115     uabal   v20.8h,      v2.8b,        v4.8b
    116 
    117     mov     v0.8b,      v3.8b
    118     mov     v1.8b,      v4.8b
    119     mov     v2.8b,      v5.8b
    120 
    121     subs    x4,         x4,             #1
    122     bgt            detect_edge
    123 
    124     // Calculate sum of absolute differeces for each edge
    125     addp  v16.8h,       v16.8h,         v16.8h
    126     addp  v18.8h,       v18.8h,         v18.8h
    127     addp  v20.8h,       v20.8h,         v20.8h
    128 
    129     uaddlp  v16.2s,     v16.4h
    130     uaddlp  v18.2s,     v18.4h
    131     uaddlp  v20.2s,     v20.4h
    132 
    133     // adiff[0] *= EDGE_BIAS_0;
    134     mul     v16.2s,     v16.2s,         v30.2s
    135 
    136     // adiff[1] *= EDGE_BIAS_1;
    137     mul     v18.2s,     v18.2s,         v31.2s
    138 
    139     // adiff[2] *= EDGE_BIAS_1;
    140     mul     v20.2s,     v20.2s,         v31.2s
    141 
    142     // Move the differences to ARM registers
    143 
    144 
    145     // Compute shift for first half of the block
    146 compute_shift_1:
    147     smov    x5,         v16.s[0]
    148     smov    x6,         v18.s[0]
    149     smov    x7,         v20.s[0]
    150 
    151     // Compute shift
    152     mov     x8,         #0
    153 
    154     // adiff[2] <= adiff[1]
    155     cmp     x7,         x6
    156     bgt     dir_45_gt_135_1
    157 
    158     // adiff[2] <= adiff[0]
    159     cmp     x7,         x5
    160     mov     x11,        #1
    161     csel    x8,         x11,        x8,     le
    162 
    163     b       compute_shift_2
    164 dir_45_gt_135_1:
    165 
    166     // adiff[1] <= adiff[0]
    167     cmp     x6,         x5
    168     // Move -1 if less than or equal to
    169     movn    x11,        #0
    170     csel    x8,         x11,        x8,     le
    171 
    172 
    173 compute_shift_2:
    174     // Compute shift for first half of the block
    175     smov    x5,         v16.s[1]
    176     smov    x6,         v18.s[1]
    177     smov    x7,         v20.s[1]
    178 
    179     // Compute shift
    180     mov     x9,         #0
    181 
    182     // adiff[2] <= adiff[1]
    183     cmp     x7,         x6
    184     bgt     dir_45_gt_135_2
    185 
    186     // adiff[2] <= adiff[0]
    187     cmp     x7,         x5
    188     mov     x11,        #1
    189     csel    x9,         x11,        x9,     le
    190 
    191     b       interpolate
    192 
    193 dir_45_gt_135_2:
    194     // adiff[1] <= adiff[0]
    195     cmp     x6,         x5
    196 
    197     // Move -1 if less than or equal to
    198     movn    x11,        #0
    199     csel    x9,         x11,        x9,     le
    200 
    201 interpolate:
    202     add     x4,         x10,        x8
    203     add     x5,         x10,        x2
    204     sub     x5,         x5,         x8
    205 
    206     add     x10,        x10,        #4
    207     add     x6,         x10,        x9
    208     add     x7,         x10,        x2
    209     sub     x7,         x7,         x9
    210     mov     x8,         #4
    211 
    212 filter_loop:
    213     ld1     {v0.s}[0],  [x4],       x2
    214     ld1     {v2.s}[0],  [x5],       x2
    215 
    216     ld1     {v0.s}[1],  [x6],       x2
    217     ld1     {v2.s}[1],  [x7],       x2
    218 
    219     urhadd  v4.8b,      v0.8b,      v2.8b
    220     st1     {v4.2s},    [x1],       x3
    221 
    222     subs    x8,         x8,         #1
    223     bgt     filter_loop
    224 
    225     ret
    226