Home | History | Annotate | Download | only in x86
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 *******************************************************************************
     22 * @file
     23 *  ideint_utils.c
     24 *
     25 * @brief
     26 *  This file contains the definitions of the core  processing of the de
     27 * interlacer.
     28 *
     29 * @author
     30 *  Ittiam
     31 *
     32 * @par List of Functions:
     33 *   ideint_spatial_filter_ssse3()
     34 *
     35 * @remarks
     36 *  None
     37 *
     38 *******************************************************************************
     39 */
     40 /*****************************************************************************/
     41 /* File Includes                                                             */
     42 /*****************************************************************************/
     43 /* System include files */
     44 #include <stdio.h>
     45 #include <stdint.h>
     46 #include <string.h>
     47 #include <stdlib.h>
     48 #include <assert.h>
     49 #include <immintrin.h>
     50 
     51 /* User include files */
     52 #include "icv_datatypes.h"
     53 #include "icv_macros.h"
     54 #include "icv_platform_macros.h"
     55 #include "icv.h"
     56 #include "icv_variance.h"
     57 #include "icv_sad.h"
     58 #include "ideint.h"
     59 #include "ideint_defs.h"
     60 #include "ideint_structs.h"
     61 #include "ideint_utils.h"
     62 #include "ideint_cac.h"
     63 
     64 /**
     65 *******************************************************************************
     66 *
     67 * @brief
     68 *  Performs spatial edge adaptive filtering
     69 *
     70 * @par   Description
     71 *  Performs spatial edge adaptive filtering by detecting edge direction
     72 *
     73 * @param[in] pu1_src
     74 *  Source buffer
     75 *
     76 * @param[in] pu1_out
     77 *  Destination buffer
     78 *
     79 * @param[in] src_strd
     80 *  Source stride
     81 *
     82 * @param[in] out_strd
     83 *  Destination stride
     84 
     85 * @returns
     86 * None
     87 *
     88 * @remarks
     89 *
     90 *******************************************************************************
     91 */
     92 void ideint_spatial_filter_ssse3(UWORD8 *pu1_src,
     93                            UWORD8 *pu1_out,
     94                            WORD32 src_strd,
     95                            WORD32 out_strd)
     96 {
     97     WORD32 i;
     98 
     99     WORD32 adiff[6];
    100     WORD32 *pi4_diff;
    101     WORD32 shifts[2];
    102     WORD32 dir_45_le_90, dir_45_le_135, dir_135_le_90;
    103 
    104     __m128i row1_0, row1_m1, row1_p1;
    105     __m128i row2_0, row2_m1, row2_p1;
    106     __m128i diff, diffs[3];
    107     __m128i zero;
    108 
    109     /*****************************************************************/
    110     /* Direction detection                                           */
    111     /*****************************************************************/
    112 
    113     zero = _mm_setzero_si128();
    114     diffs[0] = _mm_setzero_si128();
    115     diffs[1]  = _mm_setzero_si128();
    116     diffs[2] = _mm_setzero_si128();
    117 
    118     /* Load source */
    119     row1_m1 = _mm_loadl_epi64((__m128i *) (pu1_src - 1));
    120     row1_0  = _mm_loadl_epi64((__m128i *) (pu1_src));
    121     row1_p1 = _mm_loadl_epi64((__m128i *) (pu1_src + 1));
    122     pu1_src += src_strd;
    123 
    124     /* Unpack to 16 bits */
    125     row1_m1 = _mm_unpacklo_epi8(row1_m1, zero);
    126     row1_0  = _mm_unpacklo_epi8(row1_0,  zero);
    127     row1_p1 = _mm_unpacklo_epi8(row1_p1, zero);
    128 
    129     /*****************************************************************/
    130     /* Calculating the difference along each of the 3 directions.    */
    131     /*****************************************************************/
    132     for(i = 0; i < SUB_BLK_HT; i ++)
    133     {
    134         row2_m1 = _mm_loadl_epi64((__m128i *) (pu1_src - 1));
    135         row2_0  = _mm_loadl_epi64((__m128i *) (pu1_src));
    136         row2_p1 = _mm_loadl_epi64((__m128i *) (pu1_src + 1));
    137         pu1_src += src_strd;
    138 
    139         /* Unpack to 16 bits */
    140         row2_m1 = _mm_unpacklo_epi8(row2_m1, zero);
    141         row2_0  = _mm_unpacklo_epi8(row2_0,  zero);
    142         row2_p1 = _mm_unpacklo_epi8(row2_p1, zero);
    143 
    144         diff    = _mm_sad_epu8(row1_0, row2_0);
    145         diffs[0]  = _mm_add_epi64(diffs[0], diff);
    146 
    147         diff    = _mm_sad_epu8(row1_m1, row2_p1);
    148         diffs[1] = _mm_add_epi64(diffs[1], diff);
    149 
    150         diff    = _mm_sad_epu8(row1_p1, row2_m1);
    151         diffs[2]  = _mm_add_epi64(diffs[2], diff);
    152 
    153         row1_m1 = row2_m1;
    154         row1_0 = row2_0;
    155         row1_p1 = row2_p1;
    156     }
    157     /* Revert pu1_src increment */
    158     pu1_src -= (SUB_BLK_HT + 1) * src_strd;
    159 
    160 
    161     adiff[0] = _mm_cvtsi128_si32(diffs[0]);
    162     adiff[1] = _mm_cvtsi128_si32(diffs[1]);
    163     adiff[2] = _mm_cvtsi128_si32(diffs[2]);
    164     adiff[3] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[0], 8));
    165     adiff[4] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[1], 8));
    166     adiff[5] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[2], 8));
    167     pi4_diff = adiff;
    168 
    169     for(i = 0; i < 2; i++)
    170     {
    171         /*****************************************************************/
    172         /* Applying bias, to make the diff comparision more robust.      */
    173         /*****************************************************************/
    174         pi4_diff[0] *= EDGE_BIAS_0;
    175         pi4_diff[1] *= EDGE_BIAS_1;
    176         pi4_diff[2] *= EDGE_BIAS_1;
    177 
    178         /*****************************************************************/
    179         /* comapring the diffs */
    180         /*****************************************************************/
    181         dir_45_le_90  = (pi4_diff[2] <= pi4_diff[0]);
    182         dir_45_le_135 = (pi4_diff[2] <= pi4_diff[1]);
    183         dir_135_le_90 = (pi4_diff[1] <= pi4_diff[0]);
    184 
    185         /*****************************************************************/
    186         /* Direction selection. */
    187         /*****************************************************************/
    188         shifts[i] = 0;
    189         if(1 == dir_45_le_135)
    190         {
    191             if(1 == dir_45_le_90)
    192                 shifts[i] = 1;
    193         }
    194         else
    195         {
    196             if(1 == dir_135_le_90)
    197                 shifts[i] = -1;
    198         }
    199         pi4_diff += 3;
    200     }
    201     /*****************************************************************/
    202     /* Directional interpolation */
    203     /*****************************************************************/
    204     for(i = 0; i < SUB_BLK_HT / 2; i++)
    205     {
    206         __m128i dst;
    207         __m128i row1, row2;
    208 
    209         UWORD32 *pu4_row1th, *pu4_row1tl;
    210         UWORD32 *pu4_row2th, *pu4_row2tl;
    211         UWORD32 *pu4_row1bh, *pu4_row1bl;
    212         UWORD32 *pu4_row2bh, *pu4_row2bl;
    213 
    214         pu4_row1th  = (UWORD32 *)(pu1_src + shifts[0]);
    215         pu4_row1tl  = (UWORD32 *)(pu1_src + SUB_BLK_WD + shifts[1]);
    216 
    217         pu1_src += src_strd;
    218         pu4_row2th  = (UWORD32 *)(pu1_src + shifts[0]);
    219         pu4_row2tl  = (UWORD32 *)(pu1_src + SUB_BLK_WD + shifts[1]);
    220 
    221         pu4_row1bh  = (UWORD32 *)(pu1_src - shifts[0]);
    222         pu4_row1bl  = (UWORD32 *)(pu1_src + SUB_BLK_WD - shifts[1]);
    223 
    224         pu1_src += src_strd;
    225         pu4_row2bh  = (UWORD32 *)(pu1_src - shifts[0]);
    226         pu4_row2bl  = (UWORD32 *)(pu1_src + SUB_BLK_WD - shifts[1]);
    227 
    228         row1 = _mm_set_epi32(*pu4_row1tl, *pu4_row1th, *pu4_row2tl, *pu4_row2th);
    229         row2 = _mm_set_epi32(*pu4_row1bl, *pu4_row1bh, *pu4_row2bl, *pu4_row2bh);
    230 
    231         dst = _mm_avg_epu8(row1, row2);
    232 
    233         _mm_storel_epi64((__m128i *)pu1_out, _mm_srli_si128(dst, 8));
    234         pu1_out += out_strd;
    235 
    236         _mm_storel_epi64((__m128i *)pu1_out, dst);
    237         pu1_out += out_strd;
    238     }
    239 }
    240 
    241