Home | History | Annotate | Download | only in x86
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 *******************************************************************************
     22 * @file
     23 *  ideint_cac_ssse3.c
     24 *
     25 * @brief
     26 *  This file include the definitions of the combing  artifact check function
     27 * of the de-interlacer and some  variant of that.
     28 *
     29 * @author
     30 *  Ittiam
     31 *
     32 * @par List of Functions:
     33 *  cac_4x8()
     34 *  ideint_cac()
     35 *
     36 * @remarks
     37 *  In the de-interlacer workspace, cac is not a seperate  assembly module as
     38 * it comes along with the  de_int_decision() function. But in C-Model, to
     39 * keep  the things cleaner, it was made to be a separate  function during
     40 * cac experiments long after the  assembly was written by Mudit.
     41 *
     42 *******************************************************************************
     43 */
     44 /*****************************************************************************/
     45 /* File Includes                                                             */
     46 /*****************************************************************************/
     47 /* System include files */
     48 #include <stdio.h>
     49 #include <stdint.h>
     50 #include <string.h>
     51 #include <stdlib.h>
     52 #include <immintrin.h>
     53 
     54 /* User include files */
     55 #include "icv_datatypes.h"
     56 #include "icv_macros.h"
     57 #include "icv.h"
     58 #include "icv_variance.h"
     59 #include "icv_sad.h"
     60 #include "ideint.h"
     61 #include "ideint_defs.h"
     62 #include "ideint_structs.h"
     63 #include "ideint_cac.h"
     64 
     65 /**
     66 *******************************************************************************
     67 *
     68 * @brief
     69 * Combing artifact check function for 8x8 block
     70 *
     71 * @par   Description
     72 * Determines CAC for 8x8 block by calling 8x4 CAC function
     73 *
     74 * @param[in] pu1_top
     75 *  Top field
     76 *
     77 * @param[in] pu1_bot
     78 *  Bottom field
     79 *
     80 * @param[in] top_strd
     81 *  Top field Stride
     82 *
     83 * @param[in] bot_strd
     84 *  Bottom field stride
     85 *
     86 * @returns
     87 * combing artifact flag (1 = detected, 0 = not detected)
     88 *
     89 * @remarks
     90 *
     91 *******************************************************************************
     92 */
     93 WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top,
     94                             UWORD8 *pu1_bot,
     95                             WORD32 top_strd,
     96                             WORD32 bot_strd)
     97 {
     98     WORD32 ca;        /* combing artifact result                          */
     99     WORD32 i;
    100     WORD32 adj[2] = {0};
    101     WORD32 alt[2] = {0};
    102     WORD32 sum_1, sum_2, sum_3, sum_4;
    103     WORD32 sum_diff, diff_sum;
    104 
    105     __m128i top[4];
    106     __m128i bot[4];
    107     __m128i sum_t[4];
    108     __m128i sum_b[4];
    109     __m128i zero;
    110 
    111 
    112     zero = _mm_setzero_si128();
    113 
    114     for(i = 0; i < 4; i++)
    115     {
    116         /* Load top */
    117         top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top));
    118         pu1_top += top_strd;
    119 
    120         /* Load bottom */
    121         bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot));
    122         pu1_bot += bot_strd;
    123 
    124         /* Unpack */
    125         top[i] = _mm_unpacklo_epi8(top[i], zero);
    126         bot[i] = _mm_unpacklo_epi8(bot[i], zero);
    127 
    128         /* Compute row sums */
    129         sum_t[i]  = _mm_sad_epu8(top[i], zero);
    130         sum_b[i]  = _mm_sad_epu8(bot[i], zero);
    131     }
    132 
    133     /* Compute row based alt and adj */
    134     for(i = 0; i < 4; i += 2)
    135     {
    136         sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]);
    137         sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]);
    138         sum_diff = ABS_DIF(sum_1, sum_2);
    139         if(sum_diff >= RSUM_CSUM_THRESH)
    140             adj[0] += sum_diff;
    141 
    142         sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]);
    143         sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]);
    144         sum_diff = ABS_DIF(sum_3, sum_4);
    145         if(sum_diff >= RSUM_CSUM_THRESH)
    146             adj[0] += sum_diff;
    147 
    148         alt[0] += ABS_DIF(sum_1, sum_3);
    149         alt[0] += ABS_DIF(sum_2, sum_4);
    150 
    151         sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8));
    152         sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8));
    153         sum_diff = ABS_DIF(sum_1, sum_2);
    154         if(sum_diff >= RSUM_CSUM_THRESH)
    155             adj[1] += sum_diff;
    156 
    157         sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8));
    158         sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8));
    159         sum_diff = ABS_DIF(sum_3, sum_4);
    160         if(sum_diff >= RSUM_CSUM_THRESH)
    161             adj[1] += sum_diff;
    162 
    163         alt[1] += ABS_DIF(sum_1, sum_3);
    164         alt[1] += ABS_DIF(sum_2, sum_4);
    165     }
    166 
    167     /* Compute column based adj */
    168     {
    169         __m128i avg1, avg2;
    170         __m128i top_avg, bot_avg;
    171         __m128i min, max, diff, thresh;
    172         __m128i mask;
    173         avg1 = _mm_avg_epu8(top[0], top[1]);
    174         avg2 = _mm_avg_epu8(top[2], top[3]);
    175         top_avg = _mm_avg_epu8(avg1, avg2);
    176 
    177         avg1 = _mm_avg_epu8(bot[0], bot[1]);
    178         avg2 = _mm_avg_epu8(bot[2], bot[3]);
    179         bot_avg = _mm_avg_epu8(avg1, avg2);
    180 
    181         min = _mm_min_epu8(top_avg, bot_avg);
    182         max = _mm_max_epu8(top_avg, bot_avg);
    183 
    184         diff = _mm_sub_epi16(max, min);
    185         thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1);
    186 
    187         mask = _mm_cmpgt_epi16(diff, thresh);
    188         diff = _mm_and_si128(diff, mask);
    189 
    190         diff_sum = _mm_extract_epi16(diff, 0);
    191         diff_sum += _mm_extract_epi16(diff, 1);
    192         diff_sum += _mm_extract_epi16(diff, 2);
    193         diff_sum += _mm_extract_epi16(diff, 3);
    194 
    195         adj[0] += diff_sum << 2;
    196 
    197         diff_sum = _mm_extract_epi16(diff, 4);
    198         diff_sum += _mm_extract_epi16(diff, 5);
    199         diff_sum += _mm_extract_epi16(diff, 6);
    200         diff_sum += _mm_extract_epi16(diff, 7);
    201 
    202         adj[1] += diff_sum << 2;
    203 
    204     }
    205 
    206     /* Compute column based alt */
    207     {
    208         __m128i avg1, avg2;
    209         __m128i even_avg, odd_avg, diff;
    210         avg1 = _mm_avg_epu8(top[0], bot[0]);
    211         avg2 = _mm_avg_epu8(top[2], bot[2]);
    212         even_avg = _mm_avg_epu8(avg1, avg2);
    213 
    214         avg1 = _mm_avg_epu8(top[1], bot[1]);
    215         avg2 = _mm_avg_epu8(top[3], bot[3]);
    216         odd_avg = _mm_avg_epu8(avg1, avg2);
    217 
    218         diff = _mm_sad_epu8(even_avg, odd_avg);
    219 
    220 
    221         diff_sum = _mm_cvtsi128_si32(diff);
    222         alt[0] += diff_sum << 2;
    223 
    224         diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8));
    225         alt[1] += diff_sum << 2;
    226 
    227     }
    228     alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
    229     alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
    230 
    231     ca    = (alt[0] < adj[0]);
    232     ca   |= (alt[1] < adj[1]);
    233 
    234     return ca;
    235 }
    236 
    237