1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * ideint_cac_ssse3.c 24 * 25 * @brief 26 * This file include the definitions of the combing artifact check function 27 * of the de-interlacer and some variant of that. 28 * 29 * @author 30 * Ittiam 31 * 32 * @par List of Functions: 33 * cac_4x8() 34 * ideint_cac() 35 * 36 * @remarks 37 * In the de-interlacer workspace, cac is not a seperate assembly module as 38 * it comes along with the de_int_decision() function. But in C-Model, to 39 * keep the things cleaner, it was made to be a separate function during 40 * cac experiments long after the assembly was written by Mudit. 41 * 42 ******************************************************************************* 43 */ 44 /*****************************************************************************/ 45 /* File Includes */ 46 /*****************************************************************************/ 47 /* System include files */ 48 #include <stdio.h> 49 #include <stdint.h> 50 #include <string.h> 51 #include <stdlib.h> 52 #include <immintrin.h> 53 54 /* User include files */ 55 #include "icv_datatypes.h" 56 #include "icv_macros.h" 57 #include "icv.h" 58 #include "icv_variance.h" 59 #include "icv_sad.h" 60 #include "ideint.h" 61 #include "ideint_defs.h" 62 #include "ideint_structs.h" 63 #include "ideint_cac.h" 64 65 /** 66 ******************************************************************************* 67 * 68 * @brief 69 * Combing artifact check function for 8x8 block 70 * 71 * @par Description 72 * Determines CAC for 8x8 block by calling 8x4 CAC function 73 * 74 * @param[in] pu1_top 75 * Top field 76 * 77 * @param[in] pu1_bot 78 * Bottom field 79 * 80 * @param[in] top_strd 81 * Top field Stride 82 * 83 * @param[in] bot_strd 84 * Bottom field stride 85 * 86 * @returns 87 * combing artifact flag (1 = detected, 0 = not detected) 88 * 89 * @remarks 90 * 91 ******************************************************************************* 92 */ 93 WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top, 94 UWORD8 *pu1_bot, 95 WORD32 top_strd, 96 WORD32 bot_strd) 97 { 98 WORD32 ca; /* combing artifact result */ 99 WORD32 i; 100 WORD32 adj[2] = {0}; 101 WORD32 alt[2] = {0}; 102 WORD32 sum_1, sum_2, sum_3, sum_4; 103 WORD32 sum_diff, diff_sum; 104 105 __m128i top[4]; 106 __m128i bot[4]; 107 __m128i sum_t[4]; 108 __m128i sum_b[4]; 109 __m128i zero; 110 111 112 zero = _mm_setzero_si128(); 113 114 for(i = 0; i < 4; i++) 115 { 116 /* Load top */ 117 top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top)); 118 pu1_top += top_strd; 119 120 /* Load bottom */ 121 bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot)); 122 pu1_bot += bot_strd; 123 124 /* Unpack */ 125 top[i] = _mm_unpacklo_epi8(top[i], zero); 126 bot[i] = _mm_unpacklo_epi8(bot[i], zero); 127 128 /* Compute row sums */ 129 sum_t[i] = _mm_sad_epu8(top[i], zero); 130 sum_b[i] = _mm_sad_epu8(bot[i], zero); 131 } 132 133 /* Compute row based alt and adj */ 134 for(i = 0; i < 4; i += 2) 135 { 136 sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]); 137 sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]); 138 sum_diff = ABS_DIF(sum_1, sum_2); 139 if(sum_diff >= RSUM_CSUM_THRESH) 140 adj[0] += sum_diff; 141 142 sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]); 143 sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]); 144 sum_diff = ABS_DIF(sum_3, sum_4); 145 if(sum_diff >= RSUM_CSUM_THRESH) 146 adj[0] += sum_diff; 147 148 alt[0] += ABS_DIF(sum_1, sum_3); 149 alt[0] += ABS_DIF(sum_2, sum_4); 150 151 sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8)); 152 sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8)); 153 sum_diff = ABS_DIF(sum_1, sum_2); 154 if(sum_diff >= RSUM_CSUM_THRESH) 155 adj[1] += sum_diff; 156 157 sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8)); 158 sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8)); 159 sum_diff = ABS_DIF(sum_3, sum_4); 160 if(sum_diff >= RSUM_CSUM_THRESH) 161 adj[1] += sum_diff; 162 163 alt[1] += ABS_DIF(sum_1, sum_3); 164 alt[1] += ABS_DIF(sum_2, sum_4); 165 } 166 167 /* Compute column based adj */ 168 { 169 __m128i avg1, avg2; 170 __m128i top_avg, bot_avg; 171 __m128i min, max, diff, thresh; 172 __m128i mask; 173 avg1 = _mm_avg_epu8(top[0], top[1]); 174 avg2 = _mm_avg_epu8(top[2], top[3]); 175 top_avg = _mm_avg_epu8(avg1, avg2); 176 177 avg1 = _mm_avg_epu8(bot[0], bot[1]); 178 avg2 = _mm_avg_epu8(bot[2], bot[3]); 179 bot_avg = _mm_avg_epu8(avg1, avg2); 180 181 min = _mm_min_epu8(top_avg, bot_avg); 182 max = _mm_max_epu8(top_avg, bot_avg); 183 184 diff = _mm_sub_epi16(max, min); 185 thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1); 186 187 mask = _mm_cmpgt_epi16(diff, thresh); 188 diff = _mm_and_si128(diff, mask); 189 190 diff_sum = _mm_extract_epi16(diff, 0); 191 diff_sum += _mm_extract_epi16(diff, 1); 192 diff_sum += _mm_extract_epi16(diff, 2); 193 diff_sum += _mm_extract_epi16(diff, 3); 194 195 adj[0] += diff_sum << 2; 196 197 diff_sum = _mm_extract_epi16(diff, 4); 198 diff_sum += _mm_extract_epi16(diff, 5); 199 diff_sum += _mm_extract_epi16(diff, 6); 200 diff_sum += _mm_extract_epi16(diff, 7); 201 202 adj[1] += diff_sum << 2; 203 204 } 205 206 /* Compute column based alt */ 207 { 208 __m128i avg1, avg2; 209 __m128i even_avg, odd_avg, diff; 210 avg1 = _mm_avg_epu8(top[0], bot[0]); 211 avg2 = _mm_avg_epu8(top[2], bot[2]); 212 even_avg = _mm_avg_epu8(avg1, avg2); 213 214 avg1 = _mm_avg_epu8(top[1], bot[1]); 215 avg2 = _mm_avg_epu8(top[3], bot[3]); 216 odd_avg = _mm_avg_epu8(avg1, avg2); 217 218 diff = _mm_sad_epu8(even_avg, odd_avg); 219 220 221 diff_sum = _mm_cvtsi128_si32(diff); 222 alt[0] += diff_sum << 2; 223 224 diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8)); 225 alt[1] += diff_sum << 2; 226 227 } 228 alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1); 229 alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1); 230 231 ca = (alt[0] < adj[0]); 232 ca |= (alt[1] < adj[1]); 233 234 return ca; 235 } 236 237