1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * ideint_utils.c 24 * 25 * @brief 26 * This file contains the definitions of the core processing of the de 27 * interlacer. 28 * 29 * @author 30 * Ittiam 31 * 32 * @par List of Functions: 33 * ideint_spatial_filter_ssse3() 34 * 35 * @remarks 36 * None 37 * 38 ******************************************************************************* 39 */ 40 /*****************************************************************************/ 41 /* File Includes */ 42 /*****************************************************************************/ 43 /* System include files */ 44 #include <stdio.h> 45 #include <stdint.h> 46 #include <string.h> 47 #include <stdlib.h> 48 #include <assert.h> 49 #include <immintrin.h> 50 51 /* User include files */ 52 #include "icv_datatypes.h" 53 #include "icv_macros.h" 54 #include "icv_platform_macros.h" 55 #include "icv.h" 56 #include "icv_variance.h" 57 #include "icv_sad.h" 58 #include "ideint.h" 59 #include "ideint_defs.h" 60 #include "ideint_structs.h" 61 #include "ideint_utils.h" 62 #include "ideint_cac.h" 63 64 /** 65 ******************************************************************************* 66 * 67 * @brief 68 * Performs spatial edge adaptive filtering 69 * 70 * @par Description 71 * Performs spatial edge adaptive filtering by detecting edge direction 72 * 73 * @param[in] pu1_src 74 * Source buffer 75 * 76 * @param[in] pu1_out 77 * Destination buffer 78 * 79 * @param[in] src_strd 80 * Source stride 81 * 82 * @param[in] out_strd 83 * Destination stride 84 85 * @returns 86 * None 87 * 88 * @remarks 89 * 90 ******************************************************************************* 91 */ 92 void ideint_spatial_filter_ssse3(UWORD8 *pu1_src, 93 UWORD8 *pu1_out, 94 WORD32 src_strd, 95 WORD32 out_strd) 96 { 97 WORD32 i; 98 99 WORD32 adiff[6]; 100 WORD32 *pi4_diff; 101 WORD32 shifts[2]; 102 WORD32 dir_45_le_90, dir_45_le_135, dir_135_le_90; 103 104 __m128i row1_0, row1_m1, row1_p1; 105 __m128i row2_0, row2_m1, row2_p1; 106 __m128i diff, diffs[3]; 107 __m128i zero; 108 109 /*****************************************************************/ 110 /* Direction detection */ 111 /*****************************************************************/ 112 113 zero = _mm_setzero_si128(); 114 diffs[0] = _mm_setzero_si128(); 115 diffs[1] = _mm_setzero_si128(); 116 diffs[2] = _mm_setzero_si128(); 117 118 /* Load source */ 119 row1_m1 = _mm_loadl_epi64((__m128i *) (pu1_src - 1)); 120 row1_0 = _mm_loadl_epi64((__m128i *) (pu1_src)); 121 row1_p1 = _mm_loadl_epi64((__m128i *) (pu1_src + 1)); 122 pu1_src += src_strd; 123 124 /* Unpack to 16 bits */ 125 row1_m1 = _mm_unpacklo_epi8(row1_m1, zero); 126 row1_0 = _mm_unpacklo_epi8(row1_0, zero); 127 row1_p1 = _mm_unpacklo_epi8(row1_p1, zero); 128 129 /*****************************************************************/ 130 /* Calculating the difference along each of the 3 directions. */ 131 /*****************************************************************/ 132 for(i = 0; i < SUB_BLK_HT; i ++) 133 { 134 row2_m1 = _mm_loadl_epi64((__m128i *) (pu1_src - 1)); 135 row2_0 = _mm_loadl_epi64((__m128i *) (pu1_src)); 136 row2_p1 = _mm_loadl_epi64((__m128i *) (pu1_src + 1)); 137 pu1_src += src_strd; 138 139 /* Unpack to 16 bits */ 140 row2_m1 = _mm_unpacklo_epi8(row2_m1, zero); 141 row2_0 = _mm_unpacklo_epi8(row2_0, zero); 142 row2_p1 = _mm_unpacklo_epi8(row2_p1, zero); 143 144 diff = _mm_sad_epu8(row1_0, row2_0); 145 diffs[0] = _mm_add_epi64(diffs[0], diff); 146 147 diff = _mm_sad_epu8(row1_m1, row2_p1); 148 diffs[1] = _mm_add_epi64(diffs[1], diff); 149 150 diff = _mm_sad_epu8(row1_p1, row2_m1); 151 diffs[2] = _mm_add_epi64(diffs[2], diff); 152 153 row1_m1 = row2_m1; 154 row1_0 = row2_0; 155 row1_p1 = row2_p1; 156 } 157 /* Revert pu1_src increment */ 158 pu1_src -= (SUB_BLK_HT + 1) * src_strd; 159 160 161 adiff[0] = _mm_cvtsi128_si32(diffs[0]); 162 adiff[1] = _mm_cvtsi128_si32(diffs[1]); 163 adiff[2] = _mm_cvtsi128_si32(diffs[2]); 164 adiff[3] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[0], 8)); 165 adiff[4] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[1], 8)); 166 adiff[5] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[2], 8)); 167 pi4_diff = adiff; 168 169 for(i = 0; i < 2; i++) 170 { 171 /*****************************************************************/ 172 /* Applying bias, to make the diff comparision more robust. */ 173 /*****************************************************************/ 174 pi4_diff[0] *= EDGE_BIAS_0; 175 pi4_diff[1] *= EDGE_BIAS_1; 176 pi4_diff[2] *= EDGE_BIAS_1; 177 178 /*****************************************************************/ 179 /* comapring the diffs */ 180 /*****************************************************************/ 181 dir_45_le_90 = (pi4_diff[2] <= pi4_diff[0]); 182 dir_45_le_135 = (pi4_diff[2] <= pi4_diff[1]); 183 dir_135_le_90 = (pi4_diff[1] <= pi4_diff[0]); 184 185 /*****************************************************************/ 186 /* Direction selection. */ 187 /*****************************************************************/ 188 shifts[i] = 0; 189 if(1 == dir_45_le_135) 190 { 191 if(1 == dir_45_le_90) 192 shifts[i] = 1; 193 } 194 else 195 { 196 if(1 == dir_135_le_90) 197 shifts[i] = -1; 198 } 199 pi4_diff += 3; 200 } 201 /*****************************************************************/ 202 /* Directional interpolation */ 203 /*****************************************************************/ 204 for(i = 0; i < SUB_BLK_HT / 2; i++) 205 { 206 __m128i dst; 207 __m128i row1, row2; 208 209 UWORD32 *pu4_row1th, *pu4_row1tl; 210 UWORD32 *pu4_row2th, *pu4_row2tl; 211 UWORD32 *pu4_row1bh, *pu4_row1bl; 212 UWORD32 *pu4_row2bh, *pu4_row2bl; 213 214 pu4_row1th = (UWORD32 *)(pu1_src + shifts[0]); 215 pu4_row1tl = (UWORD32 *)(pu1_src + SUB_BLK_WD + shifts[1]); 216 217 pu1_src += src_strd; 218 pu4_row2th = (UWORD32 *)(pu1_src + shifts[0]); 219 pu4_row2tl = (UWORD32 *)(pu1_src + SUB_BLK_WD + shifts[1]); 220 221 pu4_row1bh = (UWORD32 *)(pu1_src - shifts[0]); 222 pu4_row1bl = (UWORD32 *)(pu1_src + SUB_BLK_WD - shifts[1]); 223 224 pu1_src += src_strd; 225 pu4_row2bh = (UWORD32 *)(pu1_src - shifts[0]); 226 pu4_row2bl = (UWORD32 *)(pu1_src + SUB_BLK_WD - shifts[1]); 227 228 row1 = _mm_set_epi32(*pu4_row1tl, *pu4_row1th, *pu4_row2tl, *pu4_row2th); 229 row2 = _mm_set_epi32(*pu4_row1bl, *pu4_row1bh, *pu4_row2bl, *pu4_row2bh); 230 231 dst = _mm_avg_epu8(row1, row2); 232 233 _mm_storel_epi64((__m128i *)pu1_out, _mm_srli_si128(dst, 8)); 234 pu1_out += out_strd; 235 236 _mm_storel_epi64((__m128i *)pu1_out, dst); 237 pu1_out += out_strd; 238 } 239 } 240 241