1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_itrans_recon_8x8.c 22 * 23 * @brief 24 * Contains function definitions for inverse transform and reconstruction 8x8 25 * 26 * 27 * @author 28 * 100470 29 * 30 * @par List of Functions: 31 * - ihevc_itrans_recon_8x8() 32 * 33 * @remarks 34 * None 35 * 36 ******************************************************************************* 37 */ 38 #include <stdio.h> 39 #include <string.h> 40 #include "ihevc_typedefs.h" 41 #include "ihevc_macros.h" 42 #include "ihevc_platform_macros.h" 43 #include "ihevc_defs.h" 44 #include "ihevc_trans_tables.h" 45 #include "ihevc_itrans_recon.h" 46 #include "ihevc_func_selector.h" 47 #include "ihevc_trans_macros.h" 48 49 /** 50 ******************************************************************************* 51 * 52 * @brief 53 * This function performs Inverse transform and reconstruction for 8x8 54 * input block 55 * 56 * @par Description: 57 * Performs inverse transform and adds the prediction data and clips output 58 * to 8 bit 59 * 60 * @param[in] pi2_src 61 * Input 8x8 coefficients 62 * 63 * @param[in] pi2_tmp 64 * Temporary 8x8 buffer for storing inverse 65 * 66 * transform 67 * 1st stage output 68 * 69 * @param[in] pu1_pred 70 * Prediction 8x8 block 71 * 72 * @param[out] pu1_dst 73 * Output 8x8 block 74 * 75 * @param[in] src_strd 76 * Input stride 77 * 78 * @param[in] pred_strd 79 * Prediction stride 80 * 81 * @param[in] dst_strd 82 * Output Stride 83 * 84 * @param[in] shift 85 * Output shift 86 * 87 * @param[in] zero_cols 88 * Zero columns in pi2_src 89 * 90 * @returns Void 91 * 92 * @remarks 93 * None 94 * 95 ******************************************************************************* 96 */ 97 98 void ihevc_itrans_recon_8x8(WORD16 *pi2_src, 99 WORD16 *pi2_tmp, 100 UWORD8 *pu1_pred, 101 UWORD8 *pu1_dst, 102 WORD32 src_strd, 103 WORD32 pred_strd, 104 WORD32 dst_strd, 105 WORD32 zero_cols, 106 WORD32 zero_rows) 107 { 108 WORD32 j, k; 109 WORD32 e[4], o[4]; 110 WORD32 ee[2], eo[2]; 111 WORD32 add; 112 WORD32 shift; 113 WORD16 *pi2_tmp_orig; 114 WORD32 trans_size; 115 WORD32 zero_rows_2nd_stage = zero_cols; 116 WORD32 row_limit_2nd_stage; 117 118 trans_size = TRANS_SIZE_8; 119 120 pi2_tmp_orig = pi2_tmp; 121 122 if((zero_cols & 0xF0) == 0xF0) 123 row_limit_2nd_stage = 4; 124 else 125 row_limit_2nd_stage = TRANS_SIZE_8; 126 127 128 if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */ 129 { 130 /************************************************************************************************/ 131 /**********************************START - IT_RECON_8x8******************************************/ 132 /************************************************************************************************/ 133 134 /* Inverse Transform 1st stage */ 135 shift = IT_SHIFT_STAGE_1; 136 add = 1 << (shift - 1); 137 138 for(j = 0; j < row_limit_2nd_stage; j++) 139 { 140 /* Checking for Zero Cols */ 141 if((zero_cols & 1) == 1) 142 { 143 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 144 } 145 else 146 { 147 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 148 for(k = 0; k < 4; k++) 149 { 150 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd] 151 + g_ai2_ihevc_trans_8[3][k] 152 * pi2_src[3 * src_strd]; 153 } 154 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]; 155 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]; 156 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]; 157 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]; 158 159 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 160 e[0] = ee[0] + eo[0]; 161 e[3] = ee[0] - eo[0]; 162 e[1] = ee[1] + eo[1]; 163 e[2] = ee[1] - eo[1]; 164 for(k = 0; k < 4; k++) 165 { 166 pi2_tmp[k] = 167 CLIP_S16(((e[k] + o[k] + add) >> shift)); 168 pi2_tmp[k + 4] = 169 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 170 } 171 } 172 pi2_src++; 173 pi2_tmp += trans_size; 174 zero_cols = zero_cols >> 1; 175 } 176 177 pi2_tmp = pi2_tmp_orig; 178 179 /* Inverse Transform 2nd stage */ 180 shift = IT_SHIFT_STAGE_2; 181 add = 1 << (shift - 1); 182 if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ 183 { 184 for(j = 0; j < trans_size; j++) 185 { 186 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 187 for(k = 0; k < 4; k++) 188 { 189 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] 190 + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size]; 191 } 192 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]; 193 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]; 194 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]; 195 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]; 196 197 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 198 e[0] = ee[0] + eo[0]; 199 e[3] = ee[0] - eo[0]; 200 e[1] = ee[1] + eo[1]; 201 e[2] = ee[1] - eo[1]; 202 for(k = 0; k < 4; k++) 203 { 204 WORD32 itrans_out; 205 itrans_out = 206 CLIP_S16(((e[k] + o[k] + add) >> shift)); 207 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 208 itrans_out = 209 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 210 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); 211 } 212 pi2_tmp++; 213 pu1_pred += pred_strd; 214 pu1_dst += dst_strd; 215 } 216 } 217 else /* All rows of output of 1st stage are non-zero */ 218 { 219 for(j = 0; j < trans_size; j++) 220 { 221 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 222 for(k = 0; k < 4; k++) 223 { 224 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] 225 + g_ai2_ihevc_trans_8[3][k] 226 * pi2_tmp[3 * trans_size] 227 + g_ai2_ihevc_trans_8[5][k] 228 * pi2_tmp[5 * trans_size] 229 + g_ai2_ihevc_trans_8[7][k] 230 * pi2_tmp[7 * trans_size]; 231 } 232 233 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size] 234 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size]; 235 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size] 236 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size]; 237 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0] 238 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size]; 239 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0] 240 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size]; 241 242 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 243 e[0] = ee[0] + eo[0]; 244 e[3] = ee[0] - eo[0]; 245 e[1] = ee[1] + eo[1]; 246 e[2] = ee[1] - eo[1]; 247 for(k = 0; k < 4; k++) 248 { 249 WORD32 itrans_out; 250 itrans_out = 251 CLIP_S16(((e[k] + o[k] + add) >> shift)); 252 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 253 itrans_out = 254 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 255 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); 256 } 257 pi2_tmp++; 258 pu1_pred += pred_strd; 259 pu1_dst += dst_strd; 260 } 261 } 262 /************************************************************************************************/ 263 /************************************END - IT_RECON_8x8******************************************/ 264 /************************************************************************************************/ 265 } 266 else /* All rows of input are non-zero */ 267 { 268 /************************************************************************************************/ 269 /**********************************START - IT_RECON_8x8******************************************/ 270 /************************************************************************************************/ 271 272 /* Inverse Transform 1st stage */ 273 shift = IT_SHIFT_STAGE_1; 274 add = 1 << (shift - 1); 275 276 for(j = 0; j < row_limit_2nd_stage; j++) 277 { 278 /* Checking for Zero Cols */ 279 if((zero_cols & 1) == 1) 280 { 281 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 282 } 283 else 284 { 285 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 286 for(k = 0; k < 4; k++) 287 { 288 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd] 289 + g_ai2_ihevc_trans_8[3][k] 290 * pi2_src[3 * src_strd] 291 + g_ai2_ihevc_trans_8[5][k] 292 * pi2_src[5 * src_strd] 293 + g_ai2_ihevc_trans_8[7][k] 294 * pi2_src[7 * src_strd]; 295 } 296 297 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd] 298 + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd]; 299 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd] 300 + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd]; 301 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0] 302 + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd]; 303 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0] 304 + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd]; 305 306 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 307 e[0] = ee[0] + eo[0]; 308 e[3] = ee[0] - eo[0]; 309 e[1] = ee[1] + eo[1]; 310 e[2] = ee[1] - eo[1]; 311 for(k = 0; k < 4; k++) 312 { 313 pi2_tmp[k] = 314 CLIP_S16(((e[k] + o[k] + add) >> shift)); 315 pi2_tmp[k + 4] = 316 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 317 } 318 } 319 pi2_src++; 320 pi2_tmp += trans_size; 321 zero_cols = zero_cols >> 1; 322 } 323 324 pi2_tmp = pi2_tmp_orig; 325 326 /* Inverse Transform 2nd stage */ 327 shift = IT_SHIFT_STAGE_2; 328 add = 1 << (shift - 1); 329 if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ 330 { 331 for(j = 0; j < trans_size; j++) 332 { 333 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 334 for(k = 0; k < 4; k++) 335 { 336 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] 337 + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size]; 338 } 339 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]; 340 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]; 341 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]; 342 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]; 343 344 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 345 e[0] = ee[0] + eo[0]; 346 e[3] = ee[0] - eo[0]; 347 e[1] = ee[1] + eo[1]; 348 e[2] = ee[1] - eo[1]; 349 for(k = 0; k < 4; k++) 350 { 351 WORD32 itrans_out; 352 itrans_out = 353 CLIP_S16(((e[k] + o[k] + add) >> shift)); 354 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 355 itrans_out = 356 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 357 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); 358 } 359 pi2_tmp++; 360 pu1_pred += pred_strd; 361 pu1_dst += dst_strd; 362 } 363 } 364 else /* All rows of output of 1st stage are non-zero */ 365 { 366 for(j = 0; j < trans_size; j++) 367 { 368 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 369 for(k = 0; k < 4; k++) 370 { 371 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] 372 + g_ai2_ihevc_trans_8[3][k] 373 * pi2_tmp[3 * trans_size] 374 + g_ai2_ihevc_trans_8[5][k] 375 * pi2_tmp[5 * trans_size] 376 + g_ai2_ihevc_trans_8[7][k] 377 * pi2_tmp[7 * trans_size]; 378 } 379 380 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size] 381 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size]; 382 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size] 383 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size]; 384 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0] 385 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size]; 386 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0] 387 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size]; 388 389 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 390 e[0] = ee[0] + eo[0]; 391 e[3] = ee[0] - eo[0]; 392 e[1] = ee[1] + eo[1]; 393 e[2] = ee[1] - eo[1]; 394 for(k = 0; k < 4; k++) 395 { 396 WORD32 itrans_out; 397 itrans_out = 398 CLIP_S16(((e[k] + o[k] + add) >> shift)); 399 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 400 itrans_out = 401 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); 402 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); 403 } 404 pi2_tmp++; 405 pu1_pred += pred_strd; 406 pu1_dst += dst_strd; 407 } 408 } 409 /************************************************************************************************/ 410 /************************************END - IT_RECON_8x8******************************************/ 411 /************************************************************************************************/ 412 } 413 } 414 415