1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_itrans_recon.c 22 * 23 * @brief 24 * Contains function definitions for inverse transform and reconstruction 25 * 26 * 27 * @author 28 * 100470 29 * 30 * @par List of Functions: 31 * - ihevc_itrans_recon_4x4_ttype1() 32 * - ihevc_itrans_recon_4x4() 33 * 34 * @remarks 35 * None 36 * 37 ******************************************************************************* 38 */ 39 #include <stdio.h> 40 #include <string.h> 41 #include "ihevc_typedefs.h" 42 #include "ihevc_macros.h" 43 #include "ihevc_platform_macros.h" 44 #include "ihevc_defs.h" 45 #include "ihevc_trans_tables.h" 46 #include "ihevc_itrans_recon.h" 47 #include "ihevc_func_selector.h" 48 #include "ihevc_trans_macros.h" 49 50 /* All the functions here are replicated from ihevc_itrans.c and modified to */ 51 /* include reconstruction */ 52 53 /** 54 ******************************************************************************* 55 * 56 * @brief 57 * This function performs Inverse transform type 1 (DST) and reconstruction 58 * for 4x4 input block 59 * 60 * @par Description: 61 * Performs inverse transform and adds the prediction data and clips output 62 * to 8 bit 63 * 64 * @param[in] pi2_src 65 * Input 4x4 coefficients 66 * 67 * @param[in] pi2_tmp 68 * Temporary 4x4 buffer for storing inverse 69 * 70 * transform 71 * 1st stage output 72 * 73 * @param[in] pu1_pred 74 * Prediction 4x4 block 75 * 76 * @param[out] pu1_dst 77 * Output 4x4 block 78 * 79 * @param[in] src_strd 80 * Input stride 81 * 82 * @param[in] pred_strd 83 * Prediction stride 84 * 85 * @param[in] dst_strd 86 * Output Stride 87 * 88 * @param[in] zero_cols 89 * Zero columns in pi2_src 90 * 91 * @returns Void 92 * 93 * @remarks 94 * None 95 * 96 ******************************************************************************* 97 */ 98 99 void ihevc_itrans_recon_4x4_ttype1(WORD16 *pi2_src, 100 WORD16 *pi2_tmp, 101 UWORD8 *pu1_pred, 102 UWORD8 *pu1_dst, 103 WORD32 src_strd, 104 WORD32 pred_strd, 105 WORD32 dst_strd, 106 WORD32 zero_cols, 107 WORD32 zero_rows) 108 { 109 WORD32 i, c[4]; 110 WORD32 add; 111 WORD32 shift; 112 WORD16 *pi2_tmp_orig; 113 WORD32 trans_size; 114 UNUSED(zero_rows); 115 trans_size = TRANS_SIZE_4; 116 117 pi2_tmp_orig = pi2_tmp; 118 119 /* Inverse Transform 1st stage */ 120 shift = IT_SHIFT_STAGE_1; 121 add = 1 << (shift - 1); 122 123 for(i = 0; i < trans_size; i++) 124 { 125 /* Checking for Zero Cols */ 126 if((zero_cols & 1) == 1) 127 { 128 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 129 } 130 else 131 { 132 // Intermediate Variables 133 c[0] = pi2_src[0] + pi2_src[2 * src_strd]; 134 c[1] = pi2_src[2 * src_strd] + pi2_src[3 * src_strd]; 135 c[2] = pi2_src[0] - pi2_src[3 * src_strd]; 136 c[3] = 74 * pi2_src[src_strd]; 137 138 pi2_tmp[0] = 139 CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift); 140 pi2_tmp[1] = 141 CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift); 142 pi2_tmp[2] = 143 CLIP_S16((74 * (pi2_src[0] - pi2_src[2 * src_strd] + pi2_src[3 * src_strd]) + add) >> shift); 144 pi2_tmp[3] = 145 CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift); 146 } 147 pi2_src++; 148 pi2_tmp += trans_size; 149 zero_cols = zero_cols >> 1; 150 } 151 152 pi2_tmp = pi2_tmp_orig; 153 154 /* Inverse Transform 2nd stage */ 155 shift = IT_SHIFT_STAGE_2; 156 add = 1 << (shift - 1); 157 158 for(i = 0; i < trans_size; i++) 159 { 160 WORD32 itrans_out; 161 // Intermediate Variables 162 c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size]; 163 c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]; 164 c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size]; 165 c[3] = 74 * pi2_tmp[trans_size]; 166 167 itrans_out = 168 CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift); 169 pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0])); 170 itrans_out = 171 CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift); 172 pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1])); 173 itrans_out = 174 CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift); 175 pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2])); 176 itrans_out = 177 CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift); 178 pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3])); 179 pi2_tmp++; 180 pu1_pred += pred_strd; 181 pu1_dst += dst_strd; 182 } 183 } 184 185 /** 186 ******************************************************************************* 187 * 188 * @brief 189 * This function performs Inverse transform and reconstruction for 4x4 190 * input block 191 * 192 * @par Description: 193 * Performs inverse transform and adds the prediction data and clips output 194 * to 8 bit 195 * 196 * @param[in] pi2_src 197 * Input 4x4 coefficients 198 * 199 * @param[in] pi2_tmp 200 * Temporary 4x4 buffer for storing inverse 201 * 202 * transform 203 * 1st stage output 204 * 205 * @param[in] pu1_pred 206 * Prediction 4x4 block 207 * 208 * @param[out] pu1_dst 209 * Output 4x4 block 210 * 211 * @param[in] src_strd 212 * Input stride 213 * 214 * @param[in] pred_strd 215 * Prediction stride 216 * 217 * @param[in] dst_strd 218 * Output Stride 219 * 220 * @param[in] shift 221 * Output shift 222 * 223 * @param[in] zero_cols 224 * Zero columns in pi2_src 225 * 226 * @returns Void 227 * 228 * @remarks 229 * None 230 * 231 ******************************************************************************* 232 */ 233 234 void ihevc_itrans_recon_4x4(WORD16 *pi2_src, 235 WORD16 *pi2_tmp, 236 UWORD8 *pu1_pred, 237 UWORD8 *pu1_dst, 238 WORD32 src_strd, 239 WORD32 pred_strd, 240 WORD32 dst_strd, 241 WORD32 zero_cols, 242 WORD32 zero_rows) 243 244 { 245 WORD32 j; 246 WORD32 e[2], o[2]; 247 WORD32 add; 248 WORD32 shift; 249 WORD16 *pi2_tmp_orig; 250 WORD32 trans_size; 251 UNUSED(zero_rows); 252 trans_size = TRANS_SIZE_4; 253 254 pi2_tmp_orig = pi2_tmp; 255 256 /* Inverse Transform 1st stage */ 257 shift = IT_SHIFT_STAGE_1; 258 add = 1 << (shift - 1); 259 260 for(j = 0; j < trans_size; j++) 261 { 262 /* Checking for Zero Cols */ 263 if((zero_cols & 1) == 1) 264 { 265 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 266 } 267 else 268 { 269 270 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 271 o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_src[src_strd] 272 + g_ai2_ihevc_trans_4[3][0] * pi2_src[3 * src_strd]; 273 o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_src[src_strd] 274 + g_ai2_ihevc_trans_4[3][1] * pi2_src[3 * src_strd]; 275 e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_src[0] 276 + g_ai2_ihevc_trans_4[2][0] * pi2_src[2 * src_strd]; 277 e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_src[0] 278 + g_ai2_ihevc_trans_4[2][1] * pi2_src[2 * src_strd]; 279 280 pi2_tmp[0] = 281 CLIP_S16(((e[0] + o[0] + add) >> shift)); 282 pi2_tmp[1] = 283 CLIP_S16(((e[1] + o[1] + add) >> shift)); 284 pi2_tmp[2] = 285 CLIP_S16(((e[1] - o[1] + add) >> shift)); 286 pi2_tmp[3] = 287 CLIP_S16(((e[0] - o[0] + add) >> shift)); 288 289 } 290 pi2_src++; 291 pi2_tmp += trans_size; 292 zero_cols = zero_cols >> 1; 293 } 294 295 pi2_tmp = pi2_tmp_orig; 296 297 /* Inverse Transform 2nd stage */ 298 shift = IT_SHIFT_STAGE_2; 299 add = 1 << (shift - 1); 300 301 for(j = 0; j < trans_size; j++) 302 { 303 WORD32 itrans_out; 304 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 305 o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size] 306 + g_ai2_ihevc_trans_4[3][0] * pi2_tmp[3 * trans_size]; 307 o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size] 308 + g_ai2_ihevc_trans_4[3][1] * pi2_tmp[3 * trans_size]; 309 e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0] 310 + g_ai2_ihevc_trans_4[2][0] * pi2_tmp[2 * trans_size]; 311 e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0] 312 + g_ai2_ihevc_trans_4[2][1] * pi2_tmp[2 * trans_size]; 313 314 itrans_out = 315 CLIP_S16(((e[0] + o[0] + add) >> shift)); 316 pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0])); 317 itrans_out = 318 CLIP_S16(((e[1] + o[1] + add) >> shift)); 319 pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1])); 320 itrans_out = 321 CLIP_S16(((e[1] - o[1] + add) >> shift)); 322 pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2])); 323 itrans_out = 324 CLIP_S16(((e[0] - o[0] + add) >> shift)); 325 pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3])); 326 327 pi2_tmp++; 328 pu1_pred += pred_strd; 329 pu1_dst += dst_strd; 330 331 } 332 } 333 334