1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevcd_iquant_itrans_recon_ctb.c 22 * 23 * @brief 24 * Contains functions for inverse quantization, inverse transform and recon 25 * 26 * @author 27 * Ittiam 28 * 29 * @par List of Functions: 30 * - ihevcd_iquant_itrans_recon_ctb() 31 * 32 * @remarks 33 * None 34 * 35 ******************************************************************************* 36 */ 37 /*****************************************************************************/ 38 /* File Includes */ 39 /*****************************************************************************/ 40 #include <stdio.h> 41 #include <stddef.h> 42 #include <stdlib.h> 43 #include <string.h> 44 45 #include "ihevc_typedefs.h" 46 #include "iv.h" 47 #include "ivd.h" 48 #include "ihevcd_cxa.h" 49 50 #include "ihevc_defs.h" 51 #include "ihevc_debug.h" 52 #include "ihevc_structs.h" 53 #include "ihevc_cabac_tables.h" 54 #include "ihevc_macros.h" 55 #include "ihevc_platform_macros.h" 56 57 #include "ihevcd_defs.h" 58 #include "ihevcd_function_selector.h" 59 #include "ihevcd_structs.h" 60 #include "ihevcd_error.h" 61 #include "ihevcd_bitstream.h" 62 #include "ihevc_common_tables.h" 63 64 /* Intra pred includes */ 65 #include "ihevc_intra_pred.h" 66 67 /* Inverse transform common module includes */ 68 #include "ihevc_trans_tables.h" 69 #include "ihevc_trans_macros.h" 70 #include "ihevc_itrans_recon.h" 71 #include "ihevc_recon.h" 72 #include "ihevc_chroma_itrans_recon.h" 73 #include "ihevc_chroma_recon.h" 74 75 /* Decoder includes */ 76 #include "ihevcd_common_tables.h" 77 #include "ihevcd_iquant_itrans_recon_ctb.h" 78 #include "ihevcd_debug.h" 79 #include "ihevcd_profile.h" 80 #include "ihevcd_statistics.h" 81 #include "ihevcd_itrans_recon_dc.h" 82 83 static const UWORD32 gau4_ihevcd_4_bit_reverse[] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; 84 85 86 /* Globals */ 87 static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] = 88 { IP_FUNC_MODE_0, /* Mode 0 */ 89 IP_FUNC_MODE_1, /* Mode 1 */ 90 IP_FUNC_MODE_2, /* Mode 2 */ 91 IP_FUNC_MODE_3TO9, /* Mode 3 */ 92 IP_FUNC_MODE_3TO9, /* Mode 4 */ 93 IP_FUNC_MODE_3TO9, /* Mode 5 */ 94 IP_FUNC_MODE_3TO9, /* Mode 6 */ 95 IP_FUNC_MODE_3TO9, /* Mode 7 */ 96 IP_FUNC_MODE_3TO9, /* Mode 8 */ 97 IP_FUNC_MODE_3TO9, /* Mode 9 */ 98 IP_FUNC_MODE_10, /* Mode 10 */ 99 IP_FUNC_MODE_11TO17, /* Mode 11 */ 100 IP_FUNC_MODE_11TO17, /* Mode 12 */ 101 IP_FUNC_MODE_11TO17, /* Mode 13 */ 102 IP_FUNC_MODE_11TO17, /* Mode 14 */ 103 IP_FUNC_MODE_11TO17, /* Mode 15 */ 104 IP_FUNC_MODE_11TO17, /* Mode 16 */ 105 IP_FUNC_MODE_11TO17, /* Mode 17 */ 106 IP_FUNC_MODE_18_34, /* Mode 18 */ 107 IP_FUNC_MODE_19TO25, /* Mode 19 */ 108 IP_FUNC_MODE_19TO25, /* Mode 20 */ 109 IP_FUNC_MODE_19TO25, /* Mode 21 */ 110 IP_FUNC_MODE_19TO25, /* Mode 22 */ 111 IP_FUNC_MODE_19TO25, /* Mode 23 */ 112 IP_FUNC_MODE_19TO25, /* Mode 24 */ 113 IP_FUNC_MODE_19TO25, /* Mode 25 */ 114 IP_FUNC_MODE_26, /* Mode 26 */ 115 IP_FUNC_MODE_27TO33, /* Mode 27 */ 116 IP_FUNC_MODE_27TO33, /* Mode 26 */ 117 IP_FUNC_MODE_27TO33, /* Mode 29 */ 118 IP_FUNC_MODE_27TO33, /* Mode 30 */ 119 IP_FUNC_MODE_27TO33, /* Mode 31 */ 120 IP_FUNC_MODE_27TO33, /* Mode 32 */ 121 IP_FUNC_MODE_27TO33, /* Mode 33 */ 122 IP_FUNC_MODE_18_34, /* Mode 34 */ 123 }; 124 125 126 const WORD16 *g_ai2_ihevc_trans_tables[] = 127 { &g_ai2_ihevc_trans_dst_4[0][0], 128 &g_ai2_ihevc_trans_4[0][0], 129 &g_ai2_ihevc_trans_8[0][0], 130 &g_ai2_ihevc_trans_16[0][0], 131 &g_ai2_ihevc_trans_32[0][0] 132 }; 133 134 135 /*****************************************************************************/ 136 /* Function Prototypes */ 137 /*****************************************************************************/ 138 /* Returns number of ai2_level read from ps_sblk_coeff */ 139 UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff, 140 WORD32 log2_trans_size, 141 UWORD8 *pu1_tu_coeff_data, 142 WORD16 *pi2_dequant_matrix, 143 WORD32 qp_rem, 144 WORD32 qp_div, 145 TRANSFORM_TYPE e_trans_type, 146 WORD32 trans_quant_bypass, 147 UWORD32 *pu4_zero_cols, 148 UWORD32 *pu4_zero_rows, 149 UWORD32 *pu4_coeff_type, 150 WORD16 *pi2_coeff_value) 151 { 152 /* Generating coeffs from coeff-map */ 153 WORD32 i; 154 WORD16 *pi2_sblk_ptr; 155 WORD32 subblk_pos_x, subblk_pos_y; 156 WORD32 sblk_scan_idx, coeff_raster_idx; 157 WORD32 sblk_non_zero_coeff_idx; 158 tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data; 159 UWORD8 u1_num_coded_sblks, u1_scan_type; 160 UWORD8 *pu1_new_tu_coeff_data; 161 WORD32 trans_size; 162 WORD32 xs, ys; 163 WORD32 trans_skip; 164 WORD16 iquant_out; 165 WORD32 shift_iq; 166 { 167 WORD32 bit_depth; 168 169 bit_depth = 8 + 0; 170 shift_iq = bit_depth + log2_trans_size - 5; 171 } 172 trans_size = (1 << log2_trans_size); 173 174 /* First byte points to number of coded blocks */ 175 u1_num_coded_sblks = *pu1_tu_coeff_data++; 176 177 /* Next byte points to scan type */ 178 u1_scan_type = *pu1_tu_coeff_data++; 179 /* 0th bit has trans_skip */ 180 trans_skip = u1_scan_type & 1; 181 u1_scan_type >>= 1; 182 183 pi2_sblk_ptr = pi2_tu_coeff; 184 185 /* Initially all columns are assumed to be zero */ 186 *pu4_zero_cols = 0xFFFFFFFF; 187 /* Initially all rows are assumed to be zero */ 188 *pu4_zero_rows = 0xFFFFFFFF; 189 190 ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data); 191 192 if(trans_skip) 193 memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16)); 194 195 STATS_INIT_SBLK_AND_COEFF_POS(); 196 197 /* DC only case */ 198 if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks) 199 && (0 == ps_tu_sblk_coeff_data->u2_subblk_pos) 200 && (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map)) 201 { 202 *pu4_coeff_type = 1; 203 204 if(!trans_quant_bypass) 205 { 206 if(4 == trans_size) 207 { 208 IQUANT_4x4(iquant_out, 209 ps_tu_sblk_coeff_data->ai2_level[0], 210 pi2_dequant_matrix[0] 211 * g_ihevc_iquant_scales[qp_rem], 212 shift_iq, qp_div); 213 } 214 else 215 { 216 IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0], 217 pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem], 218 shift_iq, qp_div); 219 } 220 if(trans_skip) 221 iquant_out = (iquant_out + 16) >> 5; 222 } 223 else 224 { 225 /* setting the column to zero */ 226 for(i = 0; i < trans_size; i++) 227 *(pi2_tu_coeff + i * trans_size) = 0; 228 229 iquant_out = ps_tu_sblk_coeff_data->ai2_level[0]; 230 } 231 *pi2_coeff_value = iquant_out; 232 *pi2_tu_coeff = iquant_out; 233 *pu4_zero_cols &= ~0x1; 234 *pu4_zero_rows &= ~0x1; 235 ps_tu_sblk_coeff_data = 236 (void *)&ps_tu_sblk_coeff_data->ai2_level[1]; 237 238 STATS_UPDATE_COEFF_COUNT(); 239 STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), 0, 0); 240 STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip)); 241 return ((UWORD8 *)ps_tu_sblk_coeff_data); 242 } 243 else 244 { 245 *pu4_coeff_type = 0; 246 /* In case of trans skip, memset has already happened */ 247 if(!trans_skip) 248 memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16)); 249 } 250 251 for(i = 0; i < u1_num_coded_sblks; i++) 252 { 253 UWORD32 u4_sig_coeff_map; 254 subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF; 255 subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8; 256 257 STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y); 258 259 subblk_pos_x = subblk_pos_x * MIN_TU_SIZE; 260 subblk_pos_y = subblk_pos_y * MIN_TU_SIZE; 261 262 pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size 263 + subblk_pos_x; 264 265 //*pu4_zero_cols &= ~(0xF << subblk_pos_x); 266 267 sblk_non_zero_coeff_idx = 0; 268 u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map; 269 //for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--) 270 sblk_scan_idx = 31; 271 do 272 { 273 WORD32 clz = CLZ(u4_sig_coeff_map); 274 275 sblk_scan_idx -= clz; 276 /* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */ 277 /* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */ 278 u4_sig_coeff_map = u4_sig_coeff_map << clz; 279 /* Copying coeffs and storing in reverse order */ 280 { 281 STATS_UPDATE_COEFF_COUNT(); 282 coeff_raster_idx = 283 gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx]; 284 285 xs = coeff_raster_idx & 0x3; 286 ys = coeff_raster_idx >> 2; 287 288 if(!trans_quant_bypass) 289 { 290 if(4 == trans_size) 291 { 292 IQUANT_4x4(iquant_out, 293 ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx], 294 pi2_dequant_matrix[(subblk_pos_x + xs) 295 + (subblk_pos_y + ys) 296 * trans_size] 297 * g_ihevc_iquant_scales[qp_rem], 298 shift_iq, qp_div); 299 sblk_non_zero_coeff_idx++; 300 } 301 else 302 { 303 IQUANT(iquant_out, 304 ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx], 305 pi2_dequant_matrix[(subblk_pos_x + xs) 306 + (subblk_pos_y + ys) 307 * trans_size] 308 * g_ihevc_iquant_scales[qp_rem], 309 shift_iq, qp_div); 310 sblk_non_zero_coeff_idx++; 311 } 312 313 if(trans_skip) 314 iquant_out = (iquant_out + 16) >> 5; 315 } 316 else 317 { 318 iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++]; 319 } 320 *pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs)); 321 *pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys)); 322 *(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out; 323 } 324 sblk_scan_idx--; 325 u4_sig_coeff_map <<= 1; 326 327 }while(u4_sig_coeff_map); 328 /* Updating the sblk pointer */ 329 ps_tu_sblk_coeff_data = 330 (void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx]; 331 } 332 333 STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip)); 334 335 pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data; 336 337 return pu1_new_tu_coeff_data; 338 } 339 340 WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc, 341 tu_t *ps_tu, 342 UWORD32 *pu4_intra_nbr_avail, 343 WORD16 i2_pic_width_in_luma_samples, 344 UWORD8 i1_constrained_intra_pred_flag, 345 WORD32 trans_size, 346 WORD32 ctb_size) 347 { 348 sps_t *ps_sps; 349 UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail, 350 u1_top_lt_avail; 351 WORD32 x_cur, y_cur, x_nbr, y_nbr; 352 UWORD8 *pu1_nbr_intra_flag; 353 UWORD8 *pu1_pic_intra_flag; 354 UWORD8 top_right, top, top_left, left, bot_left; 355 WORD32 intra_pos; 356 WORD32 num_8_blks, num_8_blks_in_bits; 357 WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64; 358 WORD32 cur_x, cur_y; 359 WORD32 i; 360 WORD32 nbr_flags; 361 362 ps_sps = ps_proc->ps_sps; 363 cur_x = ps_tu->b4_pos_x; 364 cur_y = ps_tu->b4_pos_y; 365 366 u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE] 367 >> (31 - (1 + cur_x - 1))) & 1; 368 u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1))) 369 & 1; 370 u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x))) 371 & 1; 372 u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] 373 >> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1; 374 u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] 375 >> (31 - (1 + cur_x - 1))) & 1; 376 377 x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE; 378 y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE; 379 380 pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag; 381 382 /* WORD32 nbr_flags as below MSB --> LSB */ 383 /* Top-Left | Top-Right | Top | Left | Bottom-Left 384 * 1 4 4 4 4 385 */ 386 bot_left = 0; 387 left = 0; 388 top_right = 0; 389 top = 0; 390 top_left = 0; 391 392 num_8_blks = trans_size > 4 ? trans_size / 8 : 1; 393 num_8_blks_in_bits = ((1 << num_8_blks) - 1); 394 395 if(i1_constrained_intra_pred_flag) 396 { 397 /* TODO: constrained intra pred not tested */ 398 if(u1_bot_lt_avail) 399 { 400 x_nbr = x_cur - 1; 401 y_nbr = y_cur + trans_size; 402 403 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 404 + x_nbr / 64; 405 intra_pos = ((x_nbr / 8) % 8); 406 for(i = 0; i < num_8_blks; i++) 407 { 408 bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) 409 >> intra_pos) & 1) << i; 410 } 411 bot_left &= num_8_blks_in_bits; 412 } 413 if(u1_left_avail) 414 { 415 x_nbr = x_cur - 1; 416 y_nbr = y_cur; 417 418 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 419 + x_nbr / 64; 420 intra_pos = ((x_nbr / 8) % 8); 421 422 for(i = 0; i < num_8_blks; i++) 423 { 424 left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos) 425 & 1) << i; 426 } 427 left &= num_8_blks_in_bits; 428 } 429 if(u1_top_avail) 430 { 431 x_nbr = x_cur; 432 y_nbr = y_cur - 1; 433 434 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 435 + x_nbr / 64; 436 intra_pos = ((x_nbr / 8) % 8); 437 438 top = (*pu1_nbr_intra_flag >> intra_pos); 439 top &= num_8_blks_in_bits; 440 /* 441 for(i=0;i<num_8_blks;i++) 442 { 443 top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i; 444 } 445 */ 446 } 447 if(u1_top_rt_avail) 448 { 449 x_nbr = x_cur + trans_size; 450 y_nbr = y_cur - 1; 451 452 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 453 + x_nbr / 64; 454 intra_pos = ((x_nbr / 8) % 8); 455 456 top_right = (*pu1_nbr_intra_flag >> intra_pos); 457 top_right &= num_8_blks_in_bits; 458 /* 459 for(i=0;i<num_8_blks;i++) 460 { 461 top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i; 462 } 463 */ 464 } 465 if(u1_top_lt_avail) 466 { 467 x_nbr = x_cur - 1; 468 y_nbr = y_cur - 1; 469 470 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 471 + x_nbr / 64; 472 intra_pos = ((x_nbr / 8) % 8); 473 474 top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1; 475 } 476 } 477 else 478 { 479 if(u1_top_avail) 480 top = 0xF; 481 if(u1_top_rt_avail) 482 top_right = 0xF; 483 if(u1_bot_lt_avail) 484 bot_left = 0xF; 485 if(u1_left_avail) 486 left = 0xF; 487 if(u1_top_lt_avail) 488 top_left = 0x1; 489 } 490 491 /* Handling incomplete CTBs */ 492 { 493 WORD32 pu_size_limit = MIN(trans_size, 8); 494 WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples 495 - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) 496 - (ps_tu->b4_pos_x * MIN_TU_SIZE) 497 - (1 << (ps_tu->b3_size + 2)); 498 /* ctb_size_top gives number of valid pixels remaining in the current row */ 499 WORD32 ctb_size_top = MIN(ctb_size, cols_remaining); 500 WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1; 501 502 WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples 503 - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) 504 - (ps_tu->b4_pos_y * MIN_TU_SIZE) 505 - (1 << (ps_tu->b3_size + 2)); 506 /* ctb_size_bot gives number of valid pixels remaining in the current column */ 507 WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining); 508 WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1; 509 510 top_right &= ctb_size_top_bits; 511 bot_left &= ctb_size_bot_bits; 512 } 513 514 /* Top-Left | Top-Right | Top | Left | Bottom-Left 515 * 1 4 4 4 4 516 */ 517 518 /* 519 nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4) 520 | gau4_ihevcd_4_bit_reverse[bot_left]; 521 */ 522 nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4) 523 | gau4_ihevcd_4_bit_reverse[bot_left]; 524 525 526 return nbr_flags; 527 528 } 529 530 WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) 531 { 532 WORD16 *pi2_scaling_mat; 533 UWORD8 *pu1_y_dst_ctb; 534 UWORD8 *pu1_uv_dst_ctb; 535 WORD32 ctb_size; 536 codec_t *ps_codec; 537 slice_header_t *ps_slice_hdr; 538 tu_t *ps_tu; 539 WORD16 *pi2_ctb_coeff; 540 WORD32 tu_cnt; 541 WORD16 *pi2_tu_coeff; 542 WORD16 *pi2_tmp; 543 WORD32 pic_strd; 544 WORD32 luma_nbr_flags; 545 WORD32 chroma_nbr_flags = 0; 546 UWORD8 u1_luma_pred_mode_first_tu = 0; 547 /* Pointers for generating 2d coeffs from coeff-map */ 548 UWORD8 *pu1_tu_coeff_data; 549 /* nbr avail map for CTB */ 550 /* 1st bit points to neighbor (left/top_left/bot_left) */ 551 /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */ 552 UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE 553 + 2 /* Top nbr + bot nbr */]; UWORD32 554 top_avail_bits; 555 sps_t *ps_sps; 556 pps_t *ps_pps; 557 WORD32 intra_flag; 558 UWORD8 *pu1_pic_intra_flag; 559 /*************************************************************************/ 560 /* Contanis scaling matrix offset in the following order in a 1D buffer */ 561 /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V */ 562 /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V */ 563 /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V */ 564 /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */ 565 /* Intra 16x16 Y, 16x16 U, 16x16 V */ 566 /* Inter 16x16 Y, 16x16 U, 16x16 V */ 567 /* Intra 32x32 Y */ 568 /* Inter 32x32 Y */ 569 /*************************************************************************/ 570 /* Only first 20 entries are used. Array is extended to avoid out of bound 571 reads. Skip CUs (64x64) read this table, but don't really use the value */ 572 static const WORD32 scaling_mat_offset[] = 573 { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992, 574 1248, 1504, 1760, 2016, 3040, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 575 576 PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED(); 577 578 ps_sps = ps_proc->ps_sps; 579 ps_pps = ps_proc->ps_pps; 580 ps_slice_hdr = ps_proc->ps_slice_hdr; 581 ps_codec = ps_proc->ps_codec; 582 583 pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma; 584 pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma; 585 586 pi2_ctb_coeff = ps_proc->pi2_invscan_out; 587 588 ctb_size = (1 << ps_sps->i1_log2_ctb_size); 589 pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data; 590 591 pic_strd = ps_codec->i4_strd; 592 593 pi2_tmp = ps_proc->pi2_itrans_intrmd_buf; 594 595 pi2_tu_coeff = pi2_ctb_coeff; 596 597 ps_tu = ps_proc->ps_tu; 598 599 if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag)) 600 { 601 pi2_scaling_mat = ps_pps->pi2_scaling_mat; 602 } 603 else 604 { 605 pi2_scaling_mat = ps_sps->pi2_scaling_mat; 606 } 607 608 { 609 /* Updating the initial availability map */ 610 WORD32 i; 611 UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail, 612 u1_top_ctb_avail; 613 614 u1_left_ctb_avail = ps_proc->u1_left_ctb_avail; 615 u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail; 616 u1_top_ctb_avail = ps_proc->u1_top_ctb_avail; 617 u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail; 618 619 /* Initializing the availability array */ 620 memset(au4_intra_nbr_avail, 0, 621 (MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32)); 622 /* Initializing the availability array with CTB level availability flags */ 623 { 624 WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size); 625 WORD32 ctb_size_left = MIN(ctb_size, rows_remaining); 626 for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++) 627 { 628 au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31); 629 } 630 } 631 au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31) 632 >> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */ 633 634 au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31); 635 636 { 637 WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size); 638 WORD32 ctb_size_top = MIN(ctb_size, cols_remaining); 639 WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE)); 640 641 /* ctb_size_top gives number of valid pixels remaining in the current row */ 642 /* Since we need pattern of 1's starting from the MSB, an additional shift */ 643 /* is needed */ 644 shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE); 645 646 top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1) 647 << shift; 648 } 649 au4_intra_nbr_avail[0] |= ( 650 (u1_top_ctb_avail == 1) ? top_avail_bits : 0x0); 651 /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */ 652 653 } 654 655 /* Applying Inverse transform on all the TU's in CTB */ 656 for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++) 657 { 658 WORD32 transform_skip_flag = 0; 659 WORD32 transform_skip_flag_v = 0; 660 WORD32 num_comp, c_idx, func_idx; 661 WORD32 src_strd, pred_strd, dst_strd; 662 WORD32 qp_div = 0, qp_rem = 0; 663 WORD32 qp_div_v = 0, qp_rem_v = 0; 664 UWORD32 zero_cols = 0, zero_cols_v = 0; 665 UWORD32 zero_rows = 0, zero_rows_v = 0; 666 UWORD32 coeff_type = 0, coeff_type_v = 0; 667 WORD16 i2_coeff_value, i2_coeff_value_v; 668 WORD32 trans_size = 0; 669 TRANSFORM_TYPE e_trans_type; 670 WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2; 671 WORD32 log2_trans_size; 672 WORD32 chroma_qp_idx; 673 WORD16 *pi2_src = NULL, *pi2_src_v = NULL; 674 UWORD8 *pu1_pred = NULL, *pu1_pred_v = NULL; 675 UWORD8 *pu1_dst = NULL, *pu1_dst_v = NULL; 676 WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL; 677 WORD32 tu_x, tu_y; 678 WORD32 tu_y_offset, tu_uv_offset; 679 WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset; 680 UWORD8 u1_cbf = 0, u1_cbf_v = 0, u1_luma_pred_mode, u1_chroma_pred_mode; 681 WORD32 luma_nbr_flags_4x4[4]; 682 WORD32 offset; 683 WORD32 pcm_flag; 684 WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); 685 /* If 420SP_VU is chroma format, pred and dst pointer */ 686 /* will be added +1 to point to U */ 687 WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu; 688 /* If 420SP_VU is chroma format, pred and dst pointer */ 689 /* will be added U offset of +1 and subtracted 2 */ 690 /* to point to V */ 691 WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu; 692 693 tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */ 694 tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */ 695 { 696 WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x); 697 WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y); 698 699 WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64; 700 701 pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag; 702 pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row; 703 pu1_pic_intra_flag += (tu_abs_x >> 6); 704 705 intra_flag = *pu1_pic_intra_flag; 706 intra_flag &= (1 << ((tu_abs_x >> 3) % 8)); 707 } 708 709 u1_luma_pred_mode = ps_tu->b6_luma_intra_mode; 710 u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx; 711 712 if(u1_chroma_pred_mode != 7) 713 num_comp = 2; /* Y and UV */ 714 else 715 num_comp = 1; /* Y */ 716 717 718 pcm_flag = 0; 719 720 if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE)) 721 { 722 UWORD8 *pu1_buf; 723 UWORD8 *pu1_y_dst = pu1_y_dst_ctb; 724 UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb; 725 WORD32 i, j; 726 tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data; 727 WORD32 cb_size = 1 << (ps_tu->b3_size + 2); 728 729 /* trans_size is used to update availability after reconstruction */ 730 trans_size = cb_size; 731 732 pcm_flag = 1; 733 734 tu_y_offset = tu_x + tu_y * pic_strd; 735 pu1_y_dst += tu_x + tu_y * pic_strd; 736 pu1_uv_dst += tu_x + (tu_y >> 1) * pic_strd; 737 738 /* First byte points to number of coded blocks */ 739 pu1_tu_coeff_data++; 740 741 /* Next byte points to scan type */ 742 pu1_tu_coeff_data++; 743 744 ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data; 745 746 pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0]; 747 { 748 749 for(i = 0; i < cb_size; i++) 750 { 751 //pu1_y_dst[i * pic_strd + j] = *pu1_buf++; 752 memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size); 753 pu1_buf += cb_size; 754 } 755 756 pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset; 757 758 /* U */ 759 for(i = 0; i < cb_size / 2; i++) 760 { 761 for(j = 0; j < cb_size / 2; j++) 762 { 763 pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++; 764 } 765 } 766 767 pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset; 768 769 /* V */ 770 for(i = 0; i < cb_size / 2; i++) 771 { 772 for(j = 0; j < cb_size / 2; j++) 773 { 774 pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++; 775 } 776 } 777 } 778 779 pu1_tu_coeff_data = pu1_buf; 780 781 } 782 783 784 785 786 787 for(c_idx = 0; c_idx < num_comp; c_idx++) 788 { 789 if(0 == pcm_flag) 790 { 791 /* Initializing variables */ 792 pred_strd = pic_strd; 793 dst_strd = pic_strd; 794 795 if(c_idx == 0) /* Y */ 796 { 797 log2_y_trans_size_minus_2 = ps_tu->b3_size; 798 trans_size = 1 << (log2_y_trans_size_minus_2 + 2); 799 log2_trans_size = log2_y_trans_size_minus_2 + 2; 800 801 tu_y_offset = tu_x + tu_y * pic_strd; 802 803 pi2_src = pi2_tu_coeff; 804 pu1_pred = pu1_y_dst_ctb + tu_y_offset; 805 pu1_dst = pu1_y_dst_ctb + tu_y_offset; 806 807 /* Calculating scaling matrix offset */ 808 offset = log2_y_trans_size_minus_2 * 6 809 + (!intra_flag) 810 * ((log2_y_trans_size_minus_2 811 == 3) ? 1 : 3) 812 + c_idx; 813 pi2_dequant_matrix = pi2_scaling_mat 814 + scaling_mat_offset[offset]; 815 816 src_strd = trans_size; 817 818 /* 4x4 transform Luma in INTRA mode is DST */ 819 if(log2_y_trans_size_minus_2 == 0 && intra_flag) 820 { 821 func_idx = log2_y_trans_size_minus_2; 822 e_trans_type = DST_4x4; 823 } 824 else 825 { 826 func_idx = log2_y_trans_size_minus_2 + 1; 827 e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1); 828 } 829 830 qp_div = ps_tu->b7_qp / 6; 831 qp_rem = ps_tu->b7_qp % 6; 832 833 u1_cbf = ps_tu->b1_y_cbf; 834 835 transform_skip_flag = pu1_tu_coeff_data[1] & 1; 836 /* Unpacking coeffs */ 837 if(1 == u1_cbf) 838 { 839 pu1_tu_coeff_data = ihevcd_unpack_coeffs( 840 pi2_src, log2_y_trans_size_minus_2 + 2, 841 pu1_tu_coeff_data, pi2_dequant_matrix, 842 qp_rem, qp_div, e_trans_type, 843 ps_tu->b1_transquant_bypass, &zero_cols, 844 &zero_rows, &coeff_type, 845 &i2_coeff_value); 846 } 847 } 848 else /* UV interleaved */ 849 { 850 /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */ 851 if(ps_tu->b3_size == 0) 852 { 853 /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x- 4,luma pos y- 4) */ 854 log2_uv_trans_size_minus_2 = ps_tu->b3_size; 855 tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / 2) * pic_strd; 856 } 857 else 858 { 859 log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1; 860 tu_uv_offset = tu_x + (tu_y >> 1) * pic_strd; 861 } 862 trans_size = 1 << (log2_uv_trans_size_minus_2 + 2); 863 log2_trans_size = log2_uv_trans_size_minus_2 + 2; 864 865 pi2_src = pi2_tu_coeff; 866 pi2_src_v = pi2_tu_coeff + trans_size * trans_size; 867 pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ 868 pu1_pred_v = pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ 869 pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ 870 pu1_dst_v = pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ 871 872 /*TODO: Add support for choosing different tables for U and V, 873 * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes 874 */ 875 /* Calculating scaling matrix offset */ 876 /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since 877 * max uv trans size is 16x16 878 */ 879 offset = log2_uv_trans_size_minus_2 * 6 880 + (!intra_flag) * 3 + c_idx; 881 pi2_dequant_matrix = pi2_scaling_mat 882 + scaling_mat_offset[offset]; 883 pi2_dequant_matrix_v = pi2_scaling_mat 884 + scaling_mat_offset[offset + 1]; 885 886 src_strd = trans_size; 887 888 func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/ 889 890 /* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma. 891 * By limiting func_idx to 7, max of 16x16 chroma is called */ 892 func_idx = MIN(func_idx, 7); 893 894 e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1); 895 /* QP for U */ 896 i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset; 897 i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset; 898 u1_cbf = ps_tu->b1_cb_cbf; 899 900 chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset 901 + i1_chroma_slice_qp_offset; 902 chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57); 903 qp_div = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6; 904 qp_rem = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6; 905 906 /* QP for V */ 907 i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset; 908 i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset; 909 u1_cbf_v = ps_tu->b1_cr_cbf; 910 911 chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset 912 + i1_chroma_slice_qp_offset; 913 chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57); 914 qp_div_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6; 915 qp_rem_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6; 916 917 /* Unpacking coeffs */ 918 transform_skip_flag = pu1_tu_coeff_data[1] & 1; 919 if(1 == u1_cbf) 920 { 921 pu1_tu_coeff_data = ihevcd_unpack_coeffs( 922 pi2_src, log2_uv_trans_size_minus_2 + 2, 923 pu1_tu_coeff_data, pi2_dequant_matrix, 924 qp_rem, qp_div, e_trans_type, 925 ps_tu->b1_transquant_bypass, &zero_cols, 926 &zero_rows, &coeff_type, 927 &i2_coeff_value); 928 } 929 930 transform_skip_flag_v = pu1_tu_coeff_data[1] & 1; 931 if(1 == u1_cbf_v) 932 { 933 pu1_tu_coeff_data = ihevcd_unpack_coeffs( 934 pi2_src_v, log2_uv_trans_size_minus_2 + 2, 935 pu1_tu_coeff_data, pi2_dequant_matrix_v, 936 qp_rem_v, qp_div_v, e_trans_type, 937 ps_tu->b1_transquant_bypass, &zero_cols_v, 938 &zero_rows_v, &coeff_type_v, &i2_coeff_value_v); 939 } 940 } 941 /***************************************************************/ 942 /****************** Intra Prediction **************************/ 943 /***************************************************************/ 944 if(intra_flag) /* Intra */ 945 { 946 /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed, 947 au1_ref_sub_out size is kept as multiple of 8, 948 so that SIMD functions can load 64 bits */ 949 UWORD8 au1_ref_sub_out[(MAX_TU_SIZE * 2 * 2) + 8]; 950 UWORD8 *pu1_top_left, *pu1_top, *pu1_left; 951 WORD32 luma_pred_func_idx, chroma_pred_func_idx; 952 953 /* Get the neighbour availability flags */ 954 /* Done for only Y */ 955 if(c_idx == 0) 956 { 957 /* Get neighbor availability for Y only */ 958 luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc, 959 ps_tu, 960 au4_intra_nbr_avail, 961 ps_sps->i2_pic_width_in_luma_samples, 962 ps_pps->i1_constrained_intra_pred_flag, 963 trans_size, 964 ctb_size); 965 966 if(trans_size == 4) 967 luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags; 968 969 if((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0)) 970 { 971 chroma_nbr_flags = luma_nbr_flags; 972 } 973 974 /* Initializing nbr pointers */ 975 pu1_top = pu1_pred - pic_strd; 976 pu1_left = pu1_pred - 1; 977 pu1_top_left = pu1_pred - pic_strd - 1; 978 979 /* call reference array substitution */ 980 if(luma_nbr_flags == 0x1ffff) 981 ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr( 982 pu1_top_left, 983 pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1); 984 else 985 ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr( 986 pu1_top_left, 987 pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1); 988 989 /* call reference filtering */ 990 ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr( 991 au1_ref_sub_out, trans_size, 992 au1_ref_sub_out, 993 u1_luma_pred_mode, ps_sps->i1_strong_intra_smoothing_enable_flag); 994 995 /* use the look up to get the function idx */ 996 luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode]; 997 998 /* call the intra prediction function */ 999 ps_codec->apf_intra_pred_luma[luma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred, pred_strd, trans_size, u1_luma_pred_mode); 1000 } 1001 else 1002 { 1003 /* In case of yuv420sp_vu, prediction happens as usual. */ 1004 /* So point the pu1_pred pointer to original prediction pointer */ 1005 UWORD8 *pu1_pred_orig = pu1_pred - chroma_yuv420sp_vu_u_offset; 1006 1007 /* Top-Left | Top-Right | Top | Left | Bottom-Left 1008 * 1 4 4 4 4 1009 * 1010 * Generating chroma_nbr_flags depending upon the transform size */ 1011 if(ps_tu->b3_size == 0) 1012 { 1013 /* Take TL,T,L flags of First luma 4x4 block */ 1014 chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0); 1015 /* Take TR flags of Second luma 4x4 block */ 1016 chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000); 1017 /* Take BL flags of Third luma 4x4 block */ 1018 chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F); 1019 } 1020 1021 /* Initializing nbr pointers */ 1022 pu1_top = pu1_pred_orig - pic_strd; 1023 pu1_left = pu1_pred_orig - 2; 1024 pu1_top_left = pu1_pred_orig - pic_strd - 2; 1025 1026 /* Chroma pred mode derivation from luma pred mode */ 1027 { 1028 tu_t *ps_tu_tmp = ps_tu; 1029 while(!ps_tu_tmp->b1_first_tu_in_cu) 1030 { 1031 ps_tu_tmp--; 1032 } 1033 u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode; 1034 } 1035 if(4 == u1_chroma_pred_mode) 1036 u1_chroma_pred_mode = u1_luma_pred_mode_first_tu; 1037 else 1038 { 1039 u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode]; 1040 1041 if(u1_chroma_pred_mode == 1042 u1_luma_pred_mode_first_tu) 1043 { 1044 u1_chroma_pred_mode = INTRA_ANGULAR(34); 1045 } 1046 } 1047 1048 /* call the chroma reference array substitution */ 1049 ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr( 1050 pu1_top_left, 1051 pu1_top, pu1_left, pic_strd, trans_size, chroma_nbr_flags, au1_ref_sub_out, 1); 1052 1053 /* use the look up to get the function idx */ 1054 chroma_pred_func_idx = 1055 g_i4_ip_funcs[u1_chroma_pred_mode]; 1056 1057 /* call the intra prediction function */ 1058 ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred_orig, pred_strd, trans_size, u1_chroma_pred_mode); 1059 } 1060 } 1061 1062 /* Updating number of transform types */ 1063 STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx); 1064 1065 /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */ 1066 if(1 == u1_cbf) 1067 { 1068 if(ps_tu->b1_transquant_bypass || transform_skip_flag) 1069 { 1070 /* Recon */ 1071 ps_codec->apf_recon[func_idx](pi2_src, pu1_pred, pu1_dst, 1072 src_strd, pred_strd, dst_strd, 1073 zero_cols); 1074 } 1075 else 1076 { 1077 1078 /* Updating coded number of transform types(excluding trans skip and trans quant skip) */ 1079 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0); 1080 1081 /* iQuant , iTrans and Recon */ 1082 if((0 == coeff_type)) 1083 { 1084 ps_codec->apf_itrans_recon[func_idx](pi2_src, pi2_tmp, 1085 pu1_pred, pu1_dst, 1086 src_strd, pred_strd, 1087 dst_strd, zero_cols, 1088 zero_rows); 1089 } 1090 else /* DC only */ 1091 { 1092 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1); 1093 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred, pu1_dst, 1094 pred_strd, dst_strd, 1095 log2_trans_size, 1096 i2_coeff_value); 1097 } 1098 } 1099 } 1100 /* IQ, IT and Recon for V */ 1101 if(c_idx != 0) 1102 { 1103 if(1 == u1_cbf_v) 1104 { 1105 if(ps_tu->b1_transquant_bypass || transform_skip_flag_v) 1106 { 1107 /* Recon */ 1108 ps_codec->apf_recon[func_idx](pi2_src_v, pu1_pred_v, 1109 pu1_dst_v, src_strd, 1110 pred_strd, dst_strd, 1111 zero_cols_v); 1112 } 1113 else 1114 { 1115 /* Updating number of transform types */ 1116 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0); 1117 1118 /* iQuant , iTrans and Recon */ 1119 if((0 == coeff_type_v)) 1120 { 1121 ps_codec->apf_itrans_recon[func_idx](pi2_src_v, 1122 pi2_tmp, 1123 pu1_pred_v, 1124 pu1_dst_v, 1125 src_strd, 1126 pred_strd, 1127 dst_strd, 1128 zero_cols_v, 1129 zero_rows_v); 1130 } 1131 else /* DC only */ 1132 { 1133 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1); 1134 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred_v, pu1_dst_v, 1135 pred_strd, dst_strd, 1136 log2_trans_size, 1137 i2_coeff_value_v); 1138 } 1139 } 1140 } 1141 } 1142 } 1143 1144 /* Neighbor availability inside CTB */ 1145 /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */ 1146 /* Used for neighbor availability in intra pred */ 1147 if(c_idx == 0) 1148 { 1149 WORD32 i; 1150 WORD32 trans_in_min_tu; 1151 UWORD32 cur_tu_in_bits; 1152 UWORD32 cur_tu_avail_flag; 1153 1154 trans_in_min_tu = trans_size / MIN_TU_SIZE; 1155 cur_tu_in_bits = (1 << trans_in_min_tu) - 1; 1156 cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu); 1157 1158 cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1); 1159 1160 for(i = 0; i < trans_in_min_tu; i++) 1161 au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |= 1162 cur_tu_avail_flag; 1163 } 1164 } 1165 } 1166 ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data; 1167 1168 return ps_proc->i4_ctb_tu_cnt; 1169 } 1170 1171