1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevcd_iquant_itrans_recon_ctb.c 22 * 23 * @brief 24 * Contains functions for inverse quantization, inverse transform and recon 25 * 26 * @author 27 * Ittiam 28 * 29 * @par List of Functions: 30 * - ihevcd_iquant_itrans_recon_ctb() 31 * 32 * @remarks 33 * None 34 * 35 ******************************************************************************* 36 */ 37 /*****************************************************************************/ 38 /* File Includes */ 39 /*****************************************************************************/ 40 #include <stdio.h> 41 #include <stddef.h> 42 #include <stdlib.h> 43 #include <string.h> 44 45 #include "ihevc_typedefs.h" 46 #include "iv.h" 47 #include "ivd.h" 48 #include "ihevcd_cxa.h" 49 50 #include "ihevc_defs.h" 51 #include "ihevc_debug.h" 52 #include "ihevc_structs.h" 53 #include "ihevc_cabac_tables.h" 54 #include "ihevc_macros.h" 55 #include "ihevc_platform_macros.h" 56 57 #include "ihevcd_defs.h" 58 #include "ihevcd_function_selector.h" 59 #include "ihevcd_structs.h" 60 #include "ihevcd_error.h" 61 #include "ihevcd_bitstream.h" 62 #include "ihevc_common_tables.h" 63 64 /* Intra pred includes */ 65 #include "ihevc_intra_pred.h" 66 67 /* Inverse transform common module includes */ 68 #include "ihevc_trans_tables.h" 69 #include "ihevc_trans_macros.h" 70 #include "ihevc_itrans_recon.h" 71 #include "ihevc_recon.h" 72 #include "ihevc_chroma_itrans_recon.h" 73 #include "ihevc_chroma_recon.h" 74 75 /* Decoder includes */ 76 #include "ihevcd_common_tables.h" 77 #include "ihevcd_iquant_itrans_recon_ctb.h" 78 #include "ihevcd_debug.h" 79 #include "ihevcd_profile.h" 80 #include "ihevcd_statistics.h" 81 #include "ihevcd_itrans_recon_dc.h" 82 83 const UWORD32 gau4_ihevcd_4_bit_reverse[] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; 84 85 86 /* Globals */ 87 WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] = 88 { IP_FUNC_MODE_0, /* Mode 0 */ 89 IP_FUNC_MODE_1, /* Mode 1 */ 90 IP_FUNC_MODE_2, /* Mode 2 */ 91 IP_FUNC_MODE_3TO9, /* Mode 3 */ 92 IP_FUNC_MODE_3TO9, /* Mode 4 */ 93 IP_FUNC_MODE_3TO9, /* Mode 5 */ 94 IP_FUNC_MODE_3TO9, /* Mode 6 */ 95 IP_FUNC_MODE_3TO9, /* Mode 7 */ 96 IP_FUNC_MODE_3TO9, /* Mode 8 */ 97 IP_FUNC_MODE_3TO9, /* Mode 9 */ 98 IP_FUNC_MODE_10, /* Mode 10 */ 99 IP_FUNC_MODE_11TO17, /* Mode 11 */ 100 IP_FUNC_MODE_11TO17, /* Mode 12 */ 101 IP_FUNC_MODE_11TO17, /* Mode 13 */ 102 IP_FUNC_MODE_11TO17, /* Mode 14 */ 103 IP_FUNC_MODE_11TO17, /* Mode 15 */ 104 IP_FUNC_MODE_11TO17, /* Mode 16 */ 105 IP_FUNC_MODE_11TO17, /* Mode 17 */ 106 IP_FUNC_MODE_18_34, /* Mode 18 */ 107 IP_FUNC_MODE_19TO25, /* Mode 19 */ 108 IP_FUNC_MODE_19TO25, /* Mode 20 */ 109 IP_FUNC_MODE_19TO25, /* Mode 21 */ 110 IP_FUNC_MODE_19TO25, /* Mode 22 */ 111 IP_FUNC_MODE_19TO25, /* Mode 23 */ 112 IP_FUNC_MODE_19TO25, /* Mode 24 */ 113 IP_FUNC_MODE_19TO25, /* Mode 25 */ 114 IP_FUNC_MODE_26, /* Mode 26 */ 115 IP_FUNC_MODE_27TO33, /* Mode 27 */ 116 IP_FUNC_MODE_27TO33, /* Mode 26 */ 117 IP_FUNC_MODE_27TO33, /* Mode 29 */ 118 IP_FUNC_MODE_27TO33, /* Mode 30 */ 119 IP_FUNC_MODE_27TO33, /* Mode 31 */ 120 IP_FUNC_MODE_27TO33, /* Mode 32 */ 121 IP_FUNC_MODE_27TO33, /* Mode 33 */ 122 IP_FUNC_MODE_18_34, /* Mode 34 */ 123 }; 124 125 126 const WORD16 *g_ai2_ihevc_trans_tables[] = 127 { &g_ai2_ihevc_trans_dst_4[0][0], 128 &g_ai2_ihevc_trans_4[0][0], 129 &g_ai2_ihevc_trans_8[0][0], 130 &g_ai2_ihevc_trans_16[0][0], 131 &g_ai2_ihevc_trans_32[0][0] 132 }; 133 134 135 /*****************************************************************************/ 136 /* Function Prototypes */ 137 /*****************************************************************************/ 138 /* Returns number of ai2_level read from ps_sblk_coeff */ 139 UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff, 140 WORD32 log2_trans_size, 141 UWORD8 *pu1_tu_coeff_data, 142 WORD16 *pi2_dequant_matrix, 143 WORD32 qp_rem, 144 WORD32 qp_div, 145 TRANSFORM_TYPE e_trans_type, 146 WORD32 trans_quant_bypass, 147 UWORD32 *pu4_zero_cols, 148 UWORD32 *pu4_zero_rows, 149 UWORD32 *pu4_coeff_type, 150 WORD16 *pi2_coeff_value) 151 { 152 /* Generating coeffs from coeff-map */ 153 WORD32 i; 154 WORD16 *pi2_sblk_ptr; 155 WORD32 subblk_pos_x, subblk_pos_y; 156 WORD32 sblk_scan_idx, coeff_raster_idx; 157 WORD32 sblk_non_zero_coeff_idx; 158 tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data; 159 UWORD8 u1_num_coded_sblks, u1_scan_type; 160 UWORD8 *pu1_new_tu_coeff_data; 161 WORD32 trans_size; 162 WORD32 xs, ys; 163 WORD32 trans_skip; 164 WORD16 iquant_out; 165 WORD32 shift_iq; 166 { 167 WORD32 bit_depth; 168 169 bit_depth = 8 + 0; 170 shift_iq = bit_depth + log2_trans_size - 5; 171 } 172 trans_size = (1 << log2_trans_size); 173 174 /* First byte points to number of coded blocks */ 175 u1_num_coded_sblks = *pu1_tu_coeff_data++; 176 177 /* Next byte points to scan type */ 178 u1_scan_type = *pu1_tu_coeff_data++; 179 /* 0th bit has trans_skip */ 180 trans_skip = u1_scan_type & 1; 181 u1_scan_type >>= 1; 182 183 pi2_sblk_ptr = pi2_tu_coeff; 184 185 /* Initially all columns are assumed to be zero */ 186 *pu4_zero_cols = 0xFFFFFFFF; 187 /* Initially all rows are assumed to be zero */ 188 *pu4_zero_rows = 0xFFFFFFFF; 189 190 ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data); 191 192 if(trans_skip) 193 memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16)); 194 195 STATS_INIT_SBLK_AND_COEFF_POS(); 196 197 /* DC only case */ 198 if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks) 199 && (0 == ps_tu_sblk_coeff_data->u2_subblk_pos) 200 && (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map)) 201 { 202 *pu4_coeff_type = 1; 203 204 if(!trans_quant_bypass) 205 { 206 if(4 == trans_size) 207 { 208 IQUANT_4x4(iquant_out, 209 ps_tu_sblk_coeff_data->ai2_level[0], 210 pi2_dequant_matrix[0] 211 * g_ihevc_iquant_scales[qp_rem], 212 shift_iq, qp_div); 213 } 214 else 215 { 216 IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0], 217 pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem], 218 shift_iq, qp_div); 219 } 220 if(trans_skip) 221 iquant_out = (iquant_out + 16) >> 5; 222 } 223 else 224 { 225 /* setting the column to zero */ 226 for(i = 0; i < trans_size; i++) 227 *(pi2_tu_coeff + i * trans_size) = 0; 228 229 iquant_out = ps_tu_sblk_coeff_data->ai2_level[0]; 230 } 231 *pi2_coeff_value = iquant_out; 232 *pi2_tu_coeff = iquant_out; 233 *pu4_zero_cols &= ~0x1; 234 *pu4_zero_rows &= ~0x1; 235 ps_tu_sblk_coeff_data = 236 (void *)&ps_tu_sblk_coeff_data->ai2_level[1]; 237 238 STATS_UPDATE_COEFF_COUNT(); 239 STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), 0, 0); 240 STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip)); 241 return ((UWORD8 *)ps_tu_sblk_coeff_data); 242 } 243 else 244 { 245 *pu4_coeff_type = 0; 246 /* In case of trans skip, memset has already happened */ 247 if(!trans_skip) 248 memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16)); 249 } 250 251 for(i = 0; i < u1_num_coded_sblks; i++) 252 { 253 UWORD32 u4_sig_coeff_map; 254 subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF; 255 subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8; 256 257 STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y); 258 259 subblk_pos_x = subblk_pos_x * MIN_TU_SIZE; 260 subblk_pos_y = subblk_pos_y * MIN_TU_SIZE; 261 262 pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size 263 + subblk_pos_x; 264 265 //*pu4_zero_cols &= ~(0xF << subblk_pos_x); 266 267 sblk_non_zero_coeff_idx = 0; 268 u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map; 269 //for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--) 270 sblk_scan_idx = 31; 271 do 272 { 273 WORD32 clz = CLZ(u4_sig_coeff_map); 274 275 sblk_scan_idx -= clz; 276 /* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */ 277 /* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */ 278 u4_sig_coeff_map = u4_sig_coeff_map << clz; 279 /* Copying coeffs and storing in reverse order */ 280 { 281 STATS_UPDATE_COEFF_COUNT(); 282 coeff_raster_idx = 283 gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx]; 284 285 xs = coeff_raster_idx & 0x3; 286 ys = coeff_raster_idx >> 2; 287 288 if(!trans_quant_bypass) 289 { 290 if(4 == trans_size) 291 { 292 IQUANT_4x4(iquant_out, 293 ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx], 294 pi2_dequant_matrix[(subblk_pos_x + xs) 295 + (subblk_pos_y + ys) 296 * trans_size] 297 * g_ihevc_iquant_scales[qp_rem], 298 shift_iq, qp_div); 299 sblk_non_zero_coeff_idx++; 300 } 301 else 302 { 303 IQUANT(iquant_out, 304 ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx], 305 pi2_dequant_matrix[(subblk_pos_x + xs) 306 + (subblk_pos_y + ys) 307 * trans_size] 308 * g_ihevc_iquant_scales[qp_rem], 309 shift_iq, qp_div); 310 sblk_non_zero_coeff_idx++; 311 } 312 313 if(trans_skip) 314 iquant_out = (iquant_out + 16) >> 5; 315 } 316 else 317 { 318 iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++]; 319 } 320 *pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs)); 321 *pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys)); 322 *(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out; 323 } 324 sblk_scan_idx--; 325 u4_sig_coeff_map <<= 1; 326 327 }while(u4_sig_coeff_map); 328 /* Updating the sblk pointer */ 329 ps_tu_sblk_coeff_data = 330 (void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx]; 331 } 332 333 STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip)); 334 335 pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data; 336 337 return pu1_new_tu_coeff_data; 338 } 339 340 WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc, 341 tu_t *ps_tu, 342 UWORD32 *pu4_intra_nbr_avail, 343 WORD16 i2_pic_width_in_luma_samples, 344 UWORD8 i1_constrained_intra_pred_flag, 345 WORD32 trans_size, 346 WORD32 ctb_size) 347 { 348 sps_t *ps_sps; 349 UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail, 350 u1_top_lt_avail; 351 WORD32 x_cur, y_cur, x_nbr, y_nbr; 352 UWORD8 *pu1_nbr_intra_flag; 353 UWORD8 *pu1_pic_intra_flag; 354 UWORD8 top_right, top, top_left, left, bot_left; 355 WORD32 intra_pos; 356 WORD32 num_8_blks, num_8_blks_in_bits; 357 WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64; 358 WORD32 cur_x, cur_y; 359 WORD32 i; 360 WORD32 nbr_flags; 361 362 ps_sps = ps_proc->ps_sps; 363 cur_x = ps_tu->b4_pos_x; 364 cur_y = ps_tu->b4_pos_y; 365 366 u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE] 367 >> (31 - (1 + cur_x - 1))) & 1; 368 u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1))) 369 & 1; 370 u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x))) 371 & 1; 372 u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] 373 >> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1; 374 u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] 375 >> (31 - (1 + cur_x - 1))) & 1; 376 377 x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE; 378 y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE; 379 380 pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag; 381 382 /* WORD32 nbr_flags as below MSB --> LSB */ 383 /* Top-Left | Top-Right | Top | Left | Bottom-Left 384 * 1 4 4 4 4 385 */ 386 bot_left = 0; 387 left = 0; 388 top_right = 0; 389 top = 0; 390 top_left = 0; 391 392 num_8_blks = trans_size > 4 ? trans_size / 8 : 1; 393 num_8_blks_in_bits = ((1 << num_8_blks) - 1); 394 395 if(i1_constrained_intra_pred_flag) 396 { 397 /* TODO: constrained intra pred not tested */ 398 if(u1_bot_lt_avail) 399 { 400 x_nbr = x_cur - 1; 401 y_nbr = y_cur + trans_size; 402 403 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 404 + x_nbr / 64; 405 intra_pos = ((x_nbr / 8) % 8); 406 for(i = 0; i < num_8_blks; i++) 407 { 408 bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) 409 >> intra_pos) & 1) << i; 410 } 411 bot_left &= num_8_blks_in_bits; 412 } 413 if(u1_left_avail) 414 { 415 x_nbr = x_cur - 1; 416 y_nbr = y_cur; 417 418 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 419 + x_nbr / 64; 420 intra_pos = ((x_nbr / 8) % 8); 421 422 for(i = 0; i < num_8_blks; i++) 423 { 424 left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos) 425 & 1) << i; 426 } 427 left &= num_8_blks_in_bits; 428 } 429 if(u1_top_avail) 430 { 431 x_nbr = x_cur; 432 y_nbr = y_cur - 1; 433 434 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 435 + x_nbr / 64; 436 intra_pos = ((x_nbr / 8) % 8); 437 438 top = (*pu1_nbr_intra_flag >> intra_pos); 439 top &= num_8_blks_in_bits; 440 /* 441 for(i=0;i<num_8_blks;i++) 442 { 443 top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i; 444 } 445 */ 446 } 447 if(u1_top_rt_avail) 448 { 449 x_nbr = x_cur + trans_size; 450 y_nbr = y_cur - 1; 451 452 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 453 + x_nbr / 64; 454 intra_pos = ((x_nbr / 8) % 8); 455 456 top_right = (*pu1_nbr_intra_flag >> intra_pos); 457 top_right &= num_8_blks_in_bits; 458 /* 459 for(i=0;i<num_8_blks;i++) 460 { 461 top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i; 462 } 463 */ 464 } 465 if(u1_top_lt_avail) 466 { 467 x_nbr = x_cur - 1; 468 y_nbr = y_cur - 1; 469 470 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row 471 + x_nbr / 64; 472 intra_pos = ((x_nbr / 8) % 8); 473 474 top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1; 475 } 476 } 477 else 478 { 479 if(u1_top_avail) 480 top = 0xF; 481 if(u1_top_rt_avail) 482 top_right = 0xF; 483 if(u1_bot_lt_avail) 484 bot_left = 0xF; 485 if(u1_left_avail) 486 left = 0xF; 487 if(u1_top_lt_avail) 488 top_left = 0x1; 489 } 490 491 /* Handling incomplete CTBs */ 492 { 493 WORD32 pu_size_limit = MIN(trans_size, 8); 494 WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples 495 - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) 496 - (ps_tu->b4_pos_x * MIN_TU_SIZE) 497 - (1 << (ps_tu->b3_size + 2)); 498 /* ctb_size_top gives number of valid pixels remaining in the current row */ 499 WORD32 ctb_size_top = MIN(ctb_size, cols_remaining); 500 WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1; 501 502 WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples 503 - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) 504 - (ps_tu->b4_pos_y * MIN_TU_SIZE) 505 - (1 << (ps_tu->b3_size + 2)); 506 /* ctb_size_bot gives number of valid pixels remaining in the current column */ 507 WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining); 508 WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1; 509 510 top_right &= ctb_size_top_bits; 511 bot_left &= ctb_size_bot_bits; 512 } 513 514 /* Top-Left | Top-Right | Top | Left | Bottom-Left 515 * 1 4 4 4 4 516 */ 517 518 /* 519 nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4) 520 | gau4_ihevcd_4_bit_reverse[bot_left]; 521 */ 522 nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4) 523 | gau4_ihevcd_4_bit_reverse[bot_left]; 524 525 526 return nbr_flags; 527 528 } 529 530 WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) 531 { 532 WORD16 *pi2_scaling_mat; 533 UWORD8 *pu1_y_dst_ctb; 534 UWORD8 *pu1_uv_dst_ctb; 535 WORD32 ctb_size; 536 codec_t *ps_codec; 537 slice_header_t *ps_slice_hdr; 538 tu_t *ps_tu; 539 WORD16 *pi2_ctb_coeff; 540 WORD32 tu_cnt; 541 WORD16 *pi2_tu_coeff; 542 WORD16 *pi2_tmp; 543 WORD32 pic_strd; 544 WORD32 luma_nbr_flags; 545 WORD32 chroma_nbr_flags = 0; 546 UWORD8 u1_luma_pred_mode_first_tu = 0; 547 /* Pointers for generating 2d coeffs from coeff-map */ 548 UWORD8 *pu1_tu_coeff_data; 549 /* nbr avail map for CTB */ 550 /* 1st bit points to neighbor (left/top_left/bot_left) */ 551 /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */ 552 UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE 553 + 2 /* Top nbr + bot nbr */]; UWORD32 554 top_avail_bits; 555 sps_t *ps_sps; 556 pps_t *ps_pps; 557 WORD32 intra_flag; 558 UWORD8 *pu1_pic_intra_flag; 559 /*************************************************************************/ 560 /* Contanis scaling matrix offset in the following order in a 1D buffer */ 561 /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V */ 562 /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V */ 563 /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V */ 564 /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */ 565 /* Intra 16x16 Y, 16x16 U, 16x16 V */ 566 /* Inter 16x16 Y, 16x16 U, 16x16 V */ 567 /* Intra 32x32 Y */ 568 /* Inter 32x32 Y */ 569 /*************************************************************************/ 570 WORD32 scaling_mat_offset[] = 571 { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992, 572 1248, 1504, 1760, 2016, 3040 }; 573 574 PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED(); 575 576 ps_sps = ps_proc->ps_sps; 577 ps_pps = ps_proc->ps_pps; 578 ps_slice_hdr = ps_proc->ps_slice_hdr; 579 ps_codec = ps_proc->ps_codec; 580 581 pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma; 582 pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma; 583 584 pi2_ctb_coeff = ps_proc->pi2_invscan_out; 585 586 ctb_size = (1 << ps_sps->i1_log2_ctb_size); 587 pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data; 588 589 pic_strd = ps_codec->i4_strd; 590 591 pi2_tmp = ps_proc->pi2_itrans_intrmd_buf; 592 593 pi2_tu_coeff = pi2_ctb_coeff; 594 595 ps_tu = ps_proc->ps_tu; 596 597 if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag)) 598 { 599 pi2_scaling_mat = ps_pps->pi2_scaling_mat; 600 } 601 else 602 { 603 pi2_scaling_mat = ps_sps->pi2_scaling_mat; 604 } 605 606 { 607 /* Updating the initial availability map */ 608 WORD32 i; 609 UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail, 610 u1_top_ctb_avail; 611 612 u1_left_ctb_avail = ps_proc->u1_left_ctb_avail; 613 u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail; 614 u1_top_ctb_avail = ps_proc->u1_top_ctb_avail; 615 u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail; 616 617 /* Initializing the availability array */ 618 memset(au4_intra_nbr_avail, 0, 619 (MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32)); 620 /* Initializing the availability array with CTB level availability flags */ 621 { 622 WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size); 623 WORD32 ctb_size_left = MIN(ctb_size, rows_remaining); 624 for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++) 625 { 626 au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31); 627 } 628 } 629 au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31) 630 >> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */ 631 632 au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31); 633 634 { 635 WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size); 636 WORD32 ctb_size_top = MIN(ctb_size, cols_remaining); 637 WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE)); 638 639 /* ctb_size_top gives number of valid pixels remaining in the current row */ 640 /* Since we need pattern of 1's starting from the MSB, an additional shift */ 641 /* is needed */ 642 shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE); 643 644 top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1) 645 << shift; 646 } 647 au4_intra_nbr_avail[0] |= ( 648 (u1_top_ctb_avail == 1) ? top_avail_bits : 0x0); 649 /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */ 650 651 } 652 653 /* Applying Inverse transform on all the TU's in CTB */ 654 for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++) 655 { 656 WORD32 transform_skip_flag = 0; 657 WORD32 transform_skip_flag_v = 0; 658 WORD32 num_comp, c_idx, func_idx; 659 WORD32 src_strd, pred_strd, dst_strd; 660 WORD32 qp_div = 0, qp_rem = 0; 661 WORD32 qp_div_v = 0, qp_rem_v = 0; 662 UWORD32 zero_cols = 0, zero_cols_v = 0; 663 UWORD32 zero_rows = 0, zero_rows_v = 0; 664 UWORD32 coeff_type = 0, coeff_type_v = 0; 665 WORD16 i2_coeff_value, i2_coeff_value_v; 666 WORD32 trans_size = 0; 667 TRANSFORM_TYPE e_trans_type; 668 WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2; 669 WORD32 log2_trans_size; 670 WORD32 chroma_qp_idx; 671 WORD16 *pi2_src = NULL, *pi2_src_v = NULL; 672 UWORD8 *pu1_pred = NULL, *pu1_pred_v = NULL; 673 UWORD8 *pu1_dst = NULL, *pu1_dst_v = NULL; 674 WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL; 675 WORD32 tu_x, tu_y; 676 WORD32 tu_y_offset, tu_uv_offset; 677 WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset; 678 UWORD8 u1_cbf = 0, u1_cbf_v = 0, u1_luma_pred_mode, u1_chroma_pred_mode; 679 WORD32 luma_nbr_flags_4x4[4]; 680 WORD32 offset; 681 WORD32 pcm_flag; 682 WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); 683 /* If 420SP_VU is chroma format, pred and dst pointer */ 684 /* will be added +1 to point to U */ 685 WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu; 686 /* If 420SP_VU is chroma format, pred and dst pointer */ 687 /* will be added U offset of +1 and subtracted 2 */ 688 /* to point to V */ 689 WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu; 690 691 tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */ 692 tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */ 693 { 694 WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x); 695 WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y); 696 697 WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64; 698 699 pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag; 700 pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row; 701 pu1_pic_intra_flag += (tu_abs_x >> 6); 702 703 intra_flag = *pu1_pic_intra_flag; 704 intra_flag &= (1 << ((tu_abs_x >> 3) % 8)); 705 } 706 707 u1_luma_pred_mode = ps_tu->b6_luma_intra_mode; 708 u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx; 709 710 if(u1_chroma_pred_mode != 7) 711 num_comp = 2; /* Y and UV */ 712 else 713 num_comp = 1; /* Y */ 714 715 716 pcm_flag = 0; 717 718 if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE)) 719 { 720 UWORD8 *pu1_buf; 721 UWORD8 *pu1_y_dst = pu1_y_dst_ctb; 722 UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb; 723 WORD32 i, j; 724 tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data; 725 WORD32 cb_size = 1 << (ps_tu->b3_size + 2); 726 727 /* trans_size is used to update availability after reconstruction */ 728 trans_size = cb_size; 729 730 pcm_flag = 1; 731 732 tu_y_offset = tu_x + tu_y * pic_strd; 733 pu1_y_dst += tu_x + tu_y * pic_strd; 734 pu1_uv_dst += tu_x + (tu_y >> 1) * pic_strd; 735 736 /* First byte points to number of coded blocks */ 737 pu1_tu_coeff_data++; 738 739 /* Next byte points to scan type */ 740 pu1_tu_coeff_data++; 741 742 ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data; 743 744 pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0]; 745 { 746 747 for(i = 0; i < cb_size; i++) 748 { 749 //pu1_y_dst[i * pic_strd + j] = *pu1_buf++; 750 memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size); 751 pu1_buf += cb_size; 752 } 753 754 pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset; 755 756 /* U */ 757 for(i = 0; i < cb_size / 2; i++) 758 { 759 for(j = 0; j < cb_size / 2; j++) 760 { 761 pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++; 762 } 763 } 764 765 pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset; 766 767 /* V */ 768 for(i = 0; i < cb_size / 2; i++) 769 { 770 for(j = 0; j < cb_size / 2; j++) 771 { 772 pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++; 773 } 774 } 775 } 776 777 pu1_tu_coeff_data = pu1_buf; 778 779 } 780 781 782 783 784 785 for(c_idx = 0; c_idx < num_comp; c_idx++) 786 { 787 if(0 == pcm_flag) 788 { 789 /* Initializing variables */ 790 pred_strd = pic_strd; 791 dst_strd = pic_strd; 792 793 if(c_idx == 0) /* Y */ 794 { 795 log2_y_trans_size_minus_2 = ps_tu->b3_size; 796 trans_size = 1 << (log2_y_trans_size_minus_2 + 2); 797 log2_trans_size = log2_y_trans_size_minus_2 + 2; 798 799 tu_y_offset = tu_x + tu_y * pic_strd; 800 801 pi2_src = pi2_tu_coeff; 802 pu1_pred = pu1_y_dst_ctb + tu_y_offset; 803 pu1_dst = pu1_y_dst_ctb + tu_y_offset; 804 805 /* Calculating scaling matrix offset */ 806 offset = log2_y_trans_size_minus_2 * 6 807 + (!intra_flag) 808 * ((log2_y_trans_size_minus_2 809 == 3) ? 1 : 3) 810 + c_idx; 811 pi2_dequant_matrix = pi2_scaling_mat 812 + scaling_mat_offset[offset]; 813 814 src_strd = trans_size; 815 816 /* 4x4 transform Luma in INTRA mode is DST */ 817 if(log2_y_trans_size_minus_2 == 0 && intra_flag) 818 { 819 func_idx = log2_y_trans_size_minus_2; 820 e_trans_type = DST_4x4; 821 } 822 else 823 { 824 func_idx = log2_y_trans_size_minus_2 + 1; 825 e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1); 826 } 827 828 qp_div = ps_tu->b7_qp / 6; 829 qp_rem = ps_tu->b7_qp % 6; 830 831 u1_cbf = ps_tu->b1_y_cbf; 832 833 transform_skip_flag = pu1_tu_coeff_data[1] & 1; 834 /* Unpacking coeffs */ 835 if(1 == u1_cbf) 836 { 837 pu1_tu_coeff_data = ihevcd_unpack_coeffs( 838 pi2_src, log2_y_trans_size_minus_2 + 2, 839 pu1_tu_coeff_data, pi2_dequant_matrix, 840 qp_rem, qp_div, e_trans_type, 841 ps_tu->b1_transquant_bypass, &zero_cols, 842 &zero_rows, &coeff_type, 843 &i2_coeff_value); 844 } 845 } 846 else /* UV interleaved */ 847 { 848 /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */ 849 if(ps_tu->b3_size == 0) 850 { 851 /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x- 4,luma pos y- 4) */ 852 log2_uv_trans_size_minus_2 = ps_tu->b3_size; 853 tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / 2) * pic_strd; 854 } 855 else 856 { 857 log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1; 858 tu_uv_offset = tu_x + (tu_y >> 1) * pic_strd; 859 } 860 trans_size = 1 << (log2_uv_trans_size_minus_2 + 2); 861 log2_trans_size = log2_uv_trans_size_minus_2 + 2; 862 863 pi2_src = pi2_tu_coeff; 864 pi2_src_v = pi2_tu_coeff + trans_size * trans_size; 865 pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ 866 pu1_pred_v = pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ 867 pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ 868 pu1_dst_v = pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ 869 870 /*TODO: Add support for choosing different tables for U and V, 871 * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes 872 */ 873 /* Calculating scaling matrix offset */ 874 /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since 875 * max uv trans size is 16x16 876 */ 877 offset = log2_uv_trans_size_minus_2 * 6 878 + (!intra_flag) * 3 + c_idx; 879 pi2_dequant_matrix = pi2_scaling_mat 880 + scaling_mat_offset[offset]; 881 pi2_dequant_matrix_v = pi2_scaling_mat 882 + scaling_mat_offset[offset + 1]; 883 884 src_strd = trans_size; 885 886 func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/ 887 e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1); 888 /* QP for U */ 889 i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset; 890 i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset; 891 u1_cbf = ps_tu->b1_cb_cbf; 892 893 chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset 894 + i1_chroma_slice_qp_offset; 895 chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57); 896 qp_div = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6; 897 qp_rem = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6; 898 899 /* QP for V */ 900 i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset; 901 i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset; 902 u1_cbf_v = ps_tu->b1_cr_cbf; 903 904 chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset 905 + i1_chroma_slice_qp_offset; 906 chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57); 907 qp_div_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6; 908 qp_rem_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6; 909 910 /* Unpacking coeffs */ 911 transform_skip_flag = pu1_tu_coeff_data[1] & 1; 912 if(1 == u1_cbf) 913 { 914 pu1_tu_coeff_data = ihevcd_unpack_coeffs( 915 pi2_src, log2_uv_trans_size_minus_2 + 2, 916 pu1_tu_coeff_data, pi2_dequant_matrix, 917 qp_rem, qp_div, e_trans_type, 918 ps_tu->b1_transquant_bypass, &zero_cols, 919 &zero_rows, &coeff_type, 920 &i2_coeff_value); 921 } 922 923 transform_skip_flag_v = pu1_tu_coeff_data[1] & 1; 924 if(1 == u1_cbf_v) 925 { 926 pu1_tu_coeff_data = ihevcd_unpack_coeffs( 927 pi2_src_v, log2_uv_trans_size_minus_2 + 2, 928 pu1_tu_coeff_data, pi2_dequant_matrix_v, 929 qp_rem_v, qp_div_v, e_trans_type, 930 ps_tu->b1_transquant_bypass, &zero_cols_v, 931 &zero_rows_v, &coeff_type_v, &i2_coeff_value_v); 932 } 933 } 934 /***************************************************************/ 935 /****************** Intra Prediction **************************/ 936 /***************************************************************/ 937 if(intra_flag) /* Intra */ 938 { 939 UWORD8 au1_ref_sub_out[(MAX_TU_SIZE * 2 * 2) + 4]; 940 UWORD8 *pu1_top_left, *pu1_top, *pu1_left; 941 WORD32 luma_pred_func_idx, chroma_pred_func_idx; 942 943 /* Get the neighbour availability flags */ 944 /* Done for only Y */ 945 if(c_idx == 0) 946 { 947 /* Get neighbor availability for Y only */ 948 luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc, 949 ps_tu, 950 au4_intra_nbr_avail, 951 ps_sps->i2_pic_width_in_luma_samples, 952 ps_pps->i1_constrained_intra_pred_flag, 953 trans_size, 954 ctb_size); 955 956 if(trans_size == 4) 957 luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags; 958 959 if((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0)) 960 { 961 chroma_nbr_flags = luma_nbr_flags; 962 } 963 964 /* Initializing nbr pointers */ 965 pu1_top = pu1_pred - pic_strd; 966 pu1_left = pu1_pred - 1; 967 pu1_top_left = pu1_pred - pic_strd - 1; 968 969 /* call reference array substitution */ 970 if(luma_nbr_flags == 0x1ffff) 971 ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr( 972 pu1_top_left, 973 pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1); 974 else 975 ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr( 976 pu1_top_left, 977 pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1); 978 979 /* call reference filtering */ 980 ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr( 981 au1_ref_sub_out, trans_size, 982 au1_ref_sub_out, 983 u1_luma_pred_mode, ps_sps->i1_strong_intra_smoothing_enable_flag); 984 985 /* use the look up to get the function idx */ 986 luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode]; 987 988 /* call the intra prediction function */ 989 ps_codec->apf_intra_pred_luma[luma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred, pred_strd, trans_size, u1_luma_pred_mode); 990 } 991 else 992 { 993 /* In case of yuv420sp_vu, prediction happens as usual. */ 994 /* So point the pu1_pred pointer to original prediction pointer */ 995 UWORD8 *pu1_pred_orig = pu1_pred - chroma_yuv420sp_vu_u_offset; 996 997 /* Top-Left | Top-Right | Top | Left | Bottom-Left 998 * 1 4 4 4 4 999 * 1000 * Generating chroma_nbr_flags depending upon the transform size */ 1001 if(ps_tu->b3_size == 0) 1002 { 1003 /* Take TL,T,L flags of First luma 4x4 block */ 1004 chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0); 1005 /* Take TR flags of Second luma 4x4 block */ 1006 chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000); 1007 /* Take BL flags of Third luma 4x4 block */ 1008 chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F); 1009 } 1010 1011 /* Initializing nbr pointers */ 1012 pu1_top = pu1_pred_orig - pic_strd; 1013 pu1_left = pu1_pred_orig - 2; 1014 pu1_top_left = pu1_pred_orig - pic_strd - 2; 1015 1016 /* Chroma pred mode derivation from luma pred mode */ 1017 { 1018 tu_t *ps_tu_tmp = ps_tu; 1019 while(!ps_tu_tmp->b1_first_tu_in_cu) 1020 { 1021 ps_tu_tmp--; 1022 } 1023 u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode; 1024 } 1025 if(4 == u1_chroma_pred_mode) 1026 u1_chroma_pred_mode = u1_luma_pred_mode_first_tu; 1027 else 1028 { 1029 u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode]; 1030 1031 if(u1_chroma_pred_mode == 1032 u1_luma_pred_mode_first_tu) 1033 { 1034 u1_chroma_pred_mode = INTRA_ANGULAR(34); 1035 } 1036 } 1037 1038 /* call the chroma reference array substitution */ 1039 ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr( 1040 pu1_top_left, 1041 pu1_top, pu1_left, pic_strd, trans_size, chroma_nbr_flags, au1_ref_sub_out, 1); 1042 1043 /* use the look up to get the function idx */ 1044 chroma_pred_func_idx = 1045 g_i4_ip_funcs[u1_chroma_pred_mode]; 1046 1047 /* call the intra prediction function */ 1048 ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred_orig, pred_strd, trans_size, u1_chroma_pred_mode); 1049 } 1050 } 1051 1052 /* Updating number of transform types */ 1053 STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx); 1054 1055 /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */ 1056 if(1 == u1_cbf) 1057 { 1058 if(ps_tu->b1_transquant_bypass || transform_skip_flag) 1059 { 1060 /* Recon */ 1061 ps_codec->apf_recon[func_idx](pi2_src, pu1_pred, pu1_dst, 1062 src_strd, pred_strd, dst_strd, 1063 zero_cols); 1064 } 1065 else 1066 { 1067 1068 /* Updating coded number of transform types(excluding trans skip and trans quant skip) */ 1069 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0); 1070 1071 /* iQuant , iTrans and Recon */ 1072 if((0 == coeff_type)) 1073 { 1074 ps_codec->apf_itrans_recon[func_idx](pi2_src, pi2_tmp, 1075 pu1_pred, pu1_dst, 1076 src_strd, pred_strd, 1077 dst_strd, zero_cols, 1078 zero_rows); 1079 } 1080 else /* DC only */ 1081 { 1082 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1); 1083 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred, pu1_dst, 1084 pred_strd, dst_strd, 1085 log2_trans_size, 1086 i2_coeff_value); 1087 } 1088 } 1089 } 1090 /* IQ, IT and Recon for V */ 1091 if(c_idx != 0) 1092 { 1093 if(1 == u1_cbf_v) 1094 { 1095 if(ps_tu->b1_transquant_bypass || transform_skip_flag_v) 1096 { 1097 /* Recon */ 1098 ps_codec->apf_recon[func_idx](pi2_src_v, pu1_pred_v, 1099 pu1_dst_v, src_strd, 1100 pred_strd, dst_strd, 1101 zero_cols_v); 1102 } 1103 else 1104 { 1105 /* Updating number of transform types */ 1106 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0); 1107 1108 /* iQuant , iTrans and Recon */ 1109 if((0 == coeff_type_v)) 1110 { 1111 ps_codec->apf_itrans_recon[func_idx](pi2_src_v, 1112 pi2_tmp, 1113 pu1_pred_v, 1114 pu1_dst_v, 1115 src_strd, 1116 pred_strd, 1117 dst_strd, 1118 zero_cols_v, 1119 zero_rows_v); 1120 } 1121 else /* DC only */ 1122 { 1123 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1); 1124 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred_v, pu1_dst_v, 1125 pred_strd, dst_strd, 1126 log2_trans_size, 1127 i2_coeff_value_v); 1128 } 1129 } 1130 } 1131 } 1132 } 1133 1134 /* Neighbor availability inside CTB */ 1135 /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */ 1136 /* Used for neighbor availability in intra pred */ 1137 if(c_idx == 0) 1138 { 1139 WORD32 i; 1140 WORD32 trans_in_min_tu; 1141 UWORD32 cur_tu_in_bits; 1142 UWORD32 cur_tu_avail_flag; 1143 1144 trans_in_min_tu = trans_size / MIN_TU_SIZE; 1145 cur_tu_in_bits = (1 << trans_in_min_tu) - 1; 1146 cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu); 1147 1148 cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1); 1149 1150 for(i = 0; i < trans_in_min_tu; i++) 1151 au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |= 1152 cur_tu_avail_flag; 1153 } 1154 } 1155 } 1156 ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data; 1157 1158 return ps_proc->i4_ctb_tu_cnt; 1159 } 1160 1161