1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ****************************************************************************** 23 * \file ihevce_tu_tree_selector.c 24 * 25 * \brief 26 * Functions that facilitate selection of optimal TU tree 27 * 28 * \date 29 * 20/04/2016 30 * 31 * \author 32 * Ittiam 33 * 34 ****************************************************************************** 35 */ 36 37 /*****************************************************************************/ 38 /* File Includes */ 39 /*****************************************************************************/ 40 /* System include files */ 41 #include <stdio.h> 42 #include <string.h> 43 #include <stdlib.h> 44 #include <assert.h> 45 #include <stdarg.h> 46 #include <math.h> 47 #include <limits.h> 48 49 /* User include files */ 50 #include "ihevc_typedefs.h" 51 #include "itt_video_api.h" 52 #include "ihevce_api.h" 53 54 #include "rc_cntrl_param.h" 55 #include "rc_frame_info_collector.h" 56 #include "rc_look_ahead_params.h" 57 58 #include "ihevc_defs.h" 59 #include "ihevc_structs.h" 60 #include "ihevc_platform_macros.h" 61 #include "ihevc_deblk.h" 62 #include "ihevc_itrans_recon.h" 63 #include "ihevc_chroma_itrans_recon.h" 64 #include "ihevc_chroma_intra_pred.h" 65 #include "ihevc_intra_pred.h" 66 #include "ihevc_inter_pred.h" 67 #include "ihevc_mem_fns.h" 68 #include "ihevc_padding.h" 69 #include "ihevc_weighted_pred.h" 70 #include "ihevc_sao.h" 71 #include "ihevc_resi_trans.h" 72 #include "ihevc_quant_iquant_ssd.h" 73 #include "ihevc_cabac_tables.h" 74 75 #include "ihevce_defs.h" 76 #include "ihevce_lap_enc_structs.h" 77 #include "ihevce_multi_thrd_structs.h" 78 #include "ihevce_me_common_defs.h" 79 #include "ihevce_had_satd.h" 80 #include "ihevce_error_codes.h" 81 #include "ihevce_bitstream.h" 82 #include "ihevce_cabac.h" 83 #include "ihevce_rdoq_macros.h" 84 #include "ihevce_function_selector.h" 85 #include "ihevce_enc_structs.h" 86 #include "ihevce_entropy_structs.h" 87 #include "ihevce_cmn_utils_instr_set_router.h" 88 #include "ihevce_enc_loop_structs.h" 89 #include "ihevce_enc_loop_utils.h" 90 #include "ihevce_tu_tree_selector.h" 91 92 /*****************************************************************************/ 93 /* Function Definitions */ 94 /*****************************************************************************/ 95 96 /*! 97 ****************************************************************************** 98 * \if Function name : ihevce_tu_tree_coverage_in_cu \endif 99 * 100 * \brief 101 * Determination of the area within the CU that is swept by the TU tree. 102 * Input : Pointer to a node of the TU tree 103 * Output : Area covered by the current TU or its children 104 * 105 ***************************************************************************** 106 */ 107 WORD32 ihevce_tu_tree_coverage_in_cu(tu_tree_node_t *ps_node) 108 { 109 WORD32 i4_tu_tree_area = 0; 110 111 if(ps_node->u1_is_valid_node) 112 { 113 i4_tu_tree_area += ps_node->s_luma_data.u1_size * ps_node->s_luma_data.u1_size; 114 } 115 else 116 { 117 if(NULL != ps_node->ps_child_node_tl) 118 { 119 i4_tu_tree_area += ihevce_tu_tree_coverage_in_cu(ps_node->ps_child_node_tl); 120 } 121 122 if(NULL != ps_node->ps_child_node_tr) 123 { 124 i4_tu_tree_area += ihevce_tu_tree_coverage_in_cu(ps_node->ps_child_node_tr); 125 } 126 127 if(NULL != ps_node->ps_child_node_bl) 128 { 129 i4_tu_tree_area += ihevce_tu_tree_coverage_in_cu(ps_node->ps_child_node_bl); 130 } 131 132 if(NULL != ps_node->ps_child_node_br) 133 { 134 i4_tu_tree_area += ihevce_tu_tree_coverage_in_cu(ps_node->ps_child_node_br); 135 } 136 } 137 138 return i4_tu_tree_area; 139 } 140 141 static void ihevce_tu_node_data_init( 142 tu_node_data_t *ps_tu_data, UWORD8 u1_size, UWORD8 u1_posx, UWORD8 u1_posy) 143 { 144 ps_tu_data->u1_size = u1_size; 145 ps_tu_data->i8_ssd = 0; 146 ps_tu_data->i8_cost = 0; 147 #if ENABLE_INTER_ZCU_COST 148 ps_tu_data->i8_not_coded_cost = 0; 149 #endif 150 ps_tu_data->u4_sad = 0; 151 ps_tu_data->i4_bits = 0; 152 ps_tu_data->i4_num_bytes_used_for_ecd = 0; 153 ps_tu_data->u1_cbf = 0; 154 ps_tu_data->u1_reconBufId = UCHAR_MAX; 155 ps_tu_data->u1_posx = u1_posx; 156 ps_tu_data->u1_posy = u1_posy; 157 } 158 159 /*! 160 ****************************************************************************** 161 * \if Function name : ihevce_tu_node_init \endif 162 * 163 * \brief 164 * This function initialises all nodes of the TU tree from the root upto and 165 * including the nodes at the max tree depth. Only those nodes that lie 166 * within the (max + 1) and (min - 1) depths are set as valid. Everything 167 * else is invalid. The pointers to the children nodes of the leaf-most 168 * nodes in the tree are assigned NULL. 169 * Input : Pointer to root of the tree containing TU info. 170 * Output : The memory of this node and all its progeny shall be modified 171 * returns Number of nodes of the TU tree that have been modified 172 * 173 ***************************************************************************** 174 */ 175 static UWORD16 ihevce_tu_node_init( 176 tu_tree_node_t *ps_root, 177 UWORD8 u1_size, 178 UWORD8 u1_parent_posx, 179 UWORD8 u1_parent_posy, 180 UWORD8 u1_cur_depth, 181 UWORD8 u1_min_tree_depth, 182 UWORD8 u1_max_tree_depth, 183 UWORD8 u1_chroma_processing_enabled, 184 UWORD8 u1_is_422, 185 TU_POS_T e_tu_pos) 186 { 187 tu_tree_node_t *ps_node; 188 tu_tree_node_t *ps_childNodeTL; 189 tu_tree_node_t *ps_childNodeTR; 190 tu_tree_node_t *ps_childNodeBL; 191 tu_tree_node_t *ps_childNodeBR; 192 193 UWORD8 u1_start_index_for_parent = 0; 194 UWORD8 u1_start_index_for_child = 0; 195 UWORD16 u2_parent_offset = 0; 196 UWORD16 u2_child_offset = 0; 197 UWORD8 u1_posx = 0; 198 UWORD8 u1_posy = 0; 199 200 const UWORD8 u1_nxn_tu_node_start_index = 0; 201 const UWORD8 u1_nBye2xnBye2_tu_node_start_index = 1; 202 const UWORD8 u1_nBye4xnBye4_tu_node_start_index = 1 + 4; 203 const UWORD8 u1_nBye8xnBye8_tu_node_start_index = 1 + 4 + 16; 204 const UWORD8 u1_nBye16xnBye16_tu_node_start_index = 1 + 4 + 16 + 64; 205 UWORD16 u2_num_nodes_initialised = 0; 206 207 ASSERT(u1_cur_depth <= u1_max_tree_depth); 208 ASSERT(u1_max_tree_depth >= u1_min_tree_depth); 209 210 switch(e_tu_pos) 211 { 212 case POS_TL: 213 { 214 u1_posx = u1_parent_posx; 215 u1_posy = u1_parent_posy; 216 217 break; 218 } 219 case POS_TR: 220 { 221 u1_posx = u1_parent_posx + u1_size; 222 u1_posy = u1_parent_posy; 223 224 break; 225 } 226 case POS_BL: 227 { 228 u1_posx = u1_parent_posx; 229 u1_posy = u1_parent_posy + u1_size; 230 231 break; 232 } 233 case POS_BR: 234 { 235 u1_posx = u1_parent_posx + u1_size; 236 u1_posy = u1_parent_posy + u1_size; 237 238 break; 239 } 240 default: 241 { 242 /* Here be dragons */ 243 ASSERT(0); 244 } 245 } 246 247 switch(u1_cur_depth) 248 { 249 case 0: 250 { 251 u1_start_index_for_parent = u1_nxn_tu_node_start_index; 252 u1_start_index_for_child = u1_nBye2xnBye2_tu_node_start_index; 253 254 u2_parent_offset = 0; 255 u2_child_offset = 0; 256 257 break; 258 } 259 case 1: 260 { 261 u1_start_index_for_parent = u1_nBye2xnBye2_tu_node_start_index; 262 u1_start_index_for_child = u1_nBye4xnBye4_tu_node_start_index; 263 264 u2_parent_offset = e_tu_pos; 265 u2_child_offset = 4 * u1_posx / u1_size + 8 * u1_posy / u1_size; 266 267 break; 268 } 269 case 2: 270 { 271 u1_start_index_for_parent = u1_nBye4xnBye4_tu_node_start_index; 272 u1_start_index_for_child = u1_nBye8xnBye8_tu_node_start_index; 273 274 u2_parent_offset = 2 * u1_parent_posx / u1_size + 4 * u1_parent_posy / u1_size + e_tu_pos; 275 u2_child_offset = 4 * u1_posx / u1_size + 16 * u1_posy / u1_size; 276 277 break; 278 } 279 case 3: 280 { 281 u1_start_index_for_parent = u1_nBye8xnBye8_tu_node_start_index; 282 u1_start_index_for_child = u1_nBye16xnBye16_tu_node_start_index; 283 284 u2_parent_offset = 2 * u1_parent_posx / u1_size + 8 * u1_parent_posy / u1_size + e_tu_pos; 285 u2_child_offset = 4 * u1_posx / u1_size + 32 * u1_posy / u1_size; 286 287 break; 288 } 289 case 4: 290 { 291 u1_start_index_for_parent = u1_nBye16xnBye16_tu_node_start_index; 292 u1_start_index_for_child = 0; 293 294 u2_parent_offset = 2 * u1_parent_posx / u1_size + 16 * u1_parent_posy / u1_size + e_tu_pos; 295 u2_child_offset = 0; 296 297 break; 298 } 299 default: 300 { 301 /* Here be dragons */ 302 ASSERT(0); 303 } 304 } 305 306 ASSERT((u1_start_index_for_parent + u2_parent_offset) < (256 + 64 + 16 + 4 + 1)); 307 ASSERT((u1_start_index_for_child + u2_child_offset + POS_BR) < (256 + 64 + 16 + 4 + 1)); 308 309 ps_node = ps_root + u1_start_index_for_parent + u2_parent_offset; 310 ps_childNodeTL = ps_root + u1_start_index_for_child + u2_child_offset + POS_TL; 311 ps_childNodeTR = ps_root + u1_start_index_for_child + u2_child_offset + POS_TR; 312 ps_childNodeBL = ps_root + u1_start_index_for_child + u2_child_offset + POS_BL; 313 ps_childNodeBR = ps_root + u1_start_index_for_child + u2_child_offset + POS_BR; 314 315 ihevce_tu_node_data_init(&ps_node->s_luma_data, u1_size, u1_posx, u1_posy); 316 317 if(u1_chroma_processing_enabled) 318 { 319 UWORD8 i; 320 321 if(u1_size > 4) 322 { 323 for(i = 0; i < (u1_is_422 + 1); i++) 324 { 325 ihevce_tu_node_data_init( 326 &ps_node->as_cb_data[i], 327 u1_size / 2, 328 u1_posx / 2, 329 !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size / 2); 330 331 ihevce_tu_node_data_init( 332 &ps_node->as_cr_data[i], 333 u1_size / 2, 334 u1_posx / 2, 335 !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size / 2); 336 } 337 } 338 else if(POS_TL == e_tu_pos) 339 { 340 for(i = 0; i < (u1_is_422 + 1); i++) 341 { 342 ihevce_tu_node_data_init( 343 &ps_node->as_cb_data[i], 344 u1_size, 345 u1_posx / 2, 346 !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size); 347 348 ihevce_tu_node_data_init( 349 &ps_node->as_cr_data[i], 350 u1_size, 351 u1_posx / 2, 352 !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size); 353 } 354 } 355 else 356 { 357 for(i = 0; i < (u1_is_422 + 1); i++) 358 { 359 ihevce_tu_node_data_init( 360 &ps_node->as_cb_data[i], 361 u1_size / 2, 362 u1_posx / 2, 363 !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size); 364 365 ihevce_tu_node_data_init( 366 &ps_node->as_cr_data[i], 367 u1_size / 2, 368 u1_posx / 2, 369 !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size); 370 } 371 } 372 } 373 374 if((u1_cur_depth >= u1_min_tree_depth) && (u1_cur_depth <= u1_max_tree_depth)) 375 { 376 ps_node->u1_is_valid_node = 1; 377 } 378 else 379 { 380 ps_node->u1_is_valid_node = 0; 381 } 382 383 u2_num_nodes_initialised++; 384 385 if((u1_cur_depth < u1_max_tree_depth) && (u1_size > MIN_TU_SIZE)) 386 { 387 ps_node->ps_child_node_tl = ps_childNodeTL; 388 ps_node->ps_child_node_tr = ps_childNodeTR; 389 ps_node->ps_child_node_bl = ps_childNodeBL; 390 ps_node->ps_child_node_br = ps_childNodeBR; 391 392 u2_num_nodes_initialised += ihevce_tu_node_init( 393 ps_root, 394 u1_size / 2, 395 ps_node->s_luma_data.u1_posx, 396 ps_node->s_luma_data.u1_posy, 397 u1_cur_depth + 1, 398 u1_min_tree_depth, 399 u1_max_tree_depth, 400 u1_chroma_processing_enabled, 401 u1_is_422, 402 POS_TL); 403 404 u2_num_nodes_initialised += ihevce_tu_node_init( 405 ps_root, 406 u1_size / 2, 407 ps_node->s_luma_data.u1_posx, 408 ps_node->s_luma_data.u1_posy, 409 u1_cur_depth + 1, 410 u1_min_tree_depth, 411 u1_max_tree_depth, 412 u1_chroma_processing_enabled, 413 u1_is_422, 414 POS_TR); 415 416 u2_num_nodes_initialised += ihevce_tu_node_init( 417 ps_root, 418 u1_size / 2, 419 ps_node->s_luma_data.u1_posx, 420 ps_node->s_luma_data.u1_posy, 421 u1_cur_depth + 1, 422 u1_min_tree_depth, 423 u1_max_tree_depth, 424 u1_chroma_processing_enabled, 425 u1_is_422, 426 POS_BL); 427 428 u2_num_nodes_initialised += ihevce_tu_node_init( 429 ps_root, 430 u1_size / 2, 431 ps_node->s_luma_data.u1_posx, 432 ps_node->s_luma_data.u1_posy, 433 u1_cur_depth + 1, 434 u1_min_tree_depth, 435 u1_max_tree_depth, 436 u1_chroma_processing_enabled, 437 u1_is_422, 438 POS_BR); 439 } 440 else 441 { 442 ps_node->ps_child_node_tl = NULL; 443 ps_node->ps_child_node_tr = NULL; 444 ps_node->ps_child_node_bl = NULL; 445 ps_node->ps_child_node_br = NULL; 446 } 447 448 return u2_num_nodes_initialised; 449 } 450 451 /*! 452 ****************************************************************************** 453 * \if Function name : ihevce_tu_tree_init \endif 454 * 455 * \brief 456 * Initialises all relevant data within all nodes for a specified TU tree 457 * Input : Pointer to root of the tree containing TU info. 458 * Output : Returns the number of nodes initialised 459 * 460 ***************************************************************************** 461 */ 462 UWORD16 ihevce_tu_tree_init( 463 tu_tree_node_t *ps_root, 464 UWORD8 u1_cu_size, 465 UWORD8 u1_min_tree_depth, 466 UWORD8 u1_max_tree_depth, 467 UWORD8 u1_chroma_processing_enabled, 468 UWORD8 u1_is_422) 469 { 470 UWORD16 u2_num_nodes = 0; 471 472 ASSERT(u1_max_tree_depth >= u1_min_tree_depth); 473 474 u2_num_nodes += ihevce_tu_node_init( 475 ps_root, 476 u1_cu_size, 477 0, 478 0, 479 0, 480 u1_min_tree_depth, 481 u1_max_tree_depth, 482 u1_chroma_processing_enabled, 483 u1_is_422, 484 POS_TL); 485 486 return u2_num_nodes; 487 } 488 489 /*! 490 ****************************************************************************** 491 * \if Function name : ihevce_cabac_bins2Bits_converter_and_state_updater \endif 492 * 493 * \brief 494 * cabac bin to bits converter 495 * Input : 1. Pointer to buffer which stores the current CABAC state. This 496 * buffer shall be modified by this function. 2. Index to the cabac state 497 * that corresponds to the bin. 3. bin value 498 * Output : Number of bits required to encode the bin 499 * 500 ***************************************************************************** 501 */ 502 static INLINE UWORD32 ihevce_cabac_bins2Bits_converter_and_state_updater( 503 UWORD8 *pu1_cabac_ctxt, UWORD8 u1_cabac_state_idx, UWORD8 u1_bin_value) 504 { 505 UWORD32 u4_bits = 0; 506 507 u4_bits += gau2_ihevce_cabac_bin_to_bits[pu1_cabac_ctxt[u1_cabac_state_idx] ^ u1_bin_value]; 508 pu1_cabac_ctxt[u1_cabac_state_idx] = 509 gau1_ihevc_next_state[(pu1_cabac_ctxt[u1_cabac_state_idx] << 1) | u1_bin_value]; 510 511 return u4_bits; 512 } 513 514 static tu_tree_node_t * 515 ihevce_tu_node_parent_finder(tu_tree_node_t *ps_root, tu_tree_node_t *ps_leaf) 516 { 517 UWORD8 u1_depth_of_leaf; 518 519 GETRANGE(u1_depth_of_leaf, ps_root->s_luma_data.u1_size / ps_leaf->s_luma_data.u1_size); 520 u1_depth_of_leaf--; 521 522 if(0 == u1_depth_of_leaf) 523 { 524 return NULL; 525 } 526 else if(1 == u1_depth_of_leaf) 527 { 528 return ps_root; 529 } 530 else 531 { 532 UWORD8 u1_switch_conditional = 533 (ps_leaf->s_luma_data.u1_posx >= ps_root->ps_child_node_tl->s_luma_data.u1_size) + 534 (ps_leaf->s_luma_data.u1_posy >= ps_root->ps_child_node_tl->s_luma_data.u1_size) * 2; 535 536 ASSERT(NULL != ps_root->ps_child_node_tl); 537 ASSERT(NULL != ps_root->ps_child_node_tr); 538 ASSERT(NULL != ps_root->ps_child_node_bl); 539 ASSERT(NULL != ps_root->ps_child_node_br); 540 541 switch(u1_switch_conditional) 542 { 543 case 0: 544 { 545 return ihevce_tu_node_parent_finder(ps_root->ps_child_node_tl, ps_leaf); 546 } 547 case 1: 548 { 549 return ihevce_tu_node_parent_finder(ps_root->ps_child_node_tr, ps_leaf); 550 } 551 case 2: 552 { 553 return ihevce_tu_node_parent_finder(ps_root->ps_child_node_bl, ps_leaf); 554 } 555 case 3: 556 { 557 return ihevce_tu_node_parent_finder(ps_root->ps_child_node_br, ps_leaf); 558 } 559 } 560 } 561 562 return NULL; 563 } 564 565 /*! 566 ****************************************************************************** 567 * \if Function name : ihevce_compute_bits_for_TUSplit_and_cbf \endif 568 * 569 * \notes 570 * 1. This function ought to be called before the call to 'ihevce_tu_tree_selector' 571 * of children TU's in order to determine bits to encode splitFlag as 1. 572 * This should also be called at the end of 'ihevce_tu_processor' in order 573 * to determine bits required to encode cbf and splitFlag. 574 * 2. When 'ENABLE_TOP_DOWN_TU_RECURSION' = 0 and 'INCLUDE_CHROMA_DURING_TU_RECURSION' = 1, 575 * it shall be assumed that parent chroma cbf is 1. 576 * 3. When 'INCLUDE_CHROMA_DURING_TU_RECURSION' = 0, this function works as 577 * though no chroma related syntax was included in the HEVC syntax for coding 578 * the transform tree 579 * Input : 1. ps_root: Pointer to root of the tree containing TU info 580 * 2. ps_leaf: Pointer to current node of the TU tree 581 * 3. pu1_cabac_ctxt: Pointer to buffer which stores the current CABAC 582 * state. This buffer shall be modified by this function 583 * Output : Number of bits required to encode cbf and splitFlags 584 * 585 ***************************************************************************** 586 */ 587 static WORD32 ihevce_compute_bits_for_TUSplit_and_cbf( 588 tu_tree_node_t *ps_root, 589 tu_tree_node_t *ps_leaf, 590 UWORD8 *pu1_cabac_ctxt, 591 UWORD8 u1_max_tu_size, 592 UWORD8 u1_min_tu_size, 593 UWORD8 u1_cur_depth, 594 UWORD8 u1_max_depth, 595 UWORD8 u1_is_intra, 596 UWORD8 u1_is_intra_nxn_pu, 597 UWORD8 u1_chroma_processing_enabled, 598 UWORD8 u1_is_422) 599 { 600 UWORD8 u1_cabac_state_idx; 601 UWORD8 u1_log2_tu_size; 602 603 UWORD32 u4_num_bits = 0; 604 UWORD8 u1_tu_size = ps_leaf->s_luma_data.u1_size; 605 606 ASSERT(u1_min_tu_size >= MIN_TU_SIZE); 607 ASSERT(u1_min_tu_size <= u1_max_tu_size); 608 ASSERT(u1_max_tu_size <= MAX_TU_SIZE); 609 ASSERT(u1_tu_size >= MIN_TU_SIZE); 610 ASSERT(u1_tu_size <= MAX_TU_SIZE); 611 ASSERT(u1_cur_depth <= u1_max_depth); 612 613 GETRANGE(u1_log2_tu_size, u1_tu_size); 614 615 if((ps_root->s_luma_data.u1_size >> u1_cur_depth) == u1_tu_size) 616 { 617 if((u1_tu_size <= u1_max_tu_size) && (u1_tu_size > u1_min_tu_size) && 618 (u1_cur_depth < u1_max_depth) && !(u1_is_intra_nxn_pu && !u1_cur_depth)) 619 { 620 u1_cabac_state_idx = IHEVC_CAB_SPLIT_TFM + (5 - u1_log2_tu_size); 621 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 622 pu1_cabac_ctxt, u1_cabac_state_idx, 0); 623 } 624 625 if(u1_chroma_processing_enabled && (u1_tu_size > 4)) 626 { 627 tu_tree_node_t *ps_parent = ihevce_tu_node_parent_finder(ps_root, ps_leaf); 628 629 u1_cabac_state_idx = IHEVC_CAB_CBCR_IDX + u1_cur_depth; 630 631 if(!u1_cur_depth || ps_parent->as_cb_data[0].u1_cbf || ps_parent->as_cb_data[1].u1_cbf) 632 { 633 if(u1_is_422) 634 { 635 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 636 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[0].u1_cbf); 637 638 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 639 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[1].u1_cbf); 640 } 641 else 642 { 643 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 644 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[0].u1_cbf); 645 } 646 } 647 648 if(!u1_cur_depth || ps_parent->as_cr_data[0].u1_cbf || ps_parent->as_cr_data[1].u1_cbf) 649 { 650 if(u1_is_422) 651 { 652 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 653 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[0].u1_cbf); 654 655 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 656 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[1].u1_cbf); 657 } 658 else 659 { 660 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 661 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[0].u1_cbf); 662 } 663 } 664 } 665 666 if(u1_is_intra || u1_cur_depth) 667 { 668 u1_cabac_state_idx = IHEVC_CAB_CBF_LUMA_IDX + !u1_cur_depth; 669 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 670 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->s_luma_data.u1_cbf); 671 } 672 } 673 else 674 { 675 if((u1_tu_size <= u1_max_tu_size) && (u1_tu_size > u1_min_tu_size) && 676 (u1_cur_depth < u1_max_depth) && !(u1_is_intra_nxn_pu && !u1_cur_depth)) 677 { 678 u1_cabac_state_idx = IHEVC_CAB_SPLIT_TFM + (5 - u1_log2_tu_size); 679 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 680 pu1_cabac_ctxt, u1_cabac_state_idx, 1); 681 } 682 683 if(u1_chroma_processing_enabled && (u1_tu_size > 4)) 684 { 685 tu_tree_node_t *ps_parent = ihevce_tu_node_parent_finder(ps_root, ps_leaf); 686 687 u1_cabac_state_idx = IHEVC_CAB_CBCR_IDX + u1_cur_depth; 688 689 if(!u1_cur_depth || ps_parent->as_cb_data[0].u1_cbf || ps_parent->as_cb_data[1].u1_cbf) 690 { 691 if(u1_is_422 && (8 == u1_tu_size)) 692 { 693 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 694 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[0].u1_cbf); 695 696 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 697 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[1].u1_cbf); 698 } 699 else 700 { 701 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 702 pu1_cabac_ctxt, 703 u1_cabac_state_idx, 704 ps_leaf->as_cb_data[0].u1_cbf || ps_leaf->as_cb_data[1].u1_cbf); 705 } 706 } 707 708 if(!u1_cur_depth || ps_parent->as_cr_data[0].u1_cbf || ps_parent->as_cr_data[1].u1_cbf) 709 { 710 if(u1_is_422 && (8 == u1_tu_size)) 711 { 712 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 713 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[0].u1_cbf); 714 715 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 716 pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[1].u1_cbf); 717 } 718 else 719 { 720 u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater( 721 pu1_cabac_ctxt, 722 u1_cabac_state_idx, 723 ps_leaf->as_cr_data[0].u1_cbf || ps_leaf->as_cr_data[1].u1_cbf); 724 } 725 } 726 } 727 } 728 729 return u4_num_bits; 730 } 731 732 /*! 733 ****************************************************************************** 734 * \if Function name : ihevce_tu_processor \endif 735 * 736 * \notes 737 * Input : 1. ps_ctxt: Pointer to enc-loop's context. Parts of this structure 738 * shall be modified by this function. They include, au1_cu_csbf, 739 * i8_cu_not_coded_cost, ai2_scratch and s_rdoq_sbh_ctxt 740 * 2. ps_node: Pointer to current node of the TU tree. This struct 741 * shall be modified by this function 742 * 3. pv_src: Pointer to buffer which stores the source 743 * 4. pv_pred: Pointer to buffer which stores the pred 744 * 5. pv_recon: Pointer to buffer which stores the recon 745 * This buffer shall be modified by this function 746 * 6. pi2_deq_data: Pointer to buffer which stores the output of IQ. 747 * This buffer shall be modified by this function 748 * 7. pu1_ecd: Pointer to buffer which stores the data output by 749 * entropy coding. This buffer shall be modified by this function 750 * 8. pu1_cabac_ctxt: Pointer to buffer which stores the current CABAC 751 * state. This buffer shall be modified by this function 752 * Output : NA 753 * 754 ***************************************************************************** 755 */ 756 static void ihevce_tu_processor( 757 ihevce_enc_loop_ctxt_t *ps_ctxt, 758 tu_tree_node_t *ps_node, 759 buffer_data_for_tu_t *ps_buffer_data, 760 UWORD8 *pu1_cabac_ctxt, 761 WORD32 i4_pred_mode, 762 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 763 WORD32 i4_alpha_stim_multiplier, 764 UWORD8 u1_is_cu_noisy, 765 #endif 766 UWORD8 u1_chroma_processing_enabled, 767 UWORD8 u1_compute_spatial_ssd) 768 { 769 UWORD8 u1_is_recon_available; 770 771 void *pv_src = ps_buffer_data->s_src_pred_rec_buf_luma.pv_src; 772 void *pv_pred = ps_buffer_data->s_src_pred_rec_buf_luma.pv_pred; 773 void *pv_recon = ps_buffer_data->s_src_pred_rec_buf_luma.pv_recon; 774 WORD16 *pi2_deq_data = ps_buffer_data->pi2_deq_data; 775 UWORD8 *pu1_ecd = ps_buffer_data->ppu1_ecd[0]; 776 WORD32 i4_src_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_src_stride; 777 WORD32 i4_pred_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_pred_stride; 778 WORD32 i4_recon_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_recon_stride; 779 WORD32 i4_deq_data_stride = ps_buffer_data->i4_deq_data_stride; 780 UWORD8 u1_size = ps_node->s_luma_data.u1_size; 781 UWORD8 u1_posx = ps_node->s_luma_data.u1_posx; 782 UWORD8 u1_posy = ps_node->s_luma_data.u1_posy; 783 WORD32 trans_size = (64 == u1_size) ? 32 : u1_size; 784 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); 785 786 (void)pu1_cabac_ctxt; 787 { 788 pv_src = ((UWORD8 *)pv_src) + u1_posx + u1_posy * i4_src_stride; 789 pv_pred = ((UWORD8 *)pv_pred) + u1_posx + u1_posy * i4_pred_stride; 790 pv_recon = ((UWORD8 *)pv_recon) + u1_posx + u1_posy * i4_recon_stride; 791 } 792 793 pi2_deq_data += u1_posx + u1_posy * i4_deq_data_stride; 794 795 /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */ 796 /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */ 797 /* Currently the complete array will contain only single value*/ 798 /*The rounding factor is calculated with the formula 799 Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2 800 rounding factor = (1 - DeadZone Val) 801 802 Assumption: Cabac states of All the sub-blocks in the TU are considered independent 803 */ 804 if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && 805 (ps_node->s_luma_data.u1_posx || ps_node->s_luma_data.u1_posy)) 806 { 807 double i4_lamda_modifier; 808 809 if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id)) 810 { 811 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier * 812 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00); 813 } 814 else 815 { 816 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier; 817 } 818 if(ps_ctxt->i4_use_const_lamda_modifier) 819 { 820 if(ISLICE == ps_ctxt->i1_slice_type) 821 { 822 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier; 823 } 824 else 825 { 826 i4_lamda_modifier = CONST_LAMDA_MOD_VAL; 827 } 828 } 829 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = &ps_ctxt->i4_quant_round_tu[0][0]; 830 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = &ps_ctxt->i4_quant_round_tu[1][0]; 831 832 memset( 833 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], 834 0, 835 trans_size * trans_size * sizeof(WORD32)); 836 memset( 837 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], 838 0, 839 trans_size * trans_size * sizeof(WORD32)); 840 841 ihevce_quant_rounding_factor_gen( 842 trans_size, 843 1, 844 &ps_ctxt->s_rdopt_entropy_ctxt, 845 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], 846 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], 847 i4_lamda_modifier, 848 1); 849 } 850 else 851 { 852 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = 853 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3]; 854 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = 855 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3]; 856 } 857 858 #if ENABLE_INTER_ZCU_COST 859 ps_ctxt->i8_cu_not_coded_cost = 0; 860 #endif 861 862 { 863 ps_node->s_luma_data.u1_cbf = ihevce_t_q_iq_ssd_scan_fxn( 864 ps_ctxt, 865 (UWORD8 *)pv_pred, 866 i4_pred_stride, 867 (UWORD8 *)pv_src, 868 i4_src_stride, 869 pi2_deq_data, 870 i4_deq_data_stride, 871 (UWORD8 *)pv_recon, 872 i4_recon_stride, 873 pu1_ecd, 874 ps_ctxt->au1_cu_csbf, 875 ps_ctxt->i4_cu_csbf_strd, 876 u1_size, 877 i4_pred_mode, 878 &ps_node->s_luma_data.i8_ssd, 879 &ps_node->s_luma_data.i4_num_bytes_used_for_ecd, 880 &ps_node->s_luma_data.i4_bits, 881 &ps_node->s_luma_data.u4_sad, 882 &ps_node->s_luma_data.i4_zero_col, 883 &ps_node->s_luma_data.i4_zero_row, 884 &u1_is_recon_available, 885 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq, 886 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh, 887 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 888 i4_alpha_stim_multiplier, 889 u1_is_cu_noisy, 890 #endif 891 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 892 1); 893 } 894 895 #if ENABLE_INTER_ZCU_COST 896 ps_node->s_luma_data.i8_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; 897 #endif 898 899 if(u1_compute_spatial_ssd && u1_is_recon_available) 900 { 901 ps_node->s_luma_data.u1_reconBufId = 0; 902 } 903 else 904 { 905 ps_node->s_luma_data.u1_reconBufId = UCHAR_MAX; 906 } 907 908 ps_node->s_luma_data.i8_cost = 909 ps_node->s_luma_data.i8_ssd + 910 COMPUTE_RATE_COST_CLIP30( 911 ps_node->s_luma_data.i4_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 912 913 pu1_ecd += ps_node->s_luma_data.i4_num_bytes_used_for_ecd; 914 915 if(u1_chroma_processing_enabled && 916 ((!(u1_posx % 8) && !(u1_posy % 8) && (4 == u1_size)) || (u1_size > 4))) 917 { 918 UWORD8 i; 919 void *pv_chroma_src; 920 void *pv_chroma_pred; 921 void *pv_chroma_recon; 922 WORD16 *pi2_deq_data_chroma; 923 924 WORD32 i4_chroma_src_stride = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_src_stride; 925 WORD32 i4_chroma_pred_stride = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_pred_stride; 926 WORD32 i4_chroma_recon_stride = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_recon_stride; 927 WORD32 i4_deq_data_stride_chroma = ps_buffer_data->i4_deq_data_stride_chroma; 928 929 /* SubTU loop */ 930 for(i = 0; i < u1_is_422 + 1; i++) 931 { 932 UWORD8 u1_chroma_size = ps_node->as_cb_data[i].u1_size; 933 UWORD8 u1_chroma_posx = ps_node->as_cb_data[i].u1_posx; 934 UWORD8 u1_chroma_posy = ps_node->as_cb_data[i].u1_posy; 935 936 #if ENABLE_INTER_ZCU_COST 937 ps_ctxt->i8_cu_not_coded_cost = 0; 938 #endif 939 940 pi2_deq_data_chroma = ps_buffer_data->pi2_deq_data_chroma + (u1_chroma_posx * 2) + 941 u1_chroma_posy * i4_deq_data_stride_chroma; 942 943 { 944 pv_chroma_src = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_src) + 945 (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_src_stride; 946 pv_chroma_pred = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_pred) + 947 (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_pred_stride; 948 pv_chroma_recon = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_recon) + 949 (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_recon_stride; 950 951 ps_node->as_cb_data[i].u1_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( 952 ps_ctxt, 953 (UWORD8 *)pv_chroma_pred, 954 i4_chroma_pred_stride, 955 (UWORD8 *)pv_chroma_src, 956 i4_chroma_src_stride, 957 pi2_deq_data_chroma, 958 i4_deq_data_stride_chroma, 959 (UWORD8 *)pv_chroma_recon, 960 i4_chroma_recon_stride, 961 pu1_ecd, 962 ps_ctxt->au1_cu_csbf, 963 ps_ctxt->i4_cu_csbf_strd, 964 u1_chroma_size, 965 SCAN_DIAG_UPRIGHT, 966 0, 967 &ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd, 968 &ps_node->as_cb_data[i].i4_bits, 969 &ps_node->as_cb_data[i].i4_zero_col, 970 &ps_node->as_cb_data[i].i4_zero_row, 971 &u1_is_recon_available, 972 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq, 973 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh, 974 &ps_node->as_cb_data[i].i8_ssd, 975 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 976 i4_alpha_stim_multiplier, 977 u1_is_cu_noisy, 978 #endif 979 i4_pred_mode == PRED_MODE_SKIP, 980 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 981 U_PLANE); 982 } 983 984 #if ENABLE_INTER_ZCU_COST 985 ps_node->as_cb_data[i].i8_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; 986 #endif 987 988 if(u1_compute_spatial_ssd && u1_is_recon_available) 989 { 990 ps_node->as_cb_data[i].u1_reconBufId = 0; 991 } 992 else 993 { 994 ps_node->as_cb_data[i].u1_reconBufId = UCHAR_MAX; 995 } 996 997 ps_node->as_cb_data[i].i8_cost = 998 ps_node->as_cb_data[i].i8_ssd + COMPUTE_RATE_COST_CLIP30( 999 ps_node->as_cb_data[i].i4_bits, 1000 ps_ctxt->i8_cl_ssd_lambda_chroma_qf, 1001 LAMBDA_Q_SHIFT); 1002 1003 #if WEIGH_CHROMA_COST 1004 ps_node->as_cb_data[i].i8_cost = 1005 (ps_node->as_cb_data[i].i8_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 1006 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 1007 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT; 1008 #endif 1009 1010 pu1_ecd += ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd; 1011 } 1012 1013 for(i = 0; i < u1_is_422 + 1; i++) 1014 { 1015 UWORD8 u1_chroma_size = ps_node->as_cr_data[i].u1_size; 1016 UWORD8 u1_chroma_posx = ps_node->as_cr_data[i].u1_posx; 1017 UWORD8 u1_chroma_posy = ps_node->as_cr_data[i].u1_posy; 1018 1019 #if ENABLE_INTER_ZCU_COST 1020 ps_ctxt->i8_cu_not_coded_cost = 0; 1021 #endif 1022 1023 pi2_deq_data_chroma = ps_buffer_data->pi2_deq_data_chroma + u1_chroma_size + 1024 (u1_chroma_posx * 2) + u1_chroma_posy * i4_deq_data_stride_chroma; 1025 1026 { 1027 pv_chroma_src = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_src) + 1028 (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_src_stride; 1029 pv_chroma_pred = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_pred) + 1030 (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_pred_stride; 1031 pv_chroma_recon = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_recon) + 1032 (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_recon_stride; 1033 1034 ps_node->as_cr_data[i].u1_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( 1035 ps_ctxt, 1036 (UWORD8 *)pv_chroma_pred, 1037 i4_chroma_pred_stride, 1038 (UWORD8 *)pv_chroma_src, 1039 i4_chroma_src_stride, 1040 pi2_deq_data_chroma, 1041 i4_deq_data_stride_chroma, 1042 (UWORD8 *)pv_chroma_recon, 1043 i4_chroma_recon_stride, 1044 pu1_ecd, 1045 ps_ctxt->au1_cu_csbf, 1046 ps_ctxt->i4_cu_csbf_strd, 1047 u1_chroma_size, 1048 SCAN_DIAG_UPRIGHT, 1049 0, 1050 &ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd, 1051 &ps_node->as_cr_data[i].i4_bits, 1052 &ps_node->as_cr_data[i].i4_zero_col, 1053 &ps_node->as_cr_data[i].i4_zero_row, 1054 &u1_is_recon_available, 1055 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq, 1056 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh, 1057 &ps_node->as_cr_data[i].i8_ssd, 1058 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1059 i4_alpha_stim_multiplier, 1060 u1_is_cu_noisy, 1061 #endif 1062 i4_pred_mode == PRED_MODE_SKIP, 1063 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 1064 V_PLANE); 1065 } 1066 1067 #if ENABLE_INTER_ZCU_COST 1068 ps_node->as_cr_data[i].i8_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; 1069 #endif 1070 1071 if(u1_compute_spatial_ssd && u1_is_recon_available) 1072 { 1073 ps_node->as_cr_data[i].u1_reconBufId = 0; 1074 } 1075 else 1076 { 1077 ps_node->as_cr_data[i].u1_reconBufId = UCHAR_MAX; 1078 } 1079 1080 ps_node->as_cr_data[i].i8_cost = 1081 ps_node->as_cr_data[i].i8_ssd + COMPUTE_RATE_COST_CLIP30( 1082 ps_node->as_cr_data[i].i4_bits, 1083 ps_ctxt->i8_cl_ssd_lambda_chroma_qf, 1084 LAMBDA_Q_SHIFT); 1085 1086 #if WEIGH_CHROMA_COST 1087 ps_node->as_cr_data[i].i8_cost = 1088 (ps_node->as_cr_data[i].i8_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 1089 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 1090 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT; 1091 #endif 1092 1093 pu1_ecd += ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd; 1094 } 1095 } 1096 } 1097 1098 static INLINE void ihevce_nbr_data_copier( 1099 nbr_4x4_t *ps_nbr_data_buf, 1100 WORD32 i4_nbr_data_buf_stride, 1101 WORD32 i4_cu_qp, 1102 UWORD8 u1_cbf, 1103 WORD32 u1_posx, 1104 UWORD8 u1_posy, 1105 UWORD8 u1_size) 1106 { 1107 WORD32 i, j; 1108 1109 UWORD8 u1_num_4x4_in_tu = u1_size / 4; 1110 1111 ps_nbr_data_buf += ((u1_posx) / 4) + (u1_posy / 4) * i4_nbr_data_buf_stride; 1112 1113 for(i = 0; i < u1_num_4x4_in_tu; i++) 1114 { 1115 for(j = 0; j < u1_num_4x4_in_tu; j++) 1116 { 1117 ps_nbr_data_buf[j].b8_qp = i4_cu_qp; 1118 ps_nbr_data_buf[j].b1_y_cbf = u1_cbf; 1119 } 1120 1121 ps_nbr_data_buf += i4_nbr_data_buf_stride; 1122 } 1123 } 1124 1125 static INLINE void ihevce_debriefer_when_parent_wins( 1126 tu_tree_node_t *ps_node, 1127 FT_COPY_2D *pf_copy_2d, 1128 FT_CHROMA_INTERLEAVE_2D_COPY *pf_chroma_interleave_2d_copy, 1129 nbr_4x4_t *ps_nbr_data_buf, 1130 WORD16 *pi2_deq_data_src, 1131 WORD16 *pi2_deq_data_dst, 1132 WORD16 *pi2_deq_data_src_chroma, 1133 WORD16 *pi2_deq_data_dst_chroma, 1134 void *pv_recon_src, 1135 void *pv_recon_dst, 1136 void *pv_recon_src_chroma, 1137 void *pv_recon_dst_chroma, 1138 UWORD8 *pu1_cabac_ctxt_src, 1139 UWORD8 *pu1_cabac_ctxt_dst, 1140 UWORD8 *pu1_ecd_src, 1141 UWORD8 *pu1_ecd_dst, 1142 WORD32 i4_nbr_data_buf_stride, 1143 WORD32 i4_deq_data_src_stride, 1144 WORD32 i4_deq_data_dst_stride, 1145 WORD32 i4_deq_data_src_stride_chroma, 1146 WORD32 i4_deq_data_dst_stride_chroma, 1147 WORD32 i4_recon_src_stride, 1148 WORD32 i4_recon_dst_stride, 1149 WORD32 i4_recon_src_stride_chroma, 1150 WORD32 i4_recon_dst_stride_chroma, 1151 WORD32 i4_cabac_state_table_size, 1152 WORD32 i4_cu_qp, 1153 UWORD8 u1_chroma_processing_enabled, 1154 UWORD8 u1_is_422, 1155 UWORD8 u1_is_hbd) 1156 { 1157 UWORD8 i; 1158 1159 UWORD32 u4_num_ecd_bytes = 0; 1160 1161 /* Y */ 1162 { 1163 UWORD8 u1_posx = ps_node->s_luma_data.u1_posx; 1164 UWORD8 u1_posy = ps_node->s_luma_data.u1_posy; 1165 UWORD8 *pu1_deq_data_dst = 1166 (UWORD8 *)(pi2_deq_data_dst + u1_posx + u1_posy * i4_deq_data_dst_stride); 1167 UWORD8 *pu1_deq_data_src = 1168 (UWORD8 *)(pi2_deq_data_src + u1_posx + u1_posy * i4_deq_data_src_stride); 1169 UWORD8 *pu1_recon_dst; 1170 UWORD8 *pu1_recon_src; 1171 1172 { 1173 pu1_recon_dst = (((UWORD8 *)pv_recon_dst) + u1_posx + u1_posy * i4_recon_dst_stride); 1174 pu1_recon_src = (((UWORD8 *)pv_recon_src) + u1_posx + u1_posy * i4_recon_src_stride); 1175 } 1176 u4_num_ecd_bytes += ps_node->s_luma_data.i4_num_bytes_used_for_ecd; 1177 1178 if(ps_node->s_luma_data.u1_reconBufId != UCHAR_MAX) 1179 { 1180 pf_copy_2d( 1181 pu1_recon_dst, 1182 i4_recon_dst_stride * (u1_is_hbd + 1), 1183 pu1_recon_src, 1184 i4_recon_src_stride * (u1_is_hbd + 1), 1185 ps_node->s_luma_data.u1_size * (u1_is_hbd + 1), 1186 ps_node->s_luma_data.u1_size); 1187 } 1188 else if(ps_node->s_luma_data.u1_cbf) 1189 { 1190 pf_copy_2d( 1191 pu1_deq_data_dst, 1192 i4_deq_data_dst_stride * 2, 1193 pu1_deq_data_src, 1194 i4_deq_data_src_stride * 2, 1195 ps_node->s_luma_data.u1_size * 2, 1196 ps_node->s_luma_data.u1_size); 1197 } 1198 } 1199 1200 /* Cb */ 1201 if(u1_chroma_processing_enabled) 1202 { 1203 for(i = 0; i < u1_is_422 + 1; i++) 1204 { 1205 UWORD8 u1_posx = ps_node->as_cb_data[i].u1_posx; 1206 UWORD8 u1_posy = ps_node->as_cb_data[i].u1_posy; 1207 UWORD8 *pu1_deq_data_dst = 1208 (UWORD8 1209 *)(pi2_deq_data_dst_chroma + (u1_posx * 2) + (u1_posy * i4_deq_data_dst_stride_chroma)); 1210 UWORD8 *pu1_deq_data_src = 1211 (UWORD8 1212 *)(pi2_deq_data_src_chroma + (u1_posx * 2) + (u1_posy * i4_deq_data_src_stride_chroma)); 1213 UWORD8 *pu1_recon_dst; 1214 UWORD8 *pu1_recon_src; 1215 1216 { 1217 pu1_recon_dst = 1218 (((UWORD8 *)pv_recon_dst_chroma) + (u1_posx * 2) + 1219 u1_posy * i4_recon_dst_stride_chroma); 1220 pu1_recon_src = 1221 (((UWORD8 *)pv_recon_src_chroma) + (u1_posx * 2) + 1222 u1_posy * i4_recon_src_stride_chroma); 1223 } 1224 u4_num_ecd_bytes += ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd; 1225 1226 if(ps_node->as_cb_data[i].u1_reconBufId != UCHAR_MAX) 1227 { 1228 { 1229 pf_chroma_interleave_2d_copy( 1230 pu1_recon_src, 1231 i4_recon_src_stride_chroma * (u1_is_hbd + 1), 1232 pu1_recon_dst, 1233 i4_recon_dst_stride_chroma * (u1_is_hbd + 1), 1234 ps_node->as_cb_data[i].u1_size * (u1_is_hbd + 1), 1235 ps_node->as_cb_data[i].u1_size, 1236 U_PLANE); 1237 } 1238 } 1239 else if(ps_node->as_cb_data[i].u1_cbf) 1240 { 1241 pf_copy_2d( 1242 pu1_deq_data_dst, 1243 i4_deq_data_dst_stride_chroma * 2, 1244 pu1_deq_data_src, 1245 i4_deq_data_src_stride_chroma * 2, 1246 ps_node->as_cb_data[i].u1_size * 2, 1247 ps_node->as_cb_data[i].u1_size); 1248 } 1249 } 1250 1251 /* Cr */ 1252 for(i = 0; i < u1_is_422 + 1; i++) 1253 { 1254 UWORD8 u1_posx = ps_node->as_cr_data[i].u1_posx; 1255 UWORD8 u1_posy = ps_node->as_cr_data[i].u1_posy; 1256 UWORD8 *pu1_deq_data_dst = 1257 (UWORD8 1258 *)(pi2_deq_data_dst_chroma + ps_node->as_cr_data[i].u1_size + (u1_posx * 2) + (u1_posy * i4_deq_data_dst_stride_chroma)); 1259 UWORD8 *pu1_deq_data_src = 1260 (UWORD8 1261 *)(pi2_deq_data_src_chroma + ps_node->as_cr_data[i].u1_size + (u1_posx * 2) + (u1_posy * i4_deq_data_src_stride_chroma)); 1262 UWORD8 *pu1_recon_dst; 1263 UWORD8 *pu1_recon_src; 1264 1265 { 1266 pu1_recon_dst = 1267 (((UWORD8 *)pv_recon_dst_chroma) + (u1_posx * 2) + 1268 u1_posy * i4_recon_dst_stride_chroma); 1269 pu1_recon_src = 1270 (((UWORD8 *)pv_recon_src_chroma) + (u1_posx * 2) + 1271 u1_posy * i4_recon_src_stride_chroma); 1272 } 1273 u4_num_ecd_bytes += ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd; 1274 1275 if(ps_node->as_cr_data[i].u1_reconBufId != UCHAR_MAX) 1276 { 1277 { 1278 pf_chroma_interleave_2d_copy( 1279 pu1_recon_src, 1280 i4_recon_src_stride_chroma * (u1_is_hbd + 1), 1281 pu1_recon_dst, 1282 i4_recon_dst_stride_chroma * (u1_is_hbd + 1), 1283 ps_node->as_cr_data[i].u1_size * (u1_is_hbd + 1), 1284 ps_node->as_cr_data[i].u1_size, 1285 V_PLANE); 1286 } 1287 } 1288 else if(ps_node->as_cr_data[i].u1_cbf) 1289 { 1290 pf_copy_2d( 1291 pu1_deq_data_dst, 1292 i4_deq_data_dst_stride_chroma * 2, 1293 pu1_deq_data_src, 1294 i4_deq_data_src_stride_chroma * 2, 1295 ps_node->as_cr_data[i].u1_size * 2, 1296 ps_node->as_cr_data[i].u1_size); 1297 } 1298 } 1299 } 1300 1301 if(pu1_ecd_dst != pu1_ecd_src) 1302 { 1303 memmove(pu1_ecd_dst, pu1_ecd_src, u4_num_ecd_bytes); 1304 } 1305 1306 memcpy(pu1_cabac_ctxt_dst, pu1_cabac_ctxt_src, i4_cabac_state_table_size); 1307 1308 ihevce_nbr_data_copier( 1309 ps_nbr_data_buf, 1310 i4_nbr_data_buf_stride, 1311 i4_cu_qp, 1312 ps_node->s_luma_data.u1_cbf, 1313 ps_node->s_luma_data.u1_posx, 1314 ps_node->s_luma_data.u1_posy, 1315 ps_node->s_luma_data.u1_size); 1316 1317 ps_node->ps_child_node_tl = NULL; 1318 ps_node->ps_child_node_tr = NULL; 1319 ps_node->ps_child_node_bl = NULL; 1320 ps_node->ps_child_node_br = NULL; 1321 } 1322 1323 /*! 1324 ****************************************************************************** 1325 * \if Function name : ihevce_ecd_buffer_pointer_updater \endif 1326 * 1327 * \brief 1328 * Updates ppu1_ecd with current pointer 1329 * Output : Number of byte positions 'pu1_ecd_buf_ptr_at_t0' is incremented by 1330 * 1331 ***************************************************************************** 1332 */ 1333 static INLINE UWORD32 ihevce_ecd_buffer_pointer_updater( 1334 tu_tree_node_t *ps_node, 1335 UWORD8 **ppu1_ecd, 1336 UWORD8 *pu1_ecd_buf_ptr_at_t0, 1337 UWORD8 u1_parent_has_won, 1338 UWORD8 u1_chroma_processing_enabled, 1339 UWORD8 u1_is_422) 1340 { 1341 UWORD8 i; 1342 1343 UWORD32 u4_num_bytes = 0; 1344 1345 if(u1_parent_has_won) 1346 { 1347 u4_num_bytes += ps_node->s_luma_data.i4_num_bytes_used_for_ecd; 1348 1349 if(u1_chroma_processing_enabled) 1350 { 1351 for(i = 0; i < u1_is_422 + 1; i++) 1352 { 1353 u4_num_bytes += ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd; 1354 u4_num_bytes += ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd; 1355 } 1356 } 1357 } 1358 else 1359 { 1360 u4_num_bytes += ps_node->ps_child_node_tl->s_luma_data.i4_num_bytes_used_for_ecd; 1361 u4_num_bytes += ps_node->ps_child_node_tr->s_luma_data.i4_num_bytes_used_for_ecd; 1362 u4_num_bytes += ps_node->ps_child_node_bl->s_luma_data.i4_num_bytes_used_for_ecd; 1363 u4_num_bytes += ps_node->ps_child_node_br->s_luma_data.i4_num_bytes_used_for_ecd; 1364 1365 if(u1_chroma_processing_enabled) 1366 { 1367 for(i = 0; i < u1_is_422 + 1; i++) 1368 { 1369 u4_num_bytes += ps_node->ps_child_node_tl->as_cb_data[i].i4_num_bytes_used_for_ecd; 1370 u4_num_bytes += ps_node->ps_child_node_tl->as_cr_data[i].i4_num_bytes_used_for_ecd; 1371 u4_num_bytes += ps_node->ps_child_node_tr->as_cb_data[i].i4_num_bytes_used_for_ecd; 1372 u4_num_bytes += ps_node->ps_child_node_tr->as_cr_data[i].i4_num_bytes_used_for_ecd; 1373 u4_num_bytes += ps_node->ps_child_node_bl->as_cb_data[i].i4_num_bytes_used_for_ecd; 1374 u4_num_bytes += ps_node->ps_child_node_bl->as_cr_data[i].i4_num_bytes_used_for_ecd; 1375 u4_num_bytes += ps_node->ps_child_node_br->as_cb_data[i].i4_num_bytes_used_for_ecd; 1376 u4_num_bytes += ps_node->ps_child_node_br->as_cr_data[i].i4_num_bytes_used_for_ecd; 1377 } 1378 } 1379 } 1380 1381 ppu1_ecd[0] = pu1_ecd_buf_ptr_at_t0 + u4_num_bytes; 1382 1383 return u4_num_bytes; 1384 } 1385 1386 static INLINE LWORD64 ihevce_tu_node_cost_collator( 1387 tu_tree_node_t *ps_node, UWORD8 u1_chroma_processing_enabled, UWORD8 u1_is_422) 1388 { 1389 UWORD8 i; 1390 1391 LWORD64 i8_cost = 0; 1392 1393 i8_cost += ps_node->s_luma_data.i8_cost; 1394 1395 if(u1_chroma_processing_enabled) 1396 { 1397 for(i = 0; i < u1_is_422 + 1; i++) 1398 { 1399 i8_cost += ps_node->as_cb_data[i].i8_cost; 1400 i8_cost += ps_node->as_cr_data[i].i8_cost; 1401 } 1402 } 1403 1404 return i8_cost; 1405 } 1406 1407 #if !ENABLE_TOP_DOWN_TU_RECURSION 1408 /*! 1409 ****************************************************************************** 1410 * \if Function name : ihevce_tu_processor \endif 1411 * 1412 * \notes 1413 * Determines RDO TU Tree using DFS. If the parent is the winner, then all 1414 * pointers to the children nodes are set to NULL 1415 * Input : 1. ps_ctxt: Pointer to enc-loop's context. Parts of this structure 1416 * shall be modified by this function. They include, au1_cu_csbf, 1417 * i8_cu_not_coded_cost, ai2_scratch, s_rdoq_sbh_ctxt, 1418 * pi4_quant_round_factor_tu_0_1, pi4_quant_round_factor_tu_1_2, 1419 * i4_quant_round_tu 1420 * 2. ps_node: Pointer to current node of the TU tree. This struct 1421 * shall be modified by this function 1422 * 3. pv_recon: Pointer to buffer which stores the recon 1423 * This buffer shall be modified by this function 1424 * 4. ps_nbr_data_buf: Pointer to struct used by succeeding CU's 1425 * during RDOPT. This buffer shall be modifie by this function 1426 * 6. pi2_deq_data: Pointer to buffer which stores the output of IQ. 1427 * This buffer shall be modified by this function 1428 * 7. pu1_ecd: Pointer to buffer which stores the data output by 1429 * entropy coding. This buffer shall be modified by this function 1430 * 8. pu1_cabac_ctxt: Pointer to buffer which stores the current CABAC 1431 * state. This buffer shall be modified by this function 1432 * Output : Cost of coding the current branch of the TU tree 1433 * 1434 ***************************************************************************** 1435 */ 1436 LWORD64 ihevce_tu_tree_selector( 1437 ihevce_enc_loop_ctxt_t *ps_ctxt, 1438 tu_tree_node_t *ps_node, 1439 buffer_data_for_tu_t *ps_buffer_data, 1440 UWORD8 *pu1_cabac_ctxt, 1441 WORD32 i4_pred_mode, 1442 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1443 WORD32 i4_alpha_stim_multiplier, 1444 UWORD8 u1_is_cu_noisy, 1445 #endif 1446 UWORD8 u1_cur_depth, 1447 UWORD8 u1_max_depth, 1448 UWORD8 u1_part_type, 1449 UWORD8 u1_compute_spatial_ssd) 1450 { 1451 UWORD8 au1_cabac_ctxt_backup[IHEVC_CAB_CTXT_END]; 1452 UWORD8 u1_are_children_available; 1453 UWORD32 u4_tuSplitFlag_and_cbf_coding_bits; 1454 1455 nbr_4x4_t *ps_nbr_data_buf = ps_buffer_data->ps_nbr_data_buf; 1456 void *pv_recon_chroma = ps_buffer_data->s_src_pred_rec_buf_chroma.pv_recon; 1457 WORD16 *pi2_deq_data = ps_buffer_data->pi2_deq_data; 1458 WORD16 *pi2_deq_data_chroma = ps_buffer_data->pi2_deq_data_chroma; 1459 UWORD8 **ppu1_ecd = ps_buffer_data->ppu1_ecd; 1460 WORD32 i4_nbr_data_buf_stride = ps_buffer_data->i4_nbr_data_buf_stride; 1461 WORD32 i4_recon_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_recon_stride; 1462 WORD32 i4_recon_stride_chroma = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_recon_stride; 1463 WORD32 i4_deq_data_stride = ps_buffer_data->i4_deq_data_stride; 1464 WORD32 i4_deq_data_stride_chroma = ps_buffer_data->i4_deq_data_stride_chroma; 1465 UWORD8 *pu1_ecd_bPtr_backup_t1 = ppu1_ecd[0]; 1466 UWORD8 *pu1_ecd_bPtr_backup_t2 = ppu1_ecd[0]; 1467 LWORD64 i8_winning_cost = 0; 1468 1469 ASSERT(ps_node != NULL); 1470 ASSERT( 1471 !(!ps_node->u1_is_valid_node && 1472 ((NULL == ps_node->ps_child_node_tl) || (NULL == ps_node->ps_child_node_tr) || 1473 (NULL == ps_node->ps_child_node_bl) || (NULL == ps_node->ps_child_node_br)))); 1474 1475 u1_are_children_available = 1476 !((NULL == ps_node->ps_child_node_tl) && (NULL == ps_node->ps_child_node_tr) && 1477 (NULL == ps_node->ps_child_node_bl) && (NULL == ps_node->ps_child_node_br)) && 1478 (ps_node->s_luma_data.u1_size > MIN_TU_SIZE); 1479 1480 if(u1_are_children_available) 1481 { 1482 if(ps_node->u1_is_valid_node) 1483 { 1484 memcpy(au1_cabac_ctxt_backup, pu1_cabac_ctxt, sizeof(au1_cabac_ctxt_backup)); 1485 } 1486 1487 if(i4_pred_mode != PRED_MODE_SKIP) 1488 { 1489 u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf( 1490 ps_node, 1491 ps_node->ps_child_node_tl, 1492 pu1_cabac_ctxt, 1493 MAX_TU_SIZE, 1494 MIN_TU_SIZE, 1495 0, 1496 1, 1497 i4_pred_mode == PRED_MODE_INTRA, 1498 (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA), 1499 0, 1500 0); 1501 1502 i8_winning_cost += COMPUTE_RATE_COST_CLIP30( 1503 u4_tuSplitFlag_and_cbf_coding_bits, 1504 ps_ctxt->i8_cl_ssd_lambda_qf, 1505 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 1506 } 1507 1508 i8_winning_cost += ihevce_tu_tree_selector( 1509 ps_ctxt, 1510 ps_node->ps_child_node_tl, 1511 ps_buffer_data, 1512 pu1_cabac_ctxt, 1513 i4_pred_mode, 1514 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1515 i4_alpha_stim_multiplier, 1516 u1_is_cu_noisy, 1517 #endif 1518 u1_cur_depth, 1519 u1_max_depth, 1520 u1_part_type, 1521 u1_compute_spatial_ssd); 1522 1523 i8_winning_cost += ihevce_tu_tree_selector( 1524 ps_ctxt, 1525 ps_node->ps_child_node_tr, 1526 ps_buffer_data, 1527 pu1_cabac_ctxt, 1528 i4_pred_mode, 1529 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1530 i4_alpha_stim_multiplier, 1531 u1_is_cu_noisy, 1532 #endif 1533 u1_cur_depth, 1534 u1_max_depth, 1535 u1_part_type, 1536 u1_compute_spatial_ssd); 1537 1538 i8_winning_cost += ihevce_tu_tree_selector( 1539 ps_ctxt, 1540 ps_node->ps_child_node_bl, 1541 ps_buffer_data, 1542 pu1_cabac_ctxt, 1543 i4_pred_mode, 1544 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1545 i4_alpha_stim_multiplier, 1546 u1_is_cu_noisy, 1547 #endif 1548 u1_cur_depth, 1549 u1_max_depth, 1550 u1_part_type, 1551 u1_compute_spatial_ssd); 1552 1553 i8_winning_cost += ihevce_tu_tree_selector( 1554 ps_ctxt, 1555 ps_node->ps_child_node_br, 1556 ps_buffer_data, 1557 pu1_cabac_ctxt, 1558 i4_pred_mode, 1559 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1560 i4_alpha_stim_multiplier, 1561 u1_is_cu_noisy, 1562 #endif 1563 u1_cur_depth, 1564 u1_max_depth, 1565 u1_part_type, 1566 u1_compute_spatial_ssd); 1567 1568 if(ps_node->u1_is_valid_node) 1569 { 1570 WORD16 ai2_deq_data_backup[MAX_CU_SIZE * MAX_CU_SIZE]; 1571 UWORD16 au2_recon_backup[MAX_CU_SIZE * MAX_CU_SIZE]; 1572 1573 buffer_data_for_tu_t s_buffer_data = ps_buffer_data[0]; 1574 1575 pu1_ecd_bPtr_backup_t2 = ppu1_ecd[0]; 1576 s_buffer_data.pi2_deq_data = ai2_deq_data_backup; 1577 s_buffer_data.i4_deq_data_stride = MAX_CU_SIZE; 1578 s_buffer_data.s_src_pred_rec_buf_luma.pv_recon = au2_recon_backup; 1579 s_buffer_data.s_src_pred_rec_buf_luma.i4_recon_stride = MAX_CU_SIZE; 1580 1581 ihevce_tu_processor( 1582 ps_ctxt, 1583 ps_node, 1584 &s_buffer_data, 1585 au1_cabac_ctxt_backup, 1586 i4_pred_mode, 1587 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1588 i4_alpha_stim_multiplier, 1589 u1_is_cu_noisy, 1590 #endif 1591 0, 1592 u1_compute_spatial_ssd); 1593 1594 if(i4_pred_mode != PRED_MODE_SKIP) 1595 { 1596 u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf( 1597 ps_node, 1598 ps_node, 1599 au1_cabac_ctxt_backup, 1600 MAX_TU_SIZE, 1601 MIN_TU_SIZE, 1602 0, 1603 (u1_cur_depth == u1_max_depth) ? 0 : 1, 1604 i4_pred_mode == PRED_MODE_INTRA, 1605 (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA), 1606 0, 1607 0); 1608 1609 ps_node->s_luma_data.i8_cost += COMPUTE_RATE_COST_CLIP30( 1610 u4_tuSplitFlag_and_cbf_coding_bits, 1611 ps_ctxt->i8_cl_ssd_lambda_qf, 1612 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 1613 } 1614 1615 if(ps_node->s_luma_data.i8_cost <= i8_winning_cost) 1616 { 1617 ihevce_debriefer_when_parent_wins( 1618 ps_node, 1619 ps_ctxt->s_cmn_opt_func.pf_copy_2d, 1620 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy, 1621 ps_nbr_data_buf, 1622 ai2_deq_data_backup, 1623 pi2_deq_data, 1624 ai2_deq_data_backup + MAX_CU_SIZE * MAX_CU_SIZE, 1625 pi2_deq_data_chroma, 1626 au2_recon_backup, 1627 pv_recon_chroma, 1628 au2_recon_backup + MAX_CU_SIZE * MAX_CU_SIZE, 1629 pv_recon_chroma, 1630 au1_cabac_ctxt_backup, 1631 pu1_cabac_ctxt, 1632 pu1_ecd_bPtr_backup_t2, 1633 pu1_ecd_bPtr_backup_t1, 1634 i4_nbr_data_buf_stride, 1635 MAX_CU_SIZE, 1636 i4_deq_data_stride, 1637 MAX_CU_SIZE, 1638 i4_deq_data_stride_chroma, 1639 MAX_CU_SIZE, 1640 i4_recon_stride, 1641 MAX_CU_SIZE, 1642 i4_recon_stride_chroma, 1643 sizeof(au1_cabac_ctxt_backup), 1644 ps_ctxt->i4_cu_qp, 1645 0, 1646 ps_ctxt->u1_chroma_array_type == 2, 1647 ps_ctxt->u1_bit_depth > 8); 1648 1649 ppu1_ecd[0] = 1650 pu1_ecd_bPtr_backup_t1 + ps_node->s_luma_data.i4_num_bytes_used_for_ecd; 1651 i8_winning_cost = ps_node->s_luma_data.i8_cost; 1652 } 1653 else 1654 { 1655 ps_node->u1_is_valid_node = 0; 1656 } 1657 } 1658 } 1659 else 1660 { 1661 ASSERT(ps_node->u1_is_valid_node); 1662 1663 ihevce_tu_processor( 1664 ps_ctxt, 1665 ps_node, 1666 ps_buffer_data, 1667 pu1_cabac_ctxt, 1668 i4_pred_mode, 1669 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1670 i4_alpha_stim_multiplier, 1671 u1_is_cu_noisy, 1672 #endif 1673 0, 1674 u1_compute_spatial_ssd); 1675 1676 if(i4_pred_mode != PRED_MODE_SKIP) 1677 { 1678 u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf( 1679 ps_node, 1680 ps_node, 1681 pu1_cabac_ctxt, 1682 MAX_TU_SIZE, 1683 MIN_TU_SIZE, 1684 0, 1685 (u1_cur_depth == u1_max_depth) ? 0 : 1, 1686 i4_pred_mode == PRED_MODE_INTRA, 1687 (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA), 1688 0, 1689 0); 1690 1691 ps_node->s_luma_data.i8_cost += COMPUTE_RATE_COST_CLIP30( 1692 u4_tuSplitFlag_and_cbf_coding_bits, 1693 ps_ctxt->i8_cl_ssd_lambda_qf, 1694 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 1695 } 1696 1697 ppu1_ecd[0] = pu1_ecd_bPtr_backup_t1 + ps_node->s_luma_data.i4_num_bytes_used_for_ecd; 1698 1699 ihevce_nbr_data_copier( 1700 ps_nbr_data_buf, 1701 i4_nbr_data_buf_stride, 1702 ps_ctxt->i4_cu_qp, 1703 ps_node->s_luma_data.u1_cbf, 1704 ps_node->s_luma_data.u1_posx, 1705 ps_node->s_luma_data.u1_posy, 1706 ps_node->s_luma_data.u1_size); 1707 1708 i8_winning_cost = ps_node->s_luma_data.i8_cost; 1709 } 1710 1711 return i8_winning_cost; 1712 } 1713 #endif 1714 1715 /*! 1716 ****************************************************************************** 1717 * \if Function name : ihevce_topDown_tu_tree_selector \endif 1718 * 1719 * \notes 1720 * Determines RDO TU Tree using DFS. If the parent is the winner, then all 1721 * pointers to the children nodes are set to NULL 1722 * Input : 1. ps_ctxt: Pointer to enc-loop's context. Parts of this structure 1723 * shall be modified by this function. They include, au1_cu_csbf, 1724 * i8_cu_not_coded_cost, ai2_scratch, s_rdoq_sbh_ctxt, 1725 * pi4_quant_round_factor_tu_0_1, pi4_quant_round_factor_tu_1_2, 1726 * i4_quant_round_tu 1727 * 2. ps_node: Pointer to current node of the TU tree. This struct 1728 * shall be modified by this function 1729 * 3. pv_recon: Pointer to buffer which stores the recon 1730 * This buffer shall be modified by this function 1731 * 4. ps_nbr_data_buf: Pointer to struct used by succeeding CU's 1732 * during RDOPT. This buffer shall be modifie by this function 1733 * 6. pi2_deq_data: Pointer to buffer which stores the output of IQ. 1734 * This buffer shall be modified by this function 1735 * 7. pu1_ecd: Pointer to buffer which stores the data output by 1736 * entropy coding. This buffer shall be modified by this function 1737 * 8. pu1_cabac_ctxt: Pointer to buffer which stores the current CABAC 1738 * state. This buffer shall be modified by this function 1739 * Output : Cost of coding the current branch of the TU tree 1740 * 1741 ***************************************************************************** 1742 */ 1743 LWORD64 ihevce_topDown_tu_tree_selector( 1744 ihevce_enc_loop_ctxt_t *ps_ctxt, 1745 tu_tree_node_t *ps_node, 1746 buffer_data_for_tu_t *ps_buffer_data, 1747 UWORD8 *pu1_cabac_ctxt, 1748 WORD32 i4_pred_mode, 1749 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1750 WORD32 i4_alpha_stim_multiplier, 1751 UWORD8 u1_is_cu_noisy, 1752 #endif 1753 UWORD8 u1_cur_depth, 1754 UWORD8 u1_max_depth, 1755 UWORD8 u1_part_type, 1756 UWORD8 u1_chroma_processing_enabled, 1757 UWORD8 u1_compute_spatial_ssd) 1758 { 1759 UWORD8 au1_cabac_ctxt_backup[IHEVC_CAB_CTXT_END]; 1760 UWORD8 u1_are_children_available; 1761 UWORD32 u4_tuSplitFlag_and_cbf_coding_bits; 1762 1763 nbr_4x4_t *ps_nbr_data_buf = ps_buffer_data->ps_nbr_data_buf; 1764 1765 void *pv_recon = ps_buffer_data->s_src_pred_rec_buf_luma.pv_recon; 1766 void *pv_recon_chroma = ps_buffer_data->s_src_pred_rec_buf_chroma.pv_recon; 1767 WORD16 *pi2_deq_data = ps_buffer_data->pi2_deq_data; 1768 WORD16 *pi2_deq_data_chroma = ps_buffer_data->pi2_deq_data_chroma; 1769 UWORD8 **ppu1_ecd = ps_buffer_data->ppu1_ecd; 1770 WORD32 i4_nbr_data_buf_stride = ps_buffer_data->i4_nbr_data_buf_stride; 1771 WORD32 i4_recon_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_recon_stride; 1772 WORD32 i4_recon_stride_chroma = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_recon_stride; 1773 WORD32 i4_deq_data_stride = ps_buffer_data->i4_deq_data_stride; 1774 WORD32 i4_deq_data_stride_chroma = ps_buffer_data->i4_deq_data_stride_chroma; 1775 UWORD8 *pu1_ecd_bPtr_backup_t1 = ppu1_ecd[0]; 1776 UWORD8 *pu1_ecd_bPtr_backup_t2 = ppu1_ecd[0]; 1777 LWORD64 i8_parent_cost = 0; 1778 LWORD64 i8_child_cost = 0; 1779 LWORD64 i8_winning_cost = 0; 1780 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); 1781 1782 ASSERT(ps_node != NULL); 1783 ASSERT( 1784 !(!ps_node->u1_is_valid_node && 1785 ((NULL == ps_node->ps_child_node_tl) || (NULL == ps_node->ps_child_node_tr) || 1786 (NULL == ps_node->ps_child_node_bl) || (NULL == ps_node->ps_child_node_br)))); 1787 1788 u1_are_children_available = 1789 !((NULL == ps_node->ps_child_node_tl) && (NULL == ps_node->ps_child_node_tr) && 1790 (NULL == ps_node->ps_child_node_bl) && (NULL == ps_node->ps_child_node_br)) && 1791 (ps_node->s_luma_data.u1_size > MIN_TU_SIZE); 1792 1793 if(u1_are_children_available) 1794 { 1795 WORD16 ai2_deq_data_backup[MAX_CU_SIZE * MAX_CU_SIZE * 2]; 1796 UWORD16 au2_recon_backup[MAX_CU_SIZE * MAX_CU_SIZE * 2]; 1797 1798 UWORD8 u1_is_tu_coded = 0; 1799 1800 if(ps_node->u1_is_valid_node) 1801 { 1802 buffer_data_for_tu_t s_buffer_data = ps_buffer_data[0]; 1803 1804 memcpy(au1_cabac_ctxt_backup, pu1_cabac_ctxt, sizeof(au1_cabac_ctxt_backup)); 1805 1806 s_buffer_data.pi2_deq_data = ai2_deq_data_backup; 1807 s_buffer_data.i4_deq_data_stride = MAX_CU_SIZE; 1808 s_buffer_data.pi2_deq_data_chroma = ai2_deq_data_backup + MAX_CU_SIZE * MAX_CU_SIZE; 1809 s_buffer_data.i4_deq_data_stride_chroma = MAX_CU_SIZE; 1810 s_buffer_data.s_src_pred_rec_buf_luma.pv_recon = au2_recon_backup; 1811 s_buffer_data.s_src_pred_rec_buf_luma.i4_recon_stride = MAX_CU_SIZE; 1812 s_buffer_data.s_src_pred_rec_buf_chroma.pv_recon = 1813 au2_recon_backup + MAX_CU_SIZE * MAX_CU_SIZE; 1814 s_buffer_data.s_src_pred_rec_buf_chroma.i4_recon_stride = MAX_CU_SIZE; 1815 1816 ihevce_tu_processor( 1817 ps_ctxt, 1818 ps_node, 1819 &s_buffer_data, 1820 au1_cabac_ctxt_backup, 1821 i4_pred_mode, 1822 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1823 i4_alpha_stim_multiplier, 1824 u1_is_cu_noisy, 1825 #endif 1826 u1_chroma_processing_enabled, 1827 u1_compute_spatial_ssd); 1828 1829 if(i4_pred_mode != PRED_MODE_SKIP) 1830 { 1831 u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf( 1832 ps_node, 1833 ps_node, 1834 au1_cabac_ctxt_backup, 1835 MAX_TU_SIZE, 1836 MIN_TU_SIZE, 1837 0, 1838 (u1_cur_depth == u1_max_depth) ? 0 : 1, 1839 i4_pred_mode == PRED_MODE_INTRA, 1840 (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA), 1841 u1_chroma_processing_enabled, 1842 u1_is_422); 1843 1844 ps_node->s_luma_data.i8_cost += COMPUTE_RATE_COST_CLIP30( 1845 u4_tuSplitFlag_and_cbf_coding_bits, 1846 ps_ctxt->i8_cl_ssd_lambda_qf, 1847 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 1848 } 1849 1850 i8_parent_cost += 1851 ihevce_tu_node_cost_collator(ps_node, u1_chroma_processing_enabled, u1_is_422); 1852 1853 ihevce_ecd_buffer_pointer_updater( 1854 ps_node, 1855 ppu1_ecd, 1856 pu1_ecd_bPtr_backup_t1, 1857 1, 1858 u1_chroma_processing_enabled, 1859 u1_is_422); 1860 } 1861 else 1862 { 1863 ps_node->s_luma_data.i8_cost = i8_parent_cost = LLONG_MAX; 1864 ps_node->s_luma_data.i4_num_bytes_used_for_ecd = 0; 1865 } 1866 1867 u1_is_tu_coded |= ps_node->s_luma_data.u1_cbf; 1868 1869 if(u1_chroma_processing_enabled) 1870 { 1871 UWORD8 i; 1872 1873 for(i = 0; i < u1_is_422 + 1; i++) 1874 { 1875 u1_is_tu_coded |= ps_node->as_cb_data[i].u1_cbf; 1876 u1_is_tu_coded |= ps_node->as_cr_data[i].u1_cbf; 1877 } 1878 } 1879 1880 if(!ps_node->u1_is_valid_node || u1_is_tu_coded) 1881 { 1882 pu1_ecd_bPtr_backup_t2 = ppu1_ecd[0]; 1883 1884 if(i4_pred_mode != PRED_MODE_SKIP) 1885 { 1886 u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf( 1887 ps_node, 1888 ps_node->ps_child_node_tl, 1889 pu1_cabac_ctxt, 1890 MAX_TU_SIZE, 1891 MIN_TU_SIZE, 1892 0, 1893 1, 1894 i4_pred_mode == PRED_MODE_INTRA, 1895 (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA), 1896 u1_chroma_processing_enabled, 1897 u1_is_422); 1898 1899 i8_child_cost += COMPUTE_RATE_COST_CLIP30( 1900 u4_tuSplitFlag_and_cbf_coding_bits, 1901 ps_ctxt->i8_cl_ssd_lambda_qf, 1902 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 1903 } 1904 1905 if(i8_child_cost < i8_parent_cost) 1906 { 1907 i8_child_cost += ihevce_topDown_tu_tree_selector( 1908 ps_ctxt, 1909 ps_node->ps_child_node_tl, 1910 ps_buffer_data, 1911 pu1_cabac_ctxt, 1912 i4_pred_mode, 1913 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1914 i4_alpha_stim_multiplier, 1915 u1_is_cu_noisy, 1916 #endif 1917 u1_cur_depth, 1918 u1_max_depth, 1919 u1_part_type, 1920 u1_chroma_processing_enabled, 1921 u1_compute_spatial_ssd); 1922 1923 ps_node->ps_child_node_tl->s_luma_data.i8_cost += 1924 i8_child_cost - ps_node->ps_child_node_tl->s_luma_data.i8_cost; 1925 } 1926 1927 if(i8_child_cost < i8_parent_cost) 1928 { 1929 i8_child_cost += ihevce_topDown_tu_tree_selector( 1930 ps_ctxt, 1931 ps_node->ps_child_node_tr, 1932 ps_buffer_data, 1933 pu1_cabac_ctxt, 1934 i4_pred_mode, 1935 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1936 i4_alpha_stim_multiplier, 1937 u1_is_cu_noisy, 1938 #endif 1939 u1_cur_depth, 1940 u1_max_depth, 1941 u1_part_type, 1942 u1_chroma_processing_enabled, 1943 u1_compute_spatial_ssd); 1944 } 1945 1946 if(i8_child_cost < i8_parent_cost) 1947 { 1948 i8_child_cost += ihevce_topDown_tu_tree_selector( 1949 ps_ctxt, 1950 ps_node->ps_child_node_bl, 1951 ps_buffer_data, 1952 pu1_cabac_ctxt, 1953 i4_pred_mode, 1954 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1955 i4_alpha_stim_multiplier, 1956 u1_is_cu_noisy, 1957 #endif 1958 u1_cur_depth, 1959 u1_max_depth, 1960 u1_part_type, 1961 u1_chroma_processing_enabled, 1962 u1_compute_spatial_ssd); 1963 } 1964 1965 if(i8_child_cost < i8_parent_cost) 1966 { 1967 i8_child_cost += ihevce_topDown_tu_tree_selector( 1968 ps_ctxt, 1969 ps_node->ps_child_node_br, 1970 ps_buffer_data, 1971 pu1_cabac_ctxt, 1972 i4_pred_mode, 1973 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1974 i4_alpha_stim_multiplier, 1975 u1_is_cu_noisy, 1976 #endif 1977 u1_cur_depth, 1978 u1_max_depth, 1979 u1_part_type, 1980 u1_chroma_processing_enabled, 1981 u1_compute_spatial_ssd); 1982 } 1983 1984 if(i8_parent_cost > i8_child_cost) 1985 { 1986 UWORD32 u4_num_bytes = ihevce_ecd_buffer_pointer_updater( 1987 ps_node, 1988 ppu1_ecd, 1989 pu1_ecd_bPtr_backup_t1, 1990 0, 1991 u1_chroma_processing_enabled, 1992 u1_is_422); 1993 1994 if(pu1_ecd_bPtr_backup_t2 != pu1_ecd_bPtr_backup_t1) 1995 { 1996 memmove(pu1_ecd_bPtr_backup_t1, pu1_ecd_bPtr_backup_t2, u4_num_bytes); 1997 } 1998 1999 ps_node->s_luma_data.i4_num_bytes_used_for_ecd = u4_num_bytes; 2000 ps_node->as_cb_data[0].i4_num_bytes_used_for_ecd = 0; 2001 ps_node->as_cb_data[1].i4_num_bytes_used_for_ecd = 0; 2002 ps_node->as_cr_data[0].i4_num_bytes_used_for_ecd = 0; 2003 ps_node->as_cr_data[1].i4_num_bytes_used_for_ecd = 0; 2004 2005 ps_node->u1_is_valid_node = 0; 2006 2007 i8_winning_cost = i8_child_cost; 2008 } 2009 else 2010 { 2011 ihevce_debriefer_when_parent_wins( 2012 ps_node, 2013 ps_ctxt->s_cmn_opt_func.pf_copy_2d, 2014 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy, 2015 ps_nbr_data_buf, 2016 ai2_deq_data_backup, 2017 pi2_deq_data, 2018 ai2_deq_data_backup + MAX_CU_SIZE * MAX_CU_SIZE, 2019 pi2_deq_data_chroma, 2020 au2_recon_backup, 2021 pv_recon, 2022 au2_recon_backup + MAX_CU_SIZE * MAX_CU_SIZE, 2023 pv_recon_chroma, 2024 au1_cabac_ctxt_backup, 2025 pu1_cabac_ctxt, 2026 NULL, 2027 NULL, 2028 i4_nbr_data_buf_stride, 2029 MAX_CU_SIZE, 2030 i4_deq_data_stride, 2031 MAX_CU_SIZE, 2032 i4_deq_data_stride_chroma, 2033 MAX_CU_SIZE, 2034 i4_recon_stride, 2035 MAX_CU_SIZE, 2036 i4_recon_stride_chroma, 2037 sizeof(au1_cabac_ctxt_backup), 2038 ps_ctxt->i4_cu_qp, 2039 u1_chroma_processing_enabled, 2040 u1_is_422, 2041 ps_ctxt->u1_bit_depth > 8); 2042 2043 ihevce_ecd_buffer_pointer_updater( 2044 ps_node, 2045 ppu1_ecd, 2046 pu1_ecd_bPtr_backup_t1, 2047 1, 2048 u1_chroma_processing_enabled, 2049 u1_is_422); 2050 2051 i8_winning_cost = i8_parent_cost; 2052 } 2053 } 2054 else 2055 { 2056 ihevce_debriefer_when_parent_wins( 2057 ps_node, 2058 ps_ctxt->s_cmn_opt_func.pf_copy_2d, 2059 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy, 2060 ps_nbr_data_buf, 2061 ai2_deq_data_backup, 2062 pi2_deq_data, 2063 ai2_deq_data_backup + MAX_CU_SIZE * MAX_CU_SIZE, 2064 pi2_deq_data_chroma, 2065 au2_recon_backup, 2066 pv_recon, 2067 au2_recon_backup + MAX_CU_SIZE * MAX_CU_SIZE, 2068 pv_recon_chroma, 2069 au1_cabac_ctxt_backup, 2070 pu1_cabac_ctxt, 2071 NULL, 2072 NULL, 2073 i4_nbr_data_buf_stride, 2074 MAX_CU_SIZE, 2075 i4_deq_data_stride, 2076 MAX_CU_SIZE, 2077 i4_deq_data_stride_chroma, 2078 MAX_CU_SIZE, 2079 i4_recon_stride, 2080 MAX_CU_SIZE, 2081 i4_recon_stride_chroma, 2082 sizeof(au1_cabac_ctxt_backup), 2083 ps_ctxt->i4_cu_qp, 2084 u1_chroma_processing_enabled, 2085 u1_is_422, 2086 ps_ctxt->u1_bit_depth > 8); 2087 2088 ihevce_ecd_buffer_pointer_updater( 2089 ps_node, 2090 ppu1_ecd, 2091 pu1_ecd_bPtr_backup_t1, 2092 1, 2093 u1_chroma_processing_enabled, 2094 u1_is_422); 2095 2096 i8_winning_cost = i8_parent_cost; 2097 } 2098 } 2099 else 2100 { 2101 ASSERT(ps_node->u1_is_valid_node); 2102 2103 ihevce_tu_processor( 2104 ps_ctxt, 2105 ps_node, 2106 ps_buffer_data, 2107 pu1_cabac_ctxt, 2108 i4_pred_mode, 2109 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 2110 i4_alpha_stim_multiplier, 2111 u1_is_cu_noisy, 2112 #endif 2113 u1_chroma_processing_enabled, 2114 u1_compute_spatial_ssd); 2115 2116 if(i4_pred_mode != PRED_MODE_SKIP) 2117 { 2118 u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf( 2119 ps_node, 2120 ps_node, 2121 pu1_cabac_ctxt, 2122 MAX_TU_SIZE, 2123 MIN_TU_SIZE, 2124 0, 2125 (u1_cur_depth == u1_max_depth) ? 0 : 1, 2126 i4_pred_mode == PRED_MODE_INTRA, 2127 (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA), 2128 u1_chroma_processing_enabled, 2129 u1_is_422); 2130 2131 ps_node->s_luma_data.i8_cost += COMPUTE_RATE_COST_CLIP30( 2132 u4_tuSplitFlag_and_cbf_coding_bits, 2133 ps_ctxt->i8_cl_ssd_lambda_qf, 2134 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 2135 } 2136 2137 i8_winning_cost += 2138 ihevce_tu_node_cost_collator(ps_node, u1_chroma_processing_enabled, u1_is_422); 2139 2140 ihevce_ecd_buffer_pointer_updater( 2141 ps_node, ppu1_ecd, pu1_ecd_bPtr_backup_t1, 1, u1_chroma_processing_enabled, u1_is_422); 2142 2143 ihevce_nbr_data_copier( 2144 ps_nbr_data_buf, 2145 i4_nbr_data_buf_stride, 2146 ps_ctxt->i4_cu_qp, 2147 ps_node->s_luma_data.u1_cbf, 2148 ps_node->s_luma_data.u1_posx, 2149 ps_node->s_luma_data.u1_posy, 2150 ps_node->s_luma_data.u1_size); 2151 } 2152 2153 return i8_winning_cost; 2154 } 2155 2156 /*! 2157 ****************************************************************************** 2158 * \if Function name : ihevce_tu_selector_debriefer \endif 2159 * 2160 * \notes 2161 * Conversion of TU Tree struct into TU info array. Collection of myriad CU 2162 * level data 2163 * Input : 1. ps_node: Pointer to current node of the TU tree. This struct 2164 * shall be modified by this function 2165 * 2. ps_final_prms: Pointer to struct that stores RDOPT output data. 2166 * This buffer shall be modified by this function 2167 * Output : 1. pi8_total_cost: Total CU-level cost 2168 * 2. pi8_total_non_coded_cost: Total CU level cost when no residue 2169 * is coded 2170 * 3. pi4_num_bytes_used_for_ecd: Number of bytes used for storing 2171 * entropy coding data 2172 * 4. pi4_num_bits_used_for_encoding: Number of bits used for encoding 2173 * 5. pu2_tu_ctr: Number of TU's in the CU 2174 * 2175 ***************************************************************************** 2176 */ 2177 void ihevce_tu_selector_debriefer( 2178 tu_tree_node_t *ps_node, 2179 enc_loop_cu_final_prms_t *ps_final_prms, 2180 LWORD64 *pi8_total_cost, 2181 LWORD64 *pi8_total_non_coded_cost, 2182 WORD32 *pi4_num_bytes_used_for_ecd, 2183 WORD32 *pi4_num_bits_used_for_encoding, 2184 UWORD16 *pu2_tu_ctr, 2185 WORD32 i4_cu_qp, 2186 UWORD8 u1_cu_posx, 2187 UWORD8 u1_cu_posy, 2188 UWORD8 u1_chroma_processing_enabled, 2189 UWORD8 u1_is_422, 2190 TU_POS_T e_tu_pos) 2191 { 2192 UWORD8 u1_is_chroma_tu_valid = 1; 2193 WORD32 i4_log2_size; 2194 2195 ASSERT(ps_node != NULL); 2196 2197 if(ps_node->u1_is_valid_node) 2198 { 2199 ASSERT( 2200 (NULL == ps_node->ps_child_node_tl) && (NULL == ps_node->ps_child_node_tr) && 2201 (NULL == ps_node->ps_child_node_bl) && (NULL == ps_node->ps_child_node_br)); 2202 } 2203 else 2204 { 2205 ASSERT( 2206 !((NULL == ps_node->ps_child_node_tl) || (NULL == ps_node->ps_child_node_tr) || 2207 (NULL == ps_node->ps_child_node_bl) || (NULL == ps_node->ps_child_node_br))); 2208 } 2209 2210 if(ps_node->u1_is_valid_node) 2211 { 2212 if((4 == ps_node->s_luma_data.u1_size) && (POS_TL != e_tu_pos)) 2213 { 2214 u1_is_chroma_tu_valid = INTRA_PRED_CHROMA_IDX_NONE; 2215 } 2216 2217 GETRANGE(i4_log2_size, ps_node->s_luma_data.u1_size); 2218 2219 ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[pu2_tu_ctr[0]] = 2220 ps_node->s_luma_data.u1_reconBufId; 2221 ps_final_prms->u4_cu_sad += ps_node->s_luma_data.u4_sad; 2222 ps_final_prms->u1_is_cu_coded |= ps_node->s_luma_data.u1_cbf; 2223 ps_final_prms->u4_cu_luma_res_bits += ps_node->s_luma_data.i4_bits; 2224 2225 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].i4_luma_coeff_offset = 2226 pi4_num_bytes_used_for_ecd[0]; 2227 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_y_cbf = ps_node->s_luma_data.u1_cbf; 2228 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cb_cbf = 0; 2229 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cr_cbf = 0; 2230 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cb_cbf_subtu1 = 0; 2231 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cr_cbf_subtu1 = 0; 2232 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b3_chroma_intra_mode_idx = 2233 u1_is_chroma_tu_valid; 2234 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b7_qp = i4_cu_qp; 2235 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_first_tu_in_cu = 2236 (!ps_node->s_luma_data.u1_posx && !ps_node->s_luma_data.u1_posx); 2237 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_transquant_bypass = 0; 2238 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b3_size = i4_log2_size - 3; 2239 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b4_pos_x = 2240 (u1_cu_posx + ps_node->s_luma_data.u1_posx) / 4; 2241 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b4_pos_y = 2242 (u1_cu_posy + ps_node->s_luma_data.u1_posy) / 4; 2243 2244 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].i2_luma_bytes_consumed = 2245 ps_node->s_luma_data.i4_num_bytes_used_for_ecd; 2246 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].u4_luma_zero_col = 2247 ps_node->s_luma_data.i4_zero_col; 2248 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].u4_luma_zero_row = 2249 ps_node->s_luma_data.i4_zero_row; 2250 2251 pi8_total_cost[0] += ps_node->s_luma_data.i8_cost; 2252 pi8_total_non_coded_cost[0] += ps_node->s_luma_data.i8_not_coded_cost; 2253 pi4_num_bytes_used_for_ecd[0] += ps_node->s_luma_data.i4_num_bytes_used_for_ecd; 2254 pi4_num_bits_used_for_encoding[0] += ps_node->s_luma_data.i4_bits; 2255 2256 if(u1_chroma_processing_enabled) 2257 { 2258 UWORD8 i; 2259 2260 for(i = 0; i < u1_is_422 + 1; i++) 2261 { 2262 ps_final_prms->s_recon_datastore 2263 .au1_bufId_with_winning_ChromaRecon[U_PLANE][pu2_tu_ctr[0]][i] = 2264 ps_node->as_cb_data[i].u1_reconBufId; 2265 ps_final_prms->u1_is_cu_coded |= ps_node->as_cb_data[i].u1_cbf; 2266 ps_final_prms->u4_cu_chroma_res_bits += ps_node->as_cb_data[i].i4_bits; 2267 2268 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].ai4_cb_coeff_offset[i] = 2269 pi4_num_bytes_used_for_ecd[0]; 2270 2271 if(!i) 2272 { 2273 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cb_cbf = 2274 ps_node->as_cb_data[i].u1_cbf; 2275 } 2276 else 2277 { 2278 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cb_cbf_subtu1 = 2279 ps_node->as_cb_data[i].u1_cbf; 2280 } 2281 2282 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].ai2_cb_bytes_consumed[i] = 2283 ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd; 2284 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].au4_cb_zero_col[i] = 2285 ps_node->as_cb_data[i].i4_zero_col; 2286 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].au4_cb_zero_row[i] = 2287 ps_node->as_cb_data[i].i4_zero_row; 2288 2289 pi8_total_cost[0] += ps_node->as_cb_data[i].i8_cost; 2290 pi8_total_non_coded_cost[0] += ps_node->as_cb_data[i].i8_not_coded_cost; 2291 pi4_num_bytes_used_for_ecd[0] += ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd; 2292 pi4_num_bits_used_for_encoding[0] += ps_node->as_cb_data[i].i4_bits; 2293 } 2294 2295 for(i = 0; i < u1_is_422 + 1; i++) 2296 { 2297 ps_final_prms->s_recon_datastore 2298 .au1_bufId_with_winning_ChromaRecon[V_PLANE][pu2_tu_ctr[0]][i] = 2299 ps_node->as_cr_data[i].u1_reconBufId; 2300 ps_final_prms->u1_is_cu_coded |= ps_node->as_cr_data[i].u1_cbf; 2301 ps_final_prms->u4_cu_chroma_res_bits += ps_node->as_cr_data[i].i4_bits; 2302 2303 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].ai4_cr_coeff_offset[i] = 2304 pi4_num_bytes_used_for_ecd[0]; 2305 2306 if(!i) 2307 { 2308 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cr_cbf = 2309 ps_node->as_cr_data[i].u1_cbf; 2310 } 2311 else 2312 { 2313 ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cr_cbf_subtu1 = 2314 ps_node->as_cr_data[i].u1_cbf; 2315 } 2316 2317 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].ai2_cr_bytes_consumed[i] = 2318 ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd; 2319 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].au4_cr_zero_col[i] = 2320 ps_node->as_cr_data[i].i4_zero_col; 2321 ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].au4_cr_zero_row[i] = 2322 ps_node->as_cr_data[i].i4_zero_row; 2323 2324 pi8_total_cost[0] += ps_node->as_cr_data[i].i8_cost; 2325 pi8_total_non_coded_cost[0] += ps_node->as_cr_data[i].i8_not_coded_cost; 2326 pi4_num_bytes_used_for_ecd[0] += ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd; 2327 pi4_num_bits_used_for_encoding[0] += ps_node->as_cr_data[i].i4_bits; 2328 } 2329 } 2330 2331 pu2_tu_ctr[0]++; 2332 } 2333 else 2334 { 2335 ihevce_tu_selector_debriefer( 2336 ps_node->ps_child_node_tl, 2337 ps_final_prms, 2338 pi8_total_cost, 2339 pi8_total_non_coded_cost, 2340 pi4_num_bytes_used_for_ecd, 2341 pi4_num_bits_used_for_encoding, 2342 pu2_tu_ctr, 2343 i4_cu_qp, 2344 u1_cu_posx, 2345 u1_cu_posy, 2346 u1_chroma_processing_enabled, 2347 u1_is_422, 2348 POS_TL); 2349 2350 ihevce_tu_selector_debriefer( 2351 ps_node->ps_child_node_tr, 2352 ps_final_prms, 2353 pi8_total_cost, 2354 pi8_total_non_coded_cost, 2355 pi4_num_bytes_used_for_ecd, 2356 pi4_num_bits_used_for_encoding, 2357 pu2_tu_ctr, 2358 i4_cu_qp, 2359 u1_cu_posx, 2360 u1_cu_posy, 2361 u1_chroma_processing_enabled, 2362 u1_is_422, 2363 POS_TR); 2364 2365 ihevce_tu_selector_debriefer( 2366 ps_node->ps_child_node_bl, 2367 ps_final_prms, 2368 pi8_total_cost, 2369 pi8_total_non_coded_cost, 2370 pi4_num_bytes_used_for_ecd, 2371 pi4_num_bits_used_for_encoding, 2372 pu2_tu_ctr, 2373 i4_cu_qp, 2374 u1_cu_posx, 2375 u1_cu_posy, 2376 u1_chroma_processing_enabled, 2377 u1_is_422, 2378 POS_BL); 2379 2380 ihevce_tu_selector_debriefer( 2381 ps_node->ps_child_node_br, 2382 ps_final_prms, 2383 pi8_total_cost, 2384 pi8_total_non_coded_cost, 2385 pi4_num_bytes_used_for_ecd, 2386 pi4_num_bits_used_for_encoding, 2387 pu2_tu_ctr, 2388 i4_cu_qp, 2389 u1_cu_posx, 2390 u1_cu_posy, 2391 u1_chroma_processing_enabled, 2392 u1_is_422, 2393 POS_BR); 2394 } 2395 } 2396 2397 static UWORD8 ihevce_get_curTUSplit_from_TUSplitArray( 2398 WORD32 ai4_tuSplitArray[4], UWORD8 u1_cu_size, UWORD8 u1_tu_size, UWORD8 u1_posx, UWORD8 u1_posy) 2399 { 2400 UWORD8 u1_is_split = 0; 2401 2402 UWORD8 u1_tuSplitArrayIndex = 0; 2403 UWORD8 u1_bit_index = 0; 2404 2405 switch(u1_cu_size) 2406 { 2407 case 8: 2408 { 2409 switch(u1_tu_size) 2410 { 2411 case 8: 2412 { 2413 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2414 2415 break; 2416 } 2417 case 4: 2418 { 2419 u1_is_split = 0; 2420 2421 break; 2422 } 2423 } 2424 2425 break; 2426 } 2427 case 16: 2428 { 2429 switch(u1_tu_size) 2430 { 2431 case 16: 2432 { 2433 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2434 2435 break; 2436 } 2437 case 8: 2438 { 2439 u1_bit_index += ((u1_posx / 8) % 2) + 2 * ((u1_posy / 8) % 2) + 1; 2440 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2441 2442 break; 2443 } 2444 case 4: 2445 { 2446 u1_is_split = 0; 2447 2448 break; 2449 } 2450 } 2451 2452 break; 2453 } 2454 case 32: 2455 { 2456 switch(u1_tu_size) 2457 { 2458 case 32: 2459 { 2460 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2461 2462 break; 2463 } 2464 case 16: 2465 { 2466 u1_bit_index += 5 * ((u1_posx / 16) % 2) + 10 * ((u1_posy / 16) % 2) + 1; 2467 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2468 2469 break; 2470 } 2471 case 8: 2472 { 2473 u1_bit_index = 5 * ((u1_posx / 16) % 2) + 10 * ((u1_posy / 16) % 2) + 1; 2474 u1_bit_index += ((u1_posx / 8) % 2) + 2 * ((u1_posy / 8) % 2) + 1; 2475 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2476 2477 break; 2478 } 2479 case 4: 2480 { 2481 u1_is_split = 0; 2482 2483 break; 2484 } 2485 } 2486 2487 break; 2488 } 2489 case 64: 2490 { 2491 switch(u1_tu_size) 2492 { 2493 case 64: 2494 { 2495 u1_is_split = 1; 2496 2497 break; 2498 } 2499 case 32: 2500 { 2501 u1_tuSplitArrayIndex = ((u1_posx / 32) % 2) + 2 * ((u1_posy / 32) % 2); 2502 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2503 2504 break; 2505 } 2506 case 16: 2507 { 2508 u1_tuSplitArrayIndex = ((u1_posx / 32) % 2) + 2 * ((u1_posy / 32) % 2); 2509 u1_bit_index += 5 * ((u1_posx / 16) % 2) + 10 * ((u1_posy / 16) % 2) + 1; 2510 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2511 2512 break; 2513 } 2514 case 8: 2515 { 2516 u1_tuSplitArrayIndex = ((u1_posx / 32) % 2) + 2 * ((u1_posy / 32) % 2); 2517 u1_bit_index += 5 * ((u1_posx / 16) % 2) + 10 * ((u1_posy / 16) % 2) + 1; 2518 u1_bit_index += ((u1_posx / 8) % 2) + 2 * ((u1_posy / 8) % 2) + 1; 2519 u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index)); 2520 2521 break; 2522 } 2523 case 4: 2524 { 2525 u1_is_split = 0; 2526 2527 break; 2528 } 2529 } 2530 2531 break; 2532 } 2533 } 2534 2535 return u1_is_split; 2536 } 2537 2538 /*! 2539 ****************************************************************************** 2540 * \if Function name : ihevce_tuSplitArray_to_tuTree_mapper \endif 2541 * 2542 * \notes 2543 * This function assumes that ihevce_tu_tree_init' has been called already. 2544 * The pointers to the children nodes of the leaf-most nodes in the tree 2545 * are assigned NULL 2546 * Input : 1. ps_root: Pointer to root of the tree containing TU info. 2547 * This struct shall be modified by this function 2548 * 2. ai4_tuSplitArray: Array containing information about TU splits 2549 * Output : 1. TU tree is modified such that it reflects the information 2550 * coded in ai4_tuSplitArray 2551 * 2552 ***************************************************************************** 2553 */ 2554 void ihevce_tuSplitArray_to_tuTree_mapper( 2555 tu_tree_node_t *ps_root, 2556 WORD32 ai4_tuSplitArray[4], 2557 UWORD8 u1_cu_size, 2558 UWORD8 u1_tu_size, 2559 UWORD8 u1_min_tu_size, 2560 UWORD8 u1_max_tu_size, 2561 UWORD8 u1_is_skip) 2562 { 2563 UWORD8 u1_is_split; 2564 2565 ASSERT(u1_min_tu_size >= MIN_TU_SIZE); 2566 ASSERT(u1_max_tu_size <= MAX_TU_SIZE); 2567 ASSERT(u1_min_tu_size <= u1_max_tu_size); 2568 2569 ASSERT(!u1_is_skip); 2570 2571 ASSERT(ps_root != NULL); 2572 ASSERT(ps_root->s_luma_data.u1_size == u1_tu_size); 2573 2574 if(u1_tu_size <= u1_max_tu_size) 2575 { 2576 ASSERT(ps_root->u1_is_valid_node); 2577 } 2578 else 2579 { 2580 ASSERT(!ps_root->u1_is_valid_node); 2581 } 2582 2583 if(u1_tu_size > u1_min_tu_size) 2584 { 2585 ASSERT(ps_root->ps_child_node_tl != NULL); 2586 ASSERT(ps_root->ps_child_node_tr != NULL); 2587 ASSERT(ps_root->ps_child_node_bl != NULL); 2588 ASSERT(ps_root->ps_child_node_br != NULL); 2589 ASSERT(ps_root->ps_child_node_tl->s_luma_data.u1_size == (u1_tu_size / 2)); 2590 ASSERT(ps_root->ps_child_node_tr->s_luma_data.u1_size == (u1_tu_size / 2)); 2591 ASSERT(ps_root->ps_child_node_bl->s_luma_data.u1_size == (u1_tu_size / 2)); 2592 ASSERT(ps_root->ps_child_node_br->s_luma_data.u1_size == (u1_tu_size / 2)); 2593 ASSERT(ps_root->ps_child_node_tl->u1_is_valid_node); 2594 ASSERT(ps_root->ps_child_node_tr->u1_is_valid_node); 2595 ASSERT(ps_root->ps_child_node_bl->u1_is_valid_node); 2596 ASSERT(ps_root->ps_child_node_br->u1_is_valid_node); 2597 } 2598 else 2599 { 2600 ASSERT(ps_root->ps_child_node_tl == NULL); 2601 ASSERT(ps_root->ps_child_node_tr == NULL); 2602 ASSERT(ps_root->ps_child_node_bl == NULL); 2603 ASSERT(ps_root->ps_child_node_br == NULL); 2604 } 2605 2606 u1_is_split = ihevce_get_curTUSplit_from_TUSplitArray( 2607 ai4_tuSplitArray, 2608 u1_cu_size, 2609 u1_tu_size, 2610 ps_root->s_luma_data.u1_posx, 2611 ps_root->s_luma_data.u1_posy); 2612 2613 if(u1_tu_size == u1_min_tu_size) 2614 { 2615 ASSERT(!u1_is_split); 2616 } 2617 2618 if(u1_is_split) 2619 { 2620 ps_root->u1_is_valid_node = 0; 2621 2622 ihevce_tuSplitArray_to_tuTree_mapper( 2623 ps_root->ps_child_node_tl, 2624 ai4_tuSplitArray, 2625 u1_cu_size, 2626 ps_root->ps_child_node_tl->s_luma_data.u1_size, 2627 u1_min_tu_size, 2628 u1_max_tu_size, 2629 u1_is_skip); 2630 2631 ihevce_tuSplitArray_to_tuTree_mapper( 2632 ps_root->ps_child_node_tr, 2633 ai4_tuSplitArray, 2634 u1_cu_size, 2635 ps_root->ps_child_node_tr->s_luma_data.u1_size, 2636 u1_min_tu_size, 2637 u1_max_tu_size, 2638 u1_is_skip); 2639 2640 ihevce_tuSplitArray_to_tuTree_mapper( 2641 ps_root->ps_child_node_bl, 2642 ai4_tuSplitArray, 2643 u1_cu_size, 2644 ps_root->ps_child_node_bl->s_luma_data.u1_size, 2645 u1_min_tu_size, 2646 u1_max_tu_size, 2647 u1_is_skip); 2648 2649 ihevce_tuSplitArray_to_tuTree_mapper( 2650 ps_root->ps_child_node_br, 2651 ai4_tuSplitArray, 2652 u1_cu_size, 2653 ps_root->ps_child_node_br->s_luma_data.u1_size, 2654 u1_min_tu_size, 2655 u1_max_tu_size, 2656 u1_is_skip); 2657 } 2658 else 2659 { 2660 ps_root->ps_child_node_tl = NULL; 2661 ps_root->ps_child_node_tr = NULL; 2662 ps_root->ps_child_node_bl = NULL; 2663 ps_root->ps_child_node_br = NULL; 2664 } 2665 } 2666