1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * ihevce_stasino_helpers.c 24 * 25 * @brief 26 * 27 * @author 28 * Ittiam 29 * 30 * @par List of Functions: 31 * 32 * @remarks 33 * None 34 * 35 ******************************************************************************* 36 */ 37 38 /*****************************************************************************/ 39 /* File Includes */ 40 /*****************************************************************************/ 41 /* System include files */ 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <assert.h> 45 #include <string.h> 46 47 /* User include files */ 48 #include "ihevc_typedefs.h" 49 #include "itt_video_api.h" 50 #include "ihevce_api.h" 51 52 #include "rc_cntrl_param.h" 53 #include "rc_frame_info_collector.h" 54 #include "rc_look_ahead_params.h" 55 56 #include "ihevc_defs.h" 57 #include "ihevc_structs.h" 58 #include "ihevc_platform_macros.h" 59 #include "ihevc_deblk.h" 60 #include "ihevc_itrans_recon.h" 61 #include "ihevc_chroma_itrans_recon.h" 62 #include "ihevc_chroma_intra_pred.h" 63 #include "ihevc_intra_pred.h" 64 #include "ihevc_inter_pred.h" 65 #include "ihevc_mem_fns.h" 66 #include "ihevc_padding.h" 67 #include "ihevc_weighted_pred.h" 68 #include "ihevc_sao.h" 69 #include "ihevc_resi_trans.h" 70 #include "ihevc_quant_iquant_ssd.h" 71 #include "ihevc_cabac_tables.h" 72 73 #include "ihevce_defs.h" 74 #include "ihevce_lap_enc_structs.h" 75 #include "ihevce_multi_thrd_structs.h" 76 #include "ihevce_me_common_defs.h" 77 #include "ihevce_had_satd.h" 78 #include "ihevce_error_codes.h" 79 #include "ihevce_bitstream.h" 80 #include "ihevce_cabac.h" 81 #include "ihevce_rdoq_macros.h" 82 #include "ihevce_function_selector.h" 83 #include "ihevce_enc_structs.h" 84 #include "ihevce_entropy_structs.h" 85 #include "ihevce_cmn_utils_instr_set_router.h" 86 #include "ihevce_enc_loop_structs.h" 87 #include "ihevce_stasino_helpers.h" 88 89 /*****************************************************************************/ 90 /* Function Definitions */ 91 /*****************************************************************************/ 92 93 /** 94 ******************************************************************************* 95 * 96 * @brief 97 * This function calculates the variance of given data set. 98 * 99 * @par Description: 100 * This function is mainly used to find the variance of the block of pixel values. 101 * The block can be rectangular also. Single pass variance calculation 102 * implementation. 103 * 104 * @param[in] p_input 105 * The input buffer to calculate the variance. 106 * 107 * @param[out] pi4_mean 108 * Pointer ot the mean of the datset 109 * 110 * @param[out] pi4_variance 111 * Pointer tot he variabce of the data set 112 * 113 * @param[in] u1_is_hbd 114 * 1 if the data is in high bit depth 115 * 116 * @param[in] stride 117 * Stride for the input buffer 118 * 119 * @param[in] block_height 120 * height of the pixel block 121 * 122 * @param[in] block_width 123 * width of the pixel block 124 * 125 * @remarks 126 * None 127 * 128 ******************************************************************************* 129 */ 130 void ihevce_calc_variance( 131 void *pv_input, 132 WORD32 i4_stride, 133 WORD32 *pi4_mean, 134 UWORD32 *pu4_variance, 135 UWORD8 u1_block_height, 136 UWORD8 u1_block_width, 137 UWORD8 u1_is_hbd, 138 UWORD8 u1_disable_normalization) 139 { 140 UWORD8 *pui1_buffer; // pointer for 8 bit usecase 141 WORD32 i, j; 142 WORD32 total_elements; 143 144 LWORD64 mean; 145 ULWORD64 variance; 146 ULWORD64 sum; 147 ULWORD64 sq_sum; 148 149 /* intialisation */ 150 total_elements = u1_block_height * u1_block_width; 151 mean = 0; 152 variance = 0; 153 sum = 0; 154 sq_sum = 0; 155 156 /* handle the case of 8/10 bit depth separately */ 157 if(!u1_is_hbd) 158 { 159 pui1_buffer = (UWORD8 *)pv_input; 160 161 /* loop over all the values in the block */ 162 for(i = 0; i < u1_block_height; i++) 163 { 164 /* loop over a row in the block */ 165 for(j = 0; j < u1_block_width; j++) 166 { 167 sum += pui1_buffer[i * i4_stride + j]; 168 sq_sum += (pui1_buffer[i * i4_stride + j] * pui1_buffer[i * i4_stride + j]); 169 } 170 } 171 172 if(!u1_disable_normalization) 173 { 174 mean = sum / total_elements; 175 variance = 176 ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements)); 177 } 178 else 179 { 180 mean = sum; 181 variance = ((total_elements * sq_sum) - (sum * sum)); 182 } 183 } 184 185 /* copy back the values to the output variables */ 186 *pi4_mean = mean; 187 *pu4_variance = variance; 188 } 189 190 /** 191 ******************************************************************************* 192 * 193 * @brief 194 * This function calcluates the variance of given data set which is WORD16 195 * 196 * @par Description: 197 * This function is mainly used to find the variance of the block of pixel values. 198 * Single pass variance calculation implementation. 199 * 200 * @param[in] pv_input 201 * The input buffer to calculate the variance. 202 * 203 * 204 * @param[in] stride 205 * Stride for the input buffer 206 * 207 * @param[out] pi4_mean 208 * Pointer ot the mean of the datset 209 * 210 * @param[out] pi4_variance 211 * Pointer tot he variabce of the data set 212 * 213 * @param[in] block_height 214 * height of the pixel block 215 * 216 * @param[in] block_width 217 * width of the pixel block 218 * 219 * 220 * @remarks 221 * None 222 * 223 *******************************************************************************/ 224 void ihevce_calc_variance_signed( 225 WORD16 *pv_input, 226 WORD32 i4_stride, 227 WORD32 *pi4_mean, 228 UWORD32 *pu4_variance, 229 UWORD8 u1_block_height, 230 UWORD8 u1_block_width) 231 { 232 WORD16 *pi2_buffer; // poinbter for 10 bit use case 233 234 WORD32 i, j; 235 WORD32 total_elements; 236 237 LWORD64 mean; 238 LWORD64 variance; 239 LWORD64 sum; 240 LWORD64 sq_sum; 241 242 /* intialisation */ 243 total_elements = u1_block_height * u1_block_width; 244 mean = 0; 245 variance = 0; 246 sum = 0; 247 sq_sum = 0; 248 249 pi2_buffer = pv_input; 250 251 for(i = 0; i < u1_block_height; i++) 252 { 253 for(j = 0; j < u1_block_width; j++) 254 { 255 sum += pi2_buffer[i * i4_stride + j]; 256 sq_sum += (pi2_buffer[i * i4_stride + j] * pi2_buffer[i * i4_stride + j]); 257 } 258 } 259 260 mean = sum; /// total_elements; 261 variance = ((total_elements * sq_sum) - (sum * sum)); // / (total_elements * (total_elements) ) 262 263 /* copy back the values to the output variables */ 264 *pi4_mean = mean; 265 *pu4_variance = variance; 266 } 267 268 /** 269 ******************************************************************************* 270 * 271 * @brief 272 * This function calculates the variance of a chrominance plane for 420SP data 273 * 274 * @par Description: 275 * This function is mainly used to find the variance of the block of pixel values. 276 * The block can be rectangular also. Single pass variance calculation 277 * implementation. 278 * 279 * @param[in] p_input 280 * The input buffer to calculate the variance. 281 * 282 * @param[in] stride 283 * Stride for the input buffer 284 * 285 * @param[out] pi4_mean 286 * Pointer ot the mean of the datset 287 * 288 * @param[out] pi4_variance 289 * Pointer tot he variabce of the data set 290 * 291 * @param[in] block_height 292 * height of the pixel block 293 * 294 * @param[in] block_width 295 * width of the pixel block 296 * 297 * @param[in] u1_is_hbd 298 * 1 if the data is in high bit depth 299 * 300 * @param[in] e_chroma_plane 301 * is U or V 302 * 303 * @remarks 304 * None 305 * 306 ******************************************************************************* 307 */ 308 void ihevce_calc_chroma_variance( 309 void *pv_input, 310 WORD32 i4_stride, 311 WORD32 *pi4_mean, 312 UWORD32 *pu4_variance, 313 UWORD8 u1_block_height, 314 UWORD8 u1_block_width, 315 UWORD8 u1_is_hbd, 316 CHROMA_PLANE_ID_T e_chroma_plane) 317 { 318 UWORD8 *pui1_buffer; // pointer for 8 bit usecase 319 WORD32 i, j; 320 WORD32 total_elements; 321 322 LWORD64 mean; 323 ULWORD64 variance; 324 LWORD64 sum; 325 LWORD64 sq_sum; 326 327 /* intialisation */ 328 total_elements = u1_block_height * u1_block_width; 329 mean = 0; 330 variance = 0; 331 sum = 0; 332 sq_sum = 0; 333 334 /* handle the case of 8/10 bit depth separately */ 335 if(!u1_is_hbd) 336 { 337 pui1_buffer = (UWORD8 *)pv_input; 338 339 pui1_buffer += e_chroma_plane; 340 341 /* loop over all the values in the block */ 342 for(i = 0; i < u1_block_height; i++) 343 { 344 /* loop over a row in the block */ 345 for(j = 0; j < u1_block_width; j++) 346 { 347 sum += pui1_buffer[i * i4_stride + j * 2]; 348 sq_sum += (pui1_buffer[i * i4_stride + j * 2] * pui1_buffer[i * i4_stride + j * 2]); 349 } 350 } 351 352 mean = sum / total_elements; 353 variance = ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements)); 354 } 355 356 /* copy back the values to the output variables */ 357 *pi4_mean = mean; 358 *pu4_variance = variance; 359 } 360 361 LWORD64 ihevce_inject_stim_into_distortion( 362 void *pv_src, 363 WORD32 i4_src_stride, 364 void *pv_pred, 365 WORD32 i4_pred_stride, 366 LWORD64 i8_distortion, 367 WORD32 i4_alpha_stim_multiplier, 368 UWORD8 u1_blk_size, 369 UWORD8 u1_is_hbd, 370 UWORD8 u1_enable_psyRDOPT, 371 CHROMA_PLANE_ID_T e_chroma_plane) 372 { 373 if(!u1_enable_psyRDOPT) 374 { 375 UWORD32 u4_src_variance; 376 UWORD32 u4_pred_variance; 377 WORD32 i4_mean; 378 WORD32 i4_noise_term; 379 380 if(NULL_PLANE == e_chroma_plane) 381 { 382 ihevce_calc_variance( 383 pv_src, 384 i4_src_stride, 385 &i4_mean, 386 &u4_src_variance, 387 u1_blk_size, 388 u1_blk_size, 389 u1_is_hbd, 390 0); 391 392 ihevce_calc_variance( 393 pv_pred, 394 i4_pred_stride, 395 &i4_mean, 396 &u4_pred_variance, 397 u1_blk_size, 398 u1_blk_size, 399 u1_is_hbd, 400 0); 401 } 402 else 403 { 404 ihevce_calc_chroma_variance( 405 pv_src, 406 i4_src_stride, 407 &i4_mean, 408 &u4_src_variance, 409 u1_blk_size, 410 u1_blk_size, 411 u1_is_hbd, 412 e_chroma_plane); 413 414 ihevce_calc_chroma_variance( 415 pv_pred, 416 i4_pred_stride, 417 &i4_mean, 418 &u4_pred_variance, 419 u1_blk_size, 420 u1_blk_size, 421 u1_is_hbd, 422 e_chroma_plane); 423 } 424 425 i4_noise_term = 426 ihevce_compute_noise_term(i4_alpha_stim_multiplier, u4_src_variance, u4_pred_variance); 427 428 MULTIPLY_STIM_WITH_DISTORTION(i8_distortion, i4_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT); 429 430 return i8_distortion; 431 } 432 else 433 { 434 return i8_distortion; 435 } 436 } 437 438 UWORD8 ihevce_determine_cu_noise_based_on_8x8Blk_data( 439 UWORD8 *pu1_is_8x8Blk_noisy, UWORD8 u1_cu_x_pos, UWORD8 u1_cu_y_pos, UWORD8 u1_cu_size) 440 { 441 UWORD8 u1_num_noisy_children = 0; 442 UWORD8 u1_start_index = (u1_cu_x_pos / 8) + u1_cu_y_pos; 443 444 if(8 == u1_cu_size) 445 { 446 return pu1_is_8x8Blk_noisy[u1_start_index]; 447 } 448 449 u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( 450 pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos, u1_cu_size / 2); 451 452 u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( 453 pu1_is_8x8Blk_noisy, u1_cu_x_pos + (u1_cu_size / 2), u1_cu_y_pos, u1_cu_size / 2); 454 455 u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( 456 pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos + (u1_cu_size / 2), u1_cu_size / 2); 457 458 u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( 459 pu1_is_8x8Blk_noisy, 460 u1_cu_x_pos + (u1_cu_size / 2), 461 u1_cu_y_pos + (u1_cu_size / 2), 462 u1_cu_size / 2); 463 464 return (u1_num_noisy_children >= 2); 465 } 466 467 /*! 468 ****************************************************************************** 469 * \if Function name : ihevce_psy_rd_cost_croma \endif 470 * 471 * \brief 472 * Calculates the psyco visual cost for RD opt. This is 473 * 474 * \param[in] pui4_source_satd 475 * This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated. 476 * \param[in] *pui1_recon 477 * This si the pointer to the pred data. 478 * \param[in] recon_stride 479 * This si the pred stride 480 * \param[in] pic_type 481 * Picture type. 482 * \param[in] layer_id 483 * Indicates the temporal layer. 484 * \param[in] lambda 485 * This is the weighting factor for the cost. 486 * \param[in] is_hbd 487 * This is the high bit depth flag which indicates if the bit depth of the pixels is 10 bit or 8 bit. 488 * \param[in] sub_sampling_type 489 * This is the chroma subsampling type. 11 - for 420 and 13 for 422 490 * \return 491 * the cost for the psyRDopt 492 * 493 * \author 494 * Ittiam 495 * 496 ***************************************************************************** 497 */ 498 LWORD64 ihevce_psy_rd_cost_croma( 499 LWORD64 *pui4_source_satd, 500 void *p_recon, 501 WORD32 recon_stride_vert, 502 WORD32 recond_stride_horz, 503 WORD32 cu_size_luma, 504 WORD32 pic_type, 505 WORD32 layer_id, 506 WORD32 lambda, 507 WORD32 start_index, 508 WORD32 is_hbd, 509 WORD32 sub_sampling_type, 510 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) 511 { 512 /* declare local variables to store the SATD values for the pred for the current block. */ 513 LWORD64 psy_rd_cost; 514 UWORD32 lambda_mod; 515 WORD32 psy_factor; 516 517 /* declare local variables */ 518 WORD32 i; 519 WORD32 cu_total_size; 520 WORD32 num_comp_had_blocks; 521 522 UWORD8 *pu1_l0_block; 523 UWORD8 *pu1_l0_block_prev; 524 UWORD8 *pu1_recon; 525 WORD32 ht_offset; 526 WORD32 wd_offset; 527 WORD32 cu_ht; 528 WORD32 cu_wd; 529 530 WORD32 num_horz_blocks; 531 532 WORD16 pi2_residue_had[64]; 533 /* this is used as a buffer with all values equal to 0. This is emulate the case with 534 pred being zero in HAD fucntion */ 535 UWORD8 ai1_zeros_buffer[64]; 536 537 WORD32 had_block_size; 538 LWORD64 source_satd; // to hold source for current 8x8 block 539 LWORD64 recon_satd; // holds the current recon 8x8 satd 540 541 WORD32 index_for_src_satd; 542 543 (void)recond_stride_horz; 544 (void)pic_type; 545 (void)layer_id; 546 if(!is_hbd) 547 { 548 pu1_recon = (UWORD8 *)p_recon; 549 } 550 551 /**** initialize the variables ****/ 552 had_block_size = 4; 553 554 if(sub_sampling_type == 1) // 420 555 { 556 cu_ht = cu_size_luma / 2; 557 cu_wd = cu_size_luma / 2; 558 } 559 else 560 { 561 cu_ht = cu_size_luma; 562 cu_wd = cu_size_luma / 2; 563 } 564 565 num_horz_blocks = 2 * cu_wd / had_block_size; //ctb_width / had_block_size; 566 ht_offset = -had_block_size; 567 wd_offset = 0; //-had_block_size; 568 569 cu_total_size = cu_ht * cu_wd; 570 num_comp_had_blocks = 2 * cu_total_size / (had_block_size * had_block_size); 571 572 index_for_src_satd = start_index; 573 574 for(i = 0; i < 64; i++) 575 { 576 ai1_zeros_buffer[i] = 0; 577 } 578 579 psy_factor = PSY_STRENGTH_CHROMA; 580 psy_rd_cost = 0; 581 lambda_mod = lambda * psy_factor; 582 583 /************************************************************/ 584 /* loop over for every 4x4 blocks in the CU for Cb */ 585 for(i = 0; i < num_comp_had_blocks; i++) 586 { 587 if(i % num_horz_blocks == 0) 588 { 589 wd_offset = -had_block_size; 590 ht_offset += had_block_size; 591 } 592 wd_offset += had_block_size; 593 594 /* source satd for the current 8x8 block */ 595 source_satd = pui4_source_satd[index_for_src_satd]; 596 597 if(i % 2 != 0) 598 { 599 if(!is_hbd) 600 { 601 pu1_l0_block = pu1_l0_block_prev + 1; 602 } 603 } 604 else 605 { 606 if(!is_hbd) 607 { 608 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ 609 pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset; 610 pu1_l0_block_prev = pu1_l0_block; 611 } 612 } 613 614 if(had_block_size == 4) 615 { 616 if(!is_hbd) 617 { 618 recon_satd = ps_cmn_utils_optimised_function_list->pf_chroma_AC_HAD_4x4_8bit( 619 pu1_l0_block, 620 recon_stride_vert, 621 ai1_zeros_buffer, 622 had_block_size, 623 pi2_residue_had, 624 had_block_size); 625 } 626 627 /* get the additional cost function based on the absolute SATD diff of source and recon. */ 628 psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd)); 629 630 index_for_src_satd++; 631 632 if((i % num_horz_blocks) == (num_horz_blocks - 1)) 633 { 634 index_for_src_satd -= num_horz_blocks; 635 index_for_src_satd += 636 (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */ 637 } 638 639 } // if had block size ==4 640 } // for loop for all 4x4 block in the cu 641 642 psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH_CHROMA + LAMBDA_Q_SHIFT); 643 /* reutrn the additional cost for the psy RD opt */ 644 return (psy_rd_cost); 645 } 646 647 /*! 648 ****************************************************************************** 649 * \if Function name : ihevce_psy_rd_cost \endif 650 * 651 * \brief 652 * Calculates the psyco visual cost for RD opt. This is 653 * 654 * \param[in] pui4_source_satd 655 * This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated. 656 * \param[in] *pui1_recon 657 * This si the pointer to the pred data. 658 * \param[in] recon_stride 659 * This si the pred stride 660 * \param[in] pic_type 661 * Picture type. 662 * \param[in] layer_id 663 * Indicates the temporal layer. 664 * \param[in] lambda 665 * This is the weighting factor for the cost. 666 * 667 * \return 668 * the cost for the psyRDopt 669 * 670 * \author 671 * Ittiam 672 * 673 ***************************************************************************** 674 */ 675 LWORD64 ihevce_psy_rd_cost( 676 LWORD64 *pui4_source_satd, 677 void *pv_recon, 678 WORD32 recon_stride_vert, 679 WORD32 recond_stride_horz, 680 WORD32 cu_size, 681 WORD32 pic_type, 682 WORD32 layer_id, 683 WORD32 lambda, 684 WORD32 start_index, 685 WORD32 is_hbd, 686 UWORD32 u4_psy_strength, 687 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) 688 { 689 /* declare local variables to store the SATD values for the pred for the current block. */ 690 LWORD64 psy_rd_cost; // TODO : check if overflow is there. 691 UWORD32 lambda_mod; 692 WORD32 psy_factor; 693 694 /* declare local variables */ 695 WORD32 i; 696 WORD32 cu_total_size; 697 WORD32 num_comp_had_blocks; 698 699 UWORD8 *pu1_l0_block; 700 UWORD8 *pu1_recon; 701 702 WORD32 ht_offset; 703 WORD32 wd_offset; 704 WORD32 cu_ht; 705 WORD32 cu_wd; 706 707 WORD32 num_horz_blocks; 708 709 //WORD16 pi2_residue_had[64]; 710 WORD16 pi2_residue_had_zscan[64]; 711 //WORD16 pi2_residue[64]; 712 /* this is used as a buffer with all values equal to 0. This is emulate the case with 713 pred being zero in HAD fucntion */ 714 UWORD8 ai1_zeros_buffer[64]; 715 716 WORD32 had_block_size; 717 LWORD64 source_satd; // to hold source for current 8x8 block 718 LWORD64 recon_satd; // holds the current recon 8x8 satd 719 720 WORD32 index_for_src_satd; 721 722 (void)recond_stride_horz; 723 (void)pic_type; 724 (void)layer_id; 725 /***** initialize the variables ****/ 726 had_block_size = 8; 727 cu_ht = cu_size; 728 cu_wd = cu_size; 729 730 num_horz_blocks = cu_wd / had_block_size; //ctb_width / had_block_size; 731 732 ht_offset = -had_block_size; 733 wd_offset = 0 - had_block_size; 734 735 cu_total_size = cu_ht * cu_wd; 736 num_comp_had_blocks = cu_total_size / (had_block_size * had_block_size); 737 738 index_for_src_satd = start_index; 739 740 for(i = 0; i < 64; i++) 741 { 742 ai1_zeros_buffer[i] = 0; 743 } 744 psy_factor = u4_psy_strength; //PSY_STRENGTH; 745 psy_rd_cost = 0; 746 lambda_mod = lambda * psy_factor; 747 748 if(!is_hbd) 749 { 750 pu1_recon = (UWORD8 *)pv_recon; 751 } 752 753 /**************************************************************/ 754 /* loop over for every 8x8 blocks in the CU */ 755 for(i = 0; i < num_comp_had_blocks; i++) 756 { 757 if(i % num_horz_blocks == 0) 758 { 759 wd_offset = -had_block_size; 760 ht_offset += had_block_size; 761 } 762 wd_offset += had_block_size; 763 764 /* source satd for the current 8x8 block */ 765 source_satd = pui4_source_satd[index_for_src_satd]; 766 767 if(had_block_size == 8) 768 { 769 //WORD32 index; 770 //WORD32 u4_satd; 771 //WORD32 dst_strd = 8; 772 //WORD32 i4_frm_qstep = 0; 773 //WORD32 early_cbf; 774 if(!is_hbd) 775 { 776 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ 777 pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset; 778 779 recon_satd = ps_cmn_utils_optimised_function_list->pf_AC_HAD_8x8_8bit( 780 pu1_l0_block, 781 recon_stride_vert, 782 ai1_zeros_buffer, 783 had_block_size, 784 pi2_residue_had_zscan, 785 had_block_size); 786 } 787 788 /* get the additional cost function based on the absolute SATD diff of source and recon. */ 789 psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd)); 790 791 index_for_src_satd++; 792 if((i % num_horz_blocks) == (num_horz_blocks - 1)) 793 { 794 index_for_src_satd -= num_horz_blocks; 795 index_for_src_satd += 796 (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */ 797 } 798 } // if 799 } // for loop 800 psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH + LAMBDA_Q_SHIFT); 801 802 /* reutrn the additional cost for the psy RD opt */ 803 return (psy_rd_cost); 804 } 805 806 unsigned long ihevce_calc_stim_injected_variance( 807 ULWORD64 *pu8_sigmaX, 808 ULWORD64 *pu8_sigmaXSquared, 809 ULWORD64 *u8_var, 810 WORD32 i4_inv_wpred_wt, 811 WORD32 i4_inv_wt_shift_val, 812 WORD32 i4_wpred_log_wdc, 813 WORD32 i4_part_id) 814 { 815 ULWORD64 u8_X_Square, u8_temp_var; 816 WORD32 i4_bits_req; 817 818 const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; 819 820 u8_X_Square = (pu8_sigmaX[i4_part_id] * pu8_sigmaX[i4_part_id]); 821 u8_temp_var = pu8_sigmaXSquared[i4_part_id] - u8_X_Square; 822 823 if(i4_inv_wpred_wt != i4_default_src_wt) 824 { 825 i4_inv_wpred_wt = i4_inv_wpred_wt >> i4_inv_wt_shift_val; 826 827 u8_temp_var = SHR_NEG( 828 (u8_temp_var * i4_inv_wpred_wt * i4_inv_wpred_wt), 829 (30 - (2 * i4_inv_wt_shift_val) - i4_wpred_log_wdc * 2)); 830 } 831 832 GETRANGE64(i4_bits_req, u8_temp_var); 833 834 if(i4_bits_req > 27) 835 { 836 *u8_var = u8_temp_var >> (i4_bits_req - 27); 837 return (i4_bits_req - 27); 838 } 839 else 840 { 841 *u8_var = u8_temp_var; 842 return 0; 843 } 844 } 845 846 unsigned long ihevce_calc_variance_for_diff_weights( 847 ULWORD64 *pu8_sigmaX, 848 ULWORD64 *pu8_sigmaXSquared, 849 ULWORD64 *u8_var, 850 WORD32 *pi4_inv_wt, 851 WORD32 *pi4_inv_wt_shift_val, 852 pu_result_t *ps_result, 853 WORD32 i4_wpred_log_wdc, 854 PART_ID_T *pe_part_id, 855 UWORD8 u1_cu_size, 856 UWORD8 u1_num_parts, 857 UWORD8 u1_is_for_src) 858 { 859 WORD32 i4_k; 860 UWORD32 u4_wd, u4_ht; 861 UWORD8 u1_num_base_blks; 862 UWORD32 u4_num_pixels_in_part; 863 UWORD8 u1_index; 864 WORD32 i4_bits_req; 865 866 UWORD8 u1_base_blk_size = 4; 867 UWORD32 u4_tot_num_pixels = u1_cu_size * u1_cu_size; 868 ULWORD64 u8_temp_sigmaX[MAX_NUM_INTER_PARTS] = { 0, 0 }; 869 ULWORD64 u8_temp_sigmaXsquared[MAX_NUM_INTER_PARTS] = { 0, 0 }; 870 ULWORD64 u8_z; 871 872 const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; 873 874 for(i4_k = 0; i4_k < u1_num_parts; i4_k++) 875 { 876 u4_wd = ps_result[i4_k].pu.b4_wd + 1; 877 u4_ht = ps_result[i4_k].pu.b4_ht + 1; 878 u1_num_base_blks = u4_wd * u4_ht; 879 u4_num_pixels_in_part = u1_num_base_blks * u1_base_blk_size * u1_base_blk_size; 880 881 if(u1_is_for_src) 882 { 883 u1_index = pe_part_id[i4_k]; 884 } 885 else 886 { 887 u1_index = i4_k; 888 } 889 890 u8_temp_sigmaXsquared[i4_k] = pu8_sigmaXSquared[u1_index] / u4_num_pixels_in_part; 891 u8_temp_sigmaX[i4_k] = pu8_sigmaX[u1_index]; 892 893 if(u1_is_for_src) 894 { 895 if(pi4_inv_wt[i4_k] != i4_default_src_wt) 896 { 897 pi4_inv_wt[i4_k] = pi4_inv_wt[i4_k] >> pi4_inv_wt_shift_val[i4_k]; 898 u8_temp_sigmaX[i4_k] = SHR_NEG( 899 (u8_temp_sigmaX[i4_k] * pi4_inv_wt[i4_k]), 900 (15 - pi4_inv_wt_shift_val[i4_k] - i4_wpred_log_wdc)); 901 u8_temp_sigmaXsquared[i4_k] = SHR_NEG( 902 (u8_temp_sigmaXsquared[i4_k] * pi4_inv_wt[i4_k] * pi4_inv_wt[i4_k]), 903 (30 - (2 * pi4_inv_wt_shift_val[i4_k]) - i4_wpred_log_wdc * 2)); 904 } 905 } 906 } 907 908 u8_z = (u4_tot_num_pixels * (u8_temp_sigmaXsquared[0] + u8_temp_sigmaXsquared[1])) - 909 ((u8_temp_sigmaX[0] + u8_temp_sigmaX[1]) * (u8_temp_sigmaX[0] + u8_temp_sigmaX[1])); 910 911 GETRANGE64(i4_bits_req, u8_z); 912 913 if(i4_bits_req > 27) 914 { 915 *u8_var = u8_z >> (i4_bits_req - 27); 916 return (i4_bits_req - 27); 917 } 918 else 919 { 920 *u8_var = u8_z; 921 return 0; 922 } 923 } 924