1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /*! 22 ****************************************************************************** 23 * \file ihevce_coarse_me_pass.c 24 * 25 * \brief 26 * Converts the language of the encoder to language of me. This is an i/f 27 * between the encoder style APIs and ME style APIs. This is basically 28 * a memoryless glue layer. 29 * 30 * \date 31 * 22/10/2012 32 * 33 * \author 34 * Ittiam 35 * 36 * 37 * List of Functions 38 * 39 * 40 ****************************************************************************** 41 */ 42 43 /*****************************************************************************/ 44 /* File Includes */ 45 /*****************************************************************************/ 46 /* System include files */ 47 #include <stdio.h> 48 #include <string.h> 49 #include <stdlib.h> 50 #include <assert.h> 51 #include <stdarg.h> 52 #include <math.h> 53 54 /* User include files */ 55 #include "ihevc_typedefs.h" 56 #include "itt_video_api.h" 57 #include "ihevce_api.h" 58 59 #include "rc_cntrl_param.h" 60 #include "rc_frame_info_collector.h" 61 #include "rc_look_ahead_params.h" 62 63 #include "ihevc_defs.h" 64 #include "ihevc_structs.h" 65 #include "ihevc_platform_macros.h" 66 #include "ihevc_deblk.h" 67 #include "ihevc_itrans_recon.h" 68 #include "ihevc_chroma_itrans_recon.h" 69 #include "ihevc_chroma_intra_pred.h" 70 #include "ihevc_intra_pred.h" 71 #include "ihevc_inter_pred.h" 72 #include "ihevc_mem_fns.h" 73 #include "ihevc_padding.h" 74 #include "ihevc_weighted_pred.h" 75 #include "ihevc_sao.h" 76 #include "ihevc_resi_trans.h" 77 #include "ihevc_quant_iquant_ssd.h" 78 #include "ihevc_cabac_tables.h" 79 80 #include "ihevce_defs.h" 81 #include "ihevce_lap_enc_structs.h" 82 #include "ihevce_multi_thrd_structs.h" 83 #include "ihevce_me_common_defs.h" 84 #include "ihevce_had_satd.h" 85 #include "ihevce_error_codes.h" 86 #include "ihevce_bitstream.h" 87 #include "ihevce_cabac.h" 88 #include "ihevce_rdoq_macros.h" 89 #include "ihevce_function_selector.h" 90 #include "ihevce_enc_structs.h" 91 #include "ihevce_entropy_structs.h" 92 #include "ihevce_cmn_utils_instr_set_router.h" 93 #include "ihevce_enc_loop_structs.h" 94 #include "ihevce_bs_compute_ctb.h" 95 #include "ihevce_global_tables.h" 96 #include "ihevce_dep_mngr_interface.h" 97 #include "hme_datatype.h" 98 #include "hme_interface.h" 99 #include "hme_common_defs.h" 100 #include "hme_defs.h" 101 #include "ihevce_me_instr_set_router.h" 102 #include "ihevce_ipe_instr_set_router.h" 103 #include "ihevce_ipe_structs.h" 104 #include "hme_globals.h" 105 #include "hme_utils.h" 106 #include "hme_coarse.h" 107 #include "hme_refine.h" 108 #include "ihevce_me_pass.h" 109 #include "ihevce_coarse_me_pass.h" 110 111 /*****************************************************************************/ 112 /* Function Definitions */ 113 /*****************************************************************************/ 114 115 /*! 116 ****************************************************************************** 117 * \if Function name : ihevce_coarse_me_get_num_mem_recs \endif 118 * 119 * \brief 120 * Number of memory records are returned for ME module 121 * Note : Include total mem. req. for HME + Total mem. req. for Dep Mngr for HME 122 * 123 * \return 124 * Number of memory records 125 * 126 * \author 127 * Ittiam 128 * 129 ***************************************************************************** 130 */ 131 WORD32 ihevce_coarse_me_get_num_mem_recs() 132 { 133 WORD32 hme_mem_recs = hme_coarse_num_alloc(); 134 WORD32 hme_dep_mngr_mem_recs = hme_coarse_dep_mngr_num_alloc(); 135 136 return ((hme_mem_recs + hme_dep_mngr_mem_recs)); 137 } 138 139 /*! 140 ****************************************************************************** 141 * \if Function name : ihevce_coarse_me_get_mem_recs \endif 142 * 143 * \brief 144 * Memory requirements are returned for coarse ME. 145 * 146 * \param[in,out] ps_mem_tab : pointer to memory descriptors table 147 * \param[in] ps_init_prms : Create time static parameters 148 * \param[in] i4_num_proc_thrds : Number of processing threads for this module 149 * \param[in] i4_mem_space : memspace in whihc memory request should be done 150 * 151 * \return 152 * Number of records 153 * 154 * \author 155 * Ittiam 156 * 157 ***************************************************************************** 158 */ 159 WORD32 ihevce_coarse_me_get_mem_recs( 160 iv_mem_rec_t *ps_mem_tab, 161 ihevce_static_cfg_params_t *ps_init_prms, 162 WORD32 i4_num_proc_thrds, 163 WORD32 i4_mem_space, 164 WORD32 i4_resolution_id) 165 { 166 hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS]; 167 WORD32 n_tabs, i; 168 169 /* Init prms structure specific to HME */ 170 hme_init_prms_t s_hme_init_prms; 171 172 //return (ihevce_coarse_me_get_num_mem_recs()); 173 /*************************************************************************/ 174 /* code flow: we call hme alloc function and then remap those memtabs */ 175 /* to a different type of memtab structure. */ 176 /*************************************************************************/ 177 ASSERT(HME_COARSE_TOT_MEMTABS >= hme_coarse_num_alloc()); 178 179 /*************************************************************************/ 180 /* POPULATE THE HME INIT PRMS */ 181 /*************************************************************************/ 182 ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id); 183 184 /*************************************************************************/ 185 /* CALL THE ME FUNCTION TO GET MEMTABS */ 186 /*************************************************************************/ 187 n_tabs = hme_coarse_alloc(&as_memtabs[0], &s_hme_init_prms); 188 ASSERT(n_tabs == hme_coarse_num_alloc()); 189 190 /*************************************************************************/ 191 /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE */ 192 /*************************************************************************/ 193 for(i = 0; i < n_tabs; i++) 194 { 195 ps_mem_tab[i].i4_mem_size = as_memtabs[i].size; 196 ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align; 197 ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 198 ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t); 199 } 200 201 /*************************************************************************/ 202 /* --- HME Coarse sync Dep Mngr Mem requests -- */ 203 /*************************************************************************/ 204 { 205 WORD32 n_dep_tabs; 206 207 ps_mem_tab += n_tabs; 208 209 n_dep_tabs = hme_coarse_dep_mngr_alloc( 210 ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id); 211 212 ASSERT(n_dep_tabs == hme_coarse_dep_mngr_num_alloc()); 213 214 /* Update the total no. of mem tabs */ 215 n_tabs += n_dep_tabs; 216 } 217 218 return (n_tabs); 219 } 220 221 /*! 222 ****************************************************************************** 223 * \if Function name : ihevce_coarse_me_init \endif 224 * 225 * \brief 226 * Intialization for ME context state structure . 227 * 228 * \param[in] ps_mem_tab : pointer to memory descriptors table 229 * \param[in] ps_init_prms : Create time static parameters 230 * \param[in] pv_osal_handle : Osal handle 231 * 232 * \return 233 * Handle to the ME context 234 * 235 * \author 236 * Ittiam 237 * 238 ***************************************************************************** 239 */ 240 void *ihevce_coarse_me_init( 241 iv_mem_rec_t *ps_mem_tab, 242 ihevce_static_cfg_params_t *ps_init_prms, 243 WORD32 i4_num_proc_thrds, 244 void *pv_osal_handle, 245 WORD32 i4_resolution_id, 246 UWORD8 u1_is_popcnt_available) 247 { 248 /* ME handle to be returned */ 249 void *pv_me_ctxt; 250 WORD32 status; 251 coarse_me_master_ctxt_t *ps_ctxt; 252 253 /* Init prms structure specific to HME */ 254 hme_init_prms_t s_hme_init_prms; 255 256 /* memtabs to be passed to hme */ 257 hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS]; 258 WORD32 n_tabs, n_dep_tabs, i; 259 260 /*************************************************************************/ 261 /* POPULATE THE HME INIT PRMS */ 262 /*************************************************************************/ 263 ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id); 264 265 /*************************************************************************/ 266 /* Ensure local declaration is sufficient */ 267 /*************************************************************************/ 268 n_tabs = hme_coarse_num_alloc(); 269 ASSERT(HME_COARSE_TOT_MEMTABS >= n_tabs); 270 271 /*************************************************************************/ 272 /* MAP RESULTS TO HME MEMTAB STRUCTURE */ 273 /*************************************************************************/ 274 for(i = 0; i < n_tabs; i++) 275 { 276 as_memtabs[i].size = ps_mem_tab[i].i4_mem_size; 277 as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment; 278 as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base; 279 } 280 /*************************************************************************/ 281 /* CALL THE ME FUNCTION TO GET MEMTABS */ 282 /*************************************************************************/ 283 pv_me_ctxt = (void *)as_memtabs[0].pu1_mem; 284 status = hme_coarse_init(pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms); 285 ps_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 286 if(status == -1) 287 return NULL; 288 289 /*************************************************************************/ 290 /* --- HME sync Dep Mngr Mem init -- */ 291 /*************************************************************************/ 292 293 ps_mem_tab += n_tabs; 294 295 n_dep_tabs = hme_coarse_dep_mngr_init( 296 ps_mem_tab, ps_init_prms, pv_me_ctxt, pv_osal_handle, i4_num_proc_thrds, i4_resolution_id); 297 ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc()); 298 299 n_tabs += n_dep_tabs; 300 301 ihevce_me_instr_set_router( 302 (ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list, 303 ps_init_prms->e_arch_type); 304 305 ihevce_cmn_utils_instr_set_router( 306 &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type); 307 308 return (pv_me_ctxt); 309 } 310 311 /*! 312 ****************************************************************************** 313 * \if Function name : ihevce_coarse_me_reg_thrds_sem \endif 314 * 315 * \brief 316 * Intialization for ME context state structure with semaphores . 317 * 318 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt 319 * \param[in] ppv_sem_hdls : Array of semaphore handles 320 * \param[in] i4_num_proc_thrds : Number of processing threads 321 * 322 * \return 323 * none 324 * 325 * \author 326 * Ittiam 327 * 328 ***************************************************************************** 329 */ 330 void ihevce_coarse_me_reg_thrds_sem(void *pv_me_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds) 331 { 332 hme_coarse_dep_mngr_reg_sem(pv_me_ctxt, ppv_sem_hdls, i4_num_proc_thrds); 333 334 return; 335 } 336 337 /*! 338 ****************************************************************************** 339 * \if Function name : ihevce_coarse_me_delete \endif 340 * 341 * \brief 342 * Destroy Coarse ME module 343 * Note : Only Destroys the resources allocated in the module like 344 * semaphore,etc. Memory free is done Separately using memtabs 345 * 346 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt 347 * \param[in] ps_init_prms : Create time static parameters 348 * \param[in] pv_osal_handle : Osal handle 349 * 350 * \return 351 * None 352 * 353 * \author 354 * Ittiam 355 * 356 ***************************************************************************** 357 */ 358 void ihevce_coarse_me_delete( 359 void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id) 360 { 361 /* --- HME sync Dep Mngr Delete --*/ 362 hme_coarse_dep_mngr_delete(pv_me_ctxt, ps_init_prms, i4_resolution_id); 363 } 364 365 /** 366 ******************************************************************************* 367 * \if Function name : ihevce_coarse_me_set_resolution \endif 368 * 369 * \brief 370 * Sets the resolution for ME state 371 * 372 * \par Description: 373 * ME requires information of resolution to prime up its layer descriptors 374 * and contexts. This API is called whenever a control call from application 375 * causes a change of resolution. Has to be called once initially before 376 * processing any frame. Again this is just a glue function and calls the 377 * actual ME API for the same. 378 * 379 * \param[in,out] pv_me_ctxt: Handle to the ME context 380 * \param[in] n_enc_layers: Number of layers getting encoded 381 * \param[in] p_wd : Pointer containing widths of each layer getting encoded. 382 * \param[in] p_ht : Pointer containing heights of each layer getting encoded. 383 * 384 * \returns 385 * none 386 * 387 * \author 388 * Ittiam 389 * 390 ******************************************************************************* 391 */ 392 void ihevce_coarse_me_set_resolution( 393 void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht) 394 { 395 /* local variables */ 396 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 397 WORD32 thrds; 398 399 for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++) 400 { 401 coarse_me_ctxt_t *ps_me_thrd_ctxt; 402 403 ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds]; 404 405 hme_coarse_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht); 406 } 407 } 408 void ihevce_coarse_me_get_rc_param( 409 void *pv_me_ctxt, 410 LWORD64 *i8_acc_frame_hme_cost, 411 LWORD64 *i8_acc_frame_hme_sad, 412 LWORD64 *i8_acc_num_blks_higher_sad, 413 LWORD64 *i8_total_blks, 414 WORD32 i4_is_prev_pic_same_scene) 415 { 416 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 417 WORD32 thrds; 418 coarse_me_ctxt_t *ps_me_thrd_ctxt; 419 420 *i8_acc_frame_hme_cost = 0; 421 *i8_acc_frame_hme_sad = 0; 422 423 for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++) 424 { 425 ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds]; 426 *i8_acc_frame_hme_cost += ps_me_thrd_ctxt->i4_L1_hme_best_cost; 427 428 /*Calculate me cost wrt. to ref only for P frame */ 429 if(ps_me_thrd_ctxt->s_frm_prms.is_i_pic == ps_me_thrd_ctxt->s_frm_prms.bidir_enabled) 430 { 431 *i8_acc_num_blks_higher_sad += ps_me_thrd_ctxt->i4_num_blks_high_sad; 432 *i8_total_blks += ps_me_thrd_ctxt->i4_num_blks; 433 } 434 435 *i8_acc_frame_hme_sad += ps_me_thrd_ctxt->i4_L1_hme_sad; 436 } 437 } 438 439 /*! 440 ****************************************************************************** 441 * \if Function name : ihevce_coarse_me_process \endif 442 * 443 * \brief 444 * Frame level ME function 445 * 446 * \par Description: 447 * Processing of all layers starting from coarse and going 448 * to the refinement layers, except enocde layer 449 * 450 * \param[in] pv_ctxt : pointer to ME module 451 * \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer) 452 * \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer) 453 * \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer) 454 * \param[in] pd_intra_costs : pointerto intra cost buffer 455 * \param[in] ps_multi_thrd_ctxt : pointer to multi thread ctxt 456 * \param[in] thrd_id : Thread id of the current thrd in which function is executed 457 * 458 * \return 459 * None 460 * 461 * \author 462 * Ittiam 463 * 464 ***************************************************************************** 465 */ 466 void ihevce_coarse_me_process( 467 void *pv_me_ctxt, 468 ihevce_lap_enc_buf_t *ps_enc_lap_inp, 469 multi_thrd_ctxt_t *ps_multi_thrd_ctxt, 470 WORD32 thrd_id, 471 WORD32 i4_ping_pong) 472 473 { 474 /* local variables */ 475 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 476 coarse_me_ctxt_t *ps_thrd_ctxt; 477 478 /* get the current thread ctxt pointer */ 479 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id]; 480 ps_thrd_ctxt->thrd_id = thrd_id; 481 482 /* frame level processing function */ 483 hme_coarse_process_frm( 484 (void *)ps_thrd_ctxt, 485 &ps_master_ctxt->s_ref_map, 486 &ps_master_ctxt->s_frm_prms, 487 ps_multi_thrd_ctxt, 488 i4_ping_pong, 489 &ps_master_ctxt->apv_dep_mngr_hme_sync[0]); 490 491 return; 492 } 493 494 /*! 495 ****************************************************************************** 496 * \if Function name : ihevce_coarse_me_frame_end \endif 497 * 498 * \brief 499 * End of frame update function performs 500 * - GMV collation 501 * - Dynamic Search Range collation 502 * 503 * \param[in] pv_ctxt : pointer to ME module 504 * 505 * \return 506 * None 507 * 508 * \author 509 * Ittiam 510 * 511 ***************************************************************************** 512 */ 513 void ihevce_coarse_me_frame_end(void *pv_me_ctxt) 514 { 515 /* local variables */ 516 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 517 coarse_me_ctxt_t *ps_thrd0_ctxt; 518 layer_ctxt_t *ps_curr_layer; 519 WORD32 num_ref, num_thrds, cur_poc; 520 WORD32 coarse_layer_id; 521 WORD32 i4_num_ref; 522 ME_QUALITY_PRESETS_T e_me_quality_preset; 523 524 /* GMV collation is done for coarse Layer only */ 525 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 526 coarse_layer_id = ps_thrd0_ctxt->num_layers - 1; 527 ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[coarse_layer_id]; 528 i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref; 529 e_me_quality_preset = ps_thrd0_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; 530 531 /* No processing is required if current pic is I pic */ 532 if(1 == ps_master_ctxt->s_frm_prms.is_i_pic) 533 { 534 return; 535 } 536 537 /* use thrd 0 ctxt to collate the GMVs histogram and Dynamic Search Range */ 538 /* across all threads */ 539 for(num_ref = 0; num_ref < i4_num_ref; num_ref++) 540 { 541 WORD32 i4_offset, i4_lobe_size, i4_layer_id; 542 mv_hist_t *ps_hist_thrd0; 543 dyn_range_prms_t *aps_dyn_range_prms_thrd0[MAX_NUM_LAYERS]; 544 545 ps_hist_thrd0 = ps_thrd0_ctxt->aps_mv_hist[num_ref]; 546 547 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 548 if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) 549 { 550 for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) 551 { 552 aps_dyn_range_prms_thrd0[i4_layer_id] = 553 &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; 554 } 555 } 556 557 i4_lobe_size = ps_hist_thrd0->i4_lobe1_size; 558 i4_offset = i4_lobe_size >> 1; 559 560 /* run a loop over all the other threads to add up the histogram */ 561 /* and to update the dynamical search range */ 562 for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) 563 { 564 dyn_range_prms_t *ps_dyn_range_prms; 565 566 if(ME_XTREME_SPEED_25 != e_me_quality_preset) 567 { 568 mv_hist_t *ps_hist; 569 WORD32 i4_y, i4_x; 570 /* get current thrd histogram pointer */ 571 ps_hist = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_mv_hist[num_ref]; 572 573 /* Accumalate the Bin count for all the thread */ 574 for(i4_y = 0; i4_y < ps_hist_thrd0->i4_num_rows; i4_y++) 575 { 576 for(i4_x = 0; i4_x < ps_hist_thrd0->i4_num_cols; i4_x++) 577 { 578 S32 i4_bin_id; 579 580 i4_bin_id = i4_x + (i4_y * ps_hist_thrd0->i4_num_cols); 581 582 ps_hist_thrd0->ai4_bin_count[i4_bin_id] += 583 ps_hist->ai4_bin_count[i4_bin_id]; 584 } 585 } 586 } 587 588 /* Update the dynamical search range for each Layer */ 589 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 590 if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) 591 { 592 for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) 593 { 594 /* get current thrd, layer dynamical search range param. pointer */ 595 ps_dyn_range_prms = 596 &ps_master_ctxt->aps_me_ctxt[num_thrds] 597 ->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; 598 /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */ 599 hme_update_dynamic_search_params( 600 aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_max_y); 601 602 hme_update_dynamic_search_params( 603 aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_min_y); 604 } 605 } 606 } 607 } 608 609 /*************************************************************************/ 610 /* Get the MAX/MIN per POC distance based on the all the ref. pics */ 611 /*************************************************************************/ 612 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 613 if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) 614 { 615 WORD32 i4_layer_id; 616 cur_poc = ps_thrd0_ctxt->i4_curr_poc; 617 618 for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) 619 { 620 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = 0; 621 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = 0; 622 } 623 624 for(num_ref = 0; num_ref < i4_num_ref; num_ref++) 625 { 626 for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) 627 { 628 WORD16 i2_mv_per_poc; 629 WORD32 ref_poc, poc_diff; 630 dyn_range_prms_t *ps_dyn_range_prms_thrd0; 631 632 ps_dyn_range_prms_thrd0 = 633 &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; 634 635 ref_poc = ps_dyn_range_prms_thrd0->i4_poc; 636 ASSERT(ref_poc < cur_poc); 637 poc_diff = (cur_poc - ref_poc); 638 639 /* cur. ref. pic. max y per POC */ 640 i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff; 641 /* update the max y per POC */ 642 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = 643 MAX(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id], 644 i2_mv_per_poc); 645 646 /* cur. ref. pic. min y per POC */ 647 i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff; 648 /* update the min y per POC */ 649 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = 650 MIN(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id], 651 i2_mv_per_poc); 652 } 653 } 654 655 /*************************************************************************/ 656 /* Populate the results to all thread ctxt */ 657 /*************************************************************************/ 658 for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) 659 { 660 for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) 661 { 662 ps_master_ctxt->aps_me_ctxt[num_thrds] 663 ->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = 664 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id]; 665 666 ps_master_ctxt->aps_me_ctxt[num_thrds] 667 ->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = 668 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id]; 669 } 670 } 671 } 672 673 if(ME_XTREME_SPEED_25 != e_me_quality_preset) 674 { 675 /* call the function which calcualtes the GMV */ 676 /* layer pointer is shared across all threads */ 677 /* hence all threads will have access to updated */ 678 /* GMVs populated using thread 0 ctxt */ 679 for(num_ref = 0; num_ref < i4_num_ref; num_ref++) 680 { 681 hme_calculate_global_mv( 682 ps_thrd0_ctxt->aps_mv_hist[num_ref], 683 &ps_curr_layer->s_global_mv[num_ref][GMV_THICK_LOBE], 684 GMV_THICK_LOBE); 685 } 686 } 687 return; 688 } 689 690 /*! 691 ****************************************************************************** 692 * \if Function name : ihevce_coarse_me_frame_dpb_update \endif 693 * 694 * \brief 695 * Frame level ME initialisation function 696 * 697 * \par Description: 698 * Updation of ME's internal DPB 699 * based on available ref list information 700 * 701 * \param[in] pv_ctxt : pointer to ME module 702 * \param[in] num_ref_l0 : Number of reference pics in L0 list 703 * \param[in] num_ref_l1 : Number of reference pics in L1 list 704 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list 705 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list 706 * 707 * \return 708 * None 709 * 710 * \author 711 * Ittiam 712 * 713 ***************************************************************************** 714 */ 715 void ihevce_coarse_me_frame_dpb_update( 716 void *pv_me_ctxt, 717 WORD32 num_ref_l0, 718 WORD32 num_ref_l1, 719 recon_pic_buf_t **pps_rec_list_l0, 720 recon_pic_buf_t **pps_rec_list_l1) 721 { 722 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 723 coarse_me_ctxt_t *ps_thrd0_ctxt; 724 WORD32 a_pocs_buffered_in_me[MAX_NUM_REF + 1]; 725 WORD32 a_pocs_to_remove[MAX_NUM_REF + 2]; 726 WORD32 poc_remove_id = 0; 727 WORD32 i, count; 728 729 /* All processing done using shared / common memory across */ 730 /* threads is done using thrd ctxt */ 731 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 732 733 /*************************************************************************/ 734 /* Updation of ME's DPB list. This involves the following steps: */ 735 /* 1. Obtain list of active POCs maintained within ME. */ 736 /* 2. Search each of them in the ref list. Whatever is not found goes to */ 737 /* the list to be removed. Note: a_pocs_buffered_in_me holds the */ 738 /* currently active POC list within ME. a_pocs_to_remove holds the */ 739 /* list of POCs to be removed, terminated by -1. */ 740 /*************************************************************************/ 741 hme_coarse_get_active_pocs_list((void *)ps_thrd0_ctxt, a_pocs_buffered_in_me); 742 743 count = 0; 744 while(a_pocs_buffered_in_me[count] != -1) 745 { 746 WORD32 poc_to_search = a_pocs_buffered_in_me[count]; 747 WORD32 match_found_flag = 0; 748 749 /*********************************************************************/ 750 /* Search in any one list (L0/L1) since both lists contain all the */ 751 /* active ref pics. */ 752 /*********************************************************************/ 753 for(i = 0; i < num_ref_l0; i++) 754 { 755 if(poc_to_search == pps_rec_list_l0[i]->i4_poc) 756 { 757 match_found_flag = 1; 758 break; 759 } 760 } 761 for(i = 0; i < num_ref_l1; i++) 762 { 763 if(poc_to_search == pps_rec_list_l1[i]->i4_poc) 764 { 765 match_found_flag = 1; 766 break; 767 } 768 } 769 770 if(0 == match_found_flag) 771 { 772 /*****************************************************************/ 773 /* POC buffered inside ME but not part of ref list given by DPB */ 774 /* Hence this needs to be flagged to ME for removal. */ 775 /*****************************************************************/ 776 a_pocs_to_remove[poc_remove_id] = poc_to_search; 777 poc_remove_id++; 778 } 779 count++; 780 } 781 782 /* List termination */ 783 a_pocs_to_remove[poc_remove_id] = -1; 784 785 /* Call the ME API to remove "outdated" POCs */ 786 hme_coarse_discard_frm(ps_thrd0_ctxt, a_pocs_to_remove); 787 } 788 789 /*! 790 ****************************************************************************** 791 * \if Function name : ihevce_coarse_me_frame_init \endif 792 * 793 * \brief 794 * Coarse Frame level ME initialisation function 795 * 796 * \par Description: 797 * The following pre-conditions exist for this function: a. We have the input 798 * pic ready for encode, b. We have the reference list with POC, L0/L1 IDs 799 * and ref ptrs ready for this picture and c. ihevce_me_set_resolution has 800 * been called atleast once. Once these are supplied, the following are 801 * done here: a. Input pyramid creation, b. Updation of ME's internal DPB 802 * based on available ref list information 803 * 804 * \param[in] pv_ctxt : pointer to ME module 805 * \param[in] ps_frm_ctb_prms : CTB characteristics parameters 806 * \param[in] ps_frm_lamda : Frame level Lambda params 807 * \param[in] num_ref_l0 : Number of reference pics in L0 list 808 * \param[in] num_ref_l1 : Number of reference pics in L1 list 809 * \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0) 810 * \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1) 811 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list 812 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list 813 * \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer) 814 * \param[in] i4_frm_qp : current picture QP 815 * 816 * \return 817 * None 818 * 819 * \author 820 * Ittiam 821 * 822 ***************************************************************************** 823 */ 824 void ihevce_coarse_me_frame_init( 825 void *pv_me_ctxt, 826 ihevce_static_cfg_params_t *ps_stat_prms, 827 frm_ctb_ctxt_t *ps_frm_ctb_prms, 828 frm_lambda_ctxt_t *ps_frm_lamda, 829 WORD32 num_ref_l0, 830 WORD32 num_ref_l1, 831 WORD32 num_ref_l0_active, 832 WORD32 num_ref_l1_active, 833 recon_pic_buf_t **pps_rec_list_l0, 834 recon_pic_buf_t **pps_rec_list_l1, 835 ihevce_lap_enc_buf_t *ps_enc_lap_inp, 836 WORD32 i4_frm_qp, 837 ihevce_ed_blk_t *ps_layer1_buf, //EIID 838 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, 839 UWORD8 *pu1_me_reverse_map_info, 840 WORD32 i4_temporal_layer_id) 841 { 842 /* local variables */ 843 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 844 coarse_me_ctxt_t *ps_ctxt; 845 coarse_me_ctxt_t *ps_thrd0_ctxt; 846 WORD32 inp_poc, num_ref; 847 WORD32 i; 848 849 /* Input POC is derived from input buffer */ 850 inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc; 851 num_ref = num_ref_l0 + num_ref_l1; 852 853 /* All processing done using shared / common memory across */ 854 /* threads is done using thrd 0 ctxt */ 855 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 856 857 ps_master_ctxt->s_frm_prms.u1_num_active_ref_l0 = num_ref_l0_active; 858 ps_master_ctxt->s_frm_prms.u1_num_active_ref_l1 = num_ref_l1_active; 859 860 /* store the frm ctb ctxt to all the thrd ctxt */ 861 { 862 WORD32 num_thrds; 863 864 /* initialise the parameters for all the threads */ 865 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) 866 { 867 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 868 ps_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms; 869 /*EIID: early decision buffer pointer */ 870 ps_ctxt->ps_ed_blk = ps_layer1_buf; 871 ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1; 872 873 /* weighted pred enable flag */ 874 ps_ctxt->i4_wt_pred_enable_flag = ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag | 875 ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag; 876 877 if(1 == ps_ctxt->i4_wt_pred_enable_flag) 878 { 879 /* log2 weight denom */ 880 ps_ctxt->s_wt_pred.wpred_log_wdc = 881 ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom; 882 } 883 else 884 { 885 /* default value */ 886 ps_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT; 887 } 888 ps_ctxt->i4_L1_hme_best_cost = 0; 889 ps_ctxt->i4_L1_hme_sad = 0; 890 ps_ctxt->i4_num_blks_high_sad = 0; 891 ps_ctxt->i4_num_blks = 0; 892 893 ps_ctxt->pv_me_optimised_function_list = ps_master_ctxt->pv_me_optimised_function_list; 894 ps_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func; 895 } 896 } 897 /* Create the reference map for ME */ 898 ihevce_me_create_ref_map( 899 pps_rec_list_l0, 900 pps_rec_list_l1, 901 num_ref_l0_active, 902 num_ref_l1_active, 903 num_ref, 904 &ps_master_ctxt->s_ref_map); 905 /*************************************************************************/ 906 /* Call the ME frame level processing for further actiion. */ 907 /* ToDo: Support Row Level API. */ 908 /*************************************************************************/ 909 ps_master_ctxt->s_frm_prms.i2_mv_range_x = ps_thrd0_ctxt->s_init_prms.max_horz_search_range; 910 ps_master_ctxt->s_frm_prms.i2_mv_range_y = ps_thrd0_ctxt->s_init_prms.max_vert_search_range; 911 912 ps_master_ctxt->s_frm_prms.is_i_pic = 0; 913 ps_master_ctxt->s_frm_prms.i4_temporal_layer_id = i4_temporal_layer_id; 914 915 ps_master_ctxt->s_frm_prms.is_pic_second_field = 916 (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^ 917 ps_enc_lap_inp->s_input_buf.i4_topfield_first)); 918 { 919 S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type; 920 921 /*********************************************************************/ 922 /* For I Pic, we do not call update fn at ctb level, instead we do */ 923 /* one shot update for entire picture. */ 924 /*********************************************************************/ 925 if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME)) 926 { 927 ps_master_ctxt->s_frm_prms.is_i_pic = 1; 928 ps_master_ctxt->s_frm_prms.bidir_enabled = 0; 929 } 930 else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME)) 931 { 932 ps_master_ctxt->s_frm_prms.bidir_enabled = 0; 933 } 934 else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME)) 935 { 936 ps_master_ctxt->s_frm_prms.bidir_enabled = 1; 937 } 938 else 939 { 940 /* not sure whether we need to handle mixed frames like IP, */ 941 /* they should ideally come as single field. */ 942 /* TODO : resolve thsi ambiguity */ 943 ASSERT(0); 944 } 945 } 946 /************************************************************************/ 947 /* Lambda calculations moved outside ME and to one place, so as to have */ 948 /* consistent lambda across ME, IPE, CL RDOPT etc */ 949 /************************************************************************/ 950 951 { 952 #define CLIP3_F(min, max, val) (((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val))) 953 double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 }; 954 double d_b_pic_factor; 955 double d_q_factor; 956 //double d_lambda; 957 UWORD8 u1_temp_hier = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id; 958 959 if(u1_temp_hier) 960 { 961 d_b_pic_factor = CLIP3_F(2.0, 4.0, (i4_frm_qp - 12.0) / 6.0); 962 } 963 else 964 d_b_pic_factor = 1.0; 965 966 d_q_factor = (1 << (i4_frm_qp / 6)) * q_steps[i4_frm_qp % 6]; 967 ps_master_ctxt->s_frm_prms.qstep = (WORD32)d_q_factor; 968 ps_master_ctxt->s_frm_prms.i4_frame_qp = i4_frm_qp; 969 } 970 971 /* HME Dependency Manager : Reset the num ctb processed in every row */ 972 /* for ME sync in every layer */ 973 { 974 WORD32 ctr; 975 for(ctr = 1; ctr < ps_thrd0_ctxt->num_layers; ctr++) 976 { 977 void *pv_dep_mngr_state; 978 pv_dep_mngr_state = ps_master_ctxt->apv_dep_mngr_hme_sync[ctr - 1]; 979 980 ihevce_dmgr_rst_row_row_sync(pv_dep_mngr_state); 981 } 982 } 983 984 /* Frame level init of all threads of ME */ 985 { 986 WORD32 num_thrds; 987 988 /* initialise the parameters for all the threads */ 989 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) 990 { 991 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 992 993 hme_coarse_process_frm_init( 994 (void *)ps_ctxt, ps_ctxt->ps_hme_ref_map, ps_ctxt->ps_hme_frm_prms); 995 } 996 } 997 998 ps_master_ctxt->s_frm_prms.i4_cl_sad_lambda_qf = ps_frm_lamda->i4_cl_sad_lambda_qf; 999 ps_master_ctxt->s_frm_prms.i4_cl_satd_lambda_qf = ps_frm_lamda->i4_cl_satd_lambda_qf; 1000 ps_master_ctxt->s_frm_prms.i4_ol_sad_lambda_qf = ps_frm_lamda->i4_ol_sad_lambda_qf; 1001 ps_master_ctxt->s_frm_prms.i4_ol_satd_lambda_qf = ps_frm_lamda->i4_ol_satd_lambda_qf; 1002 ps_master_ctxt->s_frm_prms.lambda_q_shift = LAMBDA_Q_SHIFT; 1003 1004 ps_master_ctxt->s_frm_prms.pf_interp_fxn = NULL; 1005 1006 /*************************************************************************/ 1007 /* If num ref is 0, that means that it has to be coded as I. Do nothing */ 1008 /* However mv bank update needs to happen with "intra" mv. */ 1009 /*************************************************************************/ 1010 if(ps_master_ctxt->s_ref_map.i4_num_ref == 0 || ps_master_ctxt->s_frm_prms.is_i_pic) 1011 { 1012 for(i = 1; i < ps_thrd0_ctxt->num_layers; i++) 1013 { 1014 layer_ctxt_t *ps_layer_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[i]; 1015 BLK_SIZE_T e_blk_size; 1016 S32 use_4x4; 1017 1018 /* The mv bank is filled with "intra" mv */ 1019 use_4x4 = hme_get_mv_blk_size( 1020 ps_thrd0_ctxt->s_init_prms.use_4x4, 1021 i, 1022 ps_thrd0_ctxt->num_layers, 1023 ps_thrd0_ctxt->u1_encode[i]); 1024 e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8; 1025 hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]); 1026 hme_fill_mvbank_intra(ps_layer_ctxt); 1027 1028 /* Clear out the global mvs */ 1029 memset( 1030 ps_layer_ctxt->s_global_mv, 1031 0, 1032 sizeof(hme_mv_t) * ps_thrd0_ctxt->max_num_ref * NUM_GMV_LOBES); 1033 } 1034 1035 return; 1036 } 1037 1038 /*************************************************************************/ 1039 /* Coarse & refine Layer frm init (layer mem is common across thrds) */ 1040 /*************************************************************************/ 1041 { 1042 coarse_prms_t s_coarse_prms; 1043 refine_prms_t s_refine_prms; 1044 S16 i2_max; 1045 S32 layer_id; 1046 1047 layer_id = ps_thrd0_ctxt->num_layers - 1; 1048 i2_max = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x; 1049 i2_max = MAX(i2_max, ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y); 1050 s_coarse_prms.i4_layer_id = layer_id; 1051 1052 { 1053 S32 log_start_step; 1054 /* Based on Preset, set the starting step size for Refinement */ 1055 if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets) 1056 { 1057 log_start_step = 0; 1058 } 1059 else 1060 { 1061 log_start_step = 1; 1062 } 1063 s_coarse_prms.i4_max_iters = i2_max >> log_start_step; 1064 s_coarse_prms.i4_start_step = 1 << log_start_step; 1065 } 1066 s_coarse_prms.i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref; 1067 s_coarse_prms.do_full_search = 1; 1068 s_coarse_prms.num_results = ps_thrd0_ctxt->max_num_results_coarse; 1069 1070 hme_coarse_frm_init(ps_thrd0_ctxt, &s_coarse_prms); 1071 1072 layer_id--; 1073 1074 /*************************************************************************/ 1075 /* This loop will run for all refine layers (non- encode layers) */ 1076 /*************************************************************************/ 1077 while(layer_id > 0) 1078 { 1079 layer_ctxt_t *ps_curr_layer; 1080 layer_ctxt_t *ps_coarse_layer; 1081 1082 ps_coarse_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id + 1]; 1083 1084 ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]; 1085 1086 hme_set_refine_prms( 1087 &s_refine_prms, 1088 ps_thrd0_ctxt->u1_encode[layer_id], 1089 ps_master_ctxt->s_ref_map.i4_num_ref, 1090 layer_id, 1091 ps_thrd0_ctxt->num_layers, 1092 ps_thrd0_ctxt->num_layers_explicit_search, 1093 ps_thrd0_ctxt->s_init_prms.use_4x4, 1094 &ps_master_ctxt->s_frm_prms, 1095 NULL, 1096 &ps_thrd0_ctxt->s_init_prms.s_me_coding_tools); 1097 1098 hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer); 1099 1100 layer_id--; 1101 } 1102 } 1103 1104 return; 1105 } 1106 1107 /*! 1108 ****************************************************************************** 1109 * \if Function name : ihevce_decomp_pre_intra_frame_init \endif 1110 * 1111 * \brief 1112 * Frame Intialization for Decomp intra pre analysis. 1113 * 1114 * \param[in] pv_ctxt : pointer to module ctxt 1115 * \param[in] ppu1_decomp_lyr_bufs : pointer to array of layer buffer pointers 1116 * \param[in] pi4_lyr_buf_stride : pointer to array of layer buffer strides 1117 * 1118 * \return 1119 * None 1120 * 1121 * \author 1122 * Ittiam 1123 * 1124 ***************************************************************************** 1125 */ 1126 WORD32 ihevce_coarse_me_get_lyr_buf_desc( 1127 void *pv_me_ctxt, UWORD8 **ppu1_decomp_lyr_bufs, WORD32 *pi4_lyr_buf_stride) 1128 { 1129 /* local variables */ 1130 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 1131 coarse_me_ctxt_t *ps_thrd0_ctxt; 1132 WORD32 lyr_no; 1133 layers_descr_t *ps_curr_descr; 1134 WORD32 i4_free_idx; 1135 1136 /* All processing done using shared / common memory across */ 1137 /* threads is done using thrd0 ctxt */ 1138 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 1139 1140 /* Obtain an empty layer descriptor */ 1141 i4_free_idx = hme_coarse_find_free_descr_idx((void *)ps_thrd0_ctxt); 1142 1143 ps_curr_descr = &ps_thrd0_ctxt->as_ref_descr[i4_free_idx]; 1144 1145 /* export all the layer buffers except Layer 0 (encode layer) */ 1146 for(lyr_no = 1; lyr_no < ps_thrd0_ctxt->num_layers; lyr_no++) 1147 { 1148 pi4_lyr_buf_stride[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->i4_inp_stride; 1149 ppu1_decomp_lyr_bufs[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->pu1_inp; 1150 } 1151 1152 return (i4_free_idx); 1153 } 1154 1155 /*! 1156 ****************************************************************************** 1157 * \if Function name : ihevce_coarse_me_get_lyr_prms_job_que \endif 1158 * 1159 * \brief Returns to the caller key attributes related to dependency between layers 1160 * for multi-thread execution 1161 * 1162 * 1163 * \par Description: 1164 * This function requires the precondition that the width and ht of encode 1165 * layer is known, and ME API ihevce_me_set_resolution() API called with 1166 * this info. Based on this, ME populates useful information for the encoder 1167 * to execute the multi-thread (concurrent across layers) in this API. 1168 * The number of layers, number of vertical units in each layer, and for 1169 * each vertial unit in each layer, its dependency on previous layer's units 1170 * From ME's perspective, a vertical unit is one which is smallest min size 1171 * vertically (and spans the entire row horizontally). This is CTB for encode 1172 * layer, and 8x8 / 4x4 for non encode layers. 1173 * 1174 * \param[in] pv_ctxt : ME handle 1175 * \param[in] ps_curr_inp : Input buffer descriptor 1176 * \param[out] pi4_num_hme_lyrs : Num of HME layers (ME updates) 1177 * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each 1178 * entry has num vertical units in that particular layer 1179 * \param[in] ps_me_job_q_prms : Array of job queue prms, one for each unit in a 1180 * layer. Note that this is contiguous in order of processing 1181 * All k units of layer N-1 from top to bottom, followed by 1182 * all m units of layer N-2 .... ends with X units of layer 0 1183 * 1184 * \return 1185 * None 1186 * 1187 * \author 1188 * Ittiam 1189 * 1190 ***************************************************************************** 1191 */ 1192 void ihevce_coarse_me_get_lyr_prms_job_que( 1193 void *pv_me_ctxt, 1194 ihevce_lap_enc_buf_t *ps_curr_inp, 1195 WORD32 *pi4_num_hme_lyrs, 1196 WORD32 *pi4_num_vert_units_in_lyr, 1197 multi_thrd_me_job_q_prms_t *ps_me_job_q_prms) 1198 { 1199 coarse_me_ctxt_t *ps_ctxt; 1200 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 1201 1202 /* These arrays and ptrs track input dependencies for units of a layer */ 1203 /* This is a ping poing design, while using one part, we update other part */ 1204 U08 au1_inp_dep[2][MAX_NUM_VERT_UNITS_FRM]; 1205 U08 *pu1_inp_dep_c, *pu1_inp_dep_n; 1206 1207 /* Height of current and next layers */ 1208 S32 ht_c, ht_n; 1209 1210 /* Blk ht at a given layer and next layer*/ 1211 S32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n; 1212 1213 /* Number of vertical units in current and next layer */ 1214 S32 num_vert_c, num_vert_n; 1215 1216 S32 ctb_size = 64, num_layers, i, j, k; 1217 1218 /* since same layer desc pointer is stored in all thread ctxt */ 1219 /* a free idx is obtained using 0th thread ctxt pointer */ 1220 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 1221 1222 /* Set the number of layers */ 1223 num_layers = ps_ctxt->num_layers; 1224 *pi4_num_hme_lyrs = num_layers; 1225 1226 pu1_inp_dep_c = &au1_inp_dep[0][0]; 1227 pu1_inp_dep_n = &au1_inp_dep[1][0]; 1228 1229 ASSERT(num_layers >= 2); 1230 1231 ht_n = ps_ctxt->a_ht[num_layers - 2]; 1232 ht_c = ps_ctxt->a_ht[num_layers - 1]; 1233 1234 /* compute blk ht and unit ht for c and n */ 1235 if(ps_ctxt->u1_encode[num_layers - 1]) 1236 { 1237 blk_ht_c = 16; 1238 unit_ht_c = ctb_size; 1239 } 1240 else 1241 { 1242 blk_ht_c = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, num_layers - 1, num_layers, 0); 1243 unit_ht_c = blk_ht_c; 1244 } 1245 1246 num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c; 1247 1248 /* For new design in Coarsest HME layer we need */ 1249 /* one additional row extra at the end of frame */ 1250 /* hence num_vert_c is incremented by 1 */ 1251 num_vert_c++; 1252 1253 /* Dummy initialization outside loop, not used first time */ 1254 memset(pu1_inp_dep_c, 0, num_vert_c); 1255 1256 /*************************************************************************/ 1257 /* Run through each layer, set the number of vertical units and job queue*/ 1258 /* attrs for each vert unit in the layer */ 1259 /*************************************************************************/ 1260 for(i = num_layers - 1; i > 0; i--) 1261 { 1262 /* 0th entry is actually layer id num_layers - 1 */ 1263 /* and entry num_layers-1 equals the biggest layer (id = 0) */ 1264 pi4_num_vert_units_in_lyr[num_layers - 1 - i] = num_vert_c; 1265 /* "n" is computed for first time */ 1266 ht_n = ps_ctxt->a_ht[i - 1]; 1267 blk_ht_n = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, i - 1, num_layers, 0); 1268 unit_ht_n = blk_ht_n; 1269 if(ps_ctxt->u1_encode[i - 1]) 1270 unit_ht_n = ctb_size; 1271 1272 num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n; 1273 /* Initialize all units' inp dep in next layer to 0 */ 1274 memset(pu1_inp_dep_n, 0, num_vert_n * sizeof(U08)); 1275 1276 /* Evaluate dependencies for this layer */ 1277 for(j = 0; j < num_vert_c; j++) 1278 { 1279 S32 v1, v2; 1280 1281 /* Output dependencies. When one unit in current layer finishes, */ 1282 /* how many in the next layer it affects?. Assuming that the top */ 1283 /* of this vertical unit and bottom of this vertical unit project*/ 1284 /* somewhere in the next layer. The top of this vertical unit */ 1285 /* becomes the bottom right point for somebody, and the bottom of*/ 1286 /* this vertical unit becomes the colocated pt for somebody, this*/ 1287 /* is the extremum. */ 1288 1289 /* for the initial unit affected by j in "c" layer, take j-1th */ 1290 /* unit top and project it. */ 1291 v1 = (j - 1) * unit_ht_c * ht_n; 1292 v1 /= (ht_c * unit_ht_n); 1293 v1 -= 1; 1294 1295 /* for the final unit affected by j in "c" layer, take jth unit */ 1296 /* bottom and project it. */ 1297 1298 v2 = (j + 1) * unit_ht_c * ht_n; 1299 v2 /= (ht_c * unit_ht_n); 1300 v2 += 1; 1301 1302 /* Clip to be within valid limits */ 1303 v1 = HME_CLIP(v1, 0, (num_vert_n - 1)); 1304 v2 = HME_CLIP(v2, 0, (num_vert_n - 1)); 1305 1306 /* In the layer "n", units starting at offset v1, and upto v2 are*/ 1307 /* dependent on unit j of layer "c". So for each of these units */ 1308 /* increment the dependency by 1 corresponding to "jth" unit in */ 1309 /* layer "c" */ 1310 ps_me_job_q_prms->i4_num_output_dep = v2 - v1 + 1; 1311 ASSERT(ps_me_job_q_prms->i4_num_output_dep <= MAX_OUT_DEP); 1312 for(k = v1; k <= v2; k++) 1313 pu1_inp_dep_n[k]++; 1314 1315 /* Input dependency would have been calculated in prev run */ 1316 ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j]; 1317 ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP); 1318 1319 /* Offsets */ 1320 for(k = v1; k <= v2; k++) 1321 ps_me_job_q_prms->ai4_out_dep_unit_off[k - v1] = k; 1322 1323 ps_me_job_q_prms++; 1324 } 1325 1326 /* Compute the blk size and vert unit size in each layer */ 1327 /* "c" denotes curr layer, and "n" denotes the layer to which result */ 1328 /* is projected to */ 1329 ht_c = ht_n; 1330 blk_ht_c = blk_ht_n; 1331 unit_ht_c = unit_ht_n; 1332 num_vert_c = num_vert_n; 1333 1334 /* Input dep count for next layer was computed this iteration. */ 1335 /* Swap so that p_inp_dep_n becomes current for next iteration, */ 1336 /* and p_inp_dep_c will become update area during next iteration */ 1337 /* for next to next. */ 1338 { 1339 U08 *pu1_tmp = pu1_inp_dep_n; 1340 pu1_inp_dep_n = pu1_inp_dep_c; 1341 pu1_inp_dep_c = pu1_tmp; 1342 } 1343 } 1344 1345 /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */ 1346 1347 /* set the numebr of vertical units */ 1348 pi4_num_vert_units_in_lyr[num_layers - 1] = num_vert_c; 1349 for(j = 0; j < num_vert_c; j++) 1350 { 1351 /* Here there is no output dependency for ME. However this data is used for encode, */ 1352 /* and there is a 1-1 correspondence between this and the encode */ 1353 /* Hence we set output dependency of 1 */ 1354 ps_me_job_q_prms->i4_num_output_dep = 1; 1355 ps_me_job_q_prms->ai4_out_dep_unit_off[0] = j; 1356 ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j]; 1357 ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP); 1358 ps_me_job_q_prms++; 1359 } 1360 1361 return; 1362 } 1363 1364 /*! 1365 ****************************************************************************** 1366 * \if Function name : ihevce_coarse_me_set_lyr1_mv_bank \endif 1367 * 1368 * \brief 1369 * Frame level ME initialisation of MV bank of penultimate layer 1370 * 1371 * \par Description: 1372 * Updates the Layer1 context with the given buffers 1373 * 1374 * \param[in] pv_me_ctxt : pointer to ME module 1375 * \param[in] pu1_mv_bank : MV bank buffer pointer 1376 * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer 1377 * 1378 * \return 1379 * None 1380 * 1381 * \author 1382 * Ittiam 1383 * 1384 ***************************************************************************** 1385 */ 1386 void ihevce_coarse_me_set_lyr1_mv_bank( 1387 void *pv_me_ctxt, 1388 ihevce_lap_enc_buf_t *ps_enc_lap_inp, 1389 void *pv_mv_bank, 1390 void *pv_ref_idx_bank, 1391 WORD32 i4_curr_idx) 1392 { 1393 coarse_me_ctxt_t *ps_thrd0_ctxt; 1394 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 1395 layer_ctxt_t *ps_lyr1_ctxt; 1396 1397 /* Input descriptor that is updated and passed to ME */ 1398 hme_inp_desc_t s_inp_desc; 1399 1400 /*************************************************************************/ 1401 /* Add the current input to ME's DPB. This will also create the pyramids */ 1402 /* for the HME layers tha are not "encoded". */ 1403 /*************************************************************************/ 1404 s_inp_desc.i4_poc = ps_enc_lap_inp->s_lap_out.i4_poc; 1405 s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf; 1406 s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf; 1407 s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf; 1408 1409 s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd; 1410 s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd; 1411 1412 hme_coarse_add_inp(pv_me_ctxt, &s_inp_desc, i4_curr_idx); 1413 1414 /* All processing done using shared / common memory across */ 1415 /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */ 1416 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 1417 1418 ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1]; 1419 1420 /* register the mv bank & ref idx bank pointer */ 1421 ps_lyr1_ctxt->ps_layer_mvbank->pi1_ref_idx_base = (S08 *)pv_ref_idx_bank; 1422 ps_lyr1_ctxt->ps_layer_mvbank->ps_mv_base = (hme_mv_t *)pv_mv_bank; 1423 1424 return; 1425 } 1426 1427 /*! 1428 ****************************************************************************** 1429 * \if Function name : ihevce_coarse_me_get_lyr1_ctxt \endif 1430 * 1431 * \brief 1432 * function to get teh Layer 1 properties to be passed on the encode layer 1433 * 1434 * \par Description: 1435 * Ucopies the enitre layer ctxt emory to the destination 1436 * 1437 * \param[in] pv_me_ctxt : pointer to ME module 1438 * \param[in] pu1_mv_bank : MV bank buffer pointer 1439 * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer 1440 * 1441 * \return 1442 * None 1443 * 1444 * \author 1445 * Ittiam 1446 * 1447 ***************************************************************************** 1448 */ 1449 void ihevce_coarse_me_get_lyr1_ctxt( 1450 void *pv_me_ctxt, void *pv_layer_ctxt, void *pv_layer_mv_bank_ctxt) 1451 { 1452 coarse_me_ctxt_t *ps_thrd0_ctxt; 1453 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 1454 layer_ctxt_t *ps_lyr1_ctxt; 1455 1456 /* All processing done using shared / common memory across */ 1457 /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */ 1458 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 1459 1460 /* get the context of layer 1 */ 1461 ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1]; 1462 1463 /* copy the layer ctxt eve registerd mv bank & ref idx bank also goes in */ 1464 memcpy(pv_layer_ctxt, ps_lyr1_ctxt, sizeof(layer_ctxt_t)); 1465 1466 /* copy the layer mv bank contents */ 1467 memcpy(pv_layer_mv_bank_ctxt, ps_lyr1_ctxt->ps_layer_mvbank, sizeof(layer_mv_t)); 1468 1469 /* register the MV bank pointer in the layer ctxt*/ 1470 ((layer_ctxt_t *)pv_layer_ctxt)->ps_layer_mvbank = (layer_mv_t *)pv_layer_mv_bank_ctxt; 1471 1472 return; 1473 } 1474