Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /*!
     22 ******************************************************************************
     23 * \file ihevce_coarse_me_pass.c
     24 *
     25 * \brief
     26 *    Converts the language of the encoder to language of me. This is an i/f
     27 *    between the encoder style APIs and ME style APIs. This is basically
     28 *    a memoryless glue layer.
     29 *
     30 * \date
     31 *    22/10/2012
     32 *
     33 * \author
     34 *    Ittiam
     35 *
     36 *
     37 * List of Functions
     38 *
     39 *
     40 ******************************************************************************
     41 */
     42 
     43 /*****************************************************************************/
     44 /* File Includes                                                             */
     45 /*****************************************************************************/
     46 /* System include files */
     47 #include <stdio.h>
     48 #include <string.h>
     49 #include <stdlib.h>
     50 #include <assert.h>
     51 #include <stdarg.h>
     52 #include <math.h>
     53 
     54 /* User include files */
     55 #include "ihevc_typedefs.h"
     56 #include "itt_video_api.h"
     57 #include "ihevce_api.h"
     58 
     59 #include "rc_cntrl_param.h"
     60 #include "rc_frame_info_collector.h"
     61 #include "rc_look_ahead_params.h"
     62 
     63 #include "ihevc_defs.h"
     64 #include "ihevc_structs.h"
     65 #include "ihevc_platform_macros.h"
     66 #include "ihevc_deblk.h"
     67 #include "ihevc_itrans_recon.h"
     68 #include "ihevc_chroma_itrans_recon.h"
     69 #include "ihevc_chroma_intra_pred.h"
     70 #include "ihevc_intra_pred.h"
     71 #include "ihevc_inter_pred.h"
     72 #include "ihevc_mem_fns.h"
     73 #include "ihevc_padding.h"
     74 #include "ihevc_weighted_pred.h"
     75 #include "ihevc_sao.h"
     76 #include "ihevc_resi_trans.h"
     77 #include "ihevc_quant_iquant_ssd.h"
     78 #include "ihevc_cabac_tables.h"
     79 
     80 #include "ihevce_defs.h"
     81 #include "ihevce_lap_enc_structs.h"
     82 #include "ihevce_multi_thrd_structs.h"
     83 #include "ihevce_me_common_defs.h"
     84 #include "ihevce_had_satd.h"
     85 #include "ihevce_error_codes.h"
     86 #include "ihevce_bitstream.h"
     87 #include "ihevce_cabac.h"
     88 #include "ihevce_rdoq_macros.h"
     89 #include "ihevce_function_selector.h"
     90 #include "ihevce_enc_structs.h"
     91 #include "ihevce_entropy_structs.h"
     92 #include "ihevce_cmn_utils_instr_set_router.h"
     93 #include "ihevce_enc_loop_structs.h"
     94 #include "ihevce_bs_compute_ctb.h"
     95 #include "ihevce_global_tables.h"
     96 #include "ihevce_dep_mngr_interface.h"
     97 #include "hme_datatype.h"
     98 #include "hme_interface.h"
     99 #include "hme_common_defs.h"
    100 #include "hme_defs.h"
    101 #include "ihevce_me_instr_set_router.h"
    102 #include "ihevce_ipe_instr_set_router.h"
    103 #include "ihevce_ipe_structs.h"
    104 #include "hme_globals.h"
    105 #include "hme_utils.h"
    106 #include "hme_coarse.h"
    107 #include "hme_refine.h"
    108 #include "ihevce_me_pass.h"
    109 #include "ihevce_coarse_me_pass.h"
    110 
    111 /*****************************************************************************/
    112 /* Function Definitions                                                      */
    113 /*****************************************************************************/
    114 
    115 /*!
    116 ******************************************************************************
    117 * \if Function name : ihevce_coarse_me_get_num_mem_recs \endif
    118 *
    119 * \brief
    120 *    Number of memory records are returned for ME module
    121 *    Note : Include total mem. req. for HME + Total mem. req. for Dep Mngr for HME
    122 *
    123 * \return
    124 *    Number of memory records
    125 *
    126 * \author
    127 *  Ittiam
    128 *
    129 *****************************************************************************
    130 */
    131 WORD32 ihevce_coarse_me_get_num_mem_recs()
    132 {
    133     WORD32 hme_mem_recs = hme_coarse_num_alloc();
    134     WORD32 hme_dep_mngr_mem_recs = hme_coarse_dep_mngr_num_alloc();
    135 
    136     return ((hme_mem_recs + hme_dep_mngr_mem_recs));
    137 }
    138 
    139 /*!
    140 ******************************************************************************
    141 * \if Function name : ihevce_coarse_me_get_mem_recs \endif
    142 *
    143 * \brief
    144 *    Memory requirements are returned for coarse ME.
    145 *
    146 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
    147 * \param[in] ps_init_prms : Create time static parameters
    148 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
    149 * \param[in] i4_mem_space : memspace in whihc memory request should be done
    150 *
    151 * \return
    152 *    Number of records
    153 *
    154 * \author
    155 *  Ittiam
    156 *
    157 *****************************************************************************
    158 */
    159 WORD32 ihevce_coarse_me_get_mem_recs(
    160     iv_mem_rec_t *ps_mem_tab,
    161     ihevce_static_cfg_params_t *ps_init_prms,
    162     WORD32 i4_num_proc_thrds,
    163     WORD32 i4_mem_space,
    164     WORD32 i4_resolution_id)
    165 {
    166     hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
    167     WORD32 n_tabs, i;
    168 
    169     /* Init prms structure specific to HME */
    170     hme_init_prms_t s_hme_init_prms;
    171 
    172     //return (ihevce_coarse_me_get_num_mem_recs());
    173     /*************************************************************************/
    174     /* code flow: we call hme alloc function and then remap those memtabs    */
    175     /* to a different type of memtab structure.                              */
    176     /*************************************************************************/
    177     ASSERT(HME_COARSE_TOT_MEMTABS >= hme_coarse_num_alloc());
    178 
    179     /*************************************************************************/
    180     /* POPULATE THE HME INIT PRMS                                            */
    181     /*************************************************************************/
    182     ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
    183 
    184     /*************************************************************************/
    185     /* CALL THE ME FUNCTION TO GET MEMTABS                                   */
    186     /*************************************************************************/
    187     n_tabs = hme_coarse_alloc(&as_memtabs[0], &s_hme_init_prms);
    188     ASSERT(n_tabs == hme_coarse_num_alloc());
    189 
    190     /*************************************************************************/
    191     /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE                             */
    192     /*************************************************************************/
    193     for(i = 0; i < n_tabs; i++)
    194     {
    195         ps_mem_tab[i].i4_mem_size = as_memtabs[i].size;
    196         ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align;
    197         ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
    198         ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t);
    199     }
    200 
    201     /*************************************************************************/
    202     /* --- HME Coarse sync Dep Mngr Mem requests --                          */
    203     /*************************************************************************/
    204     {
    205         WORD32 n_dep_tabs;
    206 
    207         ps_mem_tab += n_tabs;
    208 
    209         n_dep_tabs = hme_coarse_dep_mngr_alloc(
    210             ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
    211 
    212         ASSERT(n_dep_tabs == hme_coarse_dep_mngr_num_alloc());
    213 
    214         /* Update the total no. of mem tabs */
    215         n_tabs += n_dep_tabs;
    216     }
    217 
    218     return (n_tabs);
    219 }
    220 
    221 /*!
    222 ******************************************************************************
    223 * \if Function name : ihevce_coarse_me_init \endif
    224 *
    225 * \brief
    226 *    Intialization for ME context state structure .
    227 *
    228 * \param[in] ps_mem_tab : pointer to memory descriptors table
    229 * \param[in] ps_init_prms : Create time static parameters
    230 * \param[in] pv_osal_handle : Osal handle
    231 *
    232 * \return
    233 *    Handle to the ME context
    234 *
    235 * \author
    236 *  Ittiam
    237 *
    238 *****************************************************************************
    239 */
    240 void *ihevce_coarse_me_init(
    241     iv_mem_rec_t *ps_mem_tab,
    242     ihevce_static_cfg_params_t *ps_init_prms,
    243     WORD32 i4_num_proc_thrds,
    244     void *pv_osal_handle,
    245     WORD32 i4_resolution_id,
    246     UWORD8 u1_is_popcnt_available)
    247 {
    248     /* ME handle to be returned */
    249     void *pv_me_ctxt;
    250     WORD32 status;
    251     coarse_me_master_ctxt_t *ps_ctxt;
    252 
    253     /* Init prms structure specific to HME */
    254     hme_init_prms_t s_hme_init_prms;
    255 
    256     /* memtabs to be passed to hme */
    257     hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
    258     WORD32 n_tabs, n_dep_tabs, i;
    259 
    260     /*************************************************************************/
    261     /* POPULATE THE HME INIT PRMS                                            */
    262     /*************************************************************************/
    263     ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
    264 
    265     /*************************************************************************/
    266     /* Ensure local declaration is sufficient                                */
    267     /*************************************************************************/
    268     n_tabs = hme_coarse_num_alloc();
    269     ASSERT(HME_COARSE_TOT_MEMTABS >= n_tabs);
    270 
    271     /*************************************************************************/
    272     /* MAP RESULTS TO HME MEMTAB STRUCTURE                                   */
    273     /*************************************************************************/
    274     for(i = 0; i < n_tabs; i++)
    275     {
    276         as_memtabs[i].size = ps_mem_tab[i].i4_mem_size;
    277         as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment;
    278         as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base;
    279     }
    280     /*************************************************************************/
    281     /* CALL THE ME FUNCTION TO GET MEMTABS                                   */
    282     /*************************************************************************/
    283     pv_me_ctxt = (void *)as_memtabs[0].pu1_mem;
    284     status = hme_coarse_init(pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms);
    285     ps_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
    286     if(status == -1)
    287         return NULL;
    288 
    289     /*************************************************************************/
    290     /* --- HME sync Dep Mngr Mem init --                                     */
    291     /*************************************************************************/
    292 
    293     ps_mem_tab += n_tabs;
    294 
    295     n_dep_tabs = hme_coarse_dep_mngr_init(
    296         ps_mem_tab, ps_init_prms, pv_me_ctxt, pv_osal_handle, i4_num_proc_thrds, i4_resolution_id);
    297     ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
    298 
    299     n_tabs += n_dep_tabs;
    300 
    301     ihevce_me_instr_set_router(
    302         (ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list,
    303         ps_init_prms->e_arch_type);
    304 
    305     ihevce_cmn_utils_instr_set_router(
    306         &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
    307 
    308     return (pv_me_ctxt);
    309 }
    310 
    311 /*!
    312 ******************************************************************************
    313 * \if Function name : ihevce_coarse_me_reg_thrds_sem \endif
    314 *
    315 * \brief
    316 *    Intialization for ME context state structure with semaphores .
    317 *
    318 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
    319 * \param[in] ppv_sem_hdls : Array of semaphore handles
    320 * \param[in] i4_num_proc_thrds : Number of processing threads
    321 *
    322 * \return
    323 *   none
    324 *
    325 * \author
    326 *  Ittiam
    327 *
    328 *****************************************************************************
    329 */
    330 void ihevce_coarse_me_reg_thrds_sem(void *pv_me_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
    331 {
    332     hme_coarse_dep_mngr_reg_sem(pv_me_ctxt, ppv_sem_hdls, i4_num_proc_thrds);
    333 
    334     return;
    335 }
    336 
    337 /*!
    338 ******************************************************************************
    339 * \if Function name : ihevce_coarse_me_delete \endif
    340 *
    341 * \brief
    342 *    Destroy Coarse ME module
    343 * Note : Only Destroys the resources allocated in the module like
    344 *   semaphore,etc. Memory free is done Separately using memtabs
    345 *
    346 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
    347 * \param[in] ps_init_prms : Create time static parameters
    348 * \param[in] pv_osal_handle : Osal handle
    349 *
    350 * \return
    351 *    None
    352 *
    353 * \author
    354 *  Ittiam
    355 *
    356 *****************************************************************************
    357 */
    358 void ihevce_coarse_me_delete(
    359     void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
    360 {
    361     /* --- HME sync Dep Mngr Delete --*/
    362     hme_coarse_dep_mngr_delete(pv_me_ctxt, ps_init_prms, i4_resolution_id);
    363 }
    364 
    365 /**
    366 *******************************************************************************
    367 * \if Function name : ihevce_coarse_me_set_resolution \endif
    368 *
    369 * \brief
    370 *    Sets the resolution for ME state
    371 *
    372 * \par Description:
    373 *    ME requires information of resolution to prime up its layer descriptors
    374 *    and contexts. This API is called whenever a control call from application
    375 *    causes a change of resolution. Has to be called once initially before
    376 *    processing any frame. Again this is just a glue function and calls the
    377 *    actual ME API for the same.
    378 *
    379 * \param[in,out] pv_me_ctxt: Handle to the ME context
    380 * \param[in] n_enc_layers: Number of layers getting encoded
    381 * \param[in] p_wd : Pointer containing widths of each layer getting encoded.
    382 * \param[in] p_ht : Pointer containing heights of each layer getting encoded.
    383 *
    384 * \returns
    385 *  none
    386 *
    387 * \author
    388 *  Ittiam
    389 *
    390 *******************************************************************************
    391 */
    392 void ihevce_coarse_me_set_resolution(
    393     void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht)
    394 {
    395     /* local variables */
    396     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
    397     WORD32 thrds;
    398 
    399     for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
    400     {
    401         coarse_me_ctxt_t *ps_me_thrd_ctxt;
    402 
    403         ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
    404 
    405         hme_coarse_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht);
    406     }
    407 }
    408 void ihevce_coarse_me_get_rc_param(
    409     void *pv_me_ctxt,
    410     LWORD64 *i8_acc_frame_hme_cost,
    411     LWORD64 *i8_acc_frame_hme_sad,
    412     LWORD64 *i8_acc_num_blks_higher_sad,
    413     LWORD64 *i8_total_blks,
    414     WORD32 i4_is_prev_pic_same_scene)
    415 {
    416     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
    417     WORD32 thrds;
    418     coarse_me_ctxt_t *ps_me_thrd_ctxt;
    419 
    420     *i8_acc_frame_hme_cost = 0;
    421     *i8_acc_frame_hme_sad = 0;
    422 
    423     for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
    424     {
    425         ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
    426         *i8_acc_frame_hme_cost += ps_me_thrd_ctxt->i4_L1_hme_best_cost;
    427 
    428         /*Calculate me cost wrt. to ref only for P frame */
    429         if(ps_me_thrd_ctxt->s_frm_prms.is_i_pic == ps_me_thrd_ctxt->s_frm_prms.bidir_enabled)
    430         {
    431             *i8_acc_num_blks_higher_sad += ps_me_thrd_ctxt->i4_num_blks_high_sad;
    432             *i8_total_blks += ps_me_thrd_ctxt->i4_num_blks;
    433         }
    434 
    435         *i8_acc_frame_hme_sad += ps_me_thrd_ctxt->i4_L1_hme_sad;
    436     }
    437 }
    438 
    439 /*!
    440 ******************************************************************************
    441 * \if Function name : ihevce_coarse_me_process \endif
    442 *
    443 * \brief
    444 *    Frame level ME function
    445 *
    446 * \par Description:
    447 *    Processing of all layers starting from coarse and going
    448 *    to the refinement layers, except enocde layer
    449 *
    450 * \param[in] pv_ctxt : pointer to ME module
    451 * \param[in] ps_enc_lap_inp  : pointer to input yuv buffer (frame buffer)
    452 * \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
    453 * \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
    454 * \param[in]  pd_intra_costs : pointerto intra cost buffer
    455 * \param[in]  ps_multi_thrd_ctxt : pointer to multi thread ctxt
    456 * \param[in]  thrd_id : Thread id of the current thrd in which function is executed
    457 *
    458 * \return
    459 *    None
    460 *
    461 * \author
    462 *  Ittiam
    463 *
    464 *****************************************************************************
    465 */
    466 void ihevce_coarse_me_process(
    467     void *pv_me_ctxt,
    468     ihevce_lap_enc_buf_t *ps_enc_lap_inp,
    469     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
    470     WORD32 thrd_id,
    471     WORD32 i4_ping_pong)
    472 
    473 {
    474     /* local variables */
    475     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
    476     coarse_me_ctxt_t *ps_thrd_ctxt;
    477 
    478     /* get the current thread ctxt pointer */
    479     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id];
    480     ps_thrd_ctxt->thrd_id = thrd_id;
    481 
    482     /* frame level processing function */
    483     hme_coarse_process_frm(
    484         (void *)ps_thrd_ctxt,
    485         &ps_master_ctxt->s_ref_map,
    486         &ps_master_ctxt->s_frm_prms,
    487         ps_multi_thrd_ctxt,
    488         i4_ping_pong,
    489         &ps_master_ctxt->apv_dep_mngr_hme_sync[0]);
    490 
    491     return;
    492 }
    493 
    494 /*!
    495 ******************************************************************************
    496 * \if Function name : ihevce_coarse_me_frame_end \endif
    497 *
    498 * \brief
    499 *    End of frame update function performs
    500 *       - GMV collation
    501 *       - Dynamic Search Range collation
    502 *
    503 * \param[in] pv_ctxt : pointer to ME module
    504 *
    505 * \return
    506 *    None
    507 *
    508 * \author
    509 *  Ittiam
    510 *
    511 *****************************************************************************
    512 */
    513 void ihevce_coarse_me_frame_end(void *pv_me_ctxt)
    514 {
    515     /* local variables */
    516     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
    517     coarse_me_ctxt_t *ps_thrd0_ctxt;
    518     layer_ctxt_t *ps_curr_layer;
    519     WORD32 num_ref, num_thrds, cur_poc;
    520     WORD32 coarse_layer_id;
    521     WORD32 i4_num_ref;
    522     ME_QUALITY_PRESETS_T e_me_quality_preset;
    523 
    524     /* GMV collation is done for coarse Layer only */
    525     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
    526     coarse_layer_id = ps_thrd0_ctxt->num_layers - 1;
    527     ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[coarse_layer_id];
    528     i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref;
    529     e_me_quality_preset = ps_thrd0_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
    530 
    531     /* No processing is required if current pic is I pic */
    532     if(1 == ps_master_ctxt->s_frm_prms.is_i_pic)
    533     {
    534         return;
    535     }
    536 
    537     /* use thrd 0 ctxt to collate the GMVs histogram and Dynamic Search Range */
    538     /* across all threads */
    539     for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
    540     {
    541         WORD32 i4_offset, i4_lobe_size, i4_layer_id;
    542         mv_hist_t *ps_hist_thrd0;
    543         dyn_range_prms_t *aps_dyn_range_prms_thrd0[MAX_NUM_LAYERS];
    544 
    545         ps_hist_thrd0 = ps_thrd0_ctxt->aps_mv_hist[num_ref];
    546 
    547         /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
    548         if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
    549         {
    550             for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
    551             {
    552                 aps_dyn_range_prms_thrd0[i4_layer_id] =
    553                     &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
    554             }
    555         }
    556 
    557         i4_lobe_size = ps_hist_thrd0->i4_lobe1_size;
    558         i4_offset = i4_lobe_size >> 1;
    559 
    560         /* run a loop over all the other threads to add up the histogram */
    561         /* and to update the dynamical search range                      */
    562         for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
    563         {
    564             dyn_range_prms_t *ps_dyn_range_prms;
    565 
    566             if(ME_XTREME_SPEED_25 != e_me_quality_preset)
    567             {
    568                 mv_hist_t *ps_hist;
    569                 WORD32 i4_y, i4_x;
    570                 /* get current thrd histogram pointer */
    571                 ps_hist = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_mv_hist[num_ref];
    572 
    573                 /* Accumalate the Bin count for all the thread */
    574                 for(i4_y = 0; i4_y < ps_hist_thrd0->i4_num_rows; i4_y++)
    575                 {
    576                     for(i4_x = 0; i4_x < ps_hist_thrd0->i4_num_cols; i4_x++)
    577                     {
    578                         S32 i4_bin_id;
    579 
    580                         i4_bin_id = i4_x + (i4_y * ps_hist_thrd0->i4_num_cols);
    581 
    582                         ps_hist_thrd0->ai4_bin_count[i4_bin_id] +=
    583                             ps_hist->ai4_bin_count[i4_bin_id];
    584                     }
    585                 }
    586             }
    587 
    588             /* Update the dynamical search range for each Layer              */
    589             /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
    590             if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
    591             {
    592                 for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
    593                 {
    594                     /* get current thrd, layer dynamical search range param. pointer */
    595                     ps_dyn_range_prms =
    596                         &ps_master_ctxt->aps_me_ctxt[num_thrds]
    597                              ->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
    598                     /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */
    599                     hme_update_dynamic_search_params(
    600                         aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_max_y);
    601 
    602                     hme_update_dynamic_search_params(
    603                         aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_min_y);
    604                 }
    605             }
    606         }
    607     }
    608 
    609     /*************************************************************************/
    610     /* Get the MAX/MIN per POC distance based on the all the ref. pics       */
    611     /*************************************************************************/
    612     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
    613     if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
    614     {
    615         WORD32 i4_layer_id;
    616         cur_poc = ps_thrd0_ctxt->i4_curr_poc;
    617 
    618         for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
    619         {
    620             ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = 0;
    621             ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = 0;
    622         }
    623 
    624         for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
    625         {
    626             for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
    627             {
    628                 WORD16 i2_mv_per_poc;
    629                 WORD32 ref_poc, poc_diff;
    630                 dyn_range_prms_t *ps_dyn_range_prms_thrd0;
    631 
    632                 ps_dyn_range_prms_thrd0 =
    633                     &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
    634 
    635                 ref_poc = ps_dyn_range_prms_thrd0->i4_poc;
    636                 ASSERT(ref_poc < cur_poc);
    637                 poc_diff = (cur_poc - ref_poc);
    638 
    639                 /* cur. ref. pic. max y per POC */
    640                 i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff;
    641                 /* update the max y per POC */
    642                 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] =
    643                     MAX(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id],
    644                         i2_mv_per_poc);
    645 
    646                 /* cur. ref. pic. min y per POC */
    647                 i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff;
    648                 /* update the min y per POC */
    649                 ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] =
    650                     MIN(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id],
    651                         i2_mv_per_poc);
    652             }
    653         }
    654 
    655         /*************************************************************************/
    656         /* Populate the results to all thread ctxt                               */
    657         /*************************************************************************/
    658         for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
    659         {
    660             for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
    661             {
    662                 ps_master_ctxt->aps_me_ctxt[num_thrds]
    663                     ->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] =
    664                     ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id];
    665 
    666                 ps_master_ctxt->aps_me_ctxt[num_thrds]
    667                     ->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] =
    668                     ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id];
    669             }
    670         }
    671     }
    672 
    673     if(ME_XTREME_SPEED_25 != e_me_quality_preset)
    674     {
    675         /* call the function which calcualtes the GMV    */
    676         /* layer pointer is shared across all threads    */
    677         /* hence all threads will have access to updated */
    678         /* GMVs populated using thread 0 ctxt            */
    679         for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
    680         {
    681             hme_calculate_global_mv(
    682                 ps_thrd0_ctxt->aps_mv_hist[num_ref],
    683                 &ps_curr_layer->s_global_mv[num_ref][GMV_THICK_LOBE],
    684                 GMV_THICK_LOBE);
    685         }
    686     }
    687     return;
    688 }
    689 
    690 /*!
    691 ******************************************************************************
    692 * \if Function name : ihevce_coarse_me_frame_dpb_update \endif
    693 *
    694 * \brief
    695 *    Frame level ME initialisation function
    696 *
    697 * \par Description:
    698 *   Updation of ME's internal DPB
    699 *    based on available ref list information
    700 *
    701 * \param[in] pv_ctxt : pointer to ME module
    702 * \param[in] num_ref_l0 : Number of reference pics in L0 list
    703 * \param[in] num_ref_l1 : Number of reference pics in L1 list
    704 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list
    705 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list
    706 *
    707 * \return
    708 *    None
    709 *
    710 * \author
    711 *  Ittiam
    712 *
    713 *****************************************************************************
    714 */
    715 void ihevce_coarse_me_frame_dpb_update(
    716     void *pv_me_ctxt,
    717     WORD32 num_ref_l0,
    718     WORD32 num_ref_l1,
    719     recon_pic_buf_t **pps_rec_list_l0,
    720     recon_pic_buf_t **pps_rec_list_l1)
    721 {
    722     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
    723     coarse_me_ctxt_t *ps_thrd0_ctxt;
    724     WORD32 a_pocs_buffered_in_me[MAX_NUM_REF + 1];
    725     WORD32 a_pocs_to_remove[MAX_NUM_REF + 2];
    726     WORD32 poc_remove_id = 0;
    727     WORD32 i, count;
    728 
    729     /* All processing done using shared / common memory across */
    730     /* threads is done using thrd ctxt */
    731     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
    732 
    733     /*************************************************************************/
    734     /* Updation of ME's DPB list. This involves the following steps:         */
    735     /* 1. Obtain list of active POCs maintained within ME.                   */
    736     /* 2. Search each of them in the ref list. Whatever is not found goes to */
    737     /*     the list to be removed. Note: a_pocs_buffered_in_me holds the     */
    738     /*    currently active POC list within ME. a_pocs_to_remove holds the    */
    739     /*    list of POCs to be removed, terminated by -1.                      */
    740     /*************************************************************************/
    741     hme_coarse_get_active_pocs_list((void *)ps_thrd0_ctxt, a_pocs_buffered_in_me);
    742 
    743     count = 0;
    744     while(a_pocs_buffered_in_me[count] != -1)
    745     {
    746         WORD32 poc_to_search = a_pocs_buffered_in_me[count];
    747         WORD32 match_found_flag = 0;
    748 
    749         /*********************************************************************/
    750         /* Search in any one list (L0/L1) since both lists contain all the   */
    751         /* active ref pics.                                                  */
    752         /*********************************************************************/
    753         for(i = 0; i < num_ref_l0; i++)
    754         {
    755             if(poc_to_search == pps_rec_list_l0[i]->i4_poc)
    756             {
    757                 match_found_flag = 1;
    758                 break;
    759             }
    760         }
    761         for(i = 0; i < num_ref_l1; i++)
    762         {
    763             if(poc_to_search == pps_rec_list_l1[i]->i4_poc)
    764             {
    765                 match_found_flag = 1;
    766                 break;
    767             }
    768         }
    769 
    770         if(0 == match_found_flag)
    771         {
    772             /*****************************************************************/
    773             /* POC buffered inside ME but not part of ref list given by DPB  */
    774             /* Hence this needs to be flagged to ME for removal.             */
    775             /*****************************************************************/
    776             a_pocs_to_remove[poc_remove_id] = poc_to_search;
    777             poc_remove_id++;
    778         }
    779         count++;
    780     }
    781 
    782     /* List termination */
    783     a_pocs_to_remove[poc_remove_id] = -1;
    784 
    785     /* Call the ME API to remove "outdated" POCs */
    786     hme_coarse_discard_frm(ps_thrd0_ctxt, a_pocs_to_remove);
    787 }
    788 
    789 /*!
    790 ******************************************************************************
    791 * \if Function name : ihevce_coarse_me_frame_init \endif
    792 *
    793 * \brief
    794 *    Coarse Frame level ME initialisation function
    795 *
    796 * \par Description:
    797 *    The following pre-conditions exist for this function: a. We have the input
    798 *    pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
    799 *    and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
    800 *    been called atleast once. Once these are supplied, the following are
    801 *    done here: a. Input pyramid creation, b. Updation of ME's internal DPB
    802 *    based on available ref list information
    803 *
    804 * \param[in] pv_ctxt : pointer to ME module
    805 * \param[in] ps_frm_ctb_prms : CTB characteristics parameters
    806 * \param[in] ps_frm_lamda : Frame level Lambda params
    807 * \param[in] num_ref_l0 : Number of reference pics in L0 list
    808 * \param[in] num_ref_l1 : Number of reference pics in L1 list
    809 * \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
    810 * \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
    811 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list
    812 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list
    813 * \param[in] ps_enc_lap_inp  : pointer to input yuv buffer (frame buffer)
    814 * \param[in] i4_frm_qp       : current picture QP
    815 *
    816 * \return
    817 *    None
    818 *
    819 * \author
    820 *  Ittiam
    821 *
    822 *****************************************************************************
    823 */
    824 void ihevce_coarse_me_frame_init(
    825     void *pv_me_ctxt,
    826     ihevce_static_cfg_params_t *ps_stat_prms,
    827     frm_ctb_ctxt_t *ps_frm_ctb_prms,
    828     frm_lambda_ctxt_t *ps_frm_lamda,
    829     WORD32 num_ref_l0,
    830     WORD32 num_ref_l1,
    831     WORD32 num_ref_l0_active,
    832     WORD32 num_ref_l1_active,
    833     recon_pic_buf_t **pps_rec_list_l0,
    834     recon_pic_buf_t **pps_rec_list_l1,
    835     ihevce_lap_enc_buf_t *ps_enc_lap_inp,
    836     WORD32 i4_frm_qp,
    837     ihevce_ed_blk_t *ps_layer1_buf,  //EIID
    838     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
    839     UWORD8 *pu1_me_reverse_map_info,
    840     WORD32 i4_temporal_layer_id)
    841 {
    842     /* local variables */
    843     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
    844     coarse_me_ctxt_t *ps_ctxt;
    845     coarse_me_ctxt_t *ps_thrd0_ctxt;
    846     WORD32 inp_poc, num_ref;
    847     WORD32 i;
    848 
    849     /* Input POC is derived from input buffer */
    850     inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
    851     num_ref = num_ref_l0 + num_ref_l1;
    852 
    853     /* All processing done using shared / common memory across */
    854     /* threads is done using thrd 0 ctxt */
    855     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
    856 
    857     ps_master_ctxt->s_frm_prms.u1_num_active_ref_l0 = num_ref_l0_active;
    858     ps_master_ctxt->s_frm_prms.u1_num_active_ref_l1 = num_ref_l1_active;
    859 
    860     /* store the frm ctb ctxt to all the thrd ctxt */
    861     {
    862         WORD32 num_thrds;
    863 
    864         /* initialise the parameters for all the threads */
    865         for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
    866         {
    867             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
    868             ps_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms;
    869             /*EIID: early decision buffer pointer */
    870             ps_ctxt->ps_ed_blk = ps_layer1_buf;
    871             ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1;
    872 
    873             /* weighted pred enable flag */
    874             ps_ctxt->i4_wt_pred_enable_flag = ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag |
    875                                               ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
    876 
    877             if(1 == ps_ctxt->i4_wt_pred_enable_flag)
    878             {
    879                 /* log2 weight denom  */
    880                 ps_ctxt->s_wt_pred.wpred_log_wdc =
    881                     ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
    882             }
    883             else
    884             {
    885                 /* default value */
    886                 ps_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT;
    887             }
    888             ps_ctxt->i4_L1_hme_best_cost = 0;
    889             ps_ctxt->i4_L1_hme_sad = 0;
    890             ps_ctxt->i4_num_blks_high_sad = 0;
    891             ps_ctxt->i4_num_blks = 0;
    892 
    893             ps_ctxt->pv_me_optimised_function_list = ps_master_ctxt->pv_me_optimised_function_list;
    894             ps_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func;
    895         }
    896     }
    897     /* Create the reference map for ME */
    898     ihevce_me_create_ref_map(
    899         pps_rec_list_l0,
    900         pps_rec_list_l1,
    901         num_ref_l0_active,
    902         num_ref_l1_active,
    903         num_ref,
    904         &ps_master_ctxt->s_ref_map);
    905     /*************************************************************************/
    906     /* Call the ME frame level processing for further actiion.               */
    907     /* ToDo: Support Row Level API.                                          */
    908     /*************************************************************************/
    909     ps_master_ctxt->s_frm_prms.i2_mv_range_x = ps_thrd0_ctxt->s_init_prms.max_horz_search_range;
    910     ps_master_ctxt->s_frm_prms.i2_mv_range_y = ps_thrd0_ctxt->s_init_prms.max_vert_search_range;
    911 
    912     ps_master_ctxt->s_frm_prms.is_i_pic = 0;
    913     ps_master_ctxt->s_frm_prms.i4_temporal_layer_id = i4_temporal_layer_id;
    914 
    915     ps_master_ctxt->s_frm_prms.is_pic_second_field =
    916         (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^
    917            ps_enc_lap_inp->s_input_buf.i4_topfield_first));
    918     {
    919         S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
    920 
    921         /*********************************************************************/
    922         /* For I Pic, we do not call update fn at ctb level, instead we do   */
    923         /* one shot update for entire picture.                               */
    924         /*********************************************************************/
    925         if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME))
    926         {
    927             ps_master_ctxt->s_frm_prms.is_i_pic = 1;
    928             ps_master_ctxt->s_frm_prms.bidir_enabled = 0;
    929         }
    930         else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME))
    931         {
    932             ps_master_ctxt->s_frm_prms.bidir_enabled = 0;
    933         }
    934         else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME))
    935         {
    936             ps_master_ctxt->s_frm_prms.bidir_enabled = 1;
    937         }
    938         else
    939         {
    940             /* not sure whether we need to handle mixed frames like IP, */
    941             /* they should ideally come as single field. */
    942             /* TODO : resolve thsi ambiguity */
    943             ASSERT(0);
    944         }
    945     }
    946     /************************************************************************/
    947     /* Lambda calculations moved outside ME and to one place, so as to have */
    948     /* consistent lambda across ME, IPE, CL RDOPT etc                       */
    949     /************************************************************************/
    950 
    951     {
    952 #define CLIP3_F(min, max, val) (((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)))
    953         double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 };
    954         double d_b_pic_factor;
    955         double d_q_factor;
    956         //double d_lambda;
    957         UWORD8 u1_temp_hier = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id;
    958 
    959         if(u1_temp_hier)
    960         {
    961             d_b_pic_factor = CLIP3_F(2.0, 4.0, (i4_frm_qp - 12.0) / 6.0);
    962         }
    963         else
    964             d_b_pic_factor = 1.0;
    965 
    966         d_q_factor = (1 << (i4_frm_qp / 6)) * q_steps[i4_frm_qp % 6];
    967         ps_master_ctxt->s_frm_prms.qstep = (WORD32)d_q_factor;
    968         ps_master_ctxt->s_frm_prms.i4_frame_qp = i4_frm_qp;
    969     }
    970 
    971     /* HME Dependency Manager : Reset the num ctb processed in every row */
    972     /* for ME sync in every layer                                        */
    973     {
    974         WORD32 ctr;
    975         for(ctr = 1; ctr < ps_thrd0_ctxt->num_layers; ctr++)
    976         {
    977             void *pv_dep_mngr_state;
    978             pv_dep_mngr_state = ps_master_ctxt->apv_dep_mngr_hme_sync[ctr - 1];
    979 
    980             ihevce_dmgr_rst_row_row_sync(pv_dep_mngr_state);
    981         }
    982     }
    983 
    984     /* Frame level init of all threads of ME */
    985     {
    986         WORD32 num_thrds;
    987 
    988         /* initialise the parameters for all the threads */
    989         for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
    990         {
    991             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
    992 
    993             hme_coarse_process_frm_init(
    994                 (void *)ps_ctxt, ps_ctxt->ps_hme_ref_map, ps_ctxt->ps_hme_frm_prms);
    995         }
    996     }
    997 
    998     ps_master_ctxt->s_frm_prms.i4_cl_sad_lambda_qf = ps_frm_lamda->i4_cl_sad_lambda_qf;
    999     ps_master_ctxt->s_frm_prms.i4_cl_satd_lambda_qf = ps_frm_lamda->i4_cl_satd_lambda_qf;
   1000     ps_master_ctxt->s_frm_prms.i4_ol_sad_lambda_qf = ps_frm_lamda->i4_ol_sad_lambda_qf;
   1001     ps_master_ctxt->s_frm_prms.i4_ol_satd_lambda_qf = ps_frm_lamda->i4_ol_satd_lambda_qf;
   1002     ps_master_ctxt->s_frm_prms.lambda_q_shift = LAMBDA_Q_SHIFT;
   1003 
   1004     ps_master_ctxt->s_frm_prms.pf_interp_fxn = NULL;
   1005 
   1006     /*************************************************************************/
   1007     /* If num ref is 0, that means that it has to be coded as I. Do nothing  */
   1008     /* However mv bank update needs to happen with "intra" mv.               */
   1009     /*************************************************************************/
   1010     if(ps_master_ctxt->s_ref_map.i4_num_ref == 0 || ps_master_ctxt->s_frm_prms.is_i_pic)
   1011     {
   1012         for(i = 1; i < ps_thrd0_ctxt->num_layers; i++)
   1013         {
   1014             layer_ctxt_t *ps_layer_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[i];
   1015             BLK_SIZE_T e_blk_size;
   1016             S32 use_4x4;
   1017 
   1018             /* The mv bank is filled with "intra" mv */
   1019             use_4x4 = hme_get_mv_blk_size(
   1020                 ps_thrd0_ctxt->s_init_prms.use_4x4,
   1021                 i,
   1022                 ps_thrd0_ctxt->num_layers,
   1023                 ps_thrd0_ctxt->u1_encode[i]);
   1024             e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8;
   1025             hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]);
   1026             hme_fill_mvbank_intra(ps_layer_ctxt);
   1027 
   1028             /* Clear out the global mvs */
   1029             memset(
   1030                 ps_layer_ctxt->s_global_mv,
   1031                 0,
   1032                 sizeof(hme_mv_t) * ps_thrd0_ctxt->max_num_ref * NUM_GMV_LOBES);
   1033         }
   1034 
   1035         return;
   1036     }
   1037 
   1038     /*************************************************************************/
   1039     /* Coarse & refine Layer frm init (layer mem is common across thrds)     */
   1040     /*************************************************************************/
   1041     {
   1042         coarse_prms_t s_coarse_prms;
   1043         refine_prms_t s_refine_prms;
   1044         S16 i2_max;
   1045         S32 layer_id;
   1046 
   1047         layer_id = ps_thrd0_ctxt->num_layers - 1;
   1048         i2_max = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
   1049         i2_max = MAX(i2_max, ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
   1050         s_coarse_prms.i4_layer_id = layer_id;
   1051 
   1052         {
   1053             S32 log_start_step;
   1054             /* Based on Preset, set the starting step size for Refinement */
   1055             if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
   1056             {
   1057                 log_start_step = 0;
   1058             }
   1059             else
   1060             {
   1061                 log_start_step = 1;
   1062             }
   1063             s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
   1064             s_coarse_prms.i4_start_step = 1 << log_start_step;
   1065         }
   1066         s_coarse_prms.i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref;
   1067         s_coarse_prms.do_full_search = 1;
   1068         s_coarse_prms.num_results = ps_thrd0_ctxt->max_num_results_coarse;
   1069 
   1070         hme_coarse_frm_init(ps_thrd0_ctxt, &s_coarse_prms);
   1071 
   1072         layer_id--;
   1073 
   1074         /*************************************************************************/
   1075         /* This loop will run for all refine layers (non- encode layers)          */
   1076         /*************************************************************************/
   1077         while(layer_id > 0)
   1078         {
   1079             layer_ctxt_t *ps_curr_layer;
   1080             layer_ctxt_t *ps_coarse_layer;
   1081 
   1082             ps_coarse_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id + 1];
   1083 
   1084             ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id];
   1085 
   1086             hme_set_refine_prms(
   1087                 &s_refine_prms,
   1088                 ps_thrd0_ctxt->u1_encode[layer_id],
   1089                 ps_master_ctxt->s_ref_map.i4_num_ref,
   1090                 layer_id,
   1091                 ps_thrd0_ctxt->num_layers,
   1092                 ps_thrd0_ctxt->num_layers_explicit_search,
   1093                 ps_thrd0_ctxt->s_init_prms.use_4x4,
   1094                 &ps_master_ctxt->s_frm_prms,
   1095                 NULL,
   1096                 &ps_thrd0_ctxt->s_init_prms.s_me_coding_tools);
   1097 
   1098             hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer);
   1099 
   1100             layer_id--;
   1101         }
   1102     }
   1103 
   1104     return;
   1105 }
   1106 
   1107 /*!
   1108 ******************************************************************************
   1109 * \if Function name : ihevce_decomp_pre_intra_frame_init \endif
   1110 *
   1111 * \brief
   1112 *    Frame Intialization for Decomp intra pre analysis.
   1113 *
   1114 * \param[in] pv_ctxt : pointer to module ctxt
   1115 * \param[in] ppu1_decomp_lyr_bufs : pointer to array of layer buffer pointers
   1116 * \param[in] pi4_lyr_buf_stride : pointer to array of layer buffer strides
   1117 *
   1118 * \return
   1119 *    None
   1120 *
   1121 * \author
   1122 *  Ittiam
   1123 *
   1124 *****************************************************************************
   1125 */
   1126 WORD32 ihevce_coarse_me_get_lyr_buf_desc(
   1127     void *pv_me_ctxt, UWORD8 **ppu1_decomp_lyr_bufs, WORD32 *pi4_lyr_buf_stride)
   1128 {
   1129     /* local variables */
   1130     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
   1131     coarse_me_ctxt_t *ps_thrd0_ctxt;
   1132     WORD32 lyr_no;
   1133     layers_descr_t *ps_curr_descr;
   1134     WORD32 i4_free_idx;
   1135 
   1136     /* All processing done using shared / common memory across */
   1137     /* threads is done using thrd0  ctxt */
   1138     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   1139 
   1140     /* Obtain an empty layer descriptor */
   1141     i4_free_idx = hme_coarse_find_free_descr_idx((void *)ps_thrd0_ctxt);
   1142 
   1143     ps_curr_descr = &ps_thrd0_ctxt->as_ref_descr[i4_free_idx];
   1144 
   1145     /* export all the layer buffers except Layer 0 (encode layer) */
   1146     for(lyr_no = 1; lyr_no < ps_thrd0_ctxt->num_layers; lyr_no++)
   1147     {
   1148         pi4_lyr_buf_stride[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->i4_inp_stride;
   1149         ppu1_decomp_lyr_bufs[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->pu1_inp;
   1150     }
   1151 
   1152     return (i4_free_idx);
   1153 }
   1154 
   1155 /*!
   1156 ******************************************************************************
   1157 * \if Function name : ihevce_coarse_me_get_lyr_prms_job_que \endif
   1158 *
   1159 * \brief Returns to the caller key attributes related to dependency between layers
   1160 *          for multi-thread execution
   1161 *
   1162 *
   1163 * \par Description:
   1164 *    This function requires the precondition that the width and ht of encode
   1165 *    layer is known, and ME API ihevce_me_set_resolution() API called with
   1166 *    this info. Based on this, ME populates useful information for the encoder
   1167 *    to execute the multi-thread (concurrent across layers) in this API.
   1168 *    The number of layers, number of vertical units in each layer, and for
   1169 *    each vertial unit in each layer, its dependency on previous layer's units
   1170 *    From ME's perspective, a vertical unit is one which is smallest min size
   1171 *    vertically (and spans the entire row horizontally). This is CTB for encode
   1172 *    layer, and 8x8 / 4x4 for non encode layers.
   1173 *
   1174 * \param[in] pv_ctxt : ME handle
   1175 * \param[in] ps_curr_inp : Input buffer descriptor
   1176 * \param[out] pi4_num_hme_lyrs : Num of HME layers (ME updates)
   1177 * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
   1178 *                     entry has num vertical units in that particular layer
   1179 * \param[in] ps_me_job_q_prms : Array of job queue prms, one for each unit in a
   1180 *                 layer. Note that this is contiguous in order of processing
   1181 *                 All k units of layer N-1 from top to bottom, followed by
   1182 *                 all m units of layer N-2 .... ends with X units of layer 0
   1183 *
   1184 * \return
   1185 *    None
   1186 *
   1187 * \author
   1188 *  Ittiam
   1189 *
   1190 *****************************************************************************
   1191 */
   1192 void ihevce_coarse_me_get_lyr_prms_job_que(
   1193     void *pv_me_ctxt,
   1194     ihevce_lap_enc_buf_t *ps_curr_inp,
   1195     WORD32 *pi4_num_hme_lyrs,
   1196     WORD32 *pi4_num_vert_units_in_lyr,
   1197     multi_thrd_me_job_q_prms_t *ps_me_job_q_prms)
   1198 {
   1199     coarse_me_ctxt_t *ps_ctxt;
   1200     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
   1201 
   1202     /* These arrays and ptrs track input dependencies for units of a layer */
   1203     /* This is a ping poing design, while using one part, we update other part */
   1204     U08 au1_inp_dep[2][MAX_NUM_VERT_UNITS_FRM];
   1205     U08 *pu1_inp_dep_c, *pu1_inp_dep_n;
   1206 
   1207     /* Height of current and next layers */
   1208     S32 ht_c, ht_n;
   1209 
   1210     /* Blk ht at a given layer and next layer*/
   1211     S32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
   1212 
   1213     /* Number of vertical units in current and next layer */
   1214     S32 num_vert_c, num_vert_n;
   1215 
   1216     S32 ctb_size = 64, num_layers, i, j, k;
   1217 
   1218     /* since same layer desc pointer is stored in all thread ctxt */
   1219     /* a free idx is obtained using 0th thread ctxt pointer */
   1220     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   1221 
   1222     /* Set the number of layers */
   1223     num_layers = ps_ctxt->num_layers;
   1224     *pi4_num_hme_lyrs = num_layers;
   1225 
   1226     pu1_inp_dep_c = &au1_inp_dep[0][0];
   1227     pu1_inp_dep_n = &au1_inp_dep[1][0];
   1228 
   1229     ASSERT(num_layers >= 2);
   1230 
   1231     ht_n = ps_ctxt->a_ht[num_layers - 2];
   1232     ht_c = ps_ctxt->a_ht[num_layers - 1];
   1233 
   1234     /* compute blk ht and unit ht for c and n */
   1235     if(ps_ctxt->u1_encode[num_layers - 1])
   1236     {
   1237         blk_ht_c = 16;
   1238         unit_ht_c = ctb_size;
   1239     }
   1240     else
   1241     {
   1242         blk_ht_c = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, num_layers - 1, num_layers, 0);
   1243         unit_ht_c = blk_ht_c;
   1244     }
   1245 
   1246     num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
   1247 
   1248     /* For new design in Coarsest HME layer we need */
   1249     /* one additional row extra at the end of frame */
   1250     /* hence num_vert_c is incremented by 1         */
   1251     num_vert_c++;
   1252 
   1253     /* Dummy initialization outside loop, not used first time */
   1254     memset(pu1_inp_dep_c, 0, num_vert_c);
   1255 
   1256     /*************************************************************************/
   1257     /* Run through each layer, set the number of vertical units and job queue*/
   1258     /* attrs for each vert unit in the layer                                 */
   1259     /*************************************************************************/
   1260     for(i = num_layers - 1; i > 0; i--)
   1261     {
   1262         /* 0th entry is actually layer id num_layers - 1 */
   1263         /* and entry num_layers-1 equals the biggest layer (id = 0) */
   1264         pi4_num_vert_units_in_lyr[num_layers - 1 - i] = num_vert_c;
   1265         /* "n" is computed for first time */
   1266         ht_n = ps_ctxt->a_ht[i - 1];
   1267         blk_ht_n = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, i - 1, num_layers, 0);
   1268         unit_ht_n = blk_ht_n;
   1269         if(ps_ctxt->u1_encode[i - 1])
   1270             unit_ht_n = ctb_size;
   1271 
   1272         num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
   1273         /* Initialize all units' inp dep in next layer to 0 */
   1274         memset(pu1_inp_dep_n, 0, num_vert_n * sizeof(U08));
   1275 
   1276         /* Evaluate dependencies for this layer */
   1277         for(j = 0; j < num_vert_c; j++)
   1278         {
   1279             S32 v1, v2;
   1280 
   1281             /* Output dependencies. When one unit in current layer finishes, */
   1282             /* how many in the next layer it affects?. Assuming that the top */
   1283             /* of this vertical unit and bottom of this vertical unit project*/
   1284             /* somewhere in the next layer. The top of this vertical unit    */
   1285             /* becomes the bottom right point for somebody, and the bottom of*/
   1286             /* this vertical unit becomes the colocated pt for somebody, this*/
   1287             /* is the extremum.                                              */
   1288 
   1289             /* for the initial unit affected by j in "c" layer, take j-1th   */
   1290             /* unit top and project it.                                      */
   1291             v1 = (j - 1) * unit_ht_c * ht_n;
   1292             v1 /= (ht_c * unit_ht_n);
   1293             v1 -= 1;
   1294 
   1295             /* for the final unit affected by j in "c" layer, take jth unit  */
   1296             /* bottom and project it.                                        */
   1297 
   1298             v2 = (j + 1) * unit_ht_c * ht_n;
   1299             v2 /= (ht_c * unit_ht_n);
   1300             v2 += 1;
   1301 
   1302             /* Clip to be within valid limits */
   1303             v1 = HME_CLIP(v1, 0, (num_vert_n - 1));
   1304             v2 = HME_CLIP(v2, 0, (num_vert_n - 1));
   1305 
   1306             /* In the layer "n", units starting at offset v1, and upto v2 are*/
   1307             /* dependent on unit j of layer "c". So for each of these units  */
   1308             /* increment the dependency by 1 corresponding to "jth" unit in  */
   1309             /* layer "c"                                                     */
   1310             ps_me_job_q_prms->i4_num_output_dep = v2 - v1 + 1;
   1311             ASSERT(ps_me_job_q_prms->i4_num_output_dep <= MAX_OUT_DEP);
   1312             for(k = v1; k <= v2; k++)
   1313                 pu1_inp_dep_n[k]++;
   1314 
   1315             /* Input dependency would have been calculated in prev run */
   1316             ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j];
   1317             ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP);
   1318 
   1319             /* Offsets */
   1320             for(k = v1; k <= v2; k++)
   1321                 ps_me_job_q_prms->ai4_out_dep_unit_off[k - v1] = k;
   1322 
   1323             ps_me_job_q_prms++;
   1324         }
   1325 
   1326         /* Compute the blk size and vert unit size in each layer             */
   1327         /* "c" denotes curr layer, and "n" denotes the layer to which result */
   1328         /* is projected to                                                   */
   1329         ht_c = ht_n;
   1330         blk_ht_c = blk_ht_n;
   1331         unit_ht_c = unit_ht_n;
   1332         num_vert_c = num_vert_n;
   1333 
   1334         /* Input dep count for next layer was computed this iteration. */
   1335         /* Swap so that p_inp_dep_n becomes current for next iteration, */
   1336         /* and p_inp_dep_c will become update area during next iteration */
   1337         /* for next to next.                                             */
   1338         {
   1339             U08 *pu1_tmp = pu1_inp_dep_n;
   1340             pu1_inp_dep_n = pu1_inp_dep_c;
   1341             pu1_inp_dep_c = pu1_tmp;
   1342         }
   1343     }
   1344 
   1345     /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
   1346 
   1347     /* set the numebr of vertical units */
   1348     pi4_num_vert_units_in_lyr[num_layers - 1] = num_vert_c;
   1349     for(j = 0; j < num_vert_c; j++)
   1350     {
   1351         /* Here there is no output dependency for ME. However this data is used for encode, */
   1352         /* and there is a 1-1 correspondence between this and the encode     */
   1353         /* Hence we set output dependency of 1 */
   1354         ps_me_job_q_prms->i4_num_output_dep = 1;
   1355         ps_me_job_q_prms->ai4_out_dep_unit_off[0] = j;
   1356         ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j];
   1357         ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP);
   1358         ps_me_job_q_prms++;
   1359     }
   1360 
   1361     return;
   1362 }
   1363 
   1364 /*!
   1365 ******************************************************************************
   1366 * \if Function name : ihevce_coarse_me_set_lyr1_mv_bank \endif
   1367 *
   1368 * \brief
   1369 *    Frame level ME initialisation of MV bank of penultimate layer
   1370 *
   1371 * \par Description:
   1372 *    Updates the Layer1 context with the given buffers
   1373 *
   1374 * \param[in] pv_me_ctxt : pointer to ME module
   1375 * \param[in] pu1_mv_bank : MV bank buffer pointer
   1376 * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer
   1377 *
   1378 * \return
   1379 *    None
   1380 *
   1381 * \author
   1382 *  Ittiam
   1383 *
   1384 *****************************************************************************
   1385 */
   1386 void ihevce_coarse_me_set_lyr1_mv_bank(
   1387     void *pv_me_ctxt,
   1388     ihevce_lap_enc_buf_t *ps_enc_lap_inp,
   1389     void *pv_mv_bank,
   1390     void *pv_ref_idx_bank,
   1391     WORD32 i4_curr_idx)
   1392 {
   1393     coarse_me_ctxt_t *ps_thrd0_ctxt;
   1394     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
   1395     layer_ctxt_t *ps_lyr1_ctxt;
   1396 
   1397     /* Input descriptor that is updated and passed to ME */
   1398     hme_inp_desc_t s_inp_desc;
   1399 
   1400     /*************************************************************************/
   1401     /* Add the current input to ME's DPB. This will also create the pyramids */
   1402     /* for the HME layers tha are not "encoded".                             */
   1403     /*************************************************************************/
   1404     s_inp_desc.i4_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
   1405     s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf;
   1406     s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf;
   1407     s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf;
   1408 
   1409     s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd;
   1410     s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd;
   1411 
   1412     hme_coarse_add_inp(pv_me_ctxt, &s_inp_desc, i4_curr_idx);
   1413 
   1414     /* All processing done using shared / common memory across */
   1415     /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */
   1416     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   1417 
   1418     ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1];
   1419 
   1420     /* register the mv bank & ref idx bank pointer */
   1421     ps_lyr1_ctxt->ps_layer_mvbank->pi1_ref_idx_base = (S08 *)pv_ref_idx_bank;
   1422     ps_lyr1_ctxt->ps_layer_mvbank->ps_mv_base = (hme_mv_t *)pv_mv_bank;
   1423 
   1424     return;
   1425 }
   1426 
   1427 /*!
   1428 ******************************************************************************
   1429 * \if Function name : ihevce_coarse_me_get_lyr1_ctxt \endif
   1430 *
   1431 * \brief
   1432 *    function to get teh Layer 1 properties to be passed on the encode layer
   1433 *
   1434 * \par Description:
   1435 *    Ucopies the enitre layer ctxt emory to the destination
   1436 *
   1437 * \param[in] pv_me_ctxt : pointer to ME module
   1438 * \param[in] pu1_mv_bank : MV bank buffer pointer
   1439 * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer
   1440 *
   1441 * \return
   1442 *    None
   1443 *
   1444 * \author
   1445 *  Ittiam
   1446 *
   1447 *****************************************************************************
   1448 */
   1449 void ihevce_coarse_me_get_lyr1_ctxt(
   1450     void *pv_me_ctxt, void *pv_layer_ctxt, void *pv_layer_mv_bank_ctxt)
   1451 {
   1452     coarse_me_ctxt_t *ps_thrd0_ctxt;
   1453     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
   1454     layer_ctxt_t *ps_lyr1_ctxt;
   1455 
   1456     /* All processing done using shared / common memory across */
   1457     /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */
   1458     ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   1459 
   1460     /* get the context of layer 1 */
   1461     ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1];
   1462 
   1463     /* copy the layer ctxt eve registerd mv bank & ref idx bank also goes in */
   1464     memcpy(pv_layer_ctxt, ps_lyr1_ctxt, sizeof(layer_ctxt_t));
   1465 
   1466     /* copy the layer mv bank contents */
   1467     memcpy(pv_layer_mv_bank_ctxt, ps_lyr1_ctxt->ps_layer_mvbank, sizeof(layer_mv_t));
   1468 
   1469     /* register the MV bank pointer in the layer ctxt*/
   1470     ((layer_ctxt_t *)pv_layer_ctxt)->ps_layer_mvbank = (layer_mv_t *)pv_layer_mv_bank_ctxt;
   1471 
   1472     return;
   1473 }
   1474