Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 ******************************************************************************
     22 * @file hme_refine.c
     23 *
     24 * @brief
     25 *    Contains the implementation of the refinement layer searches and related
     26 *    functionality like CU merge.
     27 *
     28 * @author
     29 *    Ittiam
     30 *
     31 *
     32 * List of Functions
     33 *
     34 *
     35 ******************************************************************************
     36 */
     37 
     38 /*****************************************************************************/
     39 /* File Includes                                                             */
     40 /*****************************************************************************/
     41 /* System include files */
     42 #include <stdio.h>
     43 #include <string.h>
     44 #include <stdlib.h>
     45 #include <assert.h>
     46 #include <stdarg.h>
     47 #include <math.h>
     48 #include <limits.h>
     49 
     50 /* User include files */
     51 #include "ihevc_typedefs.h"
     52 #include "itt_video_api.h"
     53 #include "ihevce_api.h"
     54 
     55 #include "rc_cntrl_param.h"
     56 #include "rc_frame_info_collector.h"
     57 #include "rc_look_ahead_params.h"
     58 
     59 #include "ihevc_defs.h"
     60 #include "ihevc_structs.h"
     61 #include "ihevc_platform_macros.h"
     62 #include "ihevc_deblk.h"
     63 #include "ihevc_itrans_recon.h"
     64 #include "ihevc_chroma_itrans_recon.h"
     65 #include "ihevc_chroma_intra_pred.h"
     66 #include "ihevc_intra_pred.h"
     67 #include "ihevc_inter_pred.h"
     68 #include "ihevc_mem_fns.h"
     69 #include "ihevc_padding.h"
     70 #include "ihevc_weighted_pred.h"
     71 #include "ihevc_sao.h"
     72 #include "ihevc_resi_trans.h"
     73 #include "ihevc_quant_iquant_ssd.h"
     74 #include "ihevc_cabac_tables.h"
     75 
     76 #include "ihevce_defs.h"
     77 #include "ihevce_lap_enc_structs.h"
     78 #include "ihevce_multi_thrd_structs.h"
     79 #include "ihevce_multi_thrd_funcs.h"
     80 #include "ihevce_me_common_defs.h"
     81 #include "ihevce_had_satd.h"
     82 #include "ihevce_error_codes.h"
     83 #include "ihevce_bitstream.h"
     84 #include "ihevce_cabac.h"
     85 #include "ihevce_rdoq_macros.h"
     86 #include "ihevce_function_selector.h"
     87 #include "ihevce_enc_structs.h"
     88 #include "ihevce_entropy_structs.h"
     89 #include "ihevce_cmn_utils_instr_set_router.h"
     90 #include "ihevce_enc_loop_structs.h"
     91 #include "ihevce_bs_compute_ctb.h"
     92 #include "ihevce_global_tables.h"
     93 #include "ihevce_dep_mngr_interface.h"
     94 #include "hme_datatype.h"
     95 #include "hme_interface.h"
     96 #include "hme_common_defs.h"
     97 #include "hme_defs.h"
     98 #include "ihevce_me_instr_set_router.h"
     99 #include "hme_globals.h"
    100 #include "hme_utils.h"
    101 #include "hme_coarse.h"
    102 #include "hme_fullpel.h"
    103 #include "hme_subpel.h"
    104 #include "hme_refine.h"
    105 #include "hme_err_compute.h"
    106 #include "hme_common_utils.h"
    107 #include "hme_search_algo.h"
    108 #include "ihevce_stasino_helpers.h"
    109 #include "ihevce_common_utils.h"
    110 
    111 /*****************************************************************************/
    112 /* Globals                                                                   */
    113 /*****************************************************************************/
    114 
    115 /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
    116 UWORD8 gau1_raster_scan_to_ctb[4][4] = {
    117     { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
    118 };
    119 
    120 /*****************************************************************************/
    121 /* Extern Fucntion declaration                                               */
    122 /*****************************************************************************/
    123 extern ctb_boundary_attrs_t *
    124     get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
    125 
    126 typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
    127     search_node_t *ps_search_node,
    128     layer_ctxt_t *ps_curr_layer,
    129     layer_ctxt_t *ps_coarse_layer,
    130     S32 i4_pos_x,
    131     S32 i4_pos_y,
    132     S08 i1_ref_id,
    133     S32 i4_result_id);
    134 
    135 typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
    136     search_node_t *ps_search_node,
    137     layer_ctxt_t *ps_curr_layer,
    138     layer_ctxt_t *ps_coarse_layer,
    139     S32 i4_pos_x,
    140     S32 i4_pos_y,
    141     S32 i4_num_act_ref_l0,
    142     U08 u1_pred_dir,
    143     U08 u1_default_ref_id,
    144     S32 i4_result_id);
    145 
    146 /*****************************************************************************/
    147 /* Function Definitions                                                      */
    148 /*****************************************************************************/
    149 
    150 void ihevce_no_wt_copy(
    151     coarse_me_ctxt_t *ps_ctxt,
    152     layer_ctxt_t *ps_curr_layer,
    153     pu_t *ps_pu,
    154     UWORD8 *pu1_temp_pred,
    155     WORD32 temp_stride,
    156     WORD32 blk_x,
    157     WORD32 blk_y)
    158 {
    159     UWORD8 *pu1_ref;
    160     WORD32 ref_stride, ref_offset;
    161     WORD32 row, col, i4_tmp;
    162 
    163     ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
    164 
    165     if(ps_pu->b2_pred_mode == PRED_L0)
    166     {
    167         WORD8 i1_ref_idx;
    168 
    169         i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
    170         pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
    171 
    172         ref_stride = ps_curr_layer->i4_inp_stride;
    173 
    174         ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
    175         ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
    176 
    177         pu1_ref += ref_offset;
    178 
    179         for(row = 0; row < temp_stride; row++)
    180         {
    181             for(col = 0; col < temp_stride; col++)
    182             {
    183                 i4_tmp = pu1_ref[col];
    184                 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
    185             }
    186 
    187             pu1_ref += ref_stride;
    188             pu1_temp_pred += temp_stride;
    189         }
    190     }
    191     else
    192     {
    193         WORD8 i1_ref_idx;
    194 
    195         i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
    196         pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
    197 
    198         ref_stride = ps_curr_layer->i4_inp_stride;
    199 
    200         ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
    201         ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
    202 
    203         pu1_ref += ref_offset;
    204 
    205         for(row = 0; row < temp_stride; row++)
    206         {
    207             for(col = 0; col < temp_stride; col++)
    208             {
    209                 i4_tmp = pu1_ref[col];
    210                 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
    211             }
    212 
    213             pu1_ref += ref_stride;
    214             pu1_temp_pred += temp_stride;
    215         }
    216     }
    217 }
    218 
    219 static WORD32 hme_add_clustered_mvs_as_merge_cands(
    220     cluster_data_t *ps_cluster_base,
    221     search_node_t *ps_merge_cand,
    222     range_prms_t **pps_range_prms,
    223     U08 *pu1_refid_to_pred_dir_list,
    224     WORD32 i4_num_clusters,
    225     U08 u1_pred_dir)
    226 {
    227     WORD32 i, j, k;
    228     WORD32 i4_num_cands_added = 0;
    229     WORD32 i4_num_mvs_in_cluster;
    230 
    231     for(i = 0; i < i4_num_clusters; i++)
    232     {
    233         cluster_data_t *ps_data = &ps_cluster_base[i];
    234 
    235         if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
    236         {
    237             i4_num_mvs_in_cluster = ps_data->num_mvs;
    238 
    239             for(j = 0; j < i4_num_mvs_in_cluster; j++)
    240             {
    241                 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
    242                 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
    243                 ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
    244 
    245                 CLIP_MV_WITHIN_RANGE(
    246                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
    247                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
    248                     pps_range_prms[ps_data->ref_id],
    249                     0,
    250                     0,
    251                     0);
    252 
    253                 for(k = 0; k < i4_num_cands_added; k++)
    254                 {
    255                     if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
    256                        (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
    257                        (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
    258                     {
    259                         break;
    260                     }
    261                 }
    262 
    263                 if(k == i4_num_cands_added)
    264                 {
    265                     i4_num_cands_added++;
    266                 }
    267             }
    268         }
    269     }
    270 
    271     return i4_num_cands_added;
    272 }
    273 
    274 static WORD32 hme_add_me_best_as_merge_cands(
    275     search_results_t **pps_child_data_array,
    276     inter_cu_results_t *ps_8x8cu_results,
    277     search_node_t *ps_merge_cand,
    278     range_prms_t **pps_range_prms,
    279     U08 *pu1_refid_to_pred_dir_list,
    280     S08 *pi1_past_list,
    281     S08 *pi1_future_list,
    282     BLK_SIZE_T e_blk_size,
    283     ME_QUALITY_PRESETS_T e_quality_preset,
    284     S32 i4_num_cands_added,
    285     U08 u1_pred_dir)
    286 {
    287     WORD32 i, j, k;
    288     WORD32 i4_max_cands_to_add;
    289 
    290     WORD32 i4_result_id = 0;
    291 
    292     ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
    293     ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
    294     ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
    295     ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
    296 
    297     switch(e_quality_preset)
    298     {
    299     case ME_PRISTINE_QUALITY:
    300     {
    301         i4_max_cands_to_add = MAX_MERGE_CANDTS;
    302 
    303         break;
    304     }
    305     case ME_HIGH_QUALITY:
    306     {
    307         /* All 4 children are split and each grandchild contributes an MV */
    308         /* and 2 best results per grandchild */
    309         i4_max_cands_to_add = 4 * 4 * 2;
    310 
    311         break;
    312     }
    313     case ME_MEDIUM_SPEED:
    314     {
    315         i4_max_cands_to_add = 4 * 2 * 2;
    316 
    317         break;
    318     }
    319     case ME_HIGH_SPEED:
    320     case ME_XTREME_SPEED:
    321     case ME_XTREME_SPEED_25:
    322     {
    323         i4_max_cands_to_add = 4 * 2 * 1;
    324 
    325         break;
    326     }
    327     }
    328 
    329     while(i4_result_id < 4)
    330     {
    331         for(i = 0; i < 4; i++)
    332         {
    333             inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
    334             inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
    335 
    336             if(!pps_child_data_array[i]->u1_split_flag)
    337             {
    338                 part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
    339 
    340                 if(ps_child_data->u1_num_best_results <= i4_result_id)
    341                 {
    342                     continue;
    343                 }
    344 
    345                 if(ps_data->as_pu_results->pu.b1_intra_flag)
    346                 {
    347                     continue;
    348                 }
    349 
    350                 for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
    351                 {
    352                     mv_t *ps_mv;
    353 
    354                     S08 i1_ref_idx;
    355 
    356                     pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
    357 
    358                     if(u1_pred_dir !=
    359                        ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
    360                     {
    361                         continue;
    362                     }
    363 
    364                     if(u1_pred_dir)
    365                     {
    366                         ps_mv = &ps_pu->mv.s_l1_mv;
    367                         i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
    368                     }
    369                     else
    370                     {
    371                         ps_mv = &ps_pu->mv.s_l0_mv;
    372                         i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
    373                     }
    374 
    375                     if(-1 == i1_ref_idx)
    376                     {
    377                         continue;
    378                     }
    379 
    380                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
    381                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
    382                     ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
    383 
    384                     CLIP_MV_WITHIN_RANGE(
    385                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
    386                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
    387                         pps_range_prms[i1_ref_idx],
    388                         0,
    389                         0,
    390                         0);
    391 
    392                     for(k = 0; k < i4_num_cands_added; k++)
    393                     {
    394                         if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
    395                            (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
    396                            (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
    397                         {
    398                             break;
    399                         }
    400                     }
    401 
    402                     if(k == i4_num_cands_added)
    403                     {
    404                         i4_num_cands_added++;
    405 
    406                         if(i4_max_cands_to_add <= i4_num_cands_added)
    407                         {
    408                             return i4_num_cands_added;
    409                         }
    410                     }
    411                 }
    412             }
    413             else
    414             {
    415                 for(j = 0; j < 4; j++)
    416                 {
    417                     mv_t *ps_mv;
    418 
    419                     S08 i1_ref_idx;
    420 
    421                     part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
    422                     pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
    423 
    424                     ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
    425 
    426                     if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
    427                     {
    428                         continue;
    429                     }
    430 
    431                     if(ps_data->as_pu_results->pu.b1_intra_flag)
    432                     {
    433                         continue;
    434                     }
    435 
    436                     if(u1_pred_dir !=
    437                        ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
    438                     {
    439                         continue;
    440                     }
    441 
    442                     if(u1_pred_dir)
    443                     {
    444                         ps_mv = &ps_pu->mv.s_l1_mv;
    445                         i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
    446                     }
    447                     else
    448                     {
    449                         ps_mv = &ps_pu->mv.s_l0_mv;
    450                         i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
    451                     }
    452 
    453                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
    454                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
    455                     ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
    456 
    457                     CLIP_MV_WITHIN_RANGE(
    458                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
    459                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
    460                         pps_range_prms[i1_ref_idx],
    461                         0,
    462                         0,
    463                         0);
    464 
    465                     for(k = 0; k < i4_num_cands_added; k++)
    466                     {
    467                         if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
    468                            (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
    469                            (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
    470                         {
    471                             break;
    472                         }
    473                     }
    474 
    475                     if(k == i4_num_cands_added)
    476                     {
    477                         i4_num_cands_added++;
    478 
    479                         if(i4_max_cands_to_add <= i4_num_cands_added)
    480                         {
    481                             return i4_num_cands_added;
    482                         }
    483                     }
    484                 }
    485             }
    486         }
    487 
    488         i4_result_id++;
    489     }
    490 
    491     return i4_num_cands_added;
    492 }
    493 
    494 WORD32 hme_add_cands_for_merge_eval(
    495     ctb_cluster_info_t *ps_cluster_info,
    496     search_results_t **pps_child_data_array,
    497     inter_cu_results_t *ps_8x8cu_results,
    498     range_prms_t **pps_range_prms,
    499     search_node_t *ps_merge_cand,
    500     U08 *pu1_refid_to_pred_dir_list,
    501     S08 *pi1_past_list,
    502     S08 *pi1_future_list,
    503     ME_QUALITY_PRESETS_T e_quality_preset,
    504     BLK_SIZE_T e_blk_size,
    505     U08 u1_pred_dir,
    506     U08 u1_blk_id)
    507 {
    508     WORD32 i4_num_cands_added = 0;
    509 
    510     if(ME_PRISTINE_QUALITY == e_quality_preset)
    511     {
    512         cluster_data_t *ps_cluster_primo;
    513 
    514         WORD32 i4_num_clusters;
    515 
    516         if(BLK_32x32 == e_blk_size)
    517         {
    518             ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
    519             i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
    520         }
    521         else
    522         {
    523             ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
    524             i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
    525         }
    526 
    527         i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
    528             ps_cluster_primo,
    529             ps_merge_cand,
    530             pps_range_prms,
    531             pu1_refid_to_pred_dir_list,
    532             i4_num_clusters,
    533             u1_pred_dir);
    534     }
    535 
    536     i4_num_cands_added = hme_add_me_best_as_merge_cands(
    537         pps_child_data_array,
    538         ps_8x8cu_results,
    539         ps_merge_cand,
    540         pps_range_prms,
    541         pu1_refid_to_pred_dir_list,
    542         pi1_past_list,
    543         pi1_future_list,
    544         e_blk_size,
    545         e_quality_preset,
    546         i4_num_cands_added,
    547         u1_pred_dir);
    548 
    549     return i4_num_cands_added;
    550 }
    551 
    552 /**
    553 ********************************************************************************
    554 *  @fn   void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
    555 *                                           S08 i1_ref_idx,
    556 *                                           S32 i4_best_part_type,
    557 *                                           S32 i4_is_vert)
    558 *
    559 *  @brief  Given a target partition orientation in the merged CU, and the
    560 *          partition type of most likely partition this fxn picks up
    561 *          candidates from the 4 constituent CUs and does refinement search
    562 *          to identify best results for the merge CU across active partitions
    563 *
    564 *  @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
    565 *                  these params, the search result structure is also derived and
    566 *                 updated during the search
    567 *
    568 *  @param[in] i1_ref_idx : ID of the buffer within the search results to update.
    569 *               Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
    570 *
    571 *  @param[in] i4_best_part_type : partition type of potential partition in the
    572 *              merged CU, -1 if the merge process has not yet been able to
    573 *              determine this.
    574 *
    575 *  @param[in] i4_is_vert : Whether target partition of merged CU is vertical
    576 *             orientation or horizontal orientation.
    577 *
    578 *  @return Number of merge candidates
    579 ********************************************************************************
    580 */
    581 WORD32 hme_pick_eval_merge_candts(
    582     hme_merge_prms_t *ps_merge_prms,
    583     hme_subpel_prms_t *ps_subpel_prms,
    584     S32 i4_search_idx,
    585     S32 i4_best_part_type,
    586     S32 i4_is_vert,
    587     wgt_pred_ctxt_t *ps_wt_inp_prms,
    588     S32 i4_frm_qstep,
    589     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
    590     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
    591 {
    592     S32 x_off, y_off;
    593     search_node_t *ps_search_node;
    594     S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
    595     S32 i4_num_valid_parts;
    596     pred_ctxt_t *ps_pred_ctxt;
    597 
    598     search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
    599     S32 num_unique_nodes_cu_merge = 0;
    600 
    601     search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
    602     CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
    603     S32 i4_part_mask = ps_search_results->i4_part_mask;
    604 
    605     search_results_t *aps_child_results[4];
    606     layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
    607 
    608     S32 i4_ref_stride, i, j;
    609     result_upd_prms_t s_result_prms;
    610 
    611     BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
    612     S32 i4_offset;
    613 
    614     /*************************************************************************/
    615     /* Function pointer for SAD/SATD, array and prms structure to pass to    */
    616     /* This function                                                         */
    617     /*************************************************************************/
    618     PF_SAD_FXN_T pf_err_compute;
    619     S32 ai4_sad_grid[9][17];
    620     err_prms_t s_err_prms;
    621 
    622     /*************************************************************************/
    623     /* Allowed MV RANGE                                                      */
    624     /*************************************************************************/
    625     range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
    626     PF_INTERP_FXN_T pf_qpel_interp;
    627     PF_MV_COST_FXN pf_mv_cost_compute;
    628     WORD32 pred_lx;
    629     U08 *apu1_hpel_ref[4];
    630 
    631     interp_prms_t s_interp_prms;
    632     S32 i4_interp_buf_id;
    633 
    634     S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
    635     S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
    636 
    637     /* Sanity checks */
    638     ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
    639 
    640     s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
    641 
    642     /* Initialize all the ptrs to child CUs for merge decision */
    643     aps_child_results[0] = ps_merge_prms->ps_results_tl;
    644     aps_child_results[1] = ps_merge_prms->ps_results_tr;
    645     aps_child_results[2] = ps_merge_prms->ps_results_bl;
    646     aps_child_results[3] = ps_merge_prms->ps_results_br;
    647 
    648     num_unique_nodes_cu_merge = 0;
    649 
    650     pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
    651 
    652     if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
    653     {
    654         num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
    655             ps_merge_prms->ps_cluster_info,
    656             aps_child_results,
    657             ps_merge_prms->ps_8x8_cu_results,
    658             pps_range_prms,
    659             as_merge_unique_node,
    660             ps_search_results->pu1_is_past,
    661             ps_merge_prms->pi1_past_list,
    662             ps_merge_prms->pi1_future_list,
    663             ps_merge_prms->e_quality_preset,
    664             e_blk_size,
    665             i4_search_idx,
    666             (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
    667                 (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
    668     }
    669     else
    670     {
    671         /*************************************************************************/
    672         /* Populate the list of unique search nodes in the child CUs for merge   */
    673         /* evaluation                                                            */
    674         /*************************************************************************/
    675         for(i = 0; i < 4; i++)
    676         {
    677             search_node_t s_search_node;
    678 
    679             PART_TYPE_T e_part_type;
    680             PART_ID_T e_part_id;
    681 
    682             WORD32 part_num;
    683 
    684             search_results_t *ps_child = aps_child_results[i];
    685 
    686             if(ps_child->ps_cu_results->u1_num_best_results)
    687             {
    688                 if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
    689                      (1 == ps_child->ps_cu_results->u1_num_best_results)))
    690                 {
    691                     e_part_type =
    692                         (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
    693 
    694                     ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
    695 
    696                     /* Insert mvs of NxN partitions. */
    697                     for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
    698                         part_num++)
    699                     {
    700                         e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
    701 
    702                         if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
    703                         {
    704                             s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
    705                             if(s_search_node.s_mv.i2_mvx != INTRA_MV)
    706                             {
    707                                 CLIP_MV_WITHIN_RANGE(
    708                                     s_search_node.s_mv.i2_mvx,
    709                                     s_search_node.s_mv.i2_mvy,
    710                                     pps_range_prms[s_search_node.i1_ref_idx],
    711                                     0,
    712                                     0,
    713                                     0);
    714 
    715                                 INSERT_NEW_NODE_NOMAP(
    716                                     as_merge_unique_node,
    717                                     num_unique_nodes_cu_merge,
    718                                     s_search_node,
    719                                     1);
    720                             }
    721                         }
    722                     }
    723                 }
    724             }
    725             else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
    726                            .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
    727                       (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
    728                                 .ps_cu_results->u1_num_best_results)))
    729             {
    730                 search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
    731 
    732                 for(j = 0; j < 4; j++)
    733                 {
    734                     e_part_type = (PART_TYPE_T)ps_results_root[j]
    735                                       .ps_cu_results->ps_best_results[0]
    736                                       .u1_part_type;
    737 
    738                     ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
    739 
    740                     /* Insert mvs of NxN partitions. */
    741                     for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
    742                         part_num++)
    743                     {
    744                         e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
    745 
    746                         if((ps_results_root[j]
    747                                 .aps_part_results[i4_search_idx][e_part_id]
    748                                 ->i1_ref_idx != -1) &&
    749                            (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
    750                                  .b1_intra_flag))
    751                         {
    752                             s_search_node =
    753                                 *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
    754                             if(s_search_node.s_mv.i2_mvx != INTRA_MV)
    755                             {
    756                                 CLIP_MV_WITHIN_RANGE(
    757                                     s_search_node.s_mv.i2_mvx,
    758                                     s_search_node.s_mv.i2_mvy,
    759                                     pps_range_prms[s_search_node.i1_ref_idx],
    760                                     0,
    761                                     0,
    762                                     0);
    763 
    764                                 INSERT_NEW_NODE_NOMAP(
    765                                     as_merge_unique_node,
    766                                     num_unique_nodes_cu_merge,
    767                                     s_search_node,
    768                                     1);
    769                             }
    770                         }
    771                     }
    772                 }
    773             }
    774         }
    775     }
    776 
    777     if(0 == num_unique_nodes_cu_merge)
    778     {
    779         return 0;
    780     }
    781 
    782     /*************************************************************************/
    783     /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
    784     /* fixed through this subpel refinement for this partition.              */
    785     /* Note, we do not enable grid sads since one pt is evaluated per node   */
    786     /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled.   */
    787     /*************************************************************************/
    788     i4_part_mask = ps_search_results->i4_part_mask;
    789 
    790     /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
    791     if(ps_subpel_prms->i4_use_satd)
    792     {
    793         if(BLK_32x32 == e_blk_size)
    794         {
    795             pf_err_compute = hme_evalsatd_pt_pu_32x32;
    796         }
    797         else
    798         {
    799             pf_err_compute = hme_evalsatd_pt_pu_64x64;
    800         }
    801     }
    802     else
    803     {
    804         pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
    805     }
    806 
    807     i4_ref_stride = ps_curr_layer->i4_rec_stride;
    808 
    809     x_off = ps_merge_prms->ps_results_tl->u1_x_off;
    810     y_off = ps_merge_prms->ps_results_tl->u1_y_off;
    811     i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
    812 
    813     /*************************************************************************/
    814     /* This array stores the ids of the partitions whose                     */
    815     /* SADs are updated. Since the partitions whose SADs are updated may not */
    816     /* be in contiguous order, we supply another level of indirection.       */
    817     /*************************************************************************/
    818     i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
    819 
    820     /* Initialize result params used for partition update */
    821     s_result_prms.pf_mv_cost_compute = NULL;
    822     s_result_prms.ps_search_results = ps_search_results;
    823     s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
    824     s_result_prms.i1_ref_idx = i4_search_idx;
    825     s_result_prms.i4_part_mask = i4_part_mask;
    826     s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
    827     s_result_prms.i4_grid_mask = 1;
    828 
    829     /* One time Initialization of error params used for SAD/SATD compute */
    830     s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
    831     s_err_prms.i4_ref_stride = i4_ref_stride;
    832     s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
    833     s_err_prms.i4_grid_mask = 1;
    834     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
    835     s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
    836     s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
    837     s_err_prms.i4_step = 1;
    838 
    839     /*************************************************************************/
    840     /* One time preparation of non changing interpolation params.            */
    841     /*************************************************************************/
    842     s_interp_prms.i4_ref_stride = i4_ref_stride;
    843     s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
    844     s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
    845     s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
    846     s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
    847     i4_interp_buf_id = 0;
    848 
    849     pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
    850 
    851     /***************************************************************************/
    852     /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
    853     /* results                                                                 */
    854     /***************************************************************************/
    855     for(i = 0; i < num_unique_nodes_cu_merge; i++)
    856     {
    857         WORD8 i1_ref_idx;
    858         ps_search_node = &as_merge_unique_node[i];
    859 
    860         /*********************************************************************/
    861         /* Compute the base pointer for input, interpolated buffers          */
    862         /* The base pointers point as follows:                               */
    863         /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
    864         /* To these, we need to add the offset of the current node           */
    865         /*********************************************************************/
    866         i1_ref_idx = ps_search_node->i1_ref_idx;
    867         apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
    868         apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
    869         apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
    870         apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
    871 
    872         s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
    873 
    874         pf_qpel_interp(
    875             &s_interp_prms,
    876             ps_search_node->s_mv.i2_mvx,
    877             ps_search_node->s_mv.i2_mvy,
    878             i4_interp_buf_id);
    879 
    880         pred_lx = i4_search_idx;
    881         ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
    882 
    883         s_result_prms.u1_pred_lx = pred_lx;
    884         s_result_prms.ps_search_node_base = ps_search_node;
    885         s_err_prms.pu1_inp =
    886             ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
    887         s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
    888         s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
    889 
    890         /* Carry out the SAD/SATD. This call also does the TU RECURSION.
    891         Here the tu recursion logic is restricted with the size of the PU*/
    892         pf_err_compute(&s_err_prms);
    893 
    894         if(ps_subpel_prms->u1_is_cu_noisy &&
    895            ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
    896         {
    897             ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
    898                 s_err_prms.pu1_ref,
    899                 s_err_prms.i4_ref_stride,
    900                 ai4_valid_part_ids,
    901                 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
    902                 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
    903                 s_err_prms.pi4_sad_grid,
    904                 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
    905                 ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
    906                 ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
    907                 i4_num_valid_parts,
    908                 ps_wt_inp_prms->wpred_log_wdc,
    909                 (BLK_32x32 == e_blk_size) ? 32 : 64);
    910         }
    911 
    912         /* Update the mv's */
    913         s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
    914         s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
    915 
    916         /* Update best results */
    917         hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
    918     }
    919 
    920     /************************************************************************/
    921     /* Update mv cost and total cost for each valid partition in the CU     */
    922     /************************************************************************/
    923     for(i = 0; i < TOT_NUM_PARTS; i++)
    924     {
    925         if(i4_part_mask & (1 << i))
    926         {
    927             WORD32 j;
    928             WORD32 i4_mv_cost;
    929 
    930             ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
    931 
    932             for(j = 0;
    933                 j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
    934                 j++)
    935             {
    936                 if(ps_search_node->i1_ref_idx != -1)
    937                 {
    938                     pred_lx = i4_search_idx;
    939                     ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
    940 
    941                     /* Prediction context should now deal with qpel units */
    942                     HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
    943 
    944                     ps_search_node->u1_subpel_done = 1;
    945                     ps_search_node->u1_is_avail = 1;
    946 
    947                     i4_mv_cost =
    948                         pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
    949 
    950                     ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
    951                     ps_search_node->i4_mv_cost = i4_mv_cost;
    952 
    953                     ps_search_node++;
    954                 }
    955             }
    956         }
    957     }
    958 
    959     return num_unique_nodes_cu_merge;
    960 }
    961 
    962 #define CU_MERGE_MAX_INTRA_PARTS 4
    963 
    964 /**
    965 ********************************************************************************
    966 *  @fn     hme_try_merge_high_speed
    967 *
    968 *  @brief  Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
    969 entity or with partititons for high speed preset
    970 *
    971 *  @param[in,out]  hme_merge_prms_t: Params for CU merge
    972 *
    973 *  @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
    974 ********************************************************************************
    975 */
    976 CU_MERGE_RESULT_T hme_try_merge_high_speed(
    977     me_ctxt_t *ps_thrd_ctxt,
    978     me_frm_ctxt_t *ps_ctxt,
    979     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
    980     hme_subpel_prms_t *ps_subpel_prms,
    981     hme_merge_prms_t *ps_merge_prms,
    982     inter_pu_results_t *ps_pu_results,
    983     pu_result_t *ps_pu_result)
    984 {
    985     search_results_t *ps_results_tl, *ps_results_tr;
    986     search_results_t *ps_results_bl, *ps_results_br;
    987 
    988     S32 i;
    989     S32 i4_search_idx;
    990     S32 i4_cost_parent;
    991     S32 intra_cu_size;
    992     ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
    993 
    994     search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
    995     wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
    996 
    997     S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
    998     S32 is_vert = 0, i4_best_part_type = -1;
    999     S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
   1000     S32 i4_cost_children = 0;
   1001     S32 i4_frm_qstep = ps_ctxt->frm_qstep;
   1002     S32 i4_num_merge_cands_evaluated = 0;
   1003     U08 u1_x_off = ps_results_merge->u1_x_off;
   1004     U08 u1_y_off = ps_results_merge->u1_y_off;
   1005     S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
   1006 
   1007     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
   1008         ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
   1009     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
   1010         ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
   1011     ps_results_tl = ps_merge_prms->ps_results_tl;
   1012     ps_results_tr = ps_merge_prms->ps_results_tr;
   1013     ps_results_bl = ps_merge_prms->ps_results_bl;
   1014     ps_results_br = ps_merge_prms->ps_results_br;
   1015 
   1016     if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
   1017     {
   1018         i4_part_mask &= ~ENABLE_AMP;
   1019     }
   1020 
   1021     if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
   1022     {
   1023         i4_part_mask &= ~ENABLE_AMP;
   1024 
   1025         i4_part_mask &= ~ENABLE_SMP;
   1026     }
   1027 
   1028     ps_merge_prms->i4_num_pred_dir_actual = 0;
   1029 
   1030     /*************************************************************************/
   1031     /* The logic for High speed CU merge goes as follows:                    */
   1032     /*                                                                       */
   1033     /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
   1034     /*    exceed 7                                                           */
   1035     /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
   1036     /*    are identical                                                      */
   1037     /* 3. Find the all unique mvs of best partitions of children CUs and     */
   1038     /*    evaluate partial SATDs (all 17 partitions) for each unique mv. If  */
   1039     /*    best parent cost is lower than sum of the best children costs      */
   1040     /*    return CU_MERGE after seeding the best results else return CU_SPLIT*/
   1041     /*                                                                       */
   1042     /*************************************************************************/
   1043 
   1044     /* Count the number of best partitions in child CUs, early exit if > 7 */
   1045     if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
   1046        (CU_32x32 == ps_results_merge->e_cu_size))
   1047     {
   1048         S32 num_parts_in_32x32 = 0;
   1049         WORD32 i4_part_type;
   1050 
   1051         if(ps_results_tl->u1_split_flag)
   1052         {
   1053             num_parts_in_32x32 += 4;
   1054 
   1055 #define COST_INTERCHANGE 0
   1056             i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
   1057                                ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
   1058                                ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
   1059                                ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
   1060         }
   1061         else
   1062         {
   1063             i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
   1064             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
   1065             i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
   1066         }
   1067 
   1068         if(ps_results_tr->u1_split_flag)
   1069         {
   1070             num_parts_in_32x32 += 4;
   1071 
   1072             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
   1073                                 ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
   1074                                 ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
   1075                                 ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
   1076         }
   1077         else
   1078         {
   1079             i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
   1080             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
   1081             i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
   1082         }
   1083 
   1084         if(ps_results_bl->u1_split_flag)
   1085         {
   1086             num_parts_in_32x32 += 4;
   1087 
   1088             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
   1089                                 ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
   1090                                 ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
   1091                                 ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
   1092         }
   1093         else
   1094         {
   1095             i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
   1096             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
   1097             i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
   1098         }
   1099 
   1100         if(ps_results_br->u1_split_flag)
   1101         {
   1102             num_parts_in_32x32 += 4;
   1103 
   1104             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
   1105                                 ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
   1106                                 ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
   1107                                 ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
   1108         }
   1109         else
   1110         {
   1111             i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
   1112             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
   1113             i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
   1114         }
   1115 
   1116         if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
   1117         {
   1118             return CU_SPLIT;
   1119         }
   1120 
   1121         if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
   1122            (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
   1123         {
   1124             return CU_SPLIT;
   1125         }
   1126     }
   1127 
   1128     /* Accumulate intra percentage before merge for early CU_SPLIT decision     */
   1129     /* Note : Each intra part represent a NxN unit of the children CUs          */
   1130     /* This is essentially 1/16th of the CUsize under consideration for merge   */
   1131     if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
   1132     {
   1133         if(CU_64x64 == ps_results_merge->e_cu_size)
   1134         {
   1135             i4_intra_parts =
   1136                 (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
   1137                     ? 16
   1138                     : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
   1139         }
   1140         else
   1141         {
   1142             switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
   1143             {
   1144             case 0:
   1145             {
   1146                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
   1147                                        ->u1_inter_eval_enable)
   1148                                      ? 16
   1149                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
   1150                                             ->ps_child_node_tl->u1_intra_eval_enable);
   1151 
   1152                 break;
   1153             }
   1154             case 1:
   1155             {
   1156                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
   1157                                        ->u1_inter_eval_enable)
   1158                                      ? 16
   1159                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
   1160                                             ->ps_child_node_tr->u1_intra_eval_enable);
   1161 
   1162                 break;
   1163             }
   1164             case 2:
   1165             {
   1166                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
   1167                                        ->u1_inter_eval_enable)
   1168                                      ? 16
   1169                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
   1170                                             ->ps_child_node_bl->u1_intra_eval_enable);
   1171 
   1172                 break;
   1173             }
   1174             case 3:
   1175             {
   1176                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
   1177                                        ->u1_inter_eval_enable)
   1178                                      ? 16
   1179                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
   1180                                             ->ps_child_node_br->u1_intra_eval_enable);
   1181 
   1182                 break;
   1183             }
   1184             }
   1185         }
   1186     }
   1187     else
   1188     {
   1189         for(i = 0; i < 4; i++)
   1190         {
   1191             search_results_t *ps_results =
   1192                 (i == 0) ? ps_results_tl
   1193                          : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
   1194 
   1195             part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
   1196 
   1197             if(ps_results->u1_split_flag)
   1198             {
   1199                 U08 u1_x_off = ps_results->u1_x_off;
   1200                 U08 u1_y_off = ps_results->u1_y_off;
   1201                 U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
   1202                                       2;
   1203 
   1204                 /* Special case to handle 8x8 CUs when 16x16 is split */
   1205                 ASSERT(ps_results->e_cu_size == CU_16x16);
   1206 
   1207                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
   1208 
   1209                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
   1210                     i4_intra_parts += 1;
   1211 
   1212                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
   1213 
   1214                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
   1215                     i4_intra_parts += 1;
   1216 
   1217                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
   1218 
   1219                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
   1220                     i4_intra_parts += 1;
   1221 
   1222                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
   1223 
   1224                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
   1225                     i4_intra_parts += 1;
   1226             }
   1227             else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
   1228             {
   1229                 i4_intra_parts += 4;
   1230             }
   1231         }
   1232     }
   1233 
   1234     /* Determine the max intra CU size indicated by IPE */
   1235     intra_cu_size = CU_64x64;
   1236     if(ps_cur_ipe_ctb->u1_split_flag)
   1237     {
   1238         intra_cu_size = CU_32x32;
   1239         if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
   1240         {
   1241             intra_cu_size = CU_16x16;
   1242         }
   1243     }
   1244 
   1245     if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
   1246         (intra_cu_size < ps_results_merge->e_cu_size) &&
   1247         (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
   1248        (i4_intra_parts == 16))
   1249     {
   1250         S32 i4_merge_outcome;
   1251 
   1252         i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
   1253                                ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
   1254                                   ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
   1255                                : (!ps_cur_ipe_ctb->u1_split_flag);
   1256 
   1257         i4_merge_outcome = i4_merge_outcome ||
   1258                            (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
   1259 
   1260         i4_merge_outcome = i4_merge_outcome &&
   1261                            !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
   1262 
   1263         if(i4_merge_outcome)
   1264         {
   1265             inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
   1266             part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
   1267             pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
   1268 
   1269             ps_cu_results->u1_num_best_results = 1;
   1270             ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
   1271             ps_cu_results->u1_x_off = u1_x_off;
   1272             ps_cu_results->u1_y_off = u1_y_off;
   1273 
   1274             ps_best_result->u1_part_type = PRT_2Nx2N;
   1275             ps_best_result->ai4_tu_split_flag[0] = 0;
   1276             ps_best_result->ai4_tu_split_flag[1] = 0;
   1277             ps_best_result->ai4_tu_split_flag[2] = 0;
   1278             ps_best_result->ai4_tu_split_flag[3] = 0;
   1279             ps_best_result->i4_tot_cost =
   1280                 (CU_64x64 == ps_results_merge->e_cu_size)
   1281                     ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
   1282                     : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
   1283 
   1284             ps_pu->b1_intra_flag = 1;
   1285             ps_pu->b4_pos_x = u1_x_off >> 2;
   1286             ps_pu->b4_pos_y = u1_y_off >> 2;
   1287             ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
   1288             ps_pu->b4_ht = ps_pu->b4_wd;
   1289             ps_pu->mv.i1_l0_ref_idx = -1;
   1290             ps_pu->mv.i1_l1_ref_idx = -1;
   1291             ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
   1292             ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
   1293             ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
   1294             ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
   1295 
   1296             return CU_MERGED;
   1297         }
   1298         else
   1299         {
   1300             return CU_SPLIT;
   1301         }
   1302     }
   1303 
   1304     if(i4_intra_parts)
   1305     {
   1306         i4_part_mask = ENABLE_2Nx2N;
   1307     }
   1308 
   1309     ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
   1310 
   1311     hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
   1312 
   1313     ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
   1314     ps_merge_prms->i4_num_pred_dir_actual = 0;
   1315 
   1316     if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
   1317     {
   1318         S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
   1319         S32 i4_num_valid_parts;
   1320         S32 i4_sigma_array_offset;
   1321 
   1322         i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
   1323 
   1324         /*********************************************************************************************************************************************/
   1325         /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values  */
   1326         /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
   1327         /* increment as there will be 256 4x4 blocks in a CTB                                                                                        */
   1328         /*********************************************************************************************************************************************/
   1329         i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
   1330                                 (ps_merge_prms->ps_results_merge->u1_y_off * 4);
   1331 
   1332         for(i = 0; i < i4_num_valid_parts; i++)
   1333         {
   1334             S32 i4_part_id = ai4_valid_part_ids[i];
   1335 
   1336             hme_compute_final_sigma_of_pu_from_base_blocks(
   1337                 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
   1338                 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
   1339                 au8_final_src_sigmaX,
   1340                 au8_final_src_sigmaXSquared,
   1341                 (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
   1342                 4,
   1343                 i4_part_id,
   1344                 16);
   1345         }
   1346 
   1347         ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
   1348         ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
   1349     }
   1350 
   1351     /*************************************************************************/
   1352     /* Loop through all ref idx and pick the merge candts and refine based   */
   1353     /* on the active partitions. At this stage num ref will be 1 or 2        */
   1354     /*************************************************************************/
   1355     for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
   1356     {
   1357         S32 i4_cands;
   1358         U08 u1_pred_dir = 0;
   1359 
   1360         if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
   1361         {
   1362             u1_pred_dir = i4_search_idx;
   1363         }
   1364         else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
   1365         {
   1366             u1_pred_dir = 1;
   1367         }
   1368         else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
   1369         {
   1370             u1_pred_dir = 0;
   1371         }
   1372         else
   1373         {
   1374             ASSERT(0);
   1375         }
   1376 
   1377         /* call the function to pick and evaluate the merge candts, given */
   1378         /* a ref id and a part mask.                                      */
   1379         i4_cands = hme_pick_eval_merge_candts(
   1380             ps_merge_prms,
   1381             ps_subpel_prms,
   1382             u1_pred_dir,
   1383             i4_best_part_type,
   1384             is_vert,
   1385             ps_wt_inp_prms,
   1386             i4_frm_qstep,
   1387             ps_cmn_utils_optimised_function_list,
   1388             ps_me_optimised_function_list);
   1389 
   1390         if(i4_cands)
   1391         {
   1392             ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
   1393                 u1_pred_dir;
   1394             ps_merge_prms->i4_num_pred_dir_actual++;
   1395         }
   1396 
   1397         i4_num_merge_cands_evaluated += i4_cands;
   1398     }
   1399 
   1400     /* Call the decide_part_types function here */
   1401     /* Populate the new PU struct with the results post subpel refinement*/
   1402     if(i4_num_merge_cands_evaluated)
   1403     {
   1404         inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
   1405 
   1406         hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
   1407 
   1408         ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
   1409         ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
   1410 
   1411         hme_populate_pus(
   1412             ps_thrd_ctxt,
   1413             ps_ctxt,
   1414             ps_subpel_prms,
   1415             ps_results_merge,
   1416             ps_cu_results,
   1417             ps_pu_results,
   1418             ps_pu_result,
   1419             ps_merge_prms->ps_inter_ctb_prms,
   1420             &ps_ctxt->s_wt_pred,
   1421             ps_merge_prms->ps_layer_ctxt,
   1422             ps_merge_prms->au1_pred_dir_searched,
   1423             ps_merge_prms->i4_num_pred_dir_actual);
   1424 
   1425         ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
   1426 
   1427         hme_decide_part_types(
   1428             ps_cu_results,
   1429             ps_pu_results,
   1430             ps_merge_prms->ps_inter_ctb_prms,
   1431             ps_ctxt,
   1432             ps_cmn_utils_optimised_function_list,
   1433             ps_me_optimised_function_list
   1434 
   1435         );
   1436 
   1437         /*****************************************************************/
   1438         /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL.                    */
   1439         /*****************************************************************/
   1440 #if DISABLE_INTRA_IN_BPICS
   1441         if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
   1442                  (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
   1443 #endif
   1444         {
   1445             if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
   1446             {
   1447                 hme_insert_intra_nodes_post_bipred(
   1448                     ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
   1449             }
   1450         }
   1451     }
   1452     else
   1453     {
   1454         return CU_SPLIT;
   1455     }
   1456 
   1457     /* We check the best result of ref idx 0 and compare for parent vs child */
   1458     if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
   1459        (CU_32x32 == ps_results_merge->e_cu_size))
   1460     {
   1461         i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
   1462         /*********************************************************************/
   1463         /* Add the cost of signaling the CU tree bits.                       */
   1464         /* Assuming parent is not split, then we signal 1 bit for this parent*/
   1465         /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
   1466         /* So, 4*lambda is extra for children cost. :Lokesh                  */
   1467         /*********************************************************************/
   1468         {
   1469             pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
   1470 
   1471             i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
   1472         }
   1473 
   1474         if(i4_cost_parent < i4_cost_children)
   1475         {
   1476             return CU_MERGED;
   1477         }
   1478 
   1479         return CU_SPLIT;
   1480     }
   1481     else
   1482     {
   1483         return CU_MERGED;
   1484     }
   1485 }
   1486 
   1487 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
   1488     {                                                                                              \
   1489         (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift);                               \
   1490         (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift);                               \
   1491         *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx;                                             \
   1492     }
   1493 
   1494 /**
   1495 ********************************************************************************
   1496 *  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
   1497 *                               layer_mv_t *ps_layer_mv,
   1498 *                               S32 i4_search_blk_x,
   1499 *                               S32 i4_search_blk_y,
   1500 *                               mvbank_update_prms_t *ps_prms)
   1501 *
   1502 *  @brief  Updates the mv bank in case there is no further encodign to be done
   1503 *
   1504 *  @param[in]  ps_search_results: contains results for the block just searched
   1505 *
   1506 *  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
   1507 *
   1508 *  @param[in] i4_search_blk_x  : col num of blk being searched
   1509 *
   1510 *  @param[in] i4_search_blk_y : row num of blk being searched
   1511 *
   1512 *  @param[in] ps_prms : contains certain parameters which govern how updatedone
   1513 *
   1514 *  @return None
   1515 ********************************************************************************
   1516 */
   1517 
   1518 void hme_update_mv_bank_noencode(
   1519     search_results_t *ps_search_results,
   1520     layer_mv_t *ps_layer_mv,
   1521     S32 i4_search_blk_x,
   1522     S32 i4_search_blk_y,
   1523     mvbank_update_prms_t *ps_prms)
   1524 {
   1525     hme_mv_t *ps_mv;
   1526     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
   1527     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
   1528     S32 i4_blk_x, i4_blk_y, i4_offset;
   1529     S32 i4_j, i4_ref_id;
   1530     search_node_t *ps_search_node;
   1531     search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
   1532     search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
   1533     search_node_t *ps_search_node_4x4_4;
   1534 
   1535     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
   1536     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
   1537     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
   1538 
   1539     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
   1540 
   1541     /* Identify the correct offset in the mvbank and the reference id buf */
   1542     ps_mv = ps_layer_mv->ps_mv + i4_offset;
   1543     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
   1544 
   1545     /*************************************************************************/
   1546     /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
   1547     /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
   1548     /* do a straightforward single update of results. This will have a 1-1   */
   1549     /* correspondence.                                                       */
   1550     /*************************************************************************/
   1551     if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
   1552     {
   1553         for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
   1554         {
   1555             ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
   1556             for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
   1557             {
   1558                 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
   1559                 ps_mv++;
   1560                 pi1_ref_idx++;
   1561                 ps_search_node++;
   1562             }
   1563         }
   1564         return;
   1565     }
   1566 
   1567     /*************************************************************************/
   1568     /* Case where search blk size is 8x8, but we update 4x4 results. In this */
   1569     /* case, we need to have NxN partitions enabled in search.               */
   1570     /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
   1571     /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
   1572     /*************************************************************************/
   1573     ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
   1574     ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
   1575     ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
   1576 
   1577     /*************************************************************************/
   1578     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
   1579     /* hence the below check.                                                */
   1580     /*************************************************************************/
   1581     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
   1582 
   1583     ps_mv1 = ps_mv;
   1584     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
   1585     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
   1586     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
   1587     pi1_ref_idx1 = pi1_ref_idx;
   1588     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
   1589     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
   1590     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
   1591 
   1592     for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
   1593     {
   1594         ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
   1595 
   1596         ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
   1597 
   1598         ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
   1599 
   1600         ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
   1601 
   1602         ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
   1603 
   1604         COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
   1605         ps_mv1++;
   1606         pi1_ref_idx1++;
   1607         ps_search_node_4x4_1++;
   1608         COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
   1609         ps_mv2++;
   1610         pi1_ref_idx2++;
   1611         ps_search_node_4x4_2++;
   1612         COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
   1613         ps_mv3++;
   1614         pi1_ref_idx3++;
   1615         ps_search_node_4x4_3++;
   1616         COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
   1617         ps_mv4++;
   1618         pi1_ref_idx4++;
   1619         ps_search_node_4x4_4++;
   1620 
   1621         if(ps_layer_mv->i4_num_mvs_per_ref > 1)
   1622         {
   1623             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
   1624             ps_mv1++;
   1625             pi1_ref_idx1++;
   1626             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
   1627             ps_mv2++;
   1628             pi1_ref_idx2++;
   1629             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
   1630             ps_mv3++;
   1631             pi1_ref_idx3++;
   1632             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
   1633             ps_mv4++;
   1634             pi1_ref_idx4++;
   1635         }
   1636 
   1637         for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
   1638         {
   1639             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
   1640             ps_mv1++;
   1641             pi1_ref_idx1++;
   1642             ps_search_node_4x4_1++;
   1643             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
   1644             ps_mv2++;
   1645             pi1_ref_idx2++;
   1646             ps_search_node_4x4_2++;
   1647             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
   1648             ps_mv3++;
   1649             pi1_ref_idx3++;
   1650             ps_search_node_4x4_3++;
   1651             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
   1652             ps_mv4++;
   1653             pi1_ref_idx4++;
   1654             ps_search_node_4x4_4++;
   1655         }
   1656     }
   1657 }
   1658 
   1659 void hme_update_mv_bank_encode(
   1660     search_results_t *ps_search_results,
   1661     layer_mv_t *ps_layer_mv,
   1662     S32 i4_search_blk_x,
   1663     S32 i4_search_blk_y,
   1664     mvbank_update_prms_t *ps_prms,
   1665     U08 *pu1_pred_dir_searched,
   1666     S32 i4_num_act_ref_l0)
   1667 {
   1668     hme_mv_t *ps_mv;
   1669     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
   1670     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
   1671     S32 i4_blk_x, i4_blk_y, i4_offset;
   1672     S32 j, i, num_parts;
   1673     search_node_t *ps_search_node_tl, *ps_search_node_tr;
   1674     search_node_t *ps_search_node_bl, *ps_search_node_br;
   1675     search_node_t s_zero_mv;
   1676     WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
   1677 
   1678     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
   1679     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
   1680     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
   1681 
   1682     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
   1683 
   1684     /* Identify the correct offset in the mvbank and the reference id buf */
   1685     ps_mv = ps_layer_mv->ps_mv + i4_offset;
   1686     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
   1687 
   1688     ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
   1689     ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
   1690 
   1691     /*************************************************************************/
   1692     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
   1693     /* hence the below check.                                                */
   1694     /*************************************************************************/
   1695     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
   1696 
   1697     ps_mv1 = ps_mv;
   1698     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
   1699     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
   1700     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
   1701     pi1_ref_idx1 = pi1_ref_idx;
   1702     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
   1703     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
   1704     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
   1705 
   1706     /* Initialize zero mv: default mv used for intra mvs */
   1707     s_zero_mv.s_mv.i2_mvx = 0;
   1708     s_zero_mv.s_mv.i2_mvy = 0;
   1709     s_zero_mv.i1_ref_idx = 0;
   1710 
   1711     if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
   1712        (ps_search_results->i4_part_mask & ENABLE_NxN))
   1713     {
   1714         i4_part_type = PRT_NxN;
   1715     }
   1716 
   1717     for(i = 0; i < ps_prms->i4_num_ref; i++)
   1718     {
   1719         for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
   1720         {
   1721             WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
   1722 
   1723             num_parts = gau1_num_parts_in_part_type[i4_part_type];
   1724 
   1725             ps_search_node_tl =
   1726                 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
   1727 
   1728             if(num_parts == 1)
   1729             {
   1730                 ps_search_node_tr = ps_search_node_tl;
   1731                 ps_search_node_bl = ps_search_node_tl;
   1732                 ps_search_node_br = ps_search_node_tl;
   1733             }
   1734             else if(num_parts == 2)
   1735             {
   1736                 /* For vertically oriented partitions, tl, bl pt to same result */
   1737                 /* For horizontally oriented partition, tl, tr pt to same result */
   1738                 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
   1739                 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
   1740                 /* and right 2 8x8 have 12x16R partition */
   1741                 if(gau1_is_vert_part[i4_part_type])
   1742                 {
   1743                     ps_search_node_tr =
   1744                         ps_search_results
   1745                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
   1746                     ps_search_node_bl = ps_search_node_tl;
   1747                 }
   1748                 else
   1749                 {
   1750                     ps_search_node_tr = ps_search_node_tl;
   1751                     ps_search_node_bl =
   1752                         ps_search_results
   1753                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
   1754                 }
   1755                 ps_search_node_br =
   1756                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
   1757             }
   1758             else
   1759             {
   1760                 /* 4 unique results */
   1761                 ps_search_node_tr =
   1762                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
   1763                 ps_search_node_bl =
   1764                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
   1765                 ps_search_node_br =
   1766                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
   1767             }
   1768 
   1769             if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
   1770                 ps_search_node_tl++;
   1771             if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
   1772                 ps_search_node_tr++;
   1773             if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
   1774                 ps_search_node_bl++;
   1775             if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
   1776                 ps_search_node_br++;
   1777 
   1778             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
   1779             ps_mv1++;
   1780             pi1_ref_idx1++;
   1781             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
   1782             ps_mv2++;
   1783             pi1_ref_idx2++;
   1784             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
   1785             ps_mv3++;
   1786             pi1_ref_idx3++;
   1787             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
   1788             ps_mv4++;
   1789             pi1_ref_idx4++;
   1790 
   1791             if(ps_prms->i4_num_results_to_store > 1)
   1792             {
   1793                 ps_search_node_tl =
   1794                     &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
   1795 
   1796                 if(num_parts == 1)
   1797                 {
   1798                     ps_search_node_tr = ps_search_node_tl;
   1799                     ps_search_node_bl = ps_search_node_tl;
   1800                     ps_search_node_br = ps_search_node_tl;
   1801                 }
   1802                 else if(num_parts == 2)
   1803                 {
   1804                     /* For vertically oriented partitions, tl, bl pt to same result */
   1805                     /* For horizontally oriented partition, tl, tr pt to same result */
   1806                     /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
   1807                     /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
   1808                     /* and right 2 8x8 have 12x16R partition */
   1809                     if(gau1_is_vert_part[i4_part_type])
   1810                     {
   1811                         ps_search_node_tr =
   1812                             &ps_search_results
   1813                                  ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
   1814                         ps_search_node_bl = ps_search_node_tl;
   1815                     }
   1816                     else
   1817                     {
   1818                         ps_search_node_tr = ps_search_node_tl;
   1819                         ps_search_node_bl =
   1820                             &ps_search_results
   1821                                  ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
   1822                     }
   1823                     ps_search_node_br =
   1824                         &ps_search_results
   1825                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
   1826                 }
   1827                 else
   1828                 {
   1829                     /* 4 unique results */
   1830                     ps_search_node_tr =
   1831                         &ps_search_results
   1832                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
   1833                     ps_search_node_bl =
   1834                         &ps_search_results
   1835                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
   1836                     ps_search_node_br =
   1837                         &ps_search_results
   1838                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
   1839                 }
   1840 
   1841                 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
   1842                     ps_search_node_tl++;
   1843                 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
   1844                     ps_search_node_tr++;
   1845                 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
   1846                     ps_search_node_bl++;
   1847                 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
   1848                     ps_search_node_br++;
   1849 
   1850                 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
   1851                 ps_mv1++;
   1852                 pi1_ref_idx1++;
   1853                 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
   1854                 ps_mv2++;
   1855                 pi1_ref_idx2++;
   1856                 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
   1857                 ps_mv3++;
   1858                 pi1_ref_idx3++;
   1859                 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
   1860                 ps_mv4++;
   1861                 pi1_ref_idx4++;
   1862             }
   1863         }
   1864     }
   1865 }
   1866 
   1867 /**
   1868 ********************************************************************************
   1869 *  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
   1870 *                               layer_mv_t *ps_layer_mv,
   1871 *                               S32 i4_search_blk_x,
   1872 *                               S32 i4_search_blk_y,
   1873 *                               mvbank_update_prms_t *ps_prms)
   1874 *
   1875 *  @brief  Updates the mv bank in case there is no further encodign to be done
   1876 *
   1877 *  @param[in]  ps_search_results: contains results for the block just searched
   1878 *
   1879 *  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
   1880 *
   1881 *  @param[in] i4_search_blk_x  : col num of blk being searched
   1882 *
   1883 *  @param[in] i4_search_blk_y : row num of blk being searched
   1884 *
   1885 *  @param[in] ps_prms : contains certain parameters which govern how updatedone
   1886 *
   1887 *  @return None
   1888 ********************************************************************************
   1889 */
   1890 
   1891 void hme_update_mv_bank_in_l1_me(
   1892     search_results_t *ps_search_results,
   1893     layer_mv_t *ps_layer_mv,
   1894     S32 i4_search_blk_x,
   1895     S32 i4_search_blk_y,
   1896     mvbank_update_prms_t *ps_prms)
   1897 {
   1898     hme_mv_t *ps_mv;
   1899     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
   1900     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
   1901     S32 i4_blk_x, i4_blk_y, i4_offset;
   1902     S32 i4_j, i4_ref_id;
   1903     search_node_t *ps_search_node;
   1904     search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
   1905 
   1906     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
   1907     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
   1908     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
   1909 
   1910     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
   1911 
   1912     /* Identify the correct offset in the mvbank and the reference id buf */
   1913     ps_mv = ps_layer_mv->ps_mv + i4_offset;
   1914     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
   1915 
   1916     /*************************************************************************/
   1917     /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
   1918     /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
   1919     /* do a straightforward single update of results. This will have a 1-1   */
   1920     /* correspondence.                                                       */
   1921     /*************************************************************************/
   1922     if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
   1923     {
   1924         search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
   1925 
   1926         hme_mv_t *ps_mv_l0_root = ps_mv;
   1927         hme_mv_t *ps_mv_l1_root =
   1928             ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
   1929 
   1930         U32 u4_num_l0_results_updated = 0;
   1931         U32 u4_num_l1_results_updated = 0;
   1932 
   1933         S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
   1934         S08 *pi1_ref_idx_l1_root =
   1935             pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
   1936 
   1937         for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
   1938         {
   1939             U32 *pu4_num_results_updated;
   1940             search_node_t **pps_result_nodes;
   1941 
   1942             U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
   1943 
   1944             if(u1_pred_dir_of_cur_ref)
   1945             {
   1946                 pu4_num_results_updated = &u4_num_l1_results_updated;
   1947                 pps_result_nodes = &aps_result_nodes_sorted[1][0];
   1948             }
   1949             else
   1950             {
   1951                 pu4_num_results_updated = &u4_num_l0_results_updated;
   1952                 pps_result_nodes = &aps_result_nodes_sorted[0][0];
   1953             }
   1954 
   1955             ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
   1956 
   1957             for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
   1958             {
   1959                 hme_add_new_node_to_a_sorted_array(
   1960                     &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
   1961 
   1962                 ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
   1963                 (*pu4_num_results_updated)++;
   1964             }
   1965         }
   1966 
   1967         for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
   1968         {
   1969             COPY_SEARCH_RESULT(
   1970                 &ps_mv_l0_root[i4_j],
   1971                 &pi1_ref_idx_l0_root[i4_j],
   1972                 aps_result_nodes_sorted[0][i4_j],
   1973                 0);
   1974         }
   1975 
   1976         for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
   1977         {
   1978             COPY_SEARCH_RESULT(
   1979                 &ps_mv_l1_root[i4_j],
   1980                 &pi1_ref_idx_l1_root[i4_j],
   1981                 aps_result_nodes_sorted[1][i4_j],
   1982                 0);
   1983         }
   1984 
   1985         return;
   1986     }
   1987 
   1988     /*************************************************************************/
   1989     /* Case where search blk size is 8x8, but we update 4x4 results. In this */
   1990     /* case, we need to have NxN partitions enabled in search.               */
   1991     /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
   1992     /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
   1993     /*************************************************************************/
   1994     ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
   1995     ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
   1996     ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
   1997 
   1998     /*************************************************************************/
   1999     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
   2000     /* hence the below check.                                                */
   2001     /*************************************************************************/
   2002     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
   2003 
   2004     ps_mv1 = ps_mv;
   2005     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
   2006     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
   2007     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
   2008     pi1_ref_idx1 = pi1_ref_idx;
   2009     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
   2010     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
   2011     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
   2012 
   2013     {
   2014         search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4];
   2015         U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4];
   2016 
   2017         S32 i;
   2018 
   2019         hme_mv_t *ps_mv1_l0_root = ps_mv1;
   2020         hme_mv_t *ps_mv1_l1_root =
   2021             ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
   2022         hme_mv_t *ps_mv2_l0_root = ps_mv2;
   2023         hme_mv_t *ps_mv2_l1_root =
   2024             ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
   2025         hme_mv_t *ps_mv3_l0_root = ps_mv3;
   2026         hme_mv_t *ps_mv3_l1_root =
   2027             ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
   2028         hme_mv_t *ps_mv4_l0_root = ps_mv4;
   2029         hme_mv_t *ps_mv4_l1_root =
   2030             ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
   2031 
   2032         U32 u4_num_l0_results_updated = 0;
   2033         U32 u4_num_l1_results_updated = 0;
   2034 
   2035         S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
   2036         S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
   2037                                                             ps_layer_mv->i4_num_mvs_per_ref);
   2038         S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
   2039         S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
   2040                                                             ps_layer_mv->i4_num_mvs_per_ref);
   2041         S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
   2042         S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
   2043                                                             ps_layer_mv->i4_num_mvs_per_ref);
   2044         S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
   2045         S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
   2046                                                             ps_layer_mv->i4_num_mvs_per_ref);
   2047 
   2048         for(i = 0; i < 4; i++)
   2049         {
   2050             hme_mv_t *ps_mv_l0_root;
   2051             hme_mv_t *ps_mv_l1_root;
   2052 
   2053             S08 *pi1_ref_idx_l0_root;
   2054             S08 *pi1_ref_idx_l1_root;
   2055 
   2056             for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
   2057             {
   2058                 U32 *pu4_num_results_updated;
   2059                 search_node_t **pps_result_nodes;
   2060                 U08 *pu1_cost_shifts_for_sorted_node;
   2061 
   2062                 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
   2063 
   2064                 if(u1_pred_dir_of_cur_ref)
   2065                 {
   2066                     pu4_num_results_updated = &u4_num_l1_results_updated;
   2067                     pps_result_nodes = &aps_result_nodes_sorted[1][0];
   2068                     pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
   2069                 }
   2070                 else
   2071                 {
   2072                     pu4_num_results_updated = &u4_num_l0_results_updated;
   2073                     pps_result_nodes = &aps_result_nodes_sorted[0][0];
   2074                     pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
   2075                 }
   2076 
   2077                 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
   2078 
   2079                 ps_search_node_4x4 =
   2080                     ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
   2081 
   2082                 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
   2083                 {
   2084                     hme_add_new_node_to_a_sorted_array(
   2085                         &ps_search_node_4x4[i4_j],
   2086                         pps_result_nodes,
   2087                         pu1_cost_shifts_for_sorted_node,
   2088                         *pu4_num_results_updated,
   2089                         0);
   2090 
   2091                     (*pu4_num_results_updated)++;
   2092 
   2093                     hme_add_new_node_to_a_sorted_array(
   2094                         &ps_search_node_8x8[i4_j],
   2095                         pps_result_nodes,
   2096                         pu1_cost_shifts_for_sorted_node,
   2097                         *pu4_num_results_updated,
   2098                         2);
   2099 
   2100                     (*pu4_num_results_updated)++;
   2101                 }
   2102             }
   2103 
   2104             switch(i)
   2105             {
   2106             case 0:
   2107             {
   2108                 ps_mv_l0_root = ps_mv1_l0_root;
   2109                 ps_mv_l1_root = ps_mv1_l1_root;
   2110 
   2111                 pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
   2112                 pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
   2113 
   2114                 break;
   2115             }
   2116             case 1:
   2117             {
   2118                 ps_mv_l0_root = ps_mv2_l0_root;
   2119                 ps_mv_l1_root = ps_mv2_l1_root;
   2120 
   2121                 pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
   2122                 pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
   2123 
   2124                 break;
   2125             }
   2126             case 2:
   2127             {
   2128                 ps_mv_l0_root = ps_mv3_l0_root;
   2129                 ps_mv_l1_root = ps_mv3_l1_root;
   2130 
   2131                 pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
   2132                 pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
   2133 
   2134                 break;
   2135             }
   2136             case 3:
   2137             {
   2138                 ps_mv_l0_root = ps_mv4_l0_root;
   2139                 ps_mv_l1_root = ps_mv4_l1_root;
   2140 
   2141                 pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
   2142                 pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
   2143 
   2144                 break;
   2145             }
   2146             }
   2147 
   2148             u4_num_l0_results_updated =
   2149                 MIN((S32)u4_num_l0_results_updated,
   2150                     ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
   2151 
   2152             u4_num_l1_results_updated =
   2153                 MIN((S32)u4_num_l1_results_updated,
   2154                     ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
   2155 
   2156             for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
   2157             {
   2158                 COPY_SEARCH_RESULT(
   2159                     &ps_mv_l0_root[i4_j],
   2160                     &pi1_ref_idx_l0_root[i4_j],
   2161                     aps_result_nodes_sorted[0][i4_j],
   2162                     0);
   2163             }
   2164 
   2165             for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
   2166             {
   2167                 COPY_SEARCH_RESULT(
   2168                     &ps_mv_l1_root[i4_j],
   2169                     &pi1_ref_idx_l1_root[i4_j],
   2170                     aps_result_nodes_sorted[1][i4_j],
   2171                     0);
   2172             }
   2173         }
   2174     }
   2175 }
   2176 
   2177 /**
   2178 ******************************************************************************
   2179 *  @brief Scales motion vector component projecte from a diff layer in same
   2180 *         picture (so no ref id related delta poc scaling required)
   2181 ******************************************************************************
   2182 */
   2183 
   2184 #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p)                                                  \
   2185     ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
   2186 /**
   2187 ********************************************************************************
   2188 *  @fn     hme_project_coloc_candt(search_node_t *ps_search_node,
   2189 *                                   layer_ctxt_t *ps_curr_layer,
   2190 *                                   layer_ctxt_t *ps_coarse_layer,
   2191 *                                   S32 i4_pos_x,
   2192 *                                   S32 i4_pos_y,
   2193 *                                   S08 i1_ref_id,
   2194 *                                   S08 i1_result_id)
   2195 *
   2196 *  @brief  From a coarser layer, projects a candidated situated at "colocated"
   2197 *          position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
   2198 *
   2199 *  @param[out]  ps_search_node : contains the projected result
   2200 *
   2201 *  @param[in]   ps_curr_layer : current layer context
   2202 *
   2203 *  @param[in]   ps_coarse_layer  : coarser layer context
   2204 *
   2205 *  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
   2206 *
   2207 *  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
   2208 *
   2209 *  @param[in]   i1_ref_id : reference id for which the candidate required
   2210 *
   2211 *  @param[in]   i4_result_id : result id for which the candidate required
   2212 *                              (0 : best result, 1 : next best)
   2213 *
   2214 *  @return None
   2215 ********************************************************************************
   2216 */
   2217 
   2218 void hme_project_coloc_candt(
   2219     search_node_t *ps_search_node,
   2220     layer_ctxt_t *ps_curr_layer,
   2221     layer_ctxt_t *ps_coarse_layer,
   2222     S32 i4_pos_x,
   2223     S32 i4_pos_y,
   2224     S08 i1_ref_id,
   2225     S32 i4_result_id)
   2226 {
   2227     S32 wd_c, ht_c, wd_p, ht_p;
   2228     S32 blksize_p, blk_x, blk_y, i4_offset;
   2229     layer_mv_t *ps_layer_mvbank;
   2230     hme_mv_t *ps_mv;
   2231     S08 *pi1_ref_idx;
   2232 
   2233     /* Width and ht of current and prev layers */
   2234     wd_c = ps_curr_layer->i4_wd;
   2235     ht_c = ps_curr_layer->i4_ht;
   2236     wd_p = ps_coarse_layer->i4_wd;
   2237     ht_p = ps_coarse_layer->i4_ht;
   2238 
   2239     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
   2240     blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
   2241 
   2242     /* Safety check to avoid uninitialized access across temporal layers */
   2243     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
   2244     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
   2245 
   2246     /* Project the positions to prev layer */
   2247     /* TODO: convert these to scale factors at pic level */
   2248     blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
   2249     blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
   2250 
   2251     /* Pick up the mvs from the location */
   2252     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
   2253     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
   2254 
   2255     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   2256     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   2257 
   2258     ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
   2259     pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
   2260 
   2261     ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
   2262     ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
   2263     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
   2264     ps_search_node->u1_subpel_done = 0;
   2265     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
   2266     {
   2267         ps_search_node->i1_ref_idx = i1_ref_id;
   2268         ps_search_node->s_mv.i2_mvx = 0;
   2269         ps_search_node->s_mv.i2_mvy = 0;
   2270     }
   2271 }
   2272 
   2273 /**
   2274 ********************************************************************************
   2275 *  @fn     hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
   2276 *                                   layer_ctxt_t *ps_curr_layer,
   2277 *                                   layer_ctxt_t *ps_coarse_layer,
   2278 *                                   S32 i4_pos_x,
   2279 *                                   S32 i4_pos_y,
   2280 *                                   S08 i1_ref_id,
   2281 *                                   S08 i1_result_id)
   2282 *
   2283 *  @brief  From a coarser layer, projects a candidated situated at "colocated"
   2284 *          position in the picture when the ratios are dyadic
   2285 *
   2286 *  @param[out]  ps_search_node : contains the projected result
   2287 *
   2288 *  @param[in]   ps_curr_layer : current layer context
   2289 *
   2290 *  @param[in]   ps_coarse_layer  : coarser layer context
   2291 *
   2292 *  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
   2293 *
   2294 *  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
   2295 *
   2296 *  @param[in]   i1_ref_id : reference id for which the candidate required
   2297 *
   2298 *  @param[in]   i4_result_id : result id for which the candidate required
   2299 *                              (0 : best result, 1 : next best)
   2300 *
   2301 *  @return None
   2302 ********************************************************************************
   2303 */
   2304 
   2305 void hme_project_coloc_candt_dyadic(
   2306     search_node_t *ps_search_node,
   2307     layer_ctxt_t *ps_curr_layer,
   2308     layer_ctxt_t *ps_coarse_layer,
   2309     S32 i4_pos_x,
   2310     S32 i4_pos_y,
   2311     S08 i1_ref_id,
   2312     S32 i4_result_id)
   2313 {
   2314     S32 wd_c, ht_c, wd_p, ht_p;
   2315     S32 blksize_p, blk_x, blk_y, i4_offset;
   2316     layer_mv_t *ps_layer_mvbank;
   2317     hme_mv_t *ps_mv;
   2318     S08 *pi1_ref_idx;
   2319 
   2320     /* Width and ht of current and prev layers */
   2321     wd_c = ps_curr_layer->i4_wd;
   2322     ht_c = ps_curr_layer->i4_ht;
   2323     wd_p = ps_coarse_layer->i4_wd;
   2324     ht_p = ps_coarse_layer->i4_ht;
   2325 
   2326     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
   2327     /* blksize_p = log2(wd) + 1 */
   2328     blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
   2329 
   2330     /* ASSERT for valid sizes */
   2331     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
   2332 
   2333     /* Safety check to avoid uninitialized access across temporal layers */
   2334     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
   2335     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
   2336 
   2337     /* Project the positions to prev layer */
   2338     /* TODO: convert these to scale factors at pic level */
   2339     blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
   2340     blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
   2341 
   2342     /* Pick up the mvs from the location */
   2343     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
   2344     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
   2345 
   2346     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   2347     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   2348 
   2349     ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
   2350     pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
   2351 
   2352     ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
   2353     ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
   2354     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
   2355     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
   2356     {
   2357         ps_search_node->i1_ref_idx = i1_ref_id;
   2358         ps_search_node->s_mv.i2_mvx = 0;
   2359         ps_search_node->s_mv.i2_mvy = 0;
   2360     }
   2361 }
   2362 
   2363 void hme_project_coloc_candt_dyadic_implicit(
   2364     search_node_t *ps_search_node,
   2365     layer_ctxt_t *ps_curr_layer,
   2366     layer_ctxt_t *ps_coarse_layer,
   2367     S32 i4_pos_x,
   2368     S32 i4_pos_y,
   2369     S32 i4_num_act_ref_l0,
   2370     U08 u1_pred_dir,
   2371     U08 u1_default_ref_id,
   2372     S32 i4_result_id)
   2373 {
   2374     S32 wd_c, ht_c, wd_p, ht_p;
   2375     S32 blksize_p, blk_x, blk_y, i4_offset;
   2376     layer_mv_t *ps_layer_mvbank;
   2377     hme_mv_t *ps_mv;
   2378     S08 *pi1_ref_idx;
   2379 
   2380     /* Width and ht of current and prev layers */
   2381     wd_c = ps_curr_layer->i4_wd;
   2382     ht_c = ps_curr_layer->i4_ht;
   2383     wd_p = ps_coarse_layer->i4_wd;
   2384     ht_p = ps_coarse_layer->i4_ht;
   2385 
   2386     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
   2387     blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
   2388 
   2389     /* ASSERT for valid sizes */
   2390     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
   2391 
   2392     /* Safety check to avoid uninitialized access across temporal layers */
   2393     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
   2394     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
   2395     /* Project the positions to prev layer */
   2396     /* TODO: convert these to scale factors at pic level */
   2397     blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
   2398     blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
   2399 
   2400     /* Pick up the mvs from the location */
   2401     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
   2402     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
   2403 
   2404     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   2405     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   2406 
   2407     if(u1_pred_dir == 1)
   2408     {
   2409         ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
   2410         pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
   2411     }
   2412 
   2413     ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
   2414     ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
   2415     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
   2416     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
   2417     {
   2418         ps_search_node->i1_ref_idx = u1_default_ref_id;
   2419         ps_search_node->s_mv.i2_mvx = 0;
   2420         ps_search_node->s_mv.i2_mvy = 0;
   2421     }
   2422 }
   2423 
   2424 #define SCALE_RANGE_PRMS(prm1, prm2, shift)                                                        \
   2425     {                                                                                              \
   2426         prm1.i2_min_x = prm2.i2_min_x << shift;                                                    \
   2427         prm1.i2_max_x = prm2.i2_max_x << shift;                                                    \
   2428         prm1.i2_min_y = prm2.i2_min_y << shift;                                                    \
   2429         prm1.i2_max_y = prm2.i2_max_y << shift;                                                    \
   2430     }
   2431 
   2432 #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift)                                               \
   2433     {                                                                                              \
   2434         prm1->i2_min_x = prm2->i2_min_x << shift;                                                  \
   2435         prm1->i2_max_x = prm2->i2_max_x << shift;                                                  \
   2436         prm1->i2_min_y = prm2->i2_min_y << shift;                                                  \
   2437         prm1->i2_max_y = prm2->i2_max_y << shift;                                                  \
   2438     }
   2439 
   2440 /**
   2441 ********************************************************************************
   2442 *  @fn   void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
   2443 *                       refine_layer_prms_t *ps_refine_prms)
   2444 *
   2445 *  @brief  Frame init of refinemnet layers in ME
   2446 *
   2447 *  @param[in,out]  ps_ctxt: ME Handle
   2448 *
   2449 *  @param[in]  ps_refine_prms : refinement layer prms
   2450 *
   2451 *  @return None
   2452 ********************************************************************************
   2453 */
   2454 void hme_refine_frm_init(
   2455     layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
   2456 {
   2457     /* local variables */
   2458     BLK_SIZE_T e_result_blk_size = BLK_8x8;
   2459     S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
   2460 
   2461     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
   2462 
   2463     if(ps_refine_prms->explicit_ref)
   2464     {
   2465         i4_num_ref_fpel = i4_num_ref_prev_layer;
   2466     }
   2467     else
   2468     {
   2469         i4_num_ref_fpel = 2;
   2470     }
   2471 
   2472     if(ps_refine_prms->i4_enable_4x4_part)
   2473     {
   2474         e_result_blk_size = BLK_4x4;
   2475     }
   2476 
   2477     i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
   2478 
   2479     hme_init_mv_bank(
   2480         ps_curr_layer,
   2481         e_result_blk_size,
   2482         i4_num_ref_fpel,
   2483         ps_refine_prms->i4_num_mvbank_results,
   2484         ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
   2485 }
   2486 
   2487 #if 1  //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
   2488 /**
   2489 ********************************************************************************
   2490 *  @fn   void hme_init_clusters_16x16
   2491 *               (
   2492 *                   cluster_16x16_blk_t *ps_cluster_blk_16x16
   2493 *               )
   2494 *
   2495 *  @brief  Intialisations for the structs used in clustering algorithm
   2496 *
   2497 *  @param[in/out]  ps_cluster_blk_16x16: pointer to structure containing clusters
   2498 *                                        of 16x16 block
   2499 *
   2500 *  @return None
   2501 ********************************************************************************
   2502 */
   2503 static __inline void
   2504     hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
   2505 {
   2506     S32 i;
   2507 
   2508     ps_cluster_blk_16x16->num_clusters = 0;
   2509     ps_cluster_blk_16x16->intra_mv_area = 0;
   2510     ps_cluster_blk_16x16->best_inter_cost = 0;
   2511 
   2512     for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
   2513     {
   2514         ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
   2515             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
   2516 
   2517         ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
   2518 
   2519         ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
   2520         ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
   2521     }
   2522     for(i = 0; i < MAX_NUM_REF; i++)
   2523     {
   2524         ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
   2525     }
   2526 }
   2527 
   2528 /**
   2529 ********************************************************************************
   2530 *  @fn   void hme_init_clusters_32x32
   2531 *               (
   2532 *                   cluster_32x32_blk_t *ps_cluster_blk_32x32
   2533 *               )
   2534 *
   2535 *  @brief  Intialisations for the structs used in clustering algorithm
   2536 *
   2537 *  @param[in/out]  ps_cluster_blk_32x32: pointer to structure containing clusters
   2538 *                                        of 32x32 block
   2539 *
   2540 *  @return None
   2541 ********************************************************************************
   2542 */
   2543 static __inline void
   2544     hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
   2545 {
   2546     S32 i;
   2547 
   2548     ps_cluster_blk_32x32->num_clusters = 0;
   2549     ps_cluster_blk_32x32->intra_mv_area = 0;
   2550     ps_cluster_blk_32x32->best_alt_ref = -1;
   2551     ps_cluster_blk_32x32->best_uni_ref = -1;
   2552     ps_cluster_blk_32x32->best_inter_cost = 0;
   2553     ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
   2554 
   2555     for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
   2556     {
   2557         ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
   2558             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
   2559         ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
   2560 
   2561         ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
   2562         ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
   2563     }
   2564     for(i = 0; i < MAX_NUM_REF; i++)
   2565     {
   2566         ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
   2567     }
   2568 }
   2569 
   2570 /**
   2571 ********************************************************************************
   2572 *  @fn   void hme_init_clusters_64x64
   2573 *               (
   2574 *                   cluster_64x64_blk_t *ps_cluster_blk_64x64
   2575 *               )
   2576 *
   2577 *  @brief  Intialisations for the structs used in clustering algorithm
   2578 *
   2579 *  @param[in/out]  ps_cluster_blk_64x64: pointer to structure containing clusters
   2580 *                                        of 64x64 block
   2581 *
   2582 *  @return None
   2583 ********************************************************************************
   2584 */
   2585 static __inline void
   2586     hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
   2587 {
   2588     S32 i;
   2589 
   2590     ps_cluster_blk_64x64->num_clusters = 0;
   2591     ps_cluster_blk_64x64->intra_mv_area = 0;
   2592     ps_cluster_blk_64x64->best_alt_ref = -1;
   2593     ps_cluster_blk_64x64->best_uni_ref = -1;
   2594     ps_cluster_blk_64x64->best_inter_cost = 0;
   2595 
   2596     for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
   2597     {
   2598         ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
   2599             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
   2600         ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
   2601 
   2602         ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
   2603         ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
   2604     }
   2605     for(i = 0; i < MAX_NUM_REF; i++)
   2606     {
   2607         ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
   2608     }
   2609 }
   2610 
   2611 /**
   2612 ********************************************************************************
   2613 *  @fn   void hme_sort_and_assign_top_ref_ids_areawise
   2614 *               (
   2615 *                   ctb_cluster_info_t *ps_ctb_cluster_info
   2616 *               )
   2617 *
   2618 *  @brief  Finds best_uni_ref and best_alt_ref
   2619 *
   2620 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
   2621 *
   2622 *  @param[in]  bidir_enabled: flag that indicates whether or not bi-pred is
   2623 *                             enabled
   2624 *
   2625 *  @param[in]  block_width: width of the block in pels
   2626 *
   2627 *  @param[in]  e_cu_pos: position of the block within the CTB
   2628 *
   2629 *  @return None
   2630 ********************************************************************************
   2631 */
   2632 void hme_sort_and_assign_top_ref_ids_areawise(
   2633     ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
   2634 {
   2635     cluster_32x32_blk_t *ps_32x32 = NULL;
   2636     cluster_64x64_blk_t *ps_64x64 = NULL;
   2637     cluster_data_t *ps_data;
   2638 
   2639     S32 j, k;
   2640 
   2641     S32 ai4_uni_area[MAX_NUM_REF];
   2642     S32 ai4_bi_area[MAX_NUM_REF];
   2643     S32 ai4_ref_id_found[MAX_NUM_REF];
   2644     S32 ai4_ref_id[MAX_NUM_REF];
   2645 
   2646     S32 best_uni_ref = -1, best_alt_ref = -1;
   2647     S32 num_clusters;
   2648     S32 num_ref = 0;
   2649     S32 num_clusters_evaluated = 0;
   2650     S32 is_cur_blk_valid;
   2651 
   2652     if(32 == block_width)
   2653     {
   2654         is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
   2655         ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
   2656         num_clusters = ps_32x32->num_clusters;
   2657         ps_data = &ps_32x32->as_cluster_data[0];
   2658     }
   2659     else
   2660     {
   2661         is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
   2662         ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
   2663         num_clusters = ps_64x64->num_clusters;
   2664         ps_data = &ps_64x64->as_cluster_data[0];
   2665     }
   2666 
   2667 #if !ENABLE_4CTB_EVALUATION
   2668     if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
   2669     {
   2670         return;
   2671     }
   2672 #endif
   2673     if(num_clusters == 0)
   2674     {
   2675         return;
   2676     }
   2677     else if(!is_cur_blk_valid)
   2678     {
   2679         return;
   2680     }
   2681 
   2682     memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
   2683     memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
   2684     memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
   2685     memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
   2686 
   2687     for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
   2688     {
   2689         S32 ref_id;
   2690 
   2691         if(!ps_data->is_valid_cluster)
   2692         {
   2693             continue;
   2694         }
   2695 
   2696         ref_id = ps_data->ref_id;
   2697 
   2698         num_clusters_evaluated++;
   2699 
   2700         ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
   2701         ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
   2702 
   2703         if(!ai4_ref_id_found[ref_id])
   2704         {
   2705             ai4_ref_id[ref_id] = ref_id;
   2706             ai4_ref_id_found[ref_id] = 1;
   2707             num_ref++;
   2708         }
   2709     }
   2710 
   2711     {
   2712         S32 ai4_ref_id_temp[MAX_NUM_REF];
   2713 
   2714         memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
   2715 
   2716         for(k = 1; k < MAX_NUM_REF; k++)
   2717         {
   2718             if(ai4_uni_area[k] > ai4_uni_area[0])
   2719             {
   2720                 SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
   2721                 SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
   2722             }
   2723         }
   2724 
   2725         best_uni_ref = ai4_ref_id_temp[0];
   2726     }
   2727 
   2728     if(bidir_enabled)
   2729     {
   2730         for(k = 1; k < MAX_NUM_REF; k++)
   2731         {
   2732             if(ai4_bi_area[k] > ai4_bi_area[0])
   2733             {
   2734                 SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
   2735                 SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
   2736             }
   2737         }
   2738 
   2739         if(!ai4_bi_area[0])
   2740         {
   2741             best_alt_ref = -1;
   2742 
   2743             if(32 == block_width)
   2744             {
   2745                 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
   2746             }
   2747             else
   2748             {
   2749                 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
   2750             }
   2751 
   2752             return;
   2753         }
   2754 
   2755         if(best_uni_ref == ai4_ref_id[0])
   2756         {
   2757             for(k = 2; k < MAX_NUM_REF; k++)
   2758             {
   2759                 if(ai4_bi_area[k] > ai4_bi_area[1])
   2760                 {
   2761                     SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
   2762                     SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
   2763                 }
   2764             }
   2765 
   2766             best_alt_ref = ai4_ref_id[1];
   2767         }
   2768         else
   2769         {
   2770             best_alt_ref = ai4_ref_id[0];
   2771         }
   2772     }
   2773 
   2774     if(32 == block_width)
   2775     {
   2776         SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
   2777     }
   2778     else
   2779     {
   2780         SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
   2781     }
   2782 }
   2783 
   2784 /**
   2785 ********************************************************************************
   2786 *  @fn   void hme_find_top_ref_ids
   2787 *               (
   2788 *                   ctb_cluster_info_t *ps_ctb_cluster_info
   2789 *               )
   2790 *
   2791 *  @brief  Finds best_uni_ref and best_alt_ref
   2792 *
   2793 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
   2794 *
   2795 *  @return None
   2796 ********************************************************************************
   2797 */
   2798 void hme_find_top_ref_ids(
   2799     ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
   2800 {
   2801     S32 i;
   2802 
   2803     if(32 == block_width)
   2804     {
   2805         for(i = 0; i < 4; i++)
   2806         {
   2807             hme_sort_and_assign_top_ref_ids_areawise(
   2808                 ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
   2809         }
   2810     }
   2811     else if(64 == block_width)
   2812     {
   2813         hme_sort_and_assign_top_ref_ids_areawise(
   2814             ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
   2815     }
   2816 }
   2817 
   2818 /**
   2819 ********************************************************************************
   2820 *  @fn   void hme_boot_out_outlier
   2821 *               (
   2822 *                   ctb_cluster_info_t *ps_ctb_cluster_info
   2823 *               )
   2824 *
   2825 *  @brief  Removes outlier clusters before CU tree population
   2826 *
   2827 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
   2828 *
   2829 *  @return None
   2830 ********************************************************************************
   2831 */
   2832 void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
   2833 {
   2834     cluster_32x32_blk_t *ps_32x32;
   2835 
   2836     S32 i;
   2837 
   2838     cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
   2839 
   2840     S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
   2841 
   2842     if(32 == blk_width)
   2843     {
   2844         /* 32x32 clusters */
   2845         for(i = 0; i < 4; i++)
   2846         {
   2847             ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
   2848 
   2849             if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
   2850             {
   2851                 BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
   2852             }
   2853         }
   2854     }
   2855     else if(64 == blk_width)
   2856     {
   2857         /* 64x64 clusters */
   2858         if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
   2859         {
   2860             BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
   2861         }
   2862     }
   2863 }
   2864 
   2865 /**
   2866 ********************************************************************************
   2867 *  @fn   void hme_update_cluster_attributes
   2868 *               (
   2869 *                   cluster_data_t *ps_cluster_data,
   2870 *                   S32 mvx,
   2871 *                   S32 mvy,
   2872 *                   PART_ID_T e_part_id
   2873 *               )
   2874 *
   2875 *  @brief  Implementation fo the clustering algorithm
   2876 *
   2877 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
   2878 *
   2879 *  @param[in]  mvx : x co-ordinate of the motion vector
   2880 *
   2881 *  @param[in]  mvy : y co-ordinate of the motion vector
   2882 *
   2883 *  @param[in]  ref_idx : ref_id of the motion vector
   2884 *
   2885 *  @param[in]  e_part_id : partition id of the motion vector
   2886 *
   2887 *  @return None
   2888 ********************************************************************************
   2889 */
   2890 static __inline void hme_update_cluster_attributes(
   2891     cluster_data_t *ps_cluster_data,
   2892     S32 mvx,
   2893     S32 mvy,
   2894     S32 mvdx,
   2895     S32 mvdy,
   2896     S32 ref_id,
   2897     S32 sdi,
   2898     U08 is_part_of_bi,
   2899     PART_ID_T e_part_id)
   2900 {
   2901     LWORD64 i8_mvx_sum_q8;
   2902     LWORD64 i8_mvy_sum_q8;
   2903 
   2904     S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
   2905     S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
   2906 
   2907     if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
   2908     {
   2909         ps_cluster_data->min_x = mvx;
   2910     }
   2911     else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
   2912     {
   2913         ps_cluster_data->max_x = mvx;
   2914     }
   2915 
   2916     if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
   2917     {
   2918         ps_cluster_data->min_y = mvy;
   2919     }
   2920     else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
   2921     {
   2922         ps_cluster_data->max_y = mvy;
   2923     }
   2924 
   2925     {
   2926         S32 num_mvs = ps_cluster_data->num_mvs;
   2927 
   2928         ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
   2929         ps_cluster_data->as_mv[num_mvs].mvx = mvx;
   2930         ps_cluster_data->as_mv[num_mvs].mvy = mvy;
   2931 
   2932         /***************************/
   2933         ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
   2934         ps_cluster_data->as_mv[num_mvs].sdi = sdi;
   2935         /**************************/
   2936     }
   2937 
   2938     /* Updation of centroid */
   2939     {
   2940         i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
   2941         i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
   2942 
   2943         ps_cluster_data->num_mvs++;
   2944 
   2945         ps_cluster_data->s_centroid.i4_pos_x_q8 =
   2946             (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
   2947         ps_cluster_data->s_centroid.i4_pos_y_q8 =
   2948             (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
   2949     }
   2950 
   2951     ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
   2952 
   2953     if(is_part_of_bi)
   2954     {
   2955         ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
   2956     }
   2957     else
   2958     {
   2959         ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
   2960     }
   2961 }
   2962 
   2963 /**
   2964 ********************************************************************************
   2965 *  @fn   void hme_try_cluster_merge
   2966 *               (
   2967 *                   cluster_data_t *ps_cluster_data,
   2968 *                   S32 *pi4_num_clusters,
   2969 *                   S32 idx_of_updated_cluster
   2970 *               )
   2971 *
   2972 *  @brief  Implementation fo the clustering algorithm
   2973 *
   2974 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
   2975 *
   2976 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
   2977 *
   2978 *  @param[in]  idx_of_updated_cluster : index of the cluster most recently
   2979 *                                       updated
   2980 *
   2981 *  @return Nothing
   2982 ********************************************************************************
   2983 */
   2984 void hme_try_cluster_merge(
   2985     cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
   2986 {
   2987     centroid_t *ps_centroid;
   2988 
   2989     S32 cur_pos_x_q8;
   2990     S32 cur_pos_y_q8;
   2991     S32 i;
   2992     S32 max_dist_from_centroid;
   2993     S32 mvd;
   2994     S32 mvdx_q8;
   2995     S32 mvdx;
   2996     S32 mvdy_q8;
   2997     S32 mvdy;
   2998     S32 num_clusters, num_clusters_evaluated;
   2999     S32 other_pos_x_q8;
   3000     S32 other_pos_y_q8;
   3001 
   3002     cluster_data_t *ps_root = ps_cluster_data;
   3003     cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
   3004     centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
   3005 
   3006     /* Merge is superfluous if num_clusters is 1 */
   3007     if(*pu1_num_clusters == 1)
   3008     {
   3009         return;
   3010     }
   3011 
   3012     cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
   3013     cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
   3014 
   3015     max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
   3016 
   3017     num_clusters = *pu1_num_clusters;
   3018     num_clusters_evaluated = 0;
   3019 
   3020     for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
   3021     {
   3022         if(!ps_cluster_data->is_valid_cluster)
   3023         {
   3024             continue;
   3025         }
   3026         if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
   3027         {
   3028             num_clusters_evaluated++;
   3029             continue;
   3030         }
   3031 
   3032         ps_centroid = &ps_cluster_data->s_centroid;
   3033 
   3034         other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
   3035         other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
   3036 
   3037         mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
   3038         mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
   3039         mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3040         mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3041 
   3042         mvd = ABS(mvdx) + ABS(mvdy);
   3043 
   3044         if(mvd <= (max_dist_from_centroid >> 1))
   3045         {
   3046             /* 0 => no updates */
   3047             /* 1 => min updated */
   3048             /* 2 => max updated */
   3049             S32 minmax_x_update_id;
   3050             S32 minmax_y_update_id;
   3051 
   3052             LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
   3053             LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
   3054             LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
   3055             LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
   3056 
   3057             (*pu1_num_clusters)--;
   3058 
   3059             ps_cluster_data->is_valid_cluster = 0;
   3060 
   3061             memcpy(
   3062                 &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
   3063                 ps_cluster_data->as_mv,
   3064                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
   3065 
   3066             ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
   3067             ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
   3068             ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
   3069             ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
   3070             i8_mv_x_sum_self += i8_mv_x_sum_cousin;
   3071             i8_mv_y_sum_self += i8_mv_y_sum_cousin;
   3072 
   3073             ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
   3074             ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
   3075 
   3076             minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
   3077                                      ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
   3078                                      : 1;
   3079             minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
   3080                                      ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
   3081                                      : 1;
   3082 
   3083             /* Updation of centroid spread */
   3084             switch(minmax_x_update_id + (minmax_y_update_id << 2))
   3085             {
   3086             case 1:
   3087             {
   3088                 S32 mvd, mvd_q8;
   3089 
   3090                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
   3091 
   3092                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
   3093                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3094 
   3095                 if(mvd > (max_dist_from_centroid))
   3096                 {
   3097                     ps_cluster_data->max_dist_from_centroid = mvd;
   3098                 }
   3099                 break;
   3100             }
   3101             case 2:
   3102             {
   3103                 S32 mvd, mvd_q8;
   3104 
   3105                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
   3106 
   3107                 mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
   3108                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3109 
   3110                 if(mvd > (max_dist_from_centroid))
   3111                 {
   3112                     ps_cluster_data->max_dist_from_centroid = mvd;
   3113                 }
   3114                 break;
   3115             }
   3116             case 4:
   3117             {
   3118                 S32 mvd, mvd_q8;
   3119 
   3120                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
   3121 
   3122                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
   3123                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3124 
   3125                 if(mvd > (max_dist_from_centroid))
   3126                 {
   3127                     ps_cluster_data->max_dist_from_centroid = mvd;
   3128                 }
   3129                 break;
   3130             }
   3131             case 5:
   3132             {
   3133                 S32 mvd;
   3134                 S32 mvdx, mvdx_q8;
   3135                 S32 mvdy, mvdy_q8;
   3136 
   3137                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
   3138                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3139 
   3140                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
   3141                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3142 
   3143                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   3144 
   3145                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
   3146                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
   3147 
   3148                 if(mvd > max_dist_from_centroid)
   3149                 {
   3150                     ps_cluster_data->max_dist_from_centroid = mvd;
   3151                 }
   3152                 break;
   3153             }
   3154             case 6:
   3155             {
   3156                 S32 mvd;
   3157                 S32 mvdx, mvdx_q8;
   3158                 S32 mvdy, mvdy_q8;
   3159 
   3160                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
   3161                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3162 
   3163                 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
   3164                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3165 
   3166                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   3167 
   3168                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
   3169                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
   3170 
   3171                 if(mvd > max_dist_from_centroid)
   3172                 {
   3173                     ps_cluster_data->max_dist_from_centroid = mvd;
   3174                 }
   3175                 break;
   3176             }
   3177             case 8:
   3178             {
   3179                 S32 mvd, mvd_q8;
   3180 
   3181                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
   3182 
   3183                 mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
   3184                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3185 
   3186                 if(mvd > (max_dist_from_centroid))
   3187                 {
   3188                     ps_cluster_data->max_dist_from_centroid = mvd;
   3189                 }
   3190                 break;
   3191             }
   3192             case 9:
   3193             {
   3194                 S32 mvd;
   3195                 S32 mvdx, mvdx_q8;
   3196                 S32 mvdy, mvdy_q8;
   3197 
   3198                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
   3199                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3200 
   3201                 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
   3202                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3203 
   3204                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   3205 
   3206                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
   3207                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
   3208 
   3209                 if(mvd > max_dist_from_centroid)
   3210                 {
   3211                     ps_cluster_data->max_dist_from_centroid = mvd;
   3212                 }
   3213                 break;
   3214             }
   3215             case 10:
   3216             {
   3217                 S32 mvd;
   3218                 S32 mvdx, mvdx_q8;
   3219                 S32 mvdy, mvdy_q8;
   3220 
   3221                 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
   3222                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3223 
   3224                 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
   3225                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3226 
   3227                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   3228 
   3229                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
   3230                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
   3231 
   3232                 if(mvd > ps_cluster_data->max_dist_from_centroid)
   3233                 {
   3234                     ps_cluster_data->max_dist_from_centroid = mvd;
   3235                 }
   3236                 break;
   3237             }
   3238             default:
   3239             {
   3240                 break;
   3241             }
   3242             }
   3243 
   3244             hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
   3245 
   3246             return;
   3247         }
   3248 
   3249         num_clusters_evaluated++;
   3250     }
   3251 }
   3252 
   3253 /**
   3254 ********************************************************************************
   3255 *  @fn   void hme_find_and_update_clusters
   3256 *               (
   3257 *                   cluster_data_t *ps_cluster_data,
   3258 *                   S32 *pi4_num_clusters,
   3259 *                   S32 mvx,
   3260 *                   S32 mvy,
   3261 *                   S32 ref_idx,
   3262 *                   PART_ID_T e_part_id
   3263 *               )
   3264 *
   3265 *  @brief  Implementation fo the clustering algorithm
   3266 *
   3267 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
   3268 *
   3269 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
   3270 *
   3271 *  @param[in]  mvx : x co-ordinate of the motion vector
   3272 *
   3273 *  @param[in]  mvy : y co-ordinate of the motion vector
   3274 *
   3275 *  @param[in]  ref_idx : ref_id of the motion vector
   3276 *
   3277 *  @param[in]  e_part_id : partition id of the motion vector
   3278 *
   3279 *  @return None
   3280 ********************************************************************************
   3281 */
   3282 void hme_find_and_update_clusters(
   3283     cluster_data_t *ps_cluster_data,
   3284     U08 *pu1_num_clusters,
   3285     S16 i2_mv_x,
   3286     S16 i2_mv_y,
   3287     U08 i1_ref_idx,
   3288     S32 i4_sdi,
   3289     PART_ID_T e_part_id,
   3290     U08 is_part_of_bi)
   3291 {
   3292     S32 i;
   3293     S32 min_mvd_cluster_id = -1;
   3294     S32 mvd, mvd_limit, mvdx, mvdy;
   3295     S32 min_mvdx, min_mvdy;
   3296 
   3297     S32 min_mvd = MAX_32BIT_VAL;
   3298     S32 num_clusters = *pu1_num_clusters;
   3299 
   3300     S32 mvx = i2_mv_x;
   3301     S32 mvy = i2_mv_y;
   3302     S32 ref_idx = i1_ref_idx;
   3303     S32 sdi = i4_sdi;
   3304     S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
   3305 
   3306     if(num_clusters == 0)
   3307     {
   3308         cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
   3309 
   3310         ps_data->num_mvs = 1;
   3311         ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
   3312         ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
   3313         ps_data->ref_id = ref_idx;
   3314         ps_data->area_in_pixels = gai4_partition_area[e_part_id];
   3315         ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
   3316         ps_data->as_mv[0].mvx = mvx;
   3317         ps_data->as_mv[0].mvy = mvy;
   3318 
   3319         /***************************/
   3320         ps_data->as_mv[0].is_uni = !is_part_of_bi;
   3321         ps_data->as_mv[0].sdi = sdi;
   3322         if(is_part_of_bi)
   3323         {
   3324             ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
   3325         }
   3326         else
   3327         {
   3328             ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
   3329         }
   3330         /**************************/
   3331         ps_data->max_x = mvx;
   3332         ps_data->min_x = mvx;
   3333         ps_data->max_y = mvy;
   3334         ps_data->min_y = mvy;
   3335 
   3336         ps_data->is_valid_cluster = 1;
   3337 
   3338         *pu1_num_clusters = 1;
   3339     }
   3340     else
   3341     {
   3342         S32 num_clusters_evaluated = 0;
   3343 
   3344         for(i = 0; num_clusters_evaluated < num_clusters; i++)
   3345         {
   3346             cluster_data_t *ps_data = &ps_cluster_data[i];
   3347 
   3348             centroid_t *ps_centroid;
   3349 
   3350             S32 mvx_q8;
   3351             S32 mvy_q8;
   3352             S32 posx_q8;
   3353             S32 posy_q8;
   3354             S32 mvdx_q8;
   3355             S32 mvdy_q8;
   3356 
   3357             /* In anticipation of a possible merging of clusters */
   3358             if(ps_data->is_valid_cluster == 0)
   3359             {
   3360                 new_cluster_idx = i;
   3361                 continue;
   3362             }
   3363 
   3364             if(ref_idx != ps_data->ref_id)
   3365             {
   3366                 num_clusters_evaluated++;
   3367                 continue;
   3368             }
   3369 
   3370             ps_centroid = &ps_data->s_centroid;
   3371             posx_q8 = ps_centroid->i4_pos_x_q8;
   3372             posy_q8 = ps_centroid->i4_pos_y_q8;
   3373 
   3374             mvx_q8 = mvx << 8;
   3375             mvy_q8 = mvy << 8;
   3376 
   3377             mvdx_q8 = posx_q8 - mvx_q8;
   3378             mvdy_q8 = posy_q8 - mvy_q8;
   3379 
   3380             mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
   3381             mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
   3382 
   3383             mvd = ABS(mvdx) + ABS(mvdy);
   3384 
   3385             if(mvd < min_mvd)
   3386             {
   3387                 min_mvd = mvd;
   3388                 min_mvdx = mvdx;
   3389                 min_mvdy = mvdy;
   3390                 min_mvd_cluster_id = i;
   3391             }
   3392 
   3393             num_clusters_evaluated++;
   3394         }
   3395 
   3396         mvd_limit = (min_mvd_cluster_id == -1)
   3397                         ? ps_cluster_data[0].max_dist_from_centroid
   3398                         : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
   3399 
   3400         /* This condition implies that min_mvd has been updated */
   3401         if(min_mvd <= mvd_limit)
   3402         {
   3403             hme_update_cluster_attributes(
   3404                 &ps_cluster_data[min_mvd_cluster_id],
   3405                 mvx,
   3406                 mvy,
   3407                 min_mvdx,
   3408                 min_mvdy,
   3409                 ref_idx,
   3410                 sdi,
   3411                 is_part_of_bi,
   3412                 e_part_id);
   3413 
   3414             if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
   3415             {
   3416                 hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
   3417             }
   3418         }
   3419         else
   3420         {
   3421             cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
   3422                                           ? &ps_cluster_data[num_clusters]
   3423                                           : &ps_cluster_data[new_cluster_idx];
   3424 
   3425             ps_data->num_mvs = 1;
   3426             ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
   3427             ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
   3428             ps_data->ref_id = ref_idx;
   3429             ps_data->area_in_pixels = gai4_partition_area[e_part_id];
   3430             ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
   3431             ps_data->as_mv[0].mvx = mvx;
   3432             ps_data->as_mv[0].mvy = mvy;
   3433 
   3434             /***************************/
   3435             ps_data->as_mv[0].is_uni = !is_part_of_bi;
   3436             ps_data->as_mv[0].sdi = sdi;
   3437             if(is_part_of_bi)
   3438             {
   3439                 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
   3440             }
   3441             else
   3442             {
   3443                 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
   3444             }
   3445             /**************************/
   3446             ps_data->max_x = mvx;
   3447             ps_data->min_x = mvx;
   3448             ps_data->max_y = mvy;
   3449             ps_data->min_y = mvy;
   3450 
   3451             ps_data->is_valid_cluster = 1;
   3452 
   3453             num_clusters++;
   3454             *pu1_num_clusters = num_clusters;
   3455         }
   3456     }
   3457 }
   3458 
   3459 /**
   3460 ********************************************************************************
   3461 *  @fn   void hme_update_32x32_cluster_attributes
   3462 *               (
   3463 *                   cluster_32x32_blk_t *ps_blk_32x32,
   3464 *                   cluster_data_t *ps_cluster_data
   3465 *               )
   3466 *
   3467 *  @brief  Updates attributes for 32x32 clusters based on the attributes of
   3468 *          the constituent 16x16 clusters
   3469 *
   3470 *  @param[out]  ps_blk_32x32: structure containing 32x32 block results
   3471 *
   3472 *  @param[in]  ps_cluster_data : structure containing 16x16 block results
   3473 *
   3474 *  @return None
   3475 ********************************************************************************
   3476 */
   3477 void hme_update_32x32_cluster_attributes(
   3478     cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
   3479 {
   3480     cluster_data_t *ps_cur_cluster_32;
   3481 
   3482     S32 i;
   3483     S32 mvd_limit;
   3484 
   3485     S32 num_clusters = ps_blk_32x32->num_clusters;
   3486 
   3487     if(0 == num_clusters)
   3488     {
   3489         ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
   3490 
   3491         ps_blk_32x32->num_clusters++;
   3492         ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
   3493 
   3494         ps_cur_cluster_32->is_valid_cluster = 1;
   3495 
   3496         ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
   3497         ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
   3498         ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
   3499 
   3500         memcpy(
   3501             ps_cur_cluster_32->as_mv,
   3502             ps_cluster_data->as_mv,
   3503             sizeof(mv_data_t) * ps_cluster_data->num_mvs);
   3504 
   3505         ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
   3506 
   3507         ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
   3508 
   3509         ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
   3510         ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
   3511         ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
   3512         ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
   3513 
   3514         ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
   3515     }
   3516     else
   3517     {
   3518         centroid_t *ps_centroid;
   3519 
   3520         S32 cur_posx_q8, cur_posy_q8;
   3521         S32 min_mvd_cluster_id = -1;
   3522         S32 mvd;
   3523         S32 mvdx;
   3524         S32 mvdy;
   3525         S32 mvdx_min;
   3526         S32 mvdy_min;
   3527         S32 mvdx_q8;
   3528         S32 mvdy_q8;
   3529 
   3530         S32 num_clusters_evaluated = 0;
   3531 
   3532         S32 mvd_min = MAX_32BIT_VAL;
   3533 
   3534         S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
   3535         S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
   3536 
   3537         for(i = 0; num_clusters_evaluated < num_clusters; i++)
   3538         {
   3539             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
   3540 
   3541             if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
   3542             {
   3543                 num_clusters_evaluated++;
   3544                 continue;
   3545             }
   3546             if(!ps_cluster_data->is_valid_cluster)
   3547             {
   3548                 continue;
   3549             }
   3550 
   3551             num_clusters_evaluated++;
   3552 
   3553             ps_centroid = &ps_cur_cluster_32->s_centroid;
   3554 
   3555             cur_posx_q8 = ps_centroid->i4_pos_x_q8;
   3556             cur_posy_q8 = ps_centroid->i4_pos_y_q8;
   3557 
   3558             mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
   3559             mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
   3560 
   3561             mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3562             mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3563 
   3564             mvd = ABS(mvdx) + ABS(mvdy);
   3565 
   3566             if(mvd < mvd_min)
   3567             {
   3568                 mvd_min = mvd;
   3569                 mvdx_min = mvdx;
   3570                 mvdy_min = mvdy;
   3571                 min_mvd_cluster_id = i;
   3572             }
   3573         }
   3574 
   3575         ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
   3576 
   3577         mvd_limit = (min_mvd_cluster_id == -1)
   3578                         ? ps_cur_cluster_32[0].max_dist_from_centroid
   3579                         : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
   3580 
   3581         if(mvd_min <= mvd_limit)
   3582         {
   3583             LWORD64 i8_updated_posx;
   3584             LWORD64 i8_updated_posy;
   3585             WORD32 minmax_updated_x = 0;
   3586             WORD32 minmax_updated_y = 0;
   3587 
   3588             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
   3589 
   3590             ps_centroid = &ps_cur_cluster_32->s_centroid;
   3591 
   3592             ps_cur_cluster_32->is_valid_cluster = 1;
   3593 
   3594             ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
   3595             ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
   3596             ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
   3597 
   3598             memcpy(
   3599                 &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
   3600                 ps_cluster_data->as_mv,
   3601                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
   3602 
   3603             if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
   3604             {
   3605                 ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
   3606                 minmax_updated_x = 1;
   3607             }
   3608             else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
   3609             {
   3610                 ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
   3611                 minmax_updated_x = 2;
   3612             }
   3613 
   3614             if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
   3615             {
   3616                 ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
   3617                 minmax_updated_y = 1;
   3618             }
   3619             else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
   3620             {
   3621                 ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
   3622                 minmax_updated_y = 2;
   3623             }
   3624 
   3625             switch((minmax_updated_y << 2) + minmax_updated_x)
   3626             {
   3627             case 1:
   3628             {
   3629                 S32 mvd, mvd_q8;
   3630 
   3631                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
   3632                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3633 
   3634                 if(mvd > (mvd_limit))
   3635                 {
   3636                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
   3637                 }
   3638                 break;
   3639             }
   3640             case 2:
   3641             {
   3642                 S32 mvd, mvd_q8;
   3643 
   3644                 mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
   3645                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3646 
   3647                 if(mvd > (mvd_limit))
   3648                 {
   3649                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
   3650                 }
   3651                 break;
   3652             }
   3653             case 4:
   3654             {
   3655                 S32 mvd, mvd_q8;
   3656 
   3657                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
   3658                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3659 
   3660                 if(mvd > (mvd_limit))
   3661                 {
   3662                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
   3663                 }
   3664                 break;
   3665             }
   3666             case 5:
   3667             {
   3668                 S32 mvd;
   3669                 S32 mvdx, mvdx_q8;
   3670                 S32 mvdy, mvdy_q8;
   3671 
   3672                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
   3673                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3674 
   3675                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
   3676                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3677 
   3678                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   3679 
   3680                 if(mvd > mvd_limit)
   3681                 {
   3682                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
   3683                 }
   3684                 break;
   3685             }
   3686             case 6:
   3687             {
   3688                 S32 mvd;
   3689                 S32 mvdx, mvdx_q8;
   3690                 S32 mvdy, mvdy_q8;
   3691 
   3692                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
   3693                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3694 
   3695                 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
   3696                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3697 
   3698                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   3699 
   3700                 if(mvd > mvd_limit)
   3701                 {
   3702                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
   3703                 }
   3704                 break;
   3705             }
   3706             case 8:
   3707             {
   3708                 S32 mvd, mvd_q8;
   3709 
   3710                 mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
   3711                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3712 
   3713                 if(mvd > (mvd_limit))
   3714                 {
   3715                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
   3716                 }
   3717                 break;
   3718             }
   3719             case 9:
   3720             {
   3721                 S32 mvd;
   3722                 S32 mvdx, mvdx_q8;
   3723                 S32 mvdy, mvdy_q8;
   3724 
   3725                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
   3726                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3727 
   3728                 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
   3729                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3730 
   3731                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   3732 
   3733                 if(mvd > mvd_limit)
   3734                 {
   3735                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
   3736                 }
   3737                 break;
   3738             }
   3739             case 10:
   3740             {
   3741                 S32 mvd;
   3742                 S32 mvdx, mvdx_q8;
   3743                 S32 mvdy, mvdy_q8;
   3744 
   3745                 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
   3746                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3747 
   3748                 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
   3749                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3750 
   3751                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   3752 
   3753                 if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
   3754                 {
   3755                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
   3756                 }
   3757                 break;
   3758             }
   3759             default:
   3760             {
   3761                 break;
   3762             }
   3763             }
   3764 
   3765             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
   3766                               ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
   3767             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
   3768                               ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
   3769 
   3770             ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
   3771 
   3772             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
   3773             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
   3774         }
   3775         else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
   3776         {
   3777             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
   3778 
   3779             ps_blk_32x32->num_clusters++;
   3780             ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
   3781 
   3782             ps_cur_cluster_32->is_valid_cluster = 1;
   3783 
   3784             ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
   3785             ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
   3786             ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
   3787 
   3788             memcpy(
   3789                 ps_cur_cluster_32->as_mv,
   3790                 ps_cluster_data->as_mv,
   3791                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
   3792 
   3793             ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
   3794 
   3795             ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
   3796 
   3797             ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
   3798             ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
   3799             ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
   3800             ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
   3801 
   3802             ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
   3803         }
   3804     }
   3805 }
   3806 
   3807 /**
   3808 ********************************************************************************
   3809 *  @fn   void hme_update_64x64_cluster_attributes
   3810 *               (
   3811 *                   cluster_64x64_blk_t *ps_blk_32x32,
   3812 *                   cluster_data_t *ps_cluster_data
   3813 *               )
   3814 *
   3815 *  @brief  Updates attributes for 64x64 clusters based on the attributes of
   3816 *          the constituent 16x16 clusters
   3817 *
   3818 *  @param[out]  ps_blk_64x64: structure containing 64x64 block results
   3819 *
   3820 *  @param[in]  ps_cluster_data : structure containing 32x32 block results
   3821 *
   3822 *  @return None
   3823 ********************************************************************************
   3824 */
   3825 void hme_update_64x64_cluster_attributes(
   3826     cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
   3827 {
   3828     cluster_data_t *ps_cur_cluster_64;
   3829 
   3830     S32 i;
   3831     S32 mvd_limit;
   3832 
   3833     S32 num_clusters = ps_blk_64x64->num_clusters;
   3834 
   3835     if(0 == num_clusters)
   3836     {
   3837         ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
   3838 
   3839         ps_blk_64x64->num_clusters++;
   3840         ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
   3841 
   3842         ps_cur_cluster_64->is_valid_cluster = 1;
   3843 
   3844         ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
   3845         ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
   3846         ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
   3847 
   3848         memcpy(
   3849             ps_cur_cluster_64->as_mv,
   3850             ps_cluster_data->as_mv,
   3851             sizeof(mv_data_t) * ps_cluster_data->num_mvs);
   3852 
   3853         ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
   3854 
   3855         ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
   3856 
   3857         ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
   3858         ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
   3859         ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
   3860         ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
   3861 
   3862         ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
   3863     }
   3864     else
   3865     {
   3866         centroid_t *ps_centroid;
   3867 
   3868         S32 cur_posx_q8, cur_posy_q8;
   3869         S32 min_mvd_cluster_id = -1;
   3870         S32 mvd;
   3871         S32 mvdx;
   3872         S32 mvdy;
   3873         S32 mvdx_min;
   3874         S32 mvdy_min;
   3875         S32 mvdx_q8;
   3876         S32 mvdy_q8;
   3877 
   3878         S32 num_clusters_evaluated = 0;
   3879 
   3880         S32 mvd_min = MAX_32BIT_VAL;
   3881 
   3882         S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
   3883         S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
   3884 
   3885         for(i = 0; num_clusters_evaluated < num_clusters; i++)
   3886         {
   3887             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
   3888 
   3889             if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
   3890             {
   3891                 num_clusters_evaluated++;
   3892                 continue;
   3893             }
   3894 
   3895             if(!ps_cur_cluster_64->is_valid_cluster)
   3896             {
   3897                 continue;
   3898             }
   3899 
   3900             num_clusters_evaluated++;
   3901 
   3902             ps_centroid = &ps_cur_cluster_64->s_centroid;
   3903 
   3904             cur_posx_q8 = ps_centroid->i4_pos_x_q8;
   3905             cur_posy_q8 = ps_centroid->i4_pos_y_q8;
   3906 
   3907             mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
   3908             mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
   3909 
   3910             mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   3911             mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   3912 
   3913             mvd = ABS(mvdx) + ABS(mvdy);
   3914 
   3915             if(mvd < mvd_min)
   3916             {
   3917                 mvd_min = mvd;
   3918                 mvdx_min = mvdx;
   3919                 mvdy_min = mvdy;
   3920                 min_mvd_cluster_id = i;
   3921             }
   3922         }
   3923 
   3924         ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
   3925 
   3926         mvd_limit = (min_mvd_cluster_id == -1)
   3927                         ? ps_cur_cluster_64[0].max_dist_from_centroid
   3928                         : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
   3929 
   3930         if(mvd_min <= mvd_limit)
   3931         {
   3932             LWORD64 i8_updated_posx;
   3933             LWORD64 i8_updated_posy;
   3934             WORD32 minmax_updated_x = 0;
   3935             WORD32 minmax_updated_y = 0;
   3936 
   3937             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
   3938 
   3939             ps_centroid = &ps_cur_cluster_64->s_centroid;
   3940 
   3941             ps_cur_cluster_64->is_valid_cluster = 1;
   3942 
   3943             ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
   3944             ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
   3945             ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
   3946 
   3947             memcpy(
   3948                 &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
   3949                 ps_cluster_data->as_mv,
   3950                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
   3951 
   3952             if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
   3953             {
   3954                 ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
   3955                 minmax_updated_x = 1;
   3956             }
   3957             else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
   3958             {
   3959                 ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
   3960                 minmax_updated_x = 2;
   3961             }
   3962 
   3963             if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
   3964             {
   3965                 ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
   3966                 minmax_updated_y = 1;
   3967             }
   3968             else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
   3969             {
   3970                 ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
   3971                 minmax_updated_y = 2;
   3972             }
   3973 
   3974             switch((minmax_updated_y << 2) + minmax_updated_x)
   3975             {
   3976             case 1:
   3977             {
   3978                 S32 mvd, mvd_q8;
   3979 
   3980                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
   3981                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3982 
   3983                 if(mvd > (mvd_limit))
   3984                 {
   3985                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
   3986                 }
   3987                 break;
   3988             }
   3989             case 2:
   3990             {
   3991                 S32 mvd, mvd_q8;
   3992 
   3993                 mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
   3994                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   3995 
   3996                 if(mvd > (mvd_limit))
   3997                 {
   3998                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
   3999                 }
   4000                 break;
   4001             }
   4002             case 4:
   4003             {
   4004                 S32 mvd, mvd_q8;
   4005 
   4006                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
   4007                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   4008 
   4009                 if(mvd > (mvd_limit))
   4010                 {
   4011                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
   4012                 }
   4013                 break;
   4014             }
   4015             case 5:
   4016             {
   4017                 S32 mvd;
   4018                 S32 mvdx, mvdx_q8;
   4019                 S32 mvdy, mvdy_q8;
   4020 
   4021                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
   4022                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4023 
   4024                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
   4025                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4026 
   4027                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   4028 
   4029                 if(mvd > mvd_limit)
   4030                 {
   4031                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
   4032                 }
   4033                 break;
   4034             }
   4035             case 6:
   4036             {
   4037                 S32 mvd;
   4038                 S32 mvdx, mvdx_q8;
   4039                 S32 mvdy, mvdy_q8;
   4040 
   4041                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
   4042                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4043 
   4044                 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
   4045                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4046 
   4047                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   4048 
   4049                 if(mvd > mvd_limit)
   4050                 {
   4051                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
   4052                 }
   4053                 break;
   4054             }
   4055             case 8:
   4056             {
   4057                 S32 mvd, mvd_q8;
   4058 
   4059                 mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
   4060                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   4061 
   4062                 if(mvd > (mvd_limit))
   4063                 {
   4064                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
   4065                 }
   4066                 break;
   4067             }
   4068             case 9:
   4069             {
   4070                 S32 mvd;
   4071                 S32 mvdx, mvdx_q8;
   4072                 S32 mvdy, mvdy_q8;
   4073 
   4074                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
   4075                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4076 
   4077                 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
   4078                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4079 
   4080                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   4081 
   4082                 if(mvd > mvd_limit)
   4083                 {
   4084                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
   4085                 }
   4086                 break;
   4087             }
   4088             case 10:
   4089             {
   4090                 S32 mvd;
   4091                 S32 mvdx, mvdx_q8;
   4092                 S32 mvdy, mvdy_q8;
   4093 
   4094                 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
   4095                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4096 
   4097                 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
   4098                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4099 
   4100                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   4101 
   4102                 if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
   4103                 {
   4104                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
   4105                 }
   4106                 break;
   4107             }
   4108             default:
   4109             {
   4110                 break;
   4111             }
   4112             }
   4113 
   4114             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
   4115                               ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
   4116             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
   4117                               ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
   4118 
   4119             ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
   4120 
   4121             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
   4122             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
   4123         }
   4124         else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
   4125         {
   4126             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
   4127 
   4128             ps_blk_64x64->num_clusters++;
   4129             ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
   4130 
   4131             ps_cur_cluster_64->is_valid_cluster = 1;
   4132 
   4133             ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
   4134             ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
   4135             ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
   4136 
   4137             memcpy(
   4138                 &ps_cur_cluster_64->as_mv[0],
   4139                 ps_cluster_data->as_mv,
   4140                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
   4141 
   4142             ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
   4143 
   4144             ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
   4145 
   4146             ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
   4147             ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
   4148             ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
   4149             ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
   4150 
   4151             ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
   4152         }
   4153     }
   4154 }
   4155 
   4156 /**
   4157 ********************************************************************************
   4158 *  @fn   void hme_update_32x32_clusters
   4159 *               (
   4160 *                   cluster_32x32_blk_t *ps_blk_32x32,
   4161 *                   cluster_16x16_blk_t *ps_blk_16x16
   4162 *               )
   4163 *
   4164 *  @brief  Updates attributes for 32x32 clusters based on the attributes of
   4165 *          the constituent 16x16 clusters
   4166 *
   4167 *  @param[out]  ps_blk_32x32: structure containing 32x32 block results
   4168 *
   4169 *  @param[in]  ps_blk_16x16 : structure containing 16x16 block results
   4170 *
   4171 *  @return None
   4172 ********************************************************************************
   4173 */
   4174 static __inline void
   4175     hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
   4176 {
   4177     cluster_16x16_blk_t *ps_blk_16x16_cur;
   4178     cluster_data_t *ps_cur_cluster;
   4179 
   4180     S32 i, j;
   4181     S32 num_clusters_cur_16x16_blk;
   4182 
   4183     for(i = 0; i < 4; i++)
   4184     {
   4185         S32 num_clusters_evaluated = 0;
   4186 
   4187         ps_blk_16x16_cur = &ps_blk_16x16[i];
   4188 
   4189         num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
   4190 
   4191         ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
   4192 
   4193         ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
   4194 
   4195         for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
   4196         {
   4197             ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
   4198 
   4199             if(!ps_cur_cluster->is_valid_cluster)
   4200             {
   4201                 continue;
   4202             }
   4203 
   4204             hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
   4205 
   4206             num_clusters_evaluated++;
   4207         }
   4208     }
   4209 }
   4210 
   4211 /**
   4212 ********************************************************************************
   4213 *  @fn   void hme_update_64x64_clusters
   4214 *               (
   4215 *                   cluster_64x64_blk_t *ps_blk_64x64,
   4216 *                   cluster_32x32_blk_t *ps_blk_32x32
   4217 *               )
   4218 *
   4219 *  @brief  Updates attributes for 64x64 clusters based on the attributes of
   4220 *          the constituent 16x16 clusters
   4221 *
   4222 *  @param[out]  ps_blk_64x64: structure containing 32x32 block results
   4223 *
   4224 *  @param[in]  ps_blk_32x32 : structure containing 16x16 block results
   4225 *
   4226 *  @return None
   4227 ********************************************************************************
   4228 */
   4229 static __inline void
   4230     hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
   4231 {
   4232     cluster_32x32_blk_t *ps_blk_32x32_cur;
   4233     cluster_data_t *ps_cur_cluster;
   4234 
   4235     S32 i, j;
   4236     S32 num_clusters_cur_32x32_blk;
   4237 
   4238     for(i = 0; i < 4; i++)
   4239     {
   4240         S32 num_clusters_evaluated = 0;
   4241 
   4242         ps_blk_32x32_cur = &ps_blk_32x32[i];
   4243 
   4244         num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
   4245 
   4246         ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
   4247         ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
   4248 
   4249         for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
   4250         {
   4251             ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
   4252 
   4253             if(!ps_cur_cluster->is_valid_cluster)
   4254             {
   4255                 continue;
   4256             }
   4257 
   4258             hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
   4259 
   4260             num_clusters_evaluated++;
   4261         }
   4262     }
   4263 }
   4264 
   4265 /**
   4266 ********************************************************************************
   4267 *  @fn   void hme_try_merge_clusters_blksize_gt_16
   4268 *               (
   4269 *                   cluster_data_t *ps_cluster_data,
   4270 *                   S32 num_clusters
   4271 *               )
   4272 *
   4273 *  @brief  Merging clusters from blocks of size 32x32 and greater
   4274 *
   4275 *  @param[in/out]  ps_cluster_data: structure containing cluster data
   4276 *
   4277 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
   4278 *
   4279 *  @return Success or failure
   4280 ********************************************************************************
   4281 */
   4282 S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
   4283 {
   4284     centroid_t *ps_cur_centroid;
   4285     cluster_data_t *ps_cur_cluster;
   4286 
   4287     S32 i, mvd;
   4288     S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
   4289 
   4290     centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
   4291 
   4292     S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
   4293     S32 ref_id = ps_cluster_data->ref_id;
   4294 
   4295     S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
   4296     S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
   4297     S32 num_clusters_evaluated = 1;
   4298     S32 ret_value = 0;
   4299 
   4300     if(1 >= num_clusters)
   4301     {
   4302         return ret_value;
   4303     }
   4304 
   4305     for(i = 1; num_clusters_evaluated < num_clusters; i++)
   4306     {
   4307         S32 cur_posx_q8;
   4308         S32 cur_posy_q8;
   4309 
   4310         ps_cur_cluster = &ps_cluster_data[i];
   4311 
   4312         if((ref_id != ps_cur_cluster->ref_id))
   4313         {
   4314             num_clusters_evaluated++;
   4315             continue;
   4316         }
   4317 
   4318         if((!ps_cur_cluster->is_valid_cluster))
   4319         {
   4320             continue;
   4321         }
   4322 
   4323         num_clusters_evaluated++;
   4324 
   4325         ps_cur_centroid = &ps_cur_cluster->s_centroid;
   4326 
   4327         cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
   4328         cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
   4329 
   4330         mvdx_q8 = cur_posx_q8 - node0_posx_q8;
   4331         mvdy_q8 = cur_posy_q8 - node0_posy_q8;
   4332 
   4333         mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4334         mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4335 
   4336         mvd = ABS(mvdx) + ABS(mvdy);
   4337 
   4338         if(mvd <= (mvd_limit >> 1))
   4339         {
   4340             LWORD64 i8_updated_posx;
   4341             LWORD64 i8_updated_posy;
   4342             WORD32 minmax_updated_x = 0;
   4343             WORD32 minmax_updated_y = 0;
   4344 
   4345             ps_cur_cluster->is_valid_cluster = 0;
   4346 
   4347             ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
   4348             ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
   4349             ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
   4350 
   4351             memcpy(
   4352                 &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
   4353                 ps_cur_cluster->as_mv,
   4354                 sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
   4355 
   4356             if(mvdx > 0)
   4357             {
   4358                 ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
   4359                 minmax_updated_x = 1;
   4360             }
   4361             else
   4362             {
   4363                 ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
   4364                 minmax_updated_x = 2;
   4365             }
   4366 
   4367             if(mvdy > 0)
   4368             {
   4369                 ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
   4370                 minmax_updated_y = 1;
   4371             }
   4372             else
   4373             {
   4374                 ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
   4375                 minmax_updated_y = 2;
   4376             }
   4377 
   4378             switch((minmax_updated_y << 2) + minmax_updated_x)
   4379             {
   4380             case 1:
   4381             {
   4382                 S32 mvd, mvd_q8;
   4383 
   4384                 mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
   4385                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   4386 
   4387                 if(mvd > (mvd_limit))
   4388                 {
   4389                     ps_cluster_data->max_dist_from_centroid = mvd;
   4390                 }
   4391                 break;
   4392             }
   4393             case 2:
   4394             {
   4395                 S32 mvd, mvd_q8;
   4396 
   4397                 mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
   4398                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   4399 
   4400                 if(mvd > (mvd_limit))
   4401                 {
   4402                     ps_cluster_data->max_dist_from_centroid = mvd;
   4403                 }
   4404                 break;
   4405             }
   4406             case 4:
   4407             {
   4408                 S32 mvd, mvd_q8;
   4409 
   4410                 mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
   4411                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   4412 
   4413                 if(mvd > (mvd_limit))
   4414                 {
   4415                     ps_cluster_data->max_dist_from_centroid = mvd;
   4416                 }
   4417                 break;
   4418             }
   4419             case 5:
   4420             {
   4421                 S32 mvd;
   4422                 S32 mvdx, mvdx_q8;
   4423                 S32 mvdy, mvdy_q8;
   4424 
   4425                 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
   4426                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4427 
   4428                 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
   4429                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4430 
   4431                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   4432 
   4433                 if(mvd > mvd_limit)
   4434                 {
   4435                     ps_cluster_data->max_dist_from_centroid = mvd;
   4436                 }
   4437                 break;
   4438             }
   4439             case 6:
   4440             {
   4441                 S32 mvd;
   4442                 S32 mvdx, mvdx_q8;
   4443                 S32 mvdy, mvdy_q8;
   4444 
   4445                 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
   4446                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4447 
   4448                 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
   4449                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4450 
   4451                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   4452 
   4453                 if(mvd > mvd_limit)
   4454                 {
   4455                     ps_cluster_data->max_dist_from_centroid = mvd;
   4456                 }
   4457                 break;
   4458             }
   4459             case 8:
   4460             {
   4461                 S32 mvd, mvd_q8;
   4462 
   4463                 mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
   4464                 mvd = (mvd_q8 + (1 << 7)) >> 8;
   4465 
   4466                 if(mvd > (mvd_limit))
   4467                 {
   4468                     ps_cluster_data->max_dist_from_centroid = mvd;
   4469                 }
   4470                 break;
   4471             }
   4472             case 9:
   4473             {
   4474                 S32 mvd;
   4475                 S32 mvdx, mvdx_q8;
   4476                 S32 mvdy, mvdy_q8;
   4477 
   4478                 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
   4479                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4480 
   4481                 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
   4482                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4483 
   4484                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   4485 
   4486                 if(mvd > mvd_limit)
   4487                 {
   4488                     ps_cluster_data->max_dist_from_centroid = mvd;
   4489                 }
   4490                 break;
   4491             }
   4492             case 10:
   4493             {
   4494                 S32 mvd;
   4495                 S32 mvdx, mvdx_q8;
   4496                 S32 mvdy, mvdy_q8;
   4497 
   4498                 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
   4499                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
   4500 
   4501                 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
   4502                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
   4503 
   4504                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
   4505 
   4506                 if(mvd > ps_cluster_data->max_dist_from_centroid)
   4507                 {
   4508                     ps_cluster_data->max_dist_from_centroid = mvd;
   4509                 }
   4510                 break;
   4511             }
   4512             default:
   4513             {
   4514                 break;
   4515             }
   4516             }
   4517 
   4518             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
   4519                               ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
   4520             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
   4521                               ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
   4522 
   4523             ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
   4524 
   4525             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
   4526             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
   4527 
   4528             if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
   4529             {
   4530                 num_clusters--;
   4531                 num_clusters_evaluated = 1;
   4532                 i = 0;
   4533                 ret_value++;
   4534             }
   4535             else
   4536             {
   4537                 ret_value++;
   4538 
   4539                 return ret_value;
   4540             }
   4541         }
   4542     }
   4543 
   4544     if(ret_value)
   4545     {
   4546         for(i = 1; i < (num_clusters + ret_value); i++)
   4547         {
   4548             if(ps_cluster_data[i].is_valid_cluster)
   4549             {
   4550                 break;
   4551             }
   4552         }
   4553         if(i == (num_clusters + ret_value))
   4554         {
   4555             return ret_value;
   4556         }
   4557     }
   4558     else
   4559     {
   4560         i = 1;
   4561     }
   4562 
   4563     return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
   4564            ret_value;
   4565 }
   4566 
   4567 /**
   4568 ********************************************************************************
   4569 *  @fn   S32 hme_determine_validity_32x32
   4570 *               (
   4571 *                   ctb_cluster_info_t *ps_ctb_cluster_info
   4572 *               )
   4573 *
   4574 *  @brief  Determines whther current 32x32 block needs to be evaluated in enc_loop
   4575 *           while recursing through the CU tree or not
   4576 *
   4577 *  @param[in]  ps_cluster_data: structure containing cluster data
   4578 *
   4579 *  @return Success or failure
   4580 ********************************************************************************
   4581 */
   4582 __inline S32 hme_determine_validity_32x32(
   4583     ctb_cluster_info_t *ps_ctb_cluster_info,
   4584     S32 *pi4_children_nodes_required,
   4585     S32 blk_validity_wrt_pic_bndry,
   4586     S32 parent_blk_validity_wrt_pic_bndry)
   4587 {
   4588     cluster_data_t *ps_data;
   4589 
   4590     cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
   4591     cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
   4592 
   4593     S32 num_clusters = ps_32x32_blk->num_clusters;
   4594     S32 num_clusters_parent = ps_64x64_blk->num_clusters;
   4595 
   4596     if(!blk_validity_wrt_pic_bndry)
   4597     {
   4598         *pi4_children_nodes_required = 1;
   4599         return 0;
   4600     }
   4601 
   4602     if(!parent_blk_validity_wrt_pic_bndry)
   4603     {
   4604         *pi4_children_nodes_required = 1;
   4605         return 1;
   4606     }
   4607 
   4608     if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
   4609     {
   4610         *pi4_children_nodes_required = 1;
   4611         return 0;
   4612     }
   4613 
   4614     if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
   4615     {
   4616         *pi4_children_nodes_required = 1;
   4617 
   4618         return 1;
   4619     }
   4620     else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
   4621     {
   4622         *pi4_children_nodes_required = 0;
   4623 
   4624         return 1;
   4625     }
   4626     else
   4627     {
   4628         if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
   4629         {
   4630             *pi4_children_nodes_required = 0;
   4631             return 1;
   4632         }
   4633         else
   4634         {
   4635             S32 i;
   4636 
   4637             S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
   4638             S32 min_area = MAX_32BIT_VAL;
   4639             S32 num_clusters_evaluated = 0;
   4640 
   4641             for(i = 0; num_clusters_evaluated < num_clusters; i++)
   4642             {
   4643                 ps_data = &ps_32x32_blk->as_cluster_data[i];
   4644 
   4645                 if(!ps_data->is_valid_cluster)
   4646                 {
   4647                     continue;
   4648                 }
   4649 
   4650                 num_clusters_evaluated++;
   4651 
   4652                 if(ps_data->area_in_pixels < min_area)
   4653                 {
   4654                     min_area = ps_data->area_in_pixels;
   4655                 }
   4656             }
   4657 
   4658             if((min_area << 4) < area_of_parent)
   4659             {
   4660                 *pi4_children_nodes_required = 1;
   4661                 return 0;
   4662             }
   4663             else
   4664             {
   4665                 *pi4_children_nodes_required = 0;
   4666                 return 1;
   4667             }
   4668         }
   4669     }
   4670 }
   4671 
   4672 /**
   4673 ********************************************************************************
   4674 *  @fn   S32 hme_determine_validity_16x16
   4675 *               (
   4676 *                   ctb_cluster_info_t *ps_ctb_cluster_info
   4677 *               )
   4678 *
   4679 *  @brief  Determines whther current 16x16 block needs to be evaluated in enc_loop
   4680 *           while recursing through the CU tree or not
   4681 *
   4682 *  @param[in]  ps_cluster_data: structure containing cluster data
   4683 *
   4684 *  @return Success or failure
   4685 ********************************************************************************
   4686 */
   4687 __inline S32 hme_determine_validity_16x16(
   4688     ctb_cluster_info_t *ps_ctb_cluster_info,
   4689     S32 *pi4_children_nodes_required,
   4690     S32 blk_validity_wrt_pic_bndry,
   4691     S32 parent_blk_validity_wrt_pic_bndry)
   4692 {
   4693     cluster_data_t *ps_data;
   4694 
   4695     cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
   4696     cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
   4697     cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
   4698 
   4699     S32 num_clusters = ps_16x16_blk->num_clusters;
   4700     S32 num_clusters_parent = ps_32x32_blk->num_clusters;
   4701     S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
   4702 
   4703     if(!blk_validity_wrt_pic_bndry)
   4704     {
   4705         *pi4_children_nodes_required = 1;
   4706         return 0;
   4707     }
   4708 
   4709     if(!parent_blk_validity_wrt_pic_bndry)
   4710     {
   4711         *pi4_children_nodes_required = 1;
   4712         return 1;
   4713     }
   4714 
   4715     if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
   4716        (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
   4717     {
   4718         *pi4_children_nodes_required = 1;
   4719         return 1;
   4720     }
   4721 
   4722     /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
   4723     /* implies nc_64 > 3 when num_clusters_parent < 3 & */
   4724     if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
   4725     {
   4726         if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
   4727         {
   4728             *pi4_children_nodes_required = 0;
   4729 
   4730             return 1;
   4731         }
   4732         else
   4733         {
   4734             *pi4_children_nodes_required = 1;
   4735 
   4736             return 0;
   4737         }
   4738     }
   4739     /* Implies nc_64 >= 3 */
   4740     else
   4741     {
   4742         if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
   4743         {
   4744             *pi4_children_nodes_required = 0;
   4745             return 1;
   4746         }
   4747         else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
   4748         {
   4749             *pi4_children_nodes_required = 1;
   4750             return 0;
   4751         }
   4752         else
   4753         {
   4754             S32 i;
   4755 
   4756             S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
   4757             S32 min_area = MAX_32BIT_VAL;
   4758             S32 num_clusters_evaluated = 0;
   4759 
   4760             for(i = 0; num_clusters_evaluated < num_clusters; i++)
   4761             {
   4762                 ps_data = &ps_16x16_blk->as_cluster_data[i];
   4763 
   4764                 if(!ps_data->is_valid_cluster)
   4765                 {
   4766                     continue;
   4767                 }
   4768 
   4769                 num_clusters_evaluated++;
   4770 
   4771                 if(ps_data->area_in_pixels < min_area)
   4772                 {
   4773                     min_area = ps_data->area_in_pixels;
   4774                 }
   4775             }
   4776 
   4777             if((min_area << 4) < area_of_parent)
   4778             {
   4779                 *pi4_children_nodes_required = 1;
   4780                 return 0;
   4781             }
   4782             else
   4783             {
   4784                 *pi4_children_nodes_required = 0;
   4785                 return 1;
   4786             }
   4787         }
   4788     }
   4789 }
   4790 
   4791 /**
   4792 ********************************************************************************
   4793 *  @fn   void hme_build_cu_tree
   4794 *               (
   4795 *                   ctb_cluster_info_t *ps_ctb_cluster_info,
   4796 *                   cur_ctb_cu_tree_t *ps_cu_tree,
   4797 *                   S32 tree_depth,
   4798 *                   CU_POS_T e_grand_parent_blk_pos,
   4799 *                   CU_POS_T e_parent_blk_pos,
   4800 *                   CU_POS_T e_cur_blk_pos
   4801 *               )
   4802 *
   4803 *  @brief  Recursive function for CU tree initialisation
   4804 *
   4805 *  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
   4806 *                                   corresponding to all block sizes from 64x64
   4807 *                                   to 16x16
   4808 *
   4809 *  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
   4810 *                                applicable
   4811 *
   4812 *  @param[in]  e_cur_blk_pos: position of current block wrt parent
   4813 *
   4814 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
   4815 *
   4816 *  @param[in]  tree_depth : specifies depth of the CU tree
   4817 *
   4818 *  @return Nothing
   4819 ********************************************************************************
   4820 */
   4821 void hme_build_cu_tree(
   4822     ctb_cluster_info_t *ps_ctb_cluster_info,
   4823     cur_ctb_cu_tree_t *ps_cu_tree,
   4824     S32 tree_depth,
   4825     CU_POS_T e_grandparent_blk_pos,
   4826     CU_POS_T e_parent_blk_pos,
   4827     CU_POS_T e_cur_blk_pos)
   4828 {
   4829     ihevce_cu_tree_init(
   4830         ps_cu_tree,
   4831         ps_ctb_cluster_info->ps_cu_tree_root,
   4832         &ps_ctb_cluster_info->nodes_created_in_cu_tree,
   4833         tree_depth,
   4834         e_grandparent_blk_pos,
   4835         e_parent_blk_pos,
   4836         e_cur_blk_pos);
   4837 }
   4838 
   4839 /**
   4840 ********************************************************************************
   4841 *  @fn   S32 hme_sdi_based_cluster_spread_eligibility
   4842 *               (
   4843 *                   cluster_32x32_blk_t *ps_blk_32x32
   4844 *               )
   4845 *
   4846 *  @brief  Determines whether the spread of high SDI MV's around each cluster
   4847 *          center is below a pre-determined threshold
   4848 *
   4849 *  @param[in]  ps_blk_32x32: structure containing pointers to clusters
   4850 *                                   corresponding to all block sizes from 64x64
   4851 *                                   to 16x16
   4852 *
   4853 *  @return 1 if the spread is constrained, else 0
   4854 ********************************************************************************
   4855 */
   4856 __inline S32
   4857     hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
   4858 {
   4859     S32 cumulative_mv_distance;
   4860     S32 i, j;
   4861     S32 num_high_sdi_mvs;
   4862 
   4863     S32 num_clusters = ps_blk_32x32->num_clusters;
   4864 
   4865     for(i = 0; i < num_clusters; i++)
   4866     {
   4867         cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
   4868 
   4869         num_high_sdi_mvs = 0;
   4870         cumulative_mv_distance = 0;
   4871 
   4872         for(j = 0; j < ps_data->num_mvs; j++)
   4873         {
   4874             mv_data_t *ps_mv = &ps_data->as_mv[j];
   4875 
   4876             if(ps_mv->sdi >= sdi_threshold)
   4877             {
   4878                 num_high_sdi_mvs++;
   4879 
   4880                 COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
   4881             }
   4882         }
   4883 
   4884         if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
   4885         {
   4886             return 0;
   4887         }
   4888     }
   4889 
   4890     return 1;
   4891 }
   4892 
   4893 /**
   4894 ********************************************************************************
   4895 *  @fn   S32 hme_populate_cu_tree
   4896 *               (
   4897 *                   ctb_cluster_info_t *ps_ctb_cluster_info,
   4898 *                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
   4899 *                   cur_ctb_cu_tree_t *ps_cu_tree,
   4900 *                   S32 tree_depth,
   4901 *                   CU_POS_T e_parent_blk_pos,
   4902 *                   CU_POS_T e_cur_blk_pos
   4903 *               )
   4904 *
   4905 *  @brief  Recursive function for CU tree population based on output of
   4906 *          clustering algorithm
   4907 *
   4908 *  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
   4909 *                                   corresponding to all block sizes from 64x64
   4910 *                                   to 16x16
   4911 *
   4912 *  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
   4913 applicable
   4914 *
   4915 *  @param[in]  e_cur_blk_pos: position of current block wrt parent
   4916 *
   4917 *  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
   4918 *
   4919 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
   4920 *
   4921 *  @param[in]  tree_depth : specifies depth of the CU tree
   4922 *
   4923 *  @param[in]  ipe_decision_precedence : specifies whether precedence should
   4924 *               be given to decisions made either by IPE(1) or clustering algos.
   4925 *
   4926 *  @return 1 if re-evaluation of parent node's validity is not required,
   4927 else 0
   4928 ********************************************************************************
   4929 */
   4930 void hme_populate_cu_tree(
   4931     ctb_cluster_info_t *ps_ctb_cluster_info,
   4932     cur_ctb_cu_tree_t *ps_cu_tree,
   4933     S32 tree_depth,
   4934     ME_QUALITY_PRESETS_T e_quality_preset,
   4935     CU_POS_T e_grandparent_blk_pos,
   4936     CU_POS_T e_parent_blk_pos,
   4937     CU_POS_T e_cur_blk_pos)
   4938 {
   4939     S32 area_of_cur_blk;
   4940     S32 area_limit_for_me_decision_precedence;
   4941     S32 children_nodes_required;
   4942     S32 intra_mv_area;
   4943     S32 intra_eval_enable;
   4944     S32 inter_eval_enable;
   4945     S32 ipe_decision_precedence;
   4946     S32 node_validity;
   4947     S32 num_clusters;
   4948 
   4949     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
   4950 
   4951     if(NULL == ps_cu_tree)
   4952     {
   4953         return;
   4954     }
   4955 
   4956     switch(tree_depth)
   4957     {
   4958     case 0:
   4959     {
   4960         /* 64x64 block */
   4961         S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
   4962 
   4963         cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
   4964 
   4965         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
   4966         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
   4967         children_nodes_required = 0;
   4968         intra_mv_area = ps_blk_64x64->intra_mv_area;
   4969 
   4970         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
   4971 
   4972         intra_eval_enable = ipe_decision_precedence;
   4973         inter_eval_enable = !!ps_blk_64x64->num_clusters;
   4974 
   4975 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   4976         if(e_quality_preset >= ME_HIGH_QUALITY)
   4977         {
   4978             inter_eval_enable = 1;
   4979             node_validity = (blk_32x32_mask == 0xf);
   4980 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   4981             ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
   4982 #endif
   4983             break;
   4984         }
   4985 #endif
   4986 
   4987 #if ENABLE_4CTB_EVALUATION
   4988         node_validity = (blk_32x32_mask == 0xf);
   4989 
   4990         break;
   4991 #else
   4992         {
   4993             S32 i;
   4994 
   4995             num_clusters = ps_blk_64x64->num_clusters;
   4996 
   4997             node_validity = (ipe_decision_precedence)
   4998                                 ? (!ps_cur_ipe_ctb->u1_split_flag)
   4999                                 : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
   5000 
   5001             for(i = 0; i < MAX_NUM_REF; i++)
   5002             {
   5003                 node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
   5004                                                   MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
   5005             }
   5006 
   5007             node_validity = node_validity && (blk_32x32_mask == 0xf);
   5008         }
   5009         break;
   5010 #endif
   5011     }
   5012     case 1:
   5013     {
   5014         /* 32x32 block */
   5015         S32 is_percent_intra_area_gt_threshold;
   5016 
   5017         cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
   5018 
   5019         S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
   5020 
   5021 #if !ENABLE_4CTB_EVALUATION
   5022         S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
   5023         S32 best_intra_cost =
   5024             ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
   5025               ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
   5026                   4) < 0)
   5027                 ? MAX_32BIT_VAL
   5028                 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
   5029                    ps_ctb_cluster_info->i4_frame_qstep *
   5030                        ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
   5031         S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
   5032         S32 cost_differential = (best_inter_cost - best_cost);
   5033 #endif
   5034 
   5035         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
   5036         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
   5037         intra_mv_area = ps_blk_32x32->intra_mv_area;
   5038         is_percent_intra_area_gt_threshold =
   5039             (intra_mv_area > area_limit_for_me_decision_precedence);
   5040         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
   5041 
   5042         intra_eval_enable = ipe_decision_precedence;
   5043         inter_eval_enable = !!ps_blk_32x32->num_clusters;
   5044         children_nodes_required = 1;
   5045 
   5046 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   5047         if(e_quality_preset >= ME_HIGH_QUALITY)
   5048         {
   5049             inter_eval_enable = 1;
   5050             node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
   5051 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5052             ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
   5053 #endif
   5054             break;
   5055         }
   5056 #endif
   5057 
   5058 #if ENABLE_4CTB_EVALUATION
   5059         node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
   5060 
   5061         break;
   5062 #else
   5063         {
   5064             S32 i;
   5065             num_clusters = ps_blk_32x32->num_clusters;
   5066 
   5067             if(ipe_decision_precedence)
   5068             {
   5069                 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
   5070                 node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
   5071             }
   5072             else
   5073             {
   5074                 node_validity =
   5075                     ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
   5076                     (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
   5077                     (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
   5078 
   5079                 for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
   5080                 {
   5081                     node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
   5082                                                       MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
   5083                 }
   5084 
   5085                 if(node_validity)
   5086                 {
   5087                     node_validity = node_validity &&
   5088                                     hme_sdi_based_cluster_spread_eligibility(
   5089                                         ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
   5090                 }
   5091             }
   5092         }
   5093 
   5094         break;
   5095 #endif
   5096     }
   5097     case 2:
   5098     {
   5099         cluster_16x16_blk_t *ps_blk_16x16 =
   5100             &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
   5101 
   5102         S32 blk_8x8_mask =
   5103             ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
   5104 
   5105         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
   5106         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
   5107         children_nodes_required = 1;
   5108         intra_mv_area = ps_blk_16x16->intra_mv_area;
   5109         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
   5110         num_clusters = ps_blk_16x16->num_clusters;
   5111 
   5112         intra_eval_enable = ipe_decision_precedence;
   5113         inter_eval_enable = 1;
   5114 
   5115 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   5116         if(e_quality_preset >= ME_HIGH_QUALITY)
   5117         {
   5118             node_validity =
   5119                 !ps_ctb_cluster_info
   5120                      ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
   5121             children_nodes_required = !node_validity;
   5122             break;
   5123         }
   5124 #endif
   5125 
   5126 #if ENABLE_4CTB_EVALUATION
   5127         node_validity = (blk_8x8_mask == 0xf);
   5128 
   5129 #if ENABLE_CU_TREE_CULLING
   5130         {
   5131             cur_ctb_cu_tree_t *ps_32x32_root;
   5132 
   5133             switch(e_parent_blk_pos)
   5134             {
   5135             case POS_TL:
   5136             {
   5137                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
   5138 
   5139                 break;
   5140             }
   5141             case POS_TR:
   5142             {
   5143                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
   5144 
   5145                 break;
   5146             }
   5147             case POS_BL:
   5148             {
   5149                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
   5150 
   5151                 break;
   5152             }
   5153             case POS_BR:
   5154             {
   5155                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
   5156 
   5157                 break;
   5158             }
   5159             }
   5160 
   5161             if(ps_32x32_root->is_node_valid)
   5162             {
   5163                 node_validity =
   5164                     node_validity &&
   5165                     !ps_ctb_cluster_info
   5166                          ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
   5167                 children_nodes_required = !node_validity;
   5168             }
   5169         }
   5170 #endif
   5171 
   5172         break;
   5173 #else
   5174 
   5175         if(ipe_decision_precedence)
   5176         {
   5177             S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
   5178                                      .as_intra16_analyse[e_cur_blk_pos]
   5179                                      .b1_merge_flag);
   5180             S32 valid_flag = (blk_8x8_mask == 0xf);
   5181 
   5182             node_validity = merge_flag_16 && valid_flag;
   5183         }
   5184         else
   5185         {
   5186             node_validity = (blk_8x8_mask == 0xf);
   5187         }
   5188 
   5189         break;
   5190 #endif
   5191     }
   5192     case 3:
   5193     {
   5194         S32 blk_8x8_mask =
   5195             ps_ctb_cluster_info
   5196                 ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
   5197         S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
   5198                                  .as_intra16_analyse[e_parent_blk_pos]
   5199                                  .b1_merge_flag);
   5200         S32 merge_flag_32 =
   5201             (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
   5202 
   5203         intra_eval_enable = !merge_flag_16 || !merge_flag_32;
   5204         inter_eval_enable = 1;
   5205         children_nodes_required = 0;
   5206 
   5207 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   5208         if(e_quality_preset >= ME_HIGH_QUALITY)
   5209         {
   5210             node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
   5211             break;
   5212         }
   5213 #endif
   5214 
   5215 #if ENABLE_4CTB_EVALUATION
   5216         node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
   5217 
   5218         break;
   5219 #else
   5220         {
   5221             cur_ctb_cu_tree_t *ps_32x32_root;
   5222             cur_ctb_cu_tree_t *ps_16x16_root;
   5223             cluster_32x32_blk_t *ps_32x32_blk;
   5224 
   5225             switch(e_grandparent_blk_pos)
   5226             {
   5227             case POS_TL:
   5228             {
   5229                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
   5230 
   5231                 break;
   5232             }
   5233             case POS_TR:
   5234             {
   5235                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
   5236 
   5237                 break;
   5238             }
   5239             case POS_BL:
   5240             {
   5241                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
   5242 
   5243                 break;
   5244             }
   5245             case POS_BR:
   5246             {
   5247                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
   5248 
   5249                 break;
   5250             }
   5251             }
   5252 
   5253             switch(e_parent_blk_pos)
   5254             {
   5255             case POS_TL:
   5256             {
   5257                 ps_16x16_root = ps_32x32_root->ps_child_node_tl;
   5258 
   5259                 break;
   5260             }
   5261             case POS_TR:
   5262             {
   5263                 ps_16x16_root = ps_32x32_root->ps_child_node_tr;
   5264 
   5265                 break;
   5266             }
   5267             case POS_BL:
   5268             {
   5269                 ps_16x16_root = ps_32x32_root->ps_child_node_bl;
   5270 
   5271                 break;
   5272             }
   5273             case POS_BR:
   5274             {
   5275                 ps_16x16_root = ps_32x32_root->ps_child_node_br;
   5276 
   5277                 break;
   5278             }
   5279             }
   5280 
   5281             ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
   5282 
   5283             node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
   5284                             ((!ps_32x32_root->is_node_valid) ||
   5285                              (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
   5286                              (!ps_16x16_root->is_node_valid));
   5287 
   5288             break;
   5289         }
   5290 #endif
   5291     }
   5292     }
   5293 
   5294     /* Fill the current cu_tree node */
   5295     ps_cu_tree->is_node_valid = node_validity;
   5296     ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
   5297     ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
   5298 
   5299     if(children_nodes_required)
   5300     {
   5301         tree_depth++;
   5302 
   5303         hme_populate_cu_tree(
   5304             ps_ctb_cluster_info,
   5305             ps_cu_tree->ps_child_node_tl,
   5306             tree_depth,
   5307             e_quality_preset,
   5308             e_parent_blk_pos,
   5309             e_cur_blk_pos,
   5310             POS_TL);
   5311 
   5312         hme_populate_cu_tree(
   5313             ps_ctb_cluster_info,
   5314             ps_cu_tree->ps_child_node_tr,
   5315             tree_depth,
   5316             e_quality_preset,
   5317             e_parent_blk_pos,
   5318             e_cur_blk_pos,
   5319             POS_TR);
   5320 
   5321         hme_populate_cu_tree(
   5322             ps_ctb_cluster_info,
   5323             ps_cu_tree->ps_child_node_bl,
   5324             tree_depth,
   5325             e_quality_preset,
   5326             e_parent_blk_pos,
   5327             e_cur_blk_pos,
   5328             POS_BL);
   5329 
   5330         hme_populate_cu_tree(
   5331             ps_ctb_cluster_info,
   5332             ps_cu_tree->ps_child_node_br,
   5333             tree_depth,
   5334             e_quality_preset,
   5335             e_parent_blk_pos,
   5336             e_cur_blk_pos,
   5337             POS_BR);
   5338     }
   5339 }
   5340 
   5341 /**
   5342 ********************************************************************************
   5343 *  @fn   void hme_analyse_mv_clustering
   5344 *               (
   5345 *                   search_results_t *ps_search_results,
   5346 *                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
   5347 *                   cur_ctb_cu_tree_t *ps_cu_tree
   5348 *               )
   5349 *
   5350 *  @brief  Implementation for the clustering algorithm
   5351 *
   5352 *  @param[in]  ps_search_results: structure containing 16x16 block results
   5353 *
   5354 *  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
   5355 *
   5356 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
   5357 *
   5358 *  @return None
   5359 ********************************************************************************
   5360 */
   5361 void hme_analyse_mv_clustering(
   5362     search_results_t *ps_search_results,
   5363     inter_cu_results_t *ps_16x16_cu_results,
   5364     inter_cu_results_t *ps_8x8_cu_results,
   5365     ctb_cluster_info_t *ps_ctb_cluster_info,
   5366     S08 *pi1_future_list,
   5367     S08 *pi1_past_list,
   5368     S32 bidir_enabled,
   5369     ME_QUALITY_PRESETS_T e_quality_preset)
   5370 {
   5371     cluster_16x16_blk_t *ps_blk_16x16;
   5372     cluster_32x32_blk_t *ps_blk_32x32;
   5373     cluster_64x64_blk_t *ps_blk_64x64;
   5374 
   5375     part_type_results_t *ps_best_result;
   5376     pu_result_t *aps_part_result[MAX_NUM_PARTS];
   5377     pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
   5378 
   5379     PART_ID_T e_part_id;
   5380     PART_TYPE_T e_part_type;
   5381 
   5382     S32 enable_64x64_merge;
   5383     S32 i, j, k;
   5384     S32 mvx, mvy;
   5385     S32 num_parts;
   5386     S32 ref_idx;
   5387     S32 ai4_pred_mode[MAX_NUM_PARTS];
   5388 
   5389     S32 num_32x32_merges = 0;
   5390 
   5391     /*****************************************/
   5392     /*****************************************/
   5393     /********* Enter ye who is HQ ************/
   5394     /*****************************************/
   5395     /*****************************************/
   5396 
   5397     ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
   5398 
   5399     /* Initialise data in each of the clusters */
   5400     for(i = 0; i < 16; i++)
   5401     {
   5402         ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
   5403 
   5404 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5405         if(e_quality_preset < ME_HIGH_QUALITY)
   5406         {
   5407             hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
   5408         }
   5409         else
   5410         {
   5411             ps_blk_16x16->best_inter_cost = 0;
   5412             ps_blk_16x16->intra_mv_area = 0;
   5413         }
   5414 #else
   5415         hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
   5416 #endif
   5417     }
   5418 
   5419     for(i = 0; i < 4; i++)
   5420     {
   5421         ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
   5422 
   5423 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5424         if(e_quality_preset < ME_HIGH_QUALITY)
   5425         {
   5426             hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
   5427         }
   5428         else
   5429         {
   5430             ps_blk_32x32->best_inter_cost = 0;
   5431             ps_blk_32x32->intra_mv_area = 0;
   5432         }
   5433 #else
   5434         hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
   5435 #endif
   5436     }
   5437 
   5438 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5439     if(e_quality_preset < ME_HIGH_QUALITY)
   5440     {
   5441         hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
   5442     }
   5443     else
   5444     {
   5445         ps_blk_64x64->best_inter_cost = 0;
   5446         ps_blk_64x64->intra_mv_area = 0;
   5447     }
   5448 #else
   5449     hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
   5450 #endif
   5451 
   5452     /* Initialise data for all nodes in the CU tree */
   5453     hme_build_cu_tree(
   5454         ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
   5455 
   5456     if(e_quality_preset >= ME_HIGH_QUALITY)
   5457     {
   5458         memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
   5459     }
   5460 
   5461 #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
   5462     return;
   5463 #endif
   5464 
   5465     for(i = 0; i < 16; i++)
   5466     {
   5467         S32 blk_8x8_mask;
   5468         S32 is_16x16_blk_valid;
   5469         S32 num_clusters_updated;
   5470         S32 num_clusters;
   5471 
   5472         blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
   5473 
   5474         ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
   5475 
   5476         is_16x16_blk_valid = (blk_8x8_mask == 0xf);
   5477 
   5478         if(is_16x16_blk_valid)
   5479         {
   5480             /* Use 8x8 data when 16x16 CU is split */
   5481             if(ps_search_results[i].u1_split_flag)
   5482             {
   5483                 S32 blk_8x8_idx = i << 2;
   5484 
   5485                 num_parts = 4;
   5486                 e_part_type = PRT_NxN;
   5487 
   5488                 for(j = 0; j < num_parts; j++, blk_8x8_idx++)
   5489                 {
   5490                     /* Only 2Nx2N partition supported for 8x8 block */
   5491                     ASSERT(
   5492                         ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
   5493                         ((PART_TYPE_T)PRT_2Nx2N));
   5494 
   5495                     aps_part_result[j] =
   5496                         &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
   5497                     aps_inferior_parts[j] =
   5498                         &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
   5499                     ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
   5500                 }
   5501             }
   5502             else
   5503             {
   5504                 ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
   5505 
   5506                 e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
   5507                 num_parts = gau1_num_parts_in_part_type[e_part_type];
   5508 
   5509                 for(j = 0; j < num_parts; j++)
   5510                 {
   5511                     aps_part_result[j] = &ps_best_result->as_pu_results[j];
   5512                     aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
   5513                     ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
   5514                 }
   5515 
   5516                 ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
   5517             }
   5518 
   5519             for(j = 0; j < num_parts; j++)
   5520             {
   5521                 pu_result_t *ps_part_result = aps_part_result[j];
   5522 
   5523                 S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
   5524 
   5525                 e_part_id = ge_part_type_to_part_id[e_part_type][j];
   5526 
   5527                 /* Skip clustering if best mode is intra */
   5528                 if((ps_part_result->pu.b1_intra_flag))
   5529                 {
   5530                     ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
   5531                     ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
   5532                     continue;
   5533                 }
   5534                 else
   5535                 {
   5536                     ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
   5537                 }
   5538 
   5539 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5540                 if(e_quality_preset >= ME_HIGH_QUALITY)
   5541                 {
   5542                     continue;
   5543                 }
   5544 #endif
   5545 
   5546                 for(k = 0; k < num_mvs; k++)
   5547                 {
   5548                     mv_t *ps_mv;
   5549 
   5550                     pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
   5551 
   5552                     S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
   5553 
   5554                     ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
   5555 
   5556                     mvx = ps_mv->i2_mvx;
   5557                     mvy = ps_mv->i2_mvy;
   5558 
   5559                     ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
   5560                                          : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
   5561 
   5562                     num_clusters = ps_blk_16x16->num_clusters;
   5563 
   5564                     hme_find_and_update_clusters(
   5565                         ps_blk_16x16->as_cluster_data,
   5566                         &(ps_blk_16x16->num_clusters),
   5567                         mvx,
   5568                         mvy,
   5569                         ref_idx,
   5570                         ps_part_result->i4_sdi,
   5571                         e_part_id,
   5572                         (ai4_pred_mode[j] == 2));
   5573 
   5574                     num_clusters_updated = (ps_blk_16x16->num_clusters);
   5575 
   5576                     ps_blk_16x16->au1_num_clusters[ref_idx] +=
   5577                         (num_clusters_updated - num_clusters);
   5578                 }
   5579             }
   5580         }
   5581     }
   5582 
   5583     /* Search for 32x32 clusters */
   5584     for(i = 0; i < 4; i++)
   5585     {
   5586         S32 num_clusters_merged;
   5587 
   5588         S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
   5589 
   5590         if(is_32x32_blk_valid)
   5591         {
   5592             ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
   5593             ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
   5594 
   5595 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5596             if(e_quality_preset >= ME_HIGH_QUALITY)
   5597             {
   5598                 for(j = 0; j < 4; j++, ps_blk_16x16++)
   5599                 {
   5600                     ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
   5601 
   5602                     ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
   5603                 }
   5604                 continue;
   5605             }
   5606 #endif
   5607 
   5608             hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
   5609 
   5610             if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
   5611             {
   5612                 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
   5613                     ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
   5614 
   5615                 if(num_clusters_merged)
   5616                 {
   5617                     ps_blk_32x32->num_clusters -= num_clusters_merged;
   5618 
   5619                     UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
   5620                 }
   5621             }
   5622         }
   5623     }
   5624 
   5625 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5626     /* Eliminate outlier 32x32 clusters */
   5627     if(e_quality_preset < ME_HIGH_QUALITY)
   5628 #endif
   5629     {
   5630         hme_boot_out_outlier(ps_ctb_cluster_info, 32);
   5631 
   5632         /* Find best_uni_ref and best_alt_ref */
   5633         hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
   5634     }
   5635 
   5636     /* Populate the CU tree for depths 1 and higher */
   5637     {
   5638         cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
   5639         cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
   5640         cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
   5641         cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
   5642         cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
   5643 
   5644         hme_populate_cu_tree(
   5645             ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
   5646 
   5647         num_32x32_merges += (ps_tl->is_node_valid == 1);
   5648 
   5649         hme_populate_cu_tree(
   5650             ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
   5651 
   5652         num_32x32_merges += (ps_tr->is_node_valid == 1);
   5653 
   5654         hme_populate_cu_tree(
   5655             ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
   5656 
   5657         num_32x32_merges += (ps_bl->is_node_valid == 1);
   5658 
   5659         hme_populate_cu_tree(
   5660             ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
   5661 
   5662         num_32x32_merges += (ps_br->is_node_valid == 1);
   5663     }
   5664 
   5665 #if !ENABLE_4CTB_EVALUATION
   5666     if(e_quality_preset < ME_HIGH_QUALITY)
   5667     {
   5668         enable_64x64_merge = (num_32x32_merges >= 3);
   5669     }
   5670 #else
   5671     if(e_quality_preset < ME_HIGH_QUALITY)
   5672     {
   5673         enable_64x64_merge = 1;
   5674     }
   5675 #endif
   5676 
   5677 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   5678     if(e_quality_preset >= ME_HIGH_QUALITY)
   5679     {
   5680         enable_64x64_merge = 1;
   5681     }
   5682 #else
   5683     if(e_quality_preset >= ME_HIGH_QUALITY)
   5684     {
   5685         enable_64x64_merge = (num_32x32_merges >= 3);
   5686     }
   5687 #endif
   5688 
   5689     if(enable_64x64_merge)
   5690     {
   5691         S32 num_clusters_merged;
   5692 
   5693         ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
   5694 
   5695 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5696         if(e_quality_preset >= ME_HIGH_QUALITY)
   5697         {
   5698             for(j = 0; j < 4; j++, ps_blk_32x32++)
   5699             {
   5700                 ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
   5701 
   5702                 ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
   5703             }
   5704         }
   5705         else
   5706 #endif
   5707         {
   5708             hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
   5709 
   5710             if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
   5711             {
   5712                 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
   5713                     ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
   5714 
   5715                 if(num_clusters_merged)
   5716                 {
   5717                     ps_blk_64x64->num_clusters -= num_clusters_merged;
   5718 
   5719                     UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
   5720                 }
   5721             }
   5722         }
   5723 
   5724 #if !ENABLE_4CTB_EVALUATION
   5725         if(e_quality_preset < ME_HIGH_QUALITY)
   5726         {
   5727             S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
   5728             S32 best_intra_cost =
   5729                 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
   5730                   ps_ctb_cluster_info->i4_frame_qstep *
   5731                       ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
   5732                     ? MAX_32BIT_VAL
   5733                     : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
   5734                        ps_ctb_cluster_info->i4_frame_qstep *
   5735                            ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
   5736             S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
   5737             S32 cost_differential = (best_inter_cost - best_cost);
   5738 
   5739             enable_64x64_merge =
   5740                 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
   5741         }
   5742 #endif
   5743     }
   5744 
   5745     if(enable_64x64_merge)
   5746     {
   5747 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
   5748         if(e_quality_preset < ME_HIGH_QUALITY)
   5749 #endif
   5750         {
   5751             hme_boot_out_outlier(ps_ctb_cluster_info, 64);
   5752 
   5753             hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
   5754         }
   5755 
   5756         hme_populate_cu_tree(
   5757             ps_ctb_cluster_info,
   5758             ps_ctb_cluster_info->ps_cu_tree_root,
   5759             0,
   5760             e_quality_preset,
   5761             POS_NA,
   5762             POS_NA,
   5763             POS_NA);
   5764     }
   5765 }
   5766 #endif
   5767 
   5768 static __inline void hme_merge_prms_init(
   5769     hme_merge_prms_t *ps_prms,
   5770     layer_ctxt_t *ps_curr_layer,
   5771     refine_prms_t *ps_refine_prms,
   5772     me_frm_ctxt_t *ps_me_ctxt,
   5773     range_prms_t *ps_range_prms_rec,
   5774     range_prms_t *ps_range_prms_inp,
   5775     mv_grid_t **pps_mv_grid,
   5776     inter_ctb_prms_t *ps_inter_ctb_prms,
   5777     S32 i4_num_pred_dir,
   5778     S32 i4_32x32_id,
   5779     BLK_SIZE_T e_blk_size,
   5780     ME_QUALITY_PRESETS_T e_me_quality_presets)
   5781 {
   5782     S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
   5783     S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
   5784 
   5785     /* Currently not enabling segmentation info from prev layers */
   5786     ps_prms->i4_seg_info_avail = 0;
   5787     ps_prms->i4_part_mask = 0;
   5788 
   5789     /* Number of reference pics in which to do merge */
   5790     ps_prms->i4_num_ref = i4_num_pred_dir;
   5791 
   5792     /* Layer ctxt info */
   5793     ps_prms->ps_layer_ctxt = ps_curr_layer;
   5794 
   5795     ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
   5796 
   5797     /* Top left, top right, bottom left and bottom right 16x16 units */
   5798     if(BLK_32x32 == e_blk_size)
   5799     {
   5800         ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
   5801         ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
   5802         ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
   5803         ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
   5804 
   5805         /* Merge results stored here */
   5806         ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
   5807 
   5808         /* This could be lesser than the number of 16x16results generated*/
   5809         /* For now, keeping it to be same                                */
   5810         ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
   5811         ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
   5812         ps_prms->ps_results_grandchild = NULL;
   5813     }
   5814     else
   5815     {
   5816         ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
   5817         ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
   5818         ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
   5819         ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
   5820 
   5821         /* Merge results stored here */
   5822         ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
   5823 
   5824         ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
   5825         ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
   5826         ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
   5827     }
   5828 
   5829     if(i4_use_rec)
   5830     {
   5831         WORD32 ref_ctr;
   5832 
   5833         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
   5834         {
   5835             ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
   5836         }
   5837     }
   5838     else
   5839     {
   5840         WORD32 ref_ctr;
   5841 
   5842         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
   5843         {
   5844             ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
   5845         }
   5846     }
   5847     ps_prms->i4_use_rec = i4_use_rec;
   5848 
   5849     ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
   5850 
   5851     ps_prms->pps_mv_grid = pps_mv_grid;
   5852 
   5853     ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
   5854 
   5855     ps_prms->e_quality_preset = e_me_quality_presets;
   5856     ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
   5857     ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
   5858     ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
   5859 }
   5860 
   5861 /**
   5862 ********************************************************************************
   5863 *  @fn   void hme_refine(me_ctxt_t *ps_ctxt,
   5864 *                       refine_layer_prms_t *ps_refine_prms)
   5865 *
   5866 *  @brief  Top level entry point for refinement ME
   5867 *
   5868 *  @param[in,out]  ps_ctxt: ME Handle
   5869 *
   5870 *  @param[in]  ps_refine_prms : refinement layer prms
   5871 *
   5872 *  @return None
   5873 ********************************************************************************
   5874 */
   5875 void hme_refine(
   5876     me_ctxt_t *ps_thrd_ctxt,
   5877     refine_prms_t *ps_refine_prms,
   5878     PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
   5879     layer_ctxt_t *ps_coarse_layer,
   5880     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
   5881     S32 lyr_job_type,
   5882     S32 thrd_id,
   5883     S32 me_frm_id,
   5884     pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
   5885 {
   5886     inter_ctb_prms_t s_common_frm_prms;
   5887 
   5888     BLK_SIZE_T e_search_blk_size, e_result_blk_size;
   5889     WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
   5890     me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
   5891     ME_QUALITY_PRESETS_T e_me_quality_presets =
   5892         ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
   5893 
   5894     WORD32 num_rows_proc = 0;
   5895     WORD32 num_act_ref_pics;
   5896     WORD16 i2_prev_enc_frm_max_mv_y;
   5897     WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
   5898 
   5899     /*************************************************************************/
   5900     /* Complexity of search: Low to High                                     */
   5901     /*************************************************************************/
   5902     SEARCH_COMPLEXITY_T e_search_complexity;
   5903 
   5904     /*************************************************************************/
   5905     /* to store the PU results which are passed to the decide_part_types     */
   5906     /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
   5907     /*************************************************************************/
   5908 
   5909     pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
   5910     inter_pu_results_t as_inter_pu_results[4];
   5911     inter_pu_results_t *ps_pu_results = as_inter_pu_results;
   5912 
   5913     /*************************************************************************/
   5914     /* Config parameter structures for varius ME submodules                  */
   5915     /*************************************************************************/
   5916     hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
   5917     hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
   5918     hme_merge_prms_t s_merge_prms_64x64;
   5919     hme_search_prms_t s_search_prms_blk;
   5920     mvbank_update_prms_t s_mv_update_prms;
   5921     hme_ctb_prms_t s_ctb_prms;
   5922     hme_subpel_prms_t s_subpel_prms;
   5923     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
   5924     ctb_cluster_info_t *ps_ctb_cluster_info;
   5925     fpel_srch_cand_init_data_t s_srch_cand_init_data;
   5926 
   5927     /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
   5928     S32 en_merge_32x32;
   5929     /* 5 lsb's specify whether or not merge algorithm is required */
   5930     /* to be executed or not. Relevant only in PQ. Ought to be */
   5931     /* used in conjunction with en_merge_32x32 and */
   5932     /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
   5933     /* required when all children are deemed to be intras */
   5934     S32 en_merge_execution;
   5935 
   5936     /*************************************************************************/
   5937     /* All types of search candidates for predictor based search.            */
   5938     /*************************************************************************/
   5939     S32 num_init_candts = 0;
   5940     S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
   5941     S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
   5942     search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
   5943     search_node_t as_top_neighbours[4], as_left_neighbours[3];
   5944 
   5945     pf_get_wt_inp fp_get_wt_inp;
   5946 
   5947     search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
   5948     U32 au4_unique_node_map[MAP_X_MAX * 2];
   5949 
   5950     /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
   5951     ctb_boundary_attrs_t *ps_ctb_bound_attrs;
   5952 
   5953     /*************************************************************************/
   5954     /* points ot the search results for the blk level search (8x8/16x16)     */
   5955     /*************************************************************************/
   5956     search_results_t *ps_search_results;
   5957 
   5958     /*************************************************************************/
   5959     /* Coordinates                                                           */
   5960     /*************************************************************************/
   5961     S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
   5962     S32 pos_x, pos_y;
   5963     S32 blk_id_in_full_ctb;
   5964 
   5965     /*************************************************************************/
   5966     /* Related to dimensions of block being searched and pic dimensions      */
   5967     /*************************************************************************/
   5968     S32 blk_4x4_to_16x16;
   5969     S32 blk_wd, blk_ht, blk_size_shift;
   5970     S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
   5971     S32 num_results_prev_layer;
   5972 
   5973     /*************************************************************************/
   5974     /* Size of a basic unit for this layer. For non encode layers, we search */
   5975     /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
   5976     /* basic unit size is the ctb size.                                      */
   5977     /*************************************************************************/
   5978     S32 unit_size;
   5979 
   5980     /*************************************************************************/
   5981     /* Local variable storing results of any 4 CU merge to bigger CU         */
   5982     /*************************************************************************/
   5983     CU_MERGE_RESULT_T e_merge_result;
   5984 
   5985     /*************************************************************************/
   5986     /* This mv grid stores results during and after fpel search, during      */
   5987     /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
   5988     /* meant for the 2 directions of search (l0 and l1).                     */
   5989     /*************************************************************************/
   5990     mv_grid_t *aps_mv_grid[2];
   5991 
   5992     /*************************************************************************/
   5993     /* Pointers to context in current and coarser layers                     */
   5994     /*************************************************************************/
   5995     layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
   5996 
   5997     /*************************************************************************/
   5998     /* to store mv range per blk, and picture limit, allowed search range    */
   5999     /* range prms in hpel and qpel units as well                             */
   6000     /*************************************************************************/
   6001     range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
   6002     range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
   6003     range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
   6004 
   6005     /*************************************************************************/
   6006     /* These variables are used to track number of references at different   */
   6007     /* stages of ME.                                                         */
   6008     /*************************************************************************/
   6009     S32 i4_num_pred_dir;
   6010     S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
   6011     S32 lambda_recon = ps_refine_prms->lambda_recon;
   6012 
   6013     /* Counts successful merge to 32x32 every CTB (0-4) */
   6014     S32 merge_count_32x32;
   6015 
   6016     S32 ai4_id_coloc[14], ai4_id_Z[2];
   6017     U08 au1_search_candidate_list_index[2];
   6018     S32 ai4_num_coloc_cands[2];
   6019     U08 u1_pred_dir, u1_pred_dir_ctr;
   6020 
   6021     /*************************************************************************/
   6022     /* Input pointer and stride                                              */
   6023     /*************************************************************************/
   6024     U08 *pu1_inp;
   6025     S32 i4_inp_stride;
   6026     S32 end_of_frame;
   6027     S32 num_sync_units_in_row, num_sync_units_in_tile;
   6028 
   6029     /*************************************************************************/
   6030     /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
   6031     /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
   6032     /* we need to stop merges and force 8x8 CUs for that 16x16 blk           */
   6033     /*************************************************************************/
   6034     S32 blk_8x8_mask;
   6035     S32 ai4_blk_8x8_mask[16];
   6036     U08 au1_is_64x64Blk_noisy[1];
   6037     U08 au1_is_32x32Blk_noisy[4];
   6038     U08 au1_is_16x16Blk_noisy[16];
   6039 
   6040     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
   6041         ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
   6042     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
   6043         ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
   6044 
   6045     ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
   6046 
   6047     /*************************************************************************/
   6048     /* Pointers to current and coarse layer are needed for projection */
   6049     /* Pointer to prev layer are needed for other candts like coloc   */
   6050     /*************************************************************************/
   6051     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
   6052 
   6053     ps_prev_layer = hme_get_past_layer_ctxt(
   6054         ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
   6055 
   6056     num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
   6057 
   6058     /* Function pointer is selected based on the C vc X86 macro */
   6059 
   6060     fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
   6061 
   6062     i4_inp_stride = ps_curr_layer->i4_inp_stride;
   6063     i4_pic_wd = ps_curr_layer->i4_wd;
   6064     i4_pic_ht = ps_curr_layer->i4_ht;
   6065     e_search_complexity = ps_refine_prms->e_search_complexity;
   6066     end_of_frame = 0;
   6067 
   6068     /* This points to all the initial candts */
   6069     ps_search_candts = &as_search_candts[0];
   6070 
   6071     /* mv grid being huge strucutre is part of context */
   6072     aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
   6073     aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
   6074 
   6075     /*************************************************************************/
   6076     /* If the current layer is encoded (since it may be multicast or final   */
   6077     /* layer (finest)), then we use 16x16 blk size with some selected parts  */
   6078     /* If the current layer is not encoded, then we use 8x8 blk size, with   */
   6079     /* enable or disable of 4x4 partitions depending on the input prms       */
   6080     /*************************************************************************/
   6081     e_search_blk_size = BLK_16x16;
   6082     blk_wd = blk_ht = 16;
   6083     blk_size_shift = 4;
   6084     e_result_blk_size = BLK_8x8;
   6085     s_mv_update_prms.i4_shift = 1;
   6086 
   6087     if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
   6088     {
   6089         blk_4x4_to_16x16 = 1;
   6090     }
   6091     else
   6092     {
   6093         blk_4x4_to_16x16 = 0;
   6094     }
   6095 
   6096     unit_size = 1 << ps_ctxt->log_ctb_size;
   6097     s_search_prms_blk.i4_inp_stride = unit_size;
   6098 
   6099     /* This is required to properly update the layer mv bank */
   6100     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
   6101     s_search_prms_blk.e_blk_size = e_search_blk_size;
   6102 
   6103     /*************************************************************************/
   6104     /* If current layer is explicit, then the number of ref frames are to    */
   6105     /* be same as previous layer. Else it will be 2                          */
   6106     /*************************************************************************/
   6107     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
   6108     i4_num_pred_dir =
   6109         (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
   6110         1;
   6111 
   6112 #if USE_MODIFIED == 1
   6113     s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
   6114 #else
   6115     s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
   6116 #endif
   6117 
   6118     i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
   6119     if(i4_num_ref_prev_layer <= 2)
   6120     {
   6121         i4_num_ref_each_dir = 1;
   6122     }
   6123     else
   6124     {
   6125         i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
   6126     }
   6127 
   6128     s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
   6129     s_mv_update_prms.i4_num_results_to_store =
   6130         MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
   6131                                                 : (i4_num_act_ref_l0 > 1) + 1,
   6132             ps_refine_prms->i4_num_results_per_part);
   6133 
   6134     /*************************************************************************/
   6135     /* Initialization of merge params for 16x16 to 32x32 merge.              */
   6136     /* There are 4 32x32 units in a CTB, so 4 param structures initialized   */
   6137     /*************************************************************************/
   6138     {
   6139         hme_merge_prms_t *aps_merge_prms[4];
   6140         aps_merge_prms[0] = &s_merge_prms_32x32_tl;
   6141         aps_merge_prms[1] = &s_merge_prms_32x32_tr;
   6142         aps_merge_prms[2] = &s_merge_prms_32x32_bl;
   6143         aps_merge_prms[3] = &s_merge_prms_32x32_br;
   6144         for(i = 0; i < 4; i++)
   6145         {
   6146             hme_merge_prms_init(
   6147                 aps_merge_prms[i],
   6148                 ps_curr_layer,
   6149                 ps_refine_prms,
   6150                 ps_ctxt,
   6151                 as_range_prms_rec,
   6152                 as_range_prms_inp,
   6153                 &aps_mv_grid[0],
   6154                 &s_common_frm_prms,
   6155                 i4_num_pred_dir,
   6156                 i,
   6157                 BLK_32x32,
   6158                 e_me_quality_presets);
   6159         }
   6160     }
   6161 
   6162     /*************************************************************************/
   6163     /* Initialization of merge params for 32x32 to 64x64 merge.              */
   6164     /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB    */
   6165     /*************************************************************************/
   6166     {
   6167         hme_merge_prms_init(
   6168             &s_merge_prms_64x64,
   6169             ps_curr_layer,
   6170             ps_refine_prms,
   6171             ps_ctxt,
   6172             as_range_prms_rec,
   6173             as_range_prms_inp,
   6174             &aps_mv_grid[0],
   6175             &s_common_frm_prms,
   6176             i4_num_pred_dir,
   6177             0,
   6178             BLK_64x64,
   6179             e_me_quality_presets);
   6180     }
   6181 
   6182     /* Pointers to cu_results are initialised here */
   6183     {
   6184         WORD32 i;
   6185 
   6186         ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
   6187 
   6188         for(i = 0; i < 4; i++)
   6189         {
   6190             ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
   6191         }
   6192 
   6193         for(i = 0; i < 16; i++)
   6194         {
   6195             ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
   6196         }
   6197     }
   6198 
   6199     /*************************************************************************/
   6200     /* SUBPEL Params initialized here                                        */
   6201     /*************************************************************************/
   6202     {
   6203         s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
   6204         s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
   6205         s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
   6206 
   6207         s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
   6208         s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
   6209         s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
   6210 
   6211         s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
   6212         s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
   6213 
   6214         s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
   6215 
   6216         s_subpel_prms.i4_inp_stride = unit_size;
   6217 
   6218         s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
   6219         s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
   6220         s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
   6221 
   6222         s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
   6223 
   6224         {
   6225             WORD32 ref_ctr;
   6226             for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
   6227             {
   6228                 s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
   6229                 s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
   6230             }
   6231         }
   6232         s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
   6233 
   6234 #if USE_MODIFIED == 0
   6235         s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
   6236 #else
   6237         s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
   6238 #endif
   6239         s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
   6240 
   6241         /* BI Refinement done only if this field is 1 */
   6242         s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
   6243 
   6244         s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
   6245 
   6246         s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
   6247         s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
   6248         s_subpel_prms.u1_max_num_subpel_refine_centers =
   6249             ps_refine_prms->u1_max_num_subpel_refine_centers;
   6250     }
   6251 
   6252     /* inter_ctb_prms_t struct initialisation */
   6253     {
   6254         inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
   6255         hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
   6256 
   6257         ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
   6258         ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
   6259         ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
   6260         ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
   6261         ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
   6262         ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
   6263         ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
   6264         ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
   6265         ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
   6266         ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
   6267         ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
   6268         ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
   6269         ps_inter_ctb_prms->i4_lamda = lambda_recon;
   6270         ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
   6271         ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
   6272         ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
   6273         ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
   6274         ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
   6275         ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
   6276         ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
   6277             ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
   6278     }
   6279 
   6280     for(i = 0; i < MAX_INIT_CANDTS; i++)
   6281     {
   6282         ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
   6283         ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
   6284 
   6285         INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
   6286     }
   6287     num_act_ref_pics =
   6288         ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
   6289 
   6290     if(num_act_ref_pics)
   6291     {
   6292         hme_search_cand_data_init(
   6293             ai4_id_Z,
   6294             ai4_id_coloc,
   6295             ai4_num_coloc_cands,
   6296             au1_search_candidate_list_index,
   6297             i4_num_act_ref_l0,
   6298             i4_num_act_ref_l1,
   6299             ps_ctxt->s_frm_prms.bidir_enabled,
   6300             blk_4x4_to_16x16);
   6301     }
   6302 
   6303     if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
   6304     {
   6305         ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
   6306         ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
   6307     }
   6308     else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
   6309     {
   6310         ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
   6311     }
   6312 
   6313     for(i = 0; i < 3; i++)
   6314     {
   6315         search_node_t *ps_search_node;
   6316         ps_search_node = &as_left_neighbours[i];
   6317         INIT_SEARCH_NODE(ps_search_node, 0);
   6318         ps_search_node = &as_top_neighbours[i];
   6319         INIT_SEARCH_NODE(ps_search_node, 0);
   6320     }
   6321 
   6322     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
   6323     as_left_neighbours[2].u1_is_avail = 0;
   6324 
   6325     /*************************************************************************/
   6326     /* Initialize all the search results structure here. We update all the   */
   6327     /* search results to default values, and configure things like blk sizes */
   6328     /*************************************************************************/
   6329     if(num_act_ref_pics)
   6330     {
   6331         S32 i4_x, i4_y;
   6332         /* 16x16 results */
   6333         for(i = 0; i < 16; i++)
   6334         {
   6335             search_results_t *ps_search_results;
   6336             S32 pred_lx;
   6337             ps_search_results = &ps_ctxt->as_search_results_16x16[i];
   6338             i4_x = (S32)gau1_encode_to_raster_x[i];
   6339             i4_y = (S32)gau1_encode_to_raster_y[i];
   6340             i4_x <<= 4;
   6341             i4_y <<= 4;
   6342 
   6343             hme_init_search_results(
   6344                 ps_search_results,
   6345                 i4_num_pred_dir,
   6346                 ps_refine_prms->i4_num_fpel_results,
   6347                 ps_refine_prms->i4_num_results_per_part,
   6348                 e_search_blk_size,
   6349                 i4_x,
   6350                 i4_y,
   6351                 &ps_ctxt->au1_is_past[0]);
   6352 
   6353             for(pred_lx = 0; pred_lx < 2; pred_lx++)
   6354             {
   6355                 pred_ctxt_t *ps_pred_ctxt;
   6356 
   6357                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
   6358 
   6359                 hme_init_pred_ctxt_encode(
   6360                     ps_pred_ctxt,
   6361                     ps_search_results,
   6362                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
   6363                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
   6364                     aps_mv_grid[pred_lx],
   6365                     pred_lx,
   6366                     lambda_recon,
   6367                     ps_refine_prms->lambda_q_shift,
   6368                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
   6369                     &ps_ctxt->ai2_ref_scf[0]);
   6370             }
   6371         }
   6372 
   6373         for(i = 0; i < 4; i++)
   6374         {
   6375             search_results_t *ps_search_results;
   6376             S32 pred_lx;
   6377             ps_search_results = &ps_ctxt->as_search_results_32x32[i];
   6378 
   6379             i4_x = (S32)gau1_encode_to_raster_x[i];
   6380             i4_y = (S32)gau1_encode_to_raster_y[i];
   6381             i4_x <<= 5;
   6382             i4_y <<= 5;
   6383 
   6384             hme_init_search_results(
   6385                 ps_search_results,
   6386                 i4_num_pred_dir,
   6387                 ps_refine_prms->i4_num_32x32_merge_results,
   6388                 ps_refine_prms->i4_num_results_per_part,
   6389                 BLK_32x32,
   6390                 i4_x,
   6391                 i4_y,
   6392                 &ps_ctxt->au1_is_past[0]);
   6393 
   6394             for(pred_lx = 0; pred_lx < 2; pred_lx++)
   6395             {
   6396                 pred_ctxt_t *ps_pred_ctxt;
   6397 
   6398                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
   6399 
   6400                 hme_init_pred_ctxt_encode(
   6401                     ps_pred_ctxt,
   6402                     ps_search_results,
   6403                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
   6404                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
   6405                     aps_mv_grid[pred_lx],
   6406                     pred_lx,
   6407                     lambda_recon,
   6408                     ps_refine_prms->lambda_q_shift,
   6409                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
   6410                     &ps_ctxt->ai2_ref_scf[0]);
   6411             }
   6412         }
   6413 
   6414         {
   6415             search_results_t *ps_search_results;
   6416             S32 pred_lx;
   6417             ps_search_results = &ps_ctxt->s_search_results_64x64;
   6418 
   6419             hme_init_search_results(
   6420                 ps_search_results,
   6421                 i4_num_pred_dir,
   6422                 ps_refine_prms->i4_num_64x64_merge_results,
   6423                 ps_refine_prms->i4_num_results_per_part,
   6424                 BLK_64x64,
   6425                 0,
   6426                 0,
   6427                 &ps_ctxt->au1_is_past[0]);
   6428 
   6429             for(pred_lx = 0; pred_lx < 2; pred_lx++)
   6430             {
   6431                 pred_ctxt_t *ps_pred_ctxt;
   6432 
   6433                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
   6434 
   6435                 hme_init_pred_ctxt_encode(
   6436                     ps_pred_ctxt,
   6437                     ps_search_results,
   6438                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
   6439                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
   6440                     aps_mv_grid[pred_lx],
   6441                     pred_lx,
   6442                     lambda_recon,
   6443                     ps_refine_prms->lambda_q_shift,
   6444                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
   6445                     &ps_ctxt->ai2_ref_scf[0]);
   6446             }
   6447         }
   6448     }
   6449 
   6450     /* Initialise the structure used in clustering  */
   6451     if(ME_PRISTINE_QUALITY == e_me_quality_presets)
   6452     {
   6453         ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
   6454 
   6455         ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
   6456         ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
   6457         ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
   6458         ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
   6459         ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
   6460         ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
   6461         ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
   6462     }
   6463 
   6464     /*********************************************************************/
   6465     /* Initialize the dyn. search range params. for each reference index */
   6466     /* in current layer ctxt                                             */
   6467     /*********************************************************************/
   6468 
   6469     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
   6470     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
   6471     {
   6472         WORD32 ref_ctr;
   6473         /* set no. of act ref in L0 for further use at frame level */
   6474         ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
   6475             ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
   6476 
   6477         for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
   6478         {
   6479             INIT_DYN_SEARCH_PRMS(
   6480                 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
   6481                 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
   6482         }
   6483     }
   6484     /*************************************************************************/
   6485     /* Now that the candidates have been ordered, to choose the right number */
   6486     /* of initial candidates.                                                */
   6487     /*************************************************************************/
   6488     if(blk_4x4_to_16x16)
   6489     {
   6490         if(i4_num_ref_prev_layer > 2)
   6491         {
   6492             if(e_search_complexity == SEARCH_CX_LOW)
   6493                 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6494             else if(e_search_complexity == SEARCH_CX_MED)
   6495                 num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6496             else if(e_search_complexity == SEARCH_CX_HIGH)
   6497                 num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6498             else
   6499                 ASSERT(0);
   6500         }
   6501         else if(i4_num_ref_prev_layer == 2)
   6502         {
   6503             if(e_search_complexity == SEARCH_CX_LOW)
   6504                 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6505             else if(e_search_complexity == SEARCH_CX_MED)
   6506                 num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6507             else if(e_search_complexity == SEARCH_CX_HIGH)
   6508                 num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6509             else
   6510                 ASSERT(0);
   6511         }
   6512         else
   6513         {
   6514             if(e_search_complexity == SEARCH_CX_LOW)
   6515                 num_init_candts = 5;
   6516             else if(e_search_complexity == SEARCH_CX_MED)
   6517                 num_init_candts = 12;
   6518             else if(e_search_complexity == SEARCH_CX_HIGH)
   6519                 num_init_candts = 19;
   6520             else
   6521                 ASSERT(0);
   6522         }
   6523     }
   6524     else
   6525     {
   6526         if(i4_num_ref_prev_layer > 2)
   6527         {
   6528             if(e_search_complexity == SEARCH_CX_LOW)
   6529                 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6530             else if(e_search_complexity == SEARCH_CX_MED)
   6531                 num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6532             else if(e_search_complexity == SEARCH_CX_HIGH)
   6533                 num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6534             else
   6535                 ASSERT(0);
   6536         }
   6537         else if(i4_num_ref_prev_layer == 2)
   6538         {
   6539             if(e_search_complexity == SEARCH_CX_LOW)
   6540                 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6541             else if(e_search_complexity == SEARCH_CX_MED)
   6542                 num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6543             else if(e_search_complexity == SEARCH_CX_HIGH)
   6544                 num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
   6545             else
   6546                 ASSERT(0);
   6547         }
   6548         else
   6549         {
   6550             if(e_search_complexity == SEARCH_CX_LOW)
   6551                 num_init_candts = 5;
   6552             else if(e_search_complexity == SEARCH_CX_MED)
   6553                 num_init_candts = 11;
   6554             else if(e_search_complexity == SEARCH_CX_HIGH)
   6555                 num_init_candts = 16;
   6556             else
   6557                 ASSERT(0);
   6558         }
   6559     }
   6560 
   6561     /*************************************************************************/
   6562     /* The following search parameters are fixed throughout the search across*/
   6563     /* all blks. So these are configured outside processing loop             */
   6564     /*************************************************************************/
   6565     s_search_prms_blk.i4_num_init_candts = num_init_candts;
   6566     s_search_prms_blk.i4_start_step = 1;
   6567     s_search_prms_blk.i4_use_satd = 0;
   6568     s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
   6569     /* we use recon only for encoded layers, otherwise it is not available */
   6570     s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
   6571 
   6572     s_search_prms_blk.ps_search_candts = ps_search_candts;
   6573     if(s_search_prms_blk.i4_use_rec)
   6574     {
   6575         WORD32 ref_ctr;
   6576         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
   6577             s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
   6578     }
   6579     else
   6580     {
   6581         WORD32 ref_ctr;
   6582         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
   6583             s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
   6584     }
   6585 
   6586     /*************************************************************************/
   6587     /* Initialize coordinates. Meaning as follows                            */
   6588     /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
   6589     /* blk_y : same as above, y coord.                                       */
   6590     /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
   6591     /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
   6592     /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
   6593     /* corner of the picture. Always multiple of 64.                         */
   6594     /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
   6595     /*************************************************************************/
   6596     blk_y = 0;
   6597     blk_id_in_ctb = 0;
   6598     i4_ctb_y = 0;
   6599 
   6600     /*************************************************************************/
   6601     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
   6602     /* every block given its coordinate. Note thsi assumes that the min amt  */
   6603     /* of padding to right of pic is equal to the blk size. If we go all the */
   6604     /* way upto 64x64, then the min padding on right size of picture should  */
   6605     /* be 64, and also on bottom side of picture.                            */
   6606     /*************************************************************************/
   6607     SET_PIC_LIMIT(
   6608         s_pic_limit_inp,
   6609         ps_curr_layer->i4_pad_x_rec,
   6610         ps_curr_layer->i4_pad_y_rec,
   6611         ps_curr_layer->i4_wd,
   6612         ps_curr_layer->i4_ht,
   6613         s_search_prms_blk.i4_num_steps_post_refine);
   6614 
   6615     SET_PIC_LIMIT(
   6616         s_pic_limit_rec,
   6617         ps_curr_layer->i4_pad_x_rec,
   6618         ps_curr_layer->i4_pad_y_rec,
   6619         ps_curr_layer->i4_wd,
   6620         ps_curr_layer->i4_ht,
   6621         s_search_prms_blk.i4_num_steps_post_refine);
   6622 
   6623     /*************************************************************************/
   6624     /* set the MV limit per ref. pic.                                        */
   6625     /*    - P pic. : Based on the config params.                             */
   6626     /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
   6627     /*************************************************************************/
   6628     hme_set_mv_limit_using_dvsr_data(
   6629         ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
   6630     s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
   6631     s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
   6632     s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
   6633     s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
   6634     s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
   6635     s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
   6636     s_srch_cand_init_data.ps_search_cands = ps_search_candts;
   6637     s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
   6638     s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
   6639     s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
   6640     s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
   6641 
   6642     while(0 == end_of_frame)
   6643     {
   6644         job_queue_t *ps_job;
   6645         frm_ctb_ctxt_t *ps_frm_ctb_prms;
   6646         ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
   6647 
   6648         WORD32 i4_max_mv_x_in_ctb;
   6649         WORD32 i4_max_mv_y_in_ctb;
   6650         void *pv_dep_mngr_encloop_dep_me;
   6651         WORD32 offset_val, check_dep_pos, set_dep_pos;
   6652         WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
   6653 
   6654         pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
   6655 
   6656         ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
   6657 
   6658         /* Get the current row from the job queue */
   6659         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
   6660             ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
   6661 
   6662         /* If all rows are done, set the end of process flag to 1, */
   6663         /* and the current row to -1 */
   6664         if(NULL == ps_job)
   6665         {
   6666             blk_y = -1;
   6667             i4_ctb_y = -1;
   6668             tile_col_idx = -1;
   6669             end_of_frame = 1;
   6670 
   6671             continue;
   6672         }
   6673 
   6674         /* set the output dependency after picking up the row */
   6675         ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
   6676 
   6677         /* Obtain the current row's details from the job */
   6678         {
   6679             ihevce_tile_params_t *ps_col_tile_params;
   6680 
   6681             i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
   6682             /* Obtain the current colum tile index from the job */
   6683             tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
   6684 
   6685             /* in encode layer block are 16x16 and CTB is 64 x 64 */
   6686             /* note if ctb is 32x32 the this calc needs to be changed */
   6687             num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
   6688                                     ps_ctxt->log_ctb_size;
   6689 
   6690             /* The tile parameter for the col. idx. Use only the properties
   6691             which is same for all the bottom tiles like width, start_x, etc.
   6692             Don't use height, start_y, etc.                                  */
   6693             ps_col_tile_params =
   6694                 ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
   6695             /* in encode layer block are 16x16 and CTB is 64 x 64 */
   6696             /* note if ctb is 32x32 the this calc needs to be changed */
   6697             num_sync_units_in_tile =
   6698                 (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
   6699                 ps_ctxt->log_ctb_size;
   6700 
   6701             i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
   6702             i4_ctb_x = i4_first_ctb_x;
   6703 
   6704             if(!num_act_ref_pics)
   6705             {
   6706                 for(i4_ctb_x = i4_first_ctb_x;
   6707                     i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
   6708                     i4_ctb_x++)
   6709                 {
   6710                     S32 blk_i = 0, blk_j = 0;
   6711                     /* set the dependency for the corresponding row in enc loop */
   6712                     ihevce_dmgr_set_row_row_sync(
   6713                         pv_dep_mngr_encloop_dep_me,
   6714                         (i4_ctb_x + 1),
   6715                         i4_ctb_y,
   6716                         tile_col_idx /* Col Tile No. */);
   6717                 }
   6718 
   6719                 continue;
   6720             }
   6721 
   6722             /* increment the number of rows proc */
   6723             num_rows_proc++;
   6724 
   6725             /* Set Variables for Dep. Checking and Setting */
   6726             set_dep_pos = i4_ctb_y + 1;
   6727             if(i4_ctb_y > 0)
   6728             {
   6729                 offset_val = 2;
   6730                 check_dep_pos = i4_ctb_y - 1;
   6731             }
   6732             else
   6733             {
   6734                 /* First row should run without waiting */
   6735                 offset_val = -1;
   6736                 check_dep_pos = 0;
   6737             }
   6738 
   6739             /* row ctb out pointer  */
   6740             ps_ctxt->ps_ctb_analyse_curr_row =
   6741                 ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
   6742 
   6743             /* Row level CU Tree buffer */
   6744             ps_ctxt->ps_cu_tree_curr_row =
   6745                 ps_ctxt->ps_cu_tree_base +
   6746                 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
   6747 
   6748             ps_ctxt->ps_me_ctb_data_curr_row =
   6749                 ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
   6750         }
   6751 
   6752         /* This flag says the CTB under processing is at the start of tile in horz dir.*/
   6753         left_ctb_in_diff_tile = 1;
   6754 
   6755         /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var,                                 */
   6756         /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
   6757         {
   6758             S32 i4_ref_id, i4_bits_req;
   6759 
   6760             for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
   6761                                             ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
   6762                 i4_ref_id++)
   6763             {
   6764                 GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
   6765 
   6766                 if(i4_bits_req > 12)
   6767                 {
   6768                     ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
   6769                 }
   6770                 else
   6771                 {
   6772                     ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
   6773                 }
   6774             }
   6775 
   6776             s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
   6777         }
   6778 
   6779         /* if non-encode layer then i4_ctb_x will be same as blk_x */
   6780         /* loop over all the units is a row                        */
   6781         for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
   6782             i4_ctb_x++)
   6783         {
   6784             ihevce_ctb_noise_params *ps_ctb_noise_params =
   6785                 &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
   6786 
   6787             s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
   6788             s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
   6789 
   6790             ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
   6791             ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
   6792             /* Initialize ptr to current IPE CTB */
   6793             ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
   6794                              i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
   6795             {
   6796                 ps_ctb_bound_attrs =
   6797                     get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
   6798 
   6799                 en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
   6800                 num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
   6801             }
   6802 
   6803             /* Block to initialise pointers to part_type_results_t */
   6804             /* in each size-specific inter_cu_results_t  */
   6805             {
   6806                 WORD32 i;
   6807 
   6808                 for(i = 0; i < 64; i++)
   6809                 {
   6810                     ps_ctxt->as_cu8x8_results[i].ps_best_results =
   6811                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
   6812                             .as_8x8_block_data[i]
   6813                             .as_best_results;
   6814                     ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
   6815                 }
   6816 
   6817                 for(i = 0; i < 16; i++)
   6818                 {
   6819                     ps_ctxt->as_cu16x16_results[i].ps_best_results =
   6820                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
   6821                     ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
   6822                 }
   6823 
   6824                 for(i = 0; i < 4; i++)
   6825                 {
   6826                     ps_ctxt->as_cu32x32_results[i].ps_best_results =
   6827                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
   6828                             .as_32x32_block_data[i]
   6829                             .as_best_results;
   6830                     ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
   6831                 }
   6832 
   6833                 ps_ctxt->s_cu64x64_results.ps_best_results =
   6834                     ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
   6835                 ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
   6836             }
   6837 
   6838             if(ME_PRISTINE_QUALITY == e_me_quality_presets)
   6839             {
   6840                 ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
   6841                 ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
   6842                 ps_ctb_cluster_info->ps_cu_tree_root =
   6843                     ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
   6844                 ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
   6845             }
   6846 
   6847             if(ME_PRISTINE_QUALITY != e_me_quality_presets)
   6848             {
   6849                 S32 i4_nodes_created_in_cu_tree = 1;
   6850 
   6851                 ihevce_cu_tree_init(
   6852                     (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
   6853                     (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
   6854                     &i4_nodes_created_in_cu_tree,
   6855                     0,
   6856                     POS_NA,
   6857                     POS_NA,
   6858                     POS_NA);
   6859             }
   6860 
   6861             memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
   6862 
   6863             if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
   6864             {
   6865                 S32 j;
   6866 
   6867                 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
   6868 
   6869                 ps_cur_ipe_ctb =
   6870                     ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
   6871                 lambda_recon =
   6872                     hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
   6873 
   6874                 lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
   6875 
   6876                 for(i = 0; i < 4; i++)
   6877                 {
   6878                     ps_search_results = &ps_ctxt->as_search_results_32x32[i];
   6879 
   6880                     for(j = 0; j < 2; j++)
   6881                     {
   6882                         ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
   6883                     }
   6884                 }
   6885                 ps_search_results = &ps_ctxt->s_search_results_64x64;
   6886 
   6887                 for(j = 0; j < 2; j++)
   6888                 {
   6889                     ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
   6890                 }
   6891 
   6892                 s_common_frm_prms.i4_lamda = lambda_recon;
   6893             }
   6894             else
   6895             {
   6896                 lambda_recon = ps_refine_prms->lambda_recon;
   6897             }
   6898 
   6899             /*********************************************************************/
   6900             /* replicate the inp buffer at blk or ctb level for each ref id,     */
   6901             /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
   6902             /* thereby avoiding a bloat up of memory. If we did all references   */
   6903             /* weighted pred, we will end up with a duplicate copy of each ref   */
   6904             /* at each layer, since we need to preserve the original reference.  */
   6905             /* ToDo: Need to observe performance with this mechanism and compare */
   6906             /* with case where ref is weighted.                                  */
   6907             /*********************************************************************/
   6908             fp_get_wt_inp(
   6909                 ps_curr_layer,
   6910                 &ps_ctxt->s_wt_pred,
   6911                 unit_size,
   6912                 s_common_frm_prms.i4_ctb_x_off,
   6913                 s_common_frm_prms.i4_ctb_y_off,
   6914                 unit_size,
   6915                 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
   6916                 ps_ctxt->i4_wt_pred_enable_flag);
   6917 
   6918             if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
   6919             {
   6920 #if TEMPORAL_NOISE_DETECT
   6921                 {
   6922                     WORD32 had_block_size = 16;
   6923                     WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
   6924                                            ? 64
   6925                                            : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
   6926                     WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
   6927                                             ? 64
   6928                                             : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
   6929                     WORD32 num_pred_dir = i4_num_pred_dir;
   6930                     WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
   6931                     WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
   6932 
   6933                     WORD32 i;
   6934                     WORD32 noise_detected;
   6935                     WORD32 ctb_size;
   6936                     WORD32 num_comp_had_blocks;
   6937                     WORD32 noisy_block_cnt;
   6938                     WORD32 index_8x8_block;
   6939                     WORD32 num_8x8_in_ctb_row;
   6940 
   6941                     WORD32 ht_offset;
   6942                     WORD32 wd_offset;
   6943                     WORD32 block_ht;
   6944                     WORD32 block_wd;
   6945 
   6946                     WORD32 num_horz_blocks;
   6947                     WORD32 num_vert_blocks;
   6948 
   6949                     WORD32 mean;
   6950                     UWORD32 variance_8x8;
   6951 
   6952                     WORD32 hh_energy_percent;
   6953 
   6954                     /* variables to hold the constant values. The variable values held are decided by the HAD block size */
   6955                     WORD32 min_noisy_block_cnt;
   6956                     WORD32 min_coeffs_above_avg;
   6957                     WORD32 min_coeff_avg_energy;
   6958 
   6959                     /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
   6960                     WORD32 i4_cu_x_off, i4_cu_y_off;
   6961                     WORD32 is_noisy;
   6962 
   6963                     /* intialise the variables holding the constants */
   6964                     if(had_block_size == 8)
   6965                     {
   6966                         min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8;  //6;//
   6967                         min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
   6968                         min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
   6969                     }
   6970                     else
   6971                     {
   6972                         min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16;  //7;//
   6973                         min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
   6974                         min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
   6975                     }
   6976 
   6977                     /* initialize the variables */
   6978                     noise_detected = 0;
   6979                     noisy_block_cnt = 0;
   6980                     hh_energy_percent = 0;
   6981                     variance_8x8 = 0;
   6982                     block_ht = ctb_height;
   6983                     block_wd = ctb_width;
   6984 
   6985                     mean = 0;
   6986 
   6987                     ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
   6988                     num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
   6989 
   6990                     num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
   6991                     num_vert_blocks = block_ht / had_block_size;  //ctb_height / had_block_size;
   6992 
   6993                     ht_offset = -had_block_size;
   6994                     wd_offset = -had_block_size;
   6995 
   6996                     num_8x8_in_ctb_row = block_wd / 8;  // number of 8x8 in this ctb
   6997                     for(i = 0; i < num_comp_had_blocks; i++)
   6998                     {
   6999                         if(i % num_horz_blocks == 0)
   7000                         {
   7001                             wd_offset = -had_block_size;
   7002                             ht_offset += had_block_size;
   7003                         }
   7004                         wd_offset += had_block_size;
   7005 
   7006                         /* CU level offsets */
   7007                         i4_cu_x_off = i4_x_off + (i % 4) * 16;  //+ (i % 4) * 16
   7008                         i4_cu_y_off = i4_y_off + (i / 4) * 16;
   7009 
   7010                         /* if 50 % or more of the CU is noisy then the return value is 1 */
   7011                         is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
   7012                             ps_ctb_noise_params->au1_is_8x8Blk_noisy,
   7013                             (i % 4) * 16,
   7014                             (i / 4) * 16,
   7015                             16);
   7016 
   7017                         /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
   7018                         if(is_noisy)
   7019                         {
   7020                             index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
   7021                                               (i % num_horz_blocks) * 2;
   7022                             noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
   7023                                 16,
   7024                                 ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
   7025                                     ? 64
   7026                                     : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
   7027                                 ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
   7028                                     ? 64
   7029                                     : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
   7030                                 ps_ctb_noise_params,
   7031                                 &s_srch_cand_init_data,
   7032                                 &s_search_prms_blk,
   7033                                 ps_ctxt,
   7034                                 num_pred_dir,
   7035                                 i4_num_act_ref_l0,
   7036                                 i4_num_act_ref_l1,
   7037                                 i4_cu_x_off,
   7038                                 i4_cu_y_off,
   7039                                 &ps_ctxt->s_wt_pred,
   7040                                 unit_size,
   7041                                 index_8x8_block,
   7042                                 num_horz_blocks,
   7043                                 /*num_8x8_in_ctb_row*/ 8,  // this should be a variable extra
   7044                                 i);
   7045                         } /* if 16x16 is noisy */
   7046                     } /* loop over for all 16x16*/
   7047 
   7048                     if(noisy_block_cnt >= min_noisy_block_cnt)
   7049                     {
   7050                         noise_detected = 1;
   7051                     }
   7052 
   7053                     /* write back the noise presence detected for the current CTB to the structure */
   7054                     ps_ctb_noise_params->i4_noise_present = noise_detected;
   7055                 }
   7056 #endif
   7057 
   7058 #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
   7059                 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
   7060                    ps_ctb_noise_params->i4_noise_present)
   7061                 {
   7062                     memset(
   7063                         ps_ctb_noise_params->au1_is_8x8Blk_noisy,
   7064                         1,
   7065                         sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
   7066                 }
   7067 #endif
   7068 
   7069                 for(i = 0; i < 16; i++)
   7070                 {
   7071                     au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
   7072                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
   7073                 }
   7074 
   7075                 for(i = 0; i < 4; i++)
   7076                 {
   7077                     au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
   7078                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
   7079                 }
   7080 
   7081                 for(i = 0; i < 1; i++)
   7082                 {
   7083                     au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
   7084                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
   7085                 }
   7086 
   7087                 if(ps_ctxt->s_frm_prms.bidir_enabled &&
   7088                    (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
   7089                     MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
   7090                 {
   7091                     ps_ctb_noise_params->i4_noise_present = 0;
   7092                     memset(
   7093                         ps_ctb_noise_params->au1_is_8x8Blk_noisy,
   7094                         0,
   7095                         sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
   7096                 }
   7097 
   7098 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
   7099                 for(i = 0; i < 4; i++)
   7100                 {
   7101                     S32 j;
   7102                     S32 lambda;
   7103 
   7104                     if(au1_is_32x32Blk_noisy[i])
   7105                     {
   7106                         lambda = lambda_recon;
   7107                         lambda =
   7108                             ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
   7109 
   7110                         ps_search_results = &ps_ctxt->as_search_results_32x32[i];
   7111 
   7112                         for(j = 0; j < 2; j++)
   7113                         {
   7114                             ps_search_results->as_pred_ctxt[j].lambda = lambda;
   7115                         }
   7116                     }
   7117                 }
   7118 
   7119                 {
   7120                     S32 j;
   7121                     S32 lambda;
   7122 
   7123                     if(au1_is_64x64Blk_noisy[0])
   7124                     {
   7125                         lambda = lambda_recon;
   7126                         lambda =
   7127                             ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
   7128 
   7129                         ps_search_results = &ps_ctxt->s_search_results_64x64;
   7130 
   7131                         for(j = 0; j < 2; j++)
   7132                         {
   7133                             ps_search_results->as_pred_ctxt[j].lambda = lambda;
   7134                         }
   7135                     }
   7136                 }
   7137 #endif
   7138                 if(au1_is_64x64Blk_noisy[0])
   7139                 {
   7140                     U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
   7141                                                              (s_common_frm_prms.i4_ctb_y_off *
   7142                                                               ps_curr_layer->i4_inp_stride));
   7143 
   7144                     hme_compute_sigmaX_and_sigmaXSquared(
   7145                         pu1_inp,
   7146                         ps_curr_layer->i4_inp_stride,
   7147                         ps_ctxt->au4_4x4_src_sigmaX,
   7148                         ps_ctxt->au4_4x4_src_sigmaXSquared,
   7149                         4,
   7150                         4,
   7151                         64,
   7152                         64,
   7153                         1,
   7154                         16);
   7155                 }
   7156                 else
   7157                 {
   7158                     for(i = 0; i < 4; i++)
   7159                     {
   7160                         if(au1_is_32x32Blk_noisy[i])
   7161                         {
   7162                             U08 *pu1_inp =
   7163                                 ps_curr_layer->pu1_inp +
   7164                                 (s_common_frm_prms.i4_ctb_x_off +
   7165                                  (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
   7166 
   7167                             U08 u1_cu_size = 32;
   7168                             WORD32 i4_inp_buf_offset =
   7169                                 (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
   7170                                  ((i % 2) * u1_cu_size));
   7171 
   7172                             U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
   7173                             U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
   7174                             S32 i4_sigma_arr_offset =
   7175                                 (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
   7176                                  ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
   7177 
   7178                             hme_compute_sigmaX_and_sigmaXSquared(
   7179                                 pu1_inp + i4_inp_buf_offset,
   7180                                 ps_curr_layer->i4_inp_stride,
   7181                                 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
   7182                                 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
   7183                                 4,
   7184                                 4,
   7185                                 32,
   7186                                 32,
   7187                                 1,
   7188                                 16);
   7189                         }
   7190                         else
   7191                         {
   7192                             S32 j;
   7193 
   7194                             U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
   7195                             U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
   7196                             S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
   7197                                 (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
   7198                                  ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
   7199 
   7200                             for(j = 0; j < 4; j++)
   7201                             {
   7202                                 U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
   7203                                 U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
   7204                                 S32 i4_16x16_blk_index_in_ctb =
   7205                                     i4_16x16_blk_start_index_in_i_th_32x32_blk +
   7206                                     ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
   7207                                     ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
   7208 
   7209                                 //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
   7210 
   7211                                 if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
   7212                                 {
   7213                                     U08 *pu1_inp =
   7214                                         ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
   7215                                                                   (s_common_frm_prms.i4_ctb_y_off *
   7216                                                                    ps_curr_layer->i4_inp_stride));
   7217 
   7218                                     U08 u1_cu_size = 16;
   7219                                     WORD32 i4_inp_buf_offset =
   7220                                         (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
   7221                                          ((i4_16x16_blk_index_in_ctb / 4) *
   7222                                           (u1_cu_size * ps_curr_layer->i4_inp_stride)));
   7223 
   7224                                     U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
   7225                                     U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
   7226                                     S32 i4_sigma_arr_offset =
   7227                                         (((i4_16x16_blk_index_in_ctb % 4) *
   7228                                           u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
   7229                                          ((i4_16x16_blk_index_in_ctb / 4) *
   7230                                           u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
   7231 
   7232                                     hme_compute_sigmaX_and_sigmaXSquared(
   7233                                         pu1_inp + i4_inp_buf_offset,
   7234                                         ps_curr_layer->i4_inp_stride,
   7235                                         (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
   7236                                         (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
   7237                                         4,
   7238                                         4,
   7239                                         16,
   7240                                         16,
   7241                                         1,
   7242                                         16);
   7243                                 }
   7244                             }
   7245                         }
   7246                     }
   7247                 }
   7248             }
   7249             else
   7250             {
   7251                 memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
   7252 
   7253                 memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
   7254 
   7255                 memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
   7256             }
   7257 
   7258             for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
   7259             {
   7260                 S32 ref_ctr;
   7261                 U08 au1_pred_dir_searched[2];
   7262                 U08 u1_is_cu_noisy;
   7263                 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
   7264 
   7265                 {
   7266                     blk_x = (i4_ctb_x << 2) +
   7267                             (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
   7268                     blk_y = (i4_ctb_y << 2) +
   7269                             (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
   7270 
   7271                     blk_id_in_full_ctb =
   7272                         ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
   7273                     blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
   7274                     ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
   7275                     s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
   7276                     s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
   7277                 }
   7278 
   7279                 /* get the current input blk point */
   7280                 pos_x = blk_x << blk_size_shift;
   7281                 pos_y = blk_y << blk_size_shift;
   7282                 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
   7283 
   7284                 /*********************************************************************/
   7285                 /* For every blk in the picture, the search range needs to be derived*/
   7286                 /* Any blk can have any mv, but practical search constraints are     */
   7287                 /* imposed by the picture boundary and amt of padding.               */
   7288                 /*********************************************************************/
   7289                 /* MV limit is different based on ref. PIC */
   7290                 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
   7291                 {
   7292                     if(!s_search_prms_blk.i4_use_rec)
   7293                     {
   7294                         hme_derive_search_range(
   7295                             &as_range_prms_inp[ref_ctr],
   7296                             &s_pic_limit_inp,
   7297                             &as_mv_limit[ref_ctr],
   7298                             pos_x,
   7299                             pos_y,
   7300                             blk_wd,
   7301                             blk_ht);
   7302                     }
   7303                     else
   7304                     {
   7305                         hme_derive_search_range(
   7306                             &as_range_prms_rec[ref_ctr],
   7307                             &s_pic_limit_rec,
   7308                             &as_mv_limit[ref_ctr],
   7309                             pos_x,
   7310                             pos_y,
   7311                             blk_wd,
   7312                             blk_ht);
   7313                     }
   7314                 }
   7315                 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
   7316                 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
   7317                 /* Select search results from a suitable search result in the context */
   7318                 {
   7319                     ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
   7320 
   7321                     if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
   7322                     {
   7323                         S32 i;
   7324 
   7325                         for(i = 0; i < 2; i++)
   7326                         {
   7327                             ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
   7328                         }
   7329                     }
   7330                 }
   7331 
   7332                 u1_is_cu_noisy = au1_is_16x16Blk_noisy
   7333                     [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
   7334 
   7335                 s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
   7336 
   7337 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
   7338                 if(u1_is_cu_noisy)
   7339                 {
   7340                     S32 j;
   7341                     S32 lambda;
   7342 
   7343                     lambda = lambda_recon;
   7344                     lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
   7345 
   7346                     for(j = 0; j < 2; j++)
   7347                     {
   7348                         ps_search_results->as_pred_ctxt[j].lambda = lambda;
   7349                     }
   7350                 }
   7351                 else
   7352                 {
   7353                     S32 j;
   7354                     S32 lambda;
   7355 
   7356                     lambda = lambda_recon;
   7357 
   7358                     for(j = 0; j < 2; j++)
   7359                     {
   7360                         ps_search_results->as_pred_ctxt[j].lambda = lambda;
   7361                     }
   7362                 }
   7363 #endif
   7364 
   7365                 s_search_prms_blk.ps_search_results = ps_search_results;
   7366 
   7367                 s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
   7368                     pu1_inp,
   7369                     i4_inp_stride,
   7370                     ps_refine_prms->limit_active_partitions,
   7371                     ps_ctxt->ps_hme_frm_prms->bidir_enabled,
   7372                     ps_ctxt->u1_is_curFrame_a_refFrame,
   7373                     blk_8x8_mask,
   7374                     e_me_quality_presets);
   7375 
   7376                 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
   7377                 {
   7378                     ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
   7379                         s_search_prms_blk.i4_part_mask;
   7380                 }
   7381 
   7382                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
   7383                 {
   7384                     /* Setting u1_num_active_refs to 2 */
   7385                     /* for the sole purpose of the */
   7386                     /* function called below */
   7387                     ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
   7388 
   7389                     hme_reset_search_results(
   7390                         ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
   7391 
   7392                     ps_search_results->u1_num_active_ref = i4_num_pred_dir;
   7393                 }
   7394 
   7395                 if(0 == blk_id_in_ctb)
   7396                 {
   7397                     UWORD8 u1_ctr;
   7398                     for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
   7399                                               ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
   7400                         u1_ctr++)
   7401                     {
   7402                         WORD32 i4_max_dep_ctb_y;
   7403                         WORD32 i4_max_dep_ctb_x;
   7404 
   7405                         /* Set max mv in ctb units */
   7406                         i4_max_mv_x_in_ctb =
   7407                             (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
   7408                             ps_ctxt->log_ctb_size;
   7409 
   7410                         i4_max_mv_y_in_ctb =
   7411                             (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
   7412                             ps_ctxt->log_ctb_size;
   7413                         /********************************************************************/
   7414                         /* Set max ctb_x and ctb_y dependency on reference picture          */
   7415                         /* Note +1 is due to delayed deblock, SAO, subpel plan dependency   */
   7416                         /********************************************************************/
   7417                         i4_max_dep_ctb_x = CLIP3(
   7418                             (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
   7419                             0,
   7420                             ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
   7421                         i4_max_dep_ctb_y = CLIP3(
   7422                             (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
   7423                             0,
   7424                             ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
   7425 
   7426                         ihevce_dmgr_map_chk_sync(
   7427                             ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
   7428                             ps_ctxt->thrd_id,
   7429                             i4_ctb_x,
   7430                             i4_ctb_y,
   7431                             i4_max_mv_x_in_ctb,
   7432                             i4_max_mv_y_in_ctb);
   7433                     }
   7434                 }
   7435 
   7436                 /* Loop across different Ref IDx */
   7437                 for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
   7438                 {
   7439                     S32 resultid;
   7440                     S08 u1_default_ref_id;
   7441                     S32 i4_num_srch_cands = 0;
   7442                     S32 i4_num_refinement_iterations;
   7443                     S32 i4_refine_iter_ctr;
   7444 
   7445                     if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
   7446                        (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
   7447                     {
   7448                         u1_pred_dir = u1_pred_dir_ctr;
   7449                     }
   7450                     else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
   7451                     {
   7452                         u1_pred_dir = 1;
   7453                     }
   7454 
   7455                     u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
   7456                                                            : ps_ctxt->ai1_future_list[0];
   7457                     au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
   7458 
   7459                     i4_num_srch_cands = 0;
   7460                     resultid = 0;
   7461 
   7462                     /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
   7463                     if(0 == blk_id_in_ctb)
   7464                     {
   7465                         /*****************************************************************/
   7466                         /* Initialize the mv grid with results of neighbours for the next*/
   7467                         /* ctb.                                                          */
   7468                         /*****************************************************************/
   7469                         hme_fill_ctb_neighbour_mvs(
   7470                             ps_curr_layer,
   7471                             blk_x,
   7472                             blk_y,
   7473                             aps_mv_grid[u1_pred_dir],
   7474                             u1_pred_dir_ctr,
   7475                             u1_default_ref_id,
   7476                             ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
   7477                     }
   7478 
   7479                     s_search_prms_blk.i1_ref_idx = u1_pred_dir;
   7480 
   7481                     {
   7482                         if((blk_id_in_full_ctb % 4) == 0)
   7483                         {
   7484                             ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
   7485                                 .as_pred_ctxt[u1_pred_dir]
   7486                                 .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
   7487                         }
   7488 
   7489                         if(blk_id_in_full_ctb == 0)
   7490                         {
   7491                             ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
   7492                         }
   7493 
   7494                         ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
   7495                             !gau1_encode_to_raster_y[blk_id_in_full_ctb];
   7496                     }
   7497 
   7498                     {
   7499                         S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
   7500                         S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
   7501                         U08 u1_is_blk_at_ctb_boundary = !y;
   7502 
   7503                         s_srch_cand_init_data.u1_is_left_available =
   7504                             !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
   7505 
   7506                         if(u1_is_blk_at_ctb_boundary)
   7507                         {
   7508                             s_srch_cand_init_data.u1_is_topRight_available = 0;
   7509                             s_srch_cand_init_data.u1_is_topLeft_available = 0;
   7510                             s_srch_cand_init_data.u1_is_top_available = 0;
   7511                         }
   7512                         else
   7513                         {
   7514                             s_srch_cand_init_data.u1_is_topRight_available =
   7515                                 gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
   7516                             s_srch_cand_init_data.u1_is_top_available = 1;
   7517                             s_srch_cand_init_data.u1_is_topLeft_available =
   7518                                 s_srch_cand_init_data.u1_is_left_available;
   7519                         }
   7520                     }
   7521 
   7522                     s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
   7523                     s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
   7524                     s_srch_cand_init_data.i4_pos_x = pos_x;
   7525                     s_srch_cand_init_data.i4_pos_y = pos_y;
   7526                     s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
   7527                     s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
   7528                     s_srch_cand_init_data.u1_search_candidate_list_index =
   7529                         au1_search_candidate_list_index[u1_pred_dir];
   7530 
   7531                     i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
   7532 
   7533                     /* Note this block also clips the MV range for all candidates */
   7534                     {
   7535                         S08 i1_check_for_mult_refs;
   7536 
   7537                         i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
   7538                                                              : (ps_ctxt->num_ref_past > 1);
   7539 
   7540                         ps_me_optimised_function_list->pf_mv_clipper(
   7541                             &s_search_prms_blk,
   7542                             i4_num_srch_cands,
   7543                             i1_check_for_mult_refs,
   7544                             ps_refine_prms->i4_num_steps_fpel_refine,
   7545                             ps_refine_prms->i4_num_steps_hpel_refine,
   7546                             ps_refine_prms->i4_num_steps_qpel_refine);
   7547                     }
   7548 
   7549 #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
   7550                     i4_num_refinement_iterations =
   7551                         ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
   7552                             ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
   7553                             : 1;
   7554 #else
   7555                     i4_num_refinement_iterations =
   7556                         ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
   7557 #endif
   7558 
   7559 #if ENABLE_EXPLICIT_SEARCH_IN_PQ
   7560                     if(e_me_quality_presets == ME_PRISTINE_QUALITY)
   7561                     {
   7562                         i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
   7563                                                                           : i4_num_act_ref_l1;
   7564                     }
   7565 #endif
   7566 
   7567                     for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
   7568                         i4_refine_iter_ctr++)
   7569                     {
   7570                         S32 center_x;
   7571                         S32 center_y;
   7572                         S32 center_ref_idx;
   7573 
   7574                         S08 *pi1_pred_dir_to_ref_idx =
   7575                             (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
   7576 
   7577                         {
   7578                             WORD32 i4_i;
   7579 
   7580                             for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
   7581                             {
   7582                                 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
   7583                                 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
   7584                                 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
   7585                                     MAX_SIGNED_16BIT_VAL;
   7586                                 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
   7587                                 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
   7588                                 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
   7589 
   7590                                 if(ps_refine_prms->i4_num_results_per_part == 2)
   7591                                 {
   7592                                     ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
   7593                                         MAX_SIGNED_16BIT_VAL;
   7594                                     ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
   7595                                         MAX_SIGNED_16BIT_VAL;
   7596                                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
   7597                                         MAX_SIGNED_16BIT_VAL;
   7598                                     ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
   7599                                     ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
   7600                                     ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
   7601                                 }
   7602                             }
   7603 
   7604                             s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
   7605                             s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
   7606                         }
   7607 
   7608                         {
   7609                             search_node_t *ps_coloc_node;
   7610 
   7611                             S32 i = 0;
   7612 
   7613                             if(i4_num_refinement_iterations > 1)
   7614                             {
   7615                                 for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
   7616                                 {
   7617                                     ps_coloc_node =
   7618                                         s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
   7619                                             .ps_search_node;
   7620 
   7621                                     if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
   7622                                        ps_coloc_node->i1_ref_idx)
   7623                                     {
   7624                                         break;
   7625                                     }
   7626                                 }
   7627 
   7628                                 if(i == ai4_num_coloc_cands[u1_pred_dir])
   7629                                 {
   7630                                     i = 0;
   7631                                 }
   7632                             }
   7633                             else
   7634                             {
   7635                                 ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
   7636                                                     .ps_search_node;
   7637                             }
   7638 
   7639                             hme_set_mvp_node(
   7640                                 ps_search_results,
   7641                                 ps_coloc_node,
   7642                                 u1_pred_dir,
   7643                                 (i4_num_refinement_iterations > 1)
   7644                                     ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
   7645                                     : u1_default_ref_id);
   7646 
   7647                             center_x = ps_coloc_node->ps_mv->i2_mvx;
   7648                             center_y = ps_coloc_node->ps_mv->i2_mvy;
   7649                             center_ref_idx = ps_coloc_node->i1_ref_idx;
   7650                         }
   7651 
   7652                         /* Full-Pel search */
   7653                         {
   7654                             S32 num_unique_nodes;
   7655 
   7656                             memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
   7657 
   7658                             num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
   7659                                 as_unique_search_nodes,
   7660                                 s_search_prms_blk.ps_search_candts,
   7661                                 au4_unique_node_map,
   7662                                 pi1_pred_dir_to_ref_idx,
   7663                                 i4_num_srch_cands,
   7664                                 s_search_prms_blk.i4_num_init_candts,
   7665                                 i4_refine_iter_ctr,
   7666                                 i4_num_refinement_iterations,
   7667                                 i4_num_act_ref_l0,
   7668                                 center_ref_idx,
   7669                                 center_x,
   7670                                 center_y,
   7671                                 ps_ctxt->s_frm_prms.bidir_enabled,
   7672                                 e_me_quality_presets);
   7673 
   7674                             /*************************************************************************/
   7675                             /* This array stores the ids of the partitions whose                     */
   7676                             /* SADs are updated. Since the partitions whose SADs are updated may not */
   7677                             /* be in contiguous order, we supply another level of indirection.       */
   7678                             /*************************************************************************/
   7679                             ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
   7680                                 s_search_prms_blk.i4_part_mask,
   7681                                 &ps_fullpel_refine_ctxt->ai4_part_id[0]);
   7682 
   7683                             if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
   7684                             {
   7685                                 S32 i;
   7686                                 /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
   7687                                 S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
   7688                                                             (s_search_prms_blk.i4_cu_y_off * 4);
   7689 
   7690                                 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
   7691                                 {
   7692                                     S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
   7693 
   7694                                     hme_compute_final_sigma_of_pu_from_base_blocks(
   7695                                         ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
   7696                                         ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
   7697                                         au8_final_src_sigmaX,
   7698                                         au8_final_src_sigmaXSquared,
   7699                                         16,
   7700                                         4,
   7701                                         i4_part_id,
   7702                                         16);
   7703                                 }
   7704 
   7705                                 s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
   7706                                 s_common_frm_prms.pu8_part_src_sigmaXSquared =
   7707                                     au8_final_src_sigmaXSquared;
   7708 
   7709                                 s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
   7710                                 s_search_prms_blk.pu8_part_src_sigmaXSquared =
   7711                                     au8_final_src_sigmaXSquared;
   7712                             }
   7713 
   7714                             if(0 == num_unique_nodes)
   7715                             {
   7716                                 continue;
   7717                             }
   7718 
   7719                             if(num_unique_nodes >= 2)
   7720                             {
   7721                                 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
   7722                                 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
   7723                                 if(ps_ctxt->i4_pic_type != IV_P_FRAME)
   7724                                 {
   7725                                     if(ps_ctxt->i4_temporal_layer == 1)
   7726                                     {
   7727                                         hme_fullpel_cand_sifter(
   7728                                             &s_search_prms_blk,
   7729                                             ps_curr_layer,
   7730                                             &ps_ctxt->s_wt_pred,
   7731                                             ALPHA_FOR_NOISE_TERM_IN_ME,
   7732                                             u1_is_cu_noisy,
   7733                                             ps_me_optimised_function_list);
   7734                                     }
   7735                                     else
   7736                                     {
   7737                                         hme_fullpel_cand_sifter(
   7738                                             &s_search_prms_blk,
   7739                                             ps_curr_layer,
   7740                                             &ps_ctxt->s_wt_pred,
   7741                                             ALPHA_FOR_NOISE_TERM_IN_ME,
   7742                                             u1_is_cu_noisy,
   7743                                             ps_me_optimised_function_list);
   7744                                     }
   7745                                 }
   7746                                 else
   7747                                 {
   7748                                     hme_fullpel_cand_sifter(
   7749                                         &s_search_prms_blk,
   7750                                         ps_curr_layer,
   7751                                         &ps_ctxt->s_wt_pred,
   7752                                         ALPHA_FOR_NOISE_TERM_IN_ME_P,
   7753                                         u1_is_cu_noisy,
   7754                                         ps_me_optimised_function_list);
   7755                                 }
   7756                             }
   7757 
   7758                             s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
   7759 
   7760                             hme_fullpel_refine(
   7761                                 ps_refine_prms,
   7762                                 &s_search_prms_blk,
   7763                                 ps_curr_layer,
   7764                                 &ps_ctxt->s_wt_pred,
   7765                                 au4_unique_node_map,
   7766                                 num_unique_nodes,
   7767                                 blk_8x8_mask,
   7768                                 center_x,
   7769                                 center_y,
   7770                                 center_ref_idx,
   7771                                 e_me_quality_presets,
   7772                                 ps_me_optimised_function_list);
   7773                         }
   7774 
   7775                         /* Sub-Pel search */
   7776                         {
   7777                             hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
   7778 
   7779                             s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
   7780                                 &ps_ctxt->s_buf_mgr,
   7781                                 INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
   7782                             /* MV limit is different based on ref. PIC */
   7783                             for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
   7784                             {
   7785                                 SCALE_RANGE_PRMS(
   7786                                     as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
   7787                                 SCALE_RANGE_PRMS(
   7788                                     as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
   7789                             }
   7790                             s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
   7791                             s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
   7792 
   7793                             hme_subpel_refine_cu_hs(
   7794                                 &s_subpel_prms,
   7795                                 ps_curr_layer,
   7796                                 ps_search_results,
   7797                                 u1_pred_dir,
   7798                                 &ps_ctxt->s_wt_pred,
   7799                                 blk_8x8_mask,
   7800                                 ps_ctxt->ps_func_selector,
   7801                                 ps_cmn_utils_optimised_function_list,
   7802                                 ps_me_optimised_function_list);
   7803                         }
   7804                     }
   7805                 }
   7806                 /* Populate the new PU struct with the results post subpel refinement*/
   7807                 {
   7808                     inter_cu_results_t *ps_cu_results;
   7809                     WORD32 best_inter_cost, intra_cost, posx, posy;
   7810 
   7811                     UWORD8 intra_8x8_enabled = 0;
   7812 
   7813                     /*  cost of 16x16 cu parent  */
   7814                     WORD32 parent_cost = MAX_32BIT_VAL;
   7815 
   7816                     /*  cost of 8x8 cu children  */
   7817                     /*********************************************************************/
   7818                     /* Assuming parent is not split, then we signal 1 bit for this parent*/
   7819                     /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
   7820                     /* So, 4*lambda is extra for children cost.                          */
   7821                     /*********************************************************************/
   7822                     WORD32 child_cost = 0;
   7823 
   7824                     ps_cu_results = ps_search_results->ps_cu_results;
   7825 
   7826                     /* Initialize the pu_results pointers to the first struct in the stack array */
   7827                     ps_pu_results = as_inter_pu_results;
   7828 
   7829                     hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
   7830 
   7831                     hme_populate_pus(
   7832                         ps_thrd_ctxt,
   7833                         ps_ctxt,
   7834                         &s_subpel_prms,
   7835                         ps_search_results,
   7836                         ps_cu_results,
   7837                         ps_pu_results,
   7838                         &(as_pu_results[0][0][0]),
   7839                         &s_common_frm_prms,
   7840                         &ps_ctxt->s_wt_pred,
   7841                         ps_curr_layer,
   7842                         au1_pred_dir_searched,
   7843                         i4_num_pred_dir);
   7844 
   7845                     ps_cu_results->i4_inp_offset =
   7846                         (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
   7847 
   7848                     hme_decide_part_types(
   7849                         ps_cu_results,
   7850                         ps_pu_results,
   7851                         &s_common_frm_prms,
   7852                         ps_ctxt,
   7853                         ps_cmn_utils_optimised_function_list,
   7854                         ps_me_optimised_function_list
   7855 
   7856                     );
   7857 
   7858                     /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
   7859                     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
   7860                     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
   7861                     {
   7862                         WORD32 res_ctr;
   7863 
   7864                         for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
   7865                         {
   7866                             WORD32 num_part = 2, part_ctr;
   7867                             part_type_results_t *ps_best_results =
   7868                                 &ps_cu_results->ps_best_results[res_ctr];
   7869 
   7870                             if(PRT_2Nx2N == ps_best_results->u1_part_type)
   7871                                 num_part = 1;
   7872 
   7873                             for(part_ctr = 0; part_ctr < num_part; part_ctr++)
   7874                             {
   7875                                 pu_result_t *ps_pu_results =
   7876                                     &ps_best_results->as_pu_results[part_ctr];
   7877 
   7878                                 ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
   7879 
   7880                                 hme_update_dynamic_search_params(
   7881                                     &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
   7882                                          .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
   7883                                     ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
   7884 
   7885                                 /* Sanity Check */
   7886                                 ASSERT(
   7887                                     ps_pu_results->pu.mv.i1_l0_ref_idx <
   7888                                     ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
   7889 
   7890                                 /* No L1 for P Pic. */
   7891                                 ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
   7892                                 /* No BI for P Pic. */
   7893                                 ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
   7894                             }
   7895                         }
   7896                     }
   7897 
   7898                     /*****************************************************************/
   7899                     /* INSERT INTRA RESULTS AT 16x16 LEVEL.                          */
   7900                     /*****************************************************************/
   7901 
   7902 #if DISABLE_INTRA_IN_BPICS
   7903                     if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
   7904                              (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
   7905 #endif
   7906                     {
   7907                         if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
   7908                         {
   7909                             hme_insert_intra_nodes_post_bipred(
   7910                                 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
   7911                         }
   7912                     }
   7913 
   7914 #if DISABLE_INTRA_IN_BPICS
   7915                     if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
   7916                        (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
   7917                     {
   7918                         intra_8x8_enabled = 0;
   7919                     }
   7920                     else
   7921 #endif
   7922                     {
   7923                         /*TRAQO intra flag updation*/
   7924                         if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
   7925                         {
   7926                             best_inter_cost =
   7927                                 ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
   7928                             intra_cost =
   7929                                 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
   7930                             /*@16x16 level*/
   7931                             posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
   7932                                     << 2) >>
   7933                                    4;
   7934                             posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
   7935                                     << 2) >>
   7936                                    4;
   7937                         }
   7938                         else
   7939                         {
   7940                             best_inter_cost =
   7941                                 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
   7942                             posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
   7943                                     << 2) >>
   7944                                    3;
   7945                             posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
   7946                                     << 2) >>
   7947                                    3;
   7948                         }
   7949 
   7950                         /* Disable intra16/32/64 flags based on split flags recommended by IPE */
   7951                         if(ps_cur_ipe_ctb->u1_split_flag)
   7952                         {
   7953                             /* Id of the 32x32 block, 16x16 block in a CTB */
   7954                             WORD32 i4_32x32_id =
   7955                                 (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
   7956                             WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
   7957                                                  ((ps_cu_results->u1_x_off >> 4) & 0x1);
   7958 
   7959                             if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
   7960                             {
   7961                                 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
   7962                                        .as_intra16_analyse[i4_16x16_id]
   7963                                        .b1_split_flag)
   7964                                 {
   7965                                     intra_8x8_enabled =
   7966                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
   7967                                             .as_intra16_analyse[i4_16x16_id]
   7968                                             .as_intra8_analyse[0]
   7969                                             .b1_valid_cu;
   7970                                     intra_8x8_enabled &=
   7971                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
   7972                                             .as_intra16_analyse[i4_16x16_id]
   7973                                             .as_intra8_analyse[1]
   7974                                             .b1_valid_cu;
   7975                                     intra_8x8_enabled &=
   7976                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
   7977                                             .as_intra16_analyse[i4_16x16_id]
   7978                                             .as_intra8_analyse[2]
   7979                                             .b1_valid_cu;
   7980                                     intra_8x8_enabled &=
   7981                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
   7982                                             .as_intra16_analyse[i4_16x16_id]
   7983                                             .as_intra8_analyse[3]
   7984                                             .b1_valid_cu;
   7985                                 }
   7986                             }
   7987                         }
   7988                     }
   7989 
   7990                     if(blk_8x8_mask == 0xf)
   7991                     {
   7992                         parent_cost =
   7993                             ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
   7994                         ps_search_results->u1_split_flag = 0;
   7995                     }
   7996                     else
   7997                     {
   7998                         ps_search_results->u1_split_flag = 1;
   7999                     }
   8000 
   8001                     ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
   8002 
   8003                     if(s_common_frm_prms.u1_is_cu_noisy)
   8004                     {
   8005                         intra_8x8_enabled = 0;
   8006                     }
   8007 
   8008                     /* Evalaute 8x8 if NxN part id is enabled */
   8009                     if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
   8010                     {
   8011                         /* Populates the PU's for the 4 8x8's in one call */
   8012                         hme_populate_pus_8x8_cu(
   8013                             ps_thrd_ctxt,
   8014                             ps_ctxt,
   8015                             &s_subpel_prms,
   8016                             ps_search_results,
   8017                             ps_cu_results,
   8018                             ps_pu_results,
   8019                             &(as_pu_results[0][0][0]),
   8020                             &s_common_frm_prms,
   8021                             au1_pred_dir_searched,
   8022                             i4_num_pred_dir,
   8023                             blk_8x8_mask);
   8024 
   8025                         /* Re-initialize the pu_results pointers to the first struct in the stack array */
   8026                         ps_pu_results = as_inter_pu_results;
   8027 
   8028                         for(i = 0; i < 4; i++)
   8029                         {
   8030                             if((blk_8x8_mask & (1 << i)))
   8031                             {
   8032                                 if(ps_cu_results->i4_part_mask)
   8033                                 {
   8034                                     hme_decide_part_types(
   8035                                         ps_cu_results,
   8036                                         ps_pu_results,
   8037                                         &s_common_frm_prms,
   8038                                         ps_ctxt,
   8039                                         ps_cmn_utils_optimised_function_list,
   8040                                         ps_me_optimised_function_list
   8041 
   8042                                     );
   8043                                 }
   8044                                 /*****************************************************************/
   8045                                 /* INSERT INTRA RESULTS AT 8x8 LEVEL.                          */
   8046                                 /*****************************************************************/
   8047 #if DISABLE_INTRA_IN_BPICS
   8048                                 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
   8049                                          (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
   8050                                           TEMPORAL_LAYER_DISABLE)))
   8051 #endif
   8052                                 {
   8053                                     if(!(DISABLE_INTRA_WHEN_NOISY &&
   8054                                          s_common_frm_prms.u1_is_cu_noisy))
   8055                                     {
   8056                                         hme_insert_intra_nodes_post_bipred(
   8057                                             ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
   8058                                     }
   8059                                 }
   8060 
   8061                                 child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
   8062                             }
   8063 
   8064                             ps_cu_results++;
   8065                             ps_pu_results++;
   8066                         }
   8067 
   8068                         /* Compare 16x16 vs 8x8 cost */
   8069                         if(child_cost < parent_cost)
   8070                         {
   8071                             ps_search_results->best_cu_cost = child_cost;
   8072                             ps_search_results->u1_split_flag = 1;
   8073                         }
   8074                     }
   8075                 }
   8076 
   8077                 hme_update_mv_bank_encode(
   8078                     ps_search_results,
   8079                     ps_curr_layer->ps_layer_mvbank,
   8080                     blk_x,
   8081                     blk_y,
   8082                     &s_mv_update_prms,
   8083                     au1_pred_dir_searched,
   8084                     i4_num_act_ref_l0);
   8085 
   8086                 /*********************************************************************/
   8087                 /* Map the best results to an MV Grid. This is a 18x18 grid that is  */
   8088                 /* useful for doing things like predictor for cost calculation or    */
   8089                 /* also for merge calculations if need be.                           */
   8090                 /*********************************************************************/
   8091                 hme_map_mvs_to_grid(
   8092                     &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
   8093             }
   8094 
   8095             /* Set the CU tree nodes appropriately */
   8096             if(e_me_quality_presets != ME_PRISTINE_QUALITY)
   8097             {
   8098                 WORD32 i, j;
   8099 
   8100                 for(i = 0; i < 16; i++)
   8101                 {
   8102                     cur_ctb_cu_tree_t *ps_tree_node =
   8103                         ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
   8104                     search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
   8105 
   8106                     switch(i >> 2)
   8107                     {
   8108                     case 0:
   8109                     {
   8110                         ps_tree_node = ps_tree_node->ps_child_node_tl;
   8111 
   8112                         break;
   8113                     }
   8114                     case 1:
   8115                     {
   8116                         ps_tree_node = ps_tree_node->ps_child_node_tr;
   8117 
   8118                         break;
   8119                     }
   8120                     case 2:
   8121                     {
   8122                         ps_tree_node = ps_tree_node->ps_child_node_bl;
   8123 
   8124                         break;
   8125                     }
   8126                     case 3:
   8127                     {
   8128                         ps_tree_node = ps_tree_node->ps_child_node_br;
   8129 
   8130                         break;
   8131                     }
   8132                     }
   8133 
   8134                     switch(i % 4)
   8135                     {
   8136                     case 0:
   8137                     {
   8138                         ps_tree_node = ps_tree_node->ps_child_node_tl;
   8139 
   8140                         break;
   8141                     }
   8142                     case 1:
   8143                     {
   8144                         ps_tree_node = ps_tree_node->ps_child_node_tr;
   8145 
   8146                         break;
   8147                     }
   8148                     case 2:
   8149                     {
   8150                         ps_tree_node = ps_tree_node->ps_child_node_bl;
   8151 
   8152                         break;
   8153                     }
   8154                     case 3:
   8155                     {
   8156                         ps_tree_node = ps_tree_node->ps_child_node_br;
   8157 
   8158                         break;
   8159                     }
   8160                     }
   8161 
   8162                     if(ai4_blk_8x8_mask[i] == 15)
   8163                     {
   8164                         if(!ps_results->u1_split_flag)
   8165                         {
   8166                             ps_tree_node->is_node_valid = 1;
   8167                             NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
   8168                         }
   8169                         else
   8170                         {
   8171                             ps_tree_node->is_node_valid = 0;
   8172                             ENABLE_THE_CHILDREN_NODES(ps_tree_node);
   8173                         }
   8174                     }
   8175                     else
   8176                     {
   8177                         cur_ctb_cu_tree_t *ps_tree_child;
   8178 
   8179                         ps_tree_node->is_node_valid = 0;
   8180 
   8181                         for(j = 0; j < 4; j++)
   8182                         {
   8183                             switch(j)
   8184                             {
   8185                             case 0:
   8186                             {
   8187                                 ps_tree_child = ps_tree_node->ps_child_node_tl;
   8188 
   8189                                 break;
   8190                             }
   8191                             case 1:
   8192                             {
   8193                                 ps_tree_child = ps_tree_node->ps_child_node_tr;
   8194 
   8195                                 break;
   8196                             }
   8197                             case 2:
   8198                             {
   8199                                 ps_tree_child = ps_tree_node->ps_child_node_bl;
   8200 
   8201                                 break;
   8202                             }
   8203                             case 3:
   8204                             {
   8205                                 ps_tree_child = ps_tree_node->ps_child_node_br;
   8206 
   8207                                 break;
   8208                             }
   8209                             }
   8210 
   8211                             ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
   8212                         }
   8213                     }
   8214                 }
   8215             }
   8216 
   8217             if(ME_PRISTINE_QUALITY == e_me_quality_presets)
   8218             {
   8219                 cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
   8220 
   8221                 hme_analyse_mv_clustering(
   8222                     ps_ctxt->as_search_results_16x16,
   8223                     ps_ctxt->as_cu16x16_results,
   8224                     ps_ctxt->as_cu8x8_results,
   8225                     ps_ctxt->ps_ctb_cluster_info,
   8226                     ps_ctxt->ai1_future_list,
   8227                     ps_ctxt->ai1_past_list,
   8228                     ps_ctxt->s_frm_prms.bidir_enabled,
   8229                     e_me_quality_presets);
   8230 
   8231 #if DISABLE_BLK_MERGE_WHEN_NOISY
   8232                 ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
   8233                 ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
   8234                 ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
   8235                 ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
   8236                 ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
   8237                 ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
   8238                 ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
   8239                 ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
   8240                 ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
   8241                 ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
   8242 #endif
   8243 
   8244                 en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
   8245                                  (ps_tree->ps_child_node_tr->is_node_valid << 1) |
   8246                                  (ps_tree->ps_child_node_bl->is_node_valid << 2) |
   8247                                  (ps_tree->ps_child_node_br->is_node_valid << 3);
   8248 
   8249                 en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
   8250                                      (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
   8251                                      (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
   8252                                      (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
   8253                                      (ps_tree->u1_inter_eval_enable << 4);
   8254             }
   8255             else
   8256             {
   8257                 en_merge_execution = 0x1f;
   8258 
   8259 #if DISABLE_BLK_MERGE_WHEN_NOISY
   8260                 en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
   8261                                  ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
   8262                                  ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
   8263                                  ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
   8264 #endif
   8265             }
   8266 
   8267             /* Re-initialize the pu_results pointers to the first struct in the stack array */
   8268             ps_pu_results = as_inter_pu_results;
   8269 
   8270             {
   8271                 WORD32 ref_ctr;
   8272 
   8273                 s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
   8274                 s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
   8275 
   8276                 /* MV limit is different based on ref. PIC */
   8277                 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
   8278                 {
   8279                     SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
   8280                     SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
   8281                 }
   8282 
   8283                 e_merge_result = CU_SPLIT;
   8284                 merge_count_32x32 = 0;
   8285 
   8286                 if((en_merge_32x32 & 1) && (en_merge_execution & 1))
   8287                 {
   8288                     range_prms_t *ps_pic_limit;
   8289                     if(s_merge_prms_32x32_tl.i4_use_rec == 1)
   8290                     {
   8291                         ps_pic_limit = &s_pic_limit_rec;
   8292                     }
   8293                     else
   8294                     {
   8295                         ps_pic_limit = &s_pic_limit_inp;
   8296                     }
   8297                     /* MV limit is different based on ref. PIC */
   8298                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
   8299                     {
   8300                         hme_derive_search_range(
   8301                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
   8302                             ps_pic_limit,
   8303                             &as_mv_limit[ref_ctr],
   8304                             i4_ctb_x << 6,
   8305                             i4_ctb_y << 6,
   8306                             32,
   8307                             32);
   8308 
   8309                         SCALE_RANGE_PRMS_POINTERS(
   8310                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
   8311                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
   8312                             2);
   8313                     }
   8314                     s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
   8315                     s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
   8316                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
   8317 
   8318                     e_merge_result = hme_try_merge_high_speed(
   8319                         ps_thrd_ctxt,
   8320                         ps_ctxt,
   8321                         ps_cur_ipe_ctb,
   8322                         &s_subpel_prms,
   8323                         &s_merge_prms_32x32_tl,
   8324                         ps_pu_results,
   8325                         &as_pu_results[0][0][0]);
   8326 
   8327                     if(e_merge_result == CU_MERGED)
   8328                     {
   8329                         inter_cu_results_t *ps_cu_results =
   8330                             s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
   8331 
   8332                         if(!((ps_cu_results->u1_num_best_results == 1) &&
   8333                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
   8334                         {
   8335                             hme_map_mvs_to_grid(
   8336                                 &aps_mv_grid[0],
   8337                                 s_merge_prms_32x32_tl.ps_results_merge,
   8338                                 s_merge_prms_32x32_tl.au1_pred_dir_searched,
   8339                                 s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
   8340                         }
   8341 
   8342                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
   8343                         {
   8344                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8345                                 .ps_child_node_tl->is_node_valid = 1;
   8346                             NULLIFY_THE_CHILDREN_NODES(
   8347                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8348                                     .ps_child_node_tl);
   8349                         }
   8350 
   8351                         merge_count_32x32++;
   8352                         e_merge_result = CU_SPLIT;
   8353                     }
   8354                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
   8355                     {
   8356 #if ENABLE_CU_TREE_CULLING
   8357                         cur_ctb_cu_tree_t *ps_tree =
   8358                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
   8359 
   8360                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
   8361                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
   8362                         ENABLE_THE_CHILDREN_NODES(ps_tree);
   8363                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
   8364                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
   8365                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
   8366                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
   8367 #endif
   8368                     }
   8369                 }
   8370                 else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
   8371                 {
   8372 #if ENABLE_CU_TREE_CULLING
   8373                     cur_ctb_cu_tree_t *ps_tree =
   8374                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
   8375 
   8376                     ENABLE_THE_CHILDREN_NODES(ps_tree);
   8377                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
   8378                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
   8379                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
   8380                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
   8381 #endif
   8382 
   8383                     if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
   8384                     {
   8385                         ps_tree->is_node_valid = 0;
   8386                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
   8387                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
   8388                     }
   8389                 }
   8390 
   8391                 if((en_merge_32x32 & 2) && (en_merge_execution & 2))
   8392                 {
   8393                     range_prms_t *ps_pic_limit;
   8394                     if(s_merge_prms_32x32_tr.i4_use_rec == 1)
   8395                     {
   8396                         ps_pic_limit = &s_pic_limit_rec;
   8397                     }
   8398                     else
   8399                     {
   8400                         ps_pic_limit = &s_pic_limit_inp;
   8401                     }
   8402                     /* MV limit is different based on ref. PIC */
   8403                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
   8404                     {
   8405                         hme_derive_search_range(
   8406                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
   8407                             ps_pic_limit,
   8408                             &as_mv_limit[ref_ctr],
   8409                             (i4_ctb_x << 6) + 32,
   8410                             i4_ctb_y << 6,
   8411                             32,
   8412                             32);
   8413                         SCALE_RANGE_PRMS_POINTERS(
   8414                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
   8415                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
   8416                             2);
   8417                     }
   8418                     s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
   8419                     s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
   8420                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
   8421 
   8422                     e_merge_result = hme_try_merge_high_speed(
   8423                         ps_thrd_ctxt,
   8424                         ps_ctxt,
   8425                         ps_cur_ipe_ctb,
   8426                         &s_subpel_prms,
   8427                         &s_merge_prms_32x32_tr,
   8428                         ps_pu_results,
   8429                         &as_pu_results[0][0][0]);
   8430 
   8431                     if(e_merge_result == CU_MERGED)
   8432                     {
   8433                         inter_cu_results_t *ps_cu_results =
   8434                             s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
   8435 
   8436                         if(!((ps_cu_results->u1_num_best_results == 1) &&
   8437                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
   8438                         {
   8439                             hme_map_mvs_to_grid(
   8440                                 &aps_mv_grid[0],
   8441                                 s_merge_prms_32x32_tr.ps_results_merge,
   8442                                 s_merge_prms_32x32_tr.au1_pred_dir_searched,
   8443                                 s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
   8444                         }
   8445 
   8446                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
   8447                         {
   8448                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8449                                 .ps_child_node_tr->is_node_valid = 1;
   8450                             NULLIFY_THE_CHILDREN_NODES(
   8451                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8452                                     .ps_child_node_tr);
   8453                         }
   8454 
   8455                         merge_count_32x32++;
   8456                         e_merge_result = CU_SPLIT;
   8457                     }
   8458                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
   8459                     {
   8460 #if ENABLE_CU_TREE_CULLING
   8461                         cur_ctb_cu_tree_t *ps_tree =
   8462                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
   8463 
   8464                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
   8465                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
   8466                         ENABLE_THE_CHILDREN_NODES(ps_tree);
   8467                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
   8468                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
   8469                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
   8470                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
   8471 #endif
   8472                     }
   8473                 }
   8474                 else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
   8475                 {
   8476 #if ENABLE_CU_TREE_CULLING
   8477                     cur_ctb_cu_tree_t *ps_tree =
   8478                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
   8479 
   8480                     ENABLE_THE_CHILDREN_NODES(ps_tree);
   8481                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
   8482                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
   8483                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
   8484                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
   8485 #endif
   8486 
   8487                     if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
   8488                     {
   8489                         ps_tree->is_node_valid = 0;
   8490                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
   8491                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
   8492                     }
   8493                 }
   8494 
   8495                 if((en_merge_32x32 & 4) && (en_merge_execution & 4))
   8496                 {
   8497                     range_prms_t *ps_pic_limit;
   8498                     if(s_merge_prms_32x32_bl.i4_use_rec == 1)
   8499                     {
   8500                         ps_pic_limit = &s_pic_limit_rec;
   8501                     }
   8502                     else
   8503                     {
   8504                         ps_pic_limit = &s_pic_limit_inp;
   8505                     }
   8506                     /* MV limit is different based on ref. PIC */
   8507                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
   8508                     {
   8509                         hme_derive_search_range(
   8510                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
   8511                             ps_pic_limit,
   8512                             &as_mv_limit[ref_ctr],
   8513                             i4_ctb_x << 6,
   8514                             (i4_ctb_y << 6) + 32,
   8515                             32,
   8516                             32);
   8517                         SCALE_RANGE_PRMS_POINTERS(
   8518                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
   8519                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
   8520                             2);
   8521                     }
   8522                     s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
   8523                     s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
   8524                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
   8525 
   8526                     e_merge_result = hme_try_merge_high_speed(
   8527                         ps_thrd_ctxt,
   8528                         ps_ctxt,
   8529                         ps_cur_ipe_ctb,
   8530                         &s_subpel_prms,
   8531                         &s_merge_prms_32x32_bl,
   8532                         ps_pu_results,
   8533                         &as_pu_results[0][0][0]);
   8534 
   8535                     if(e_merge_result == CU_MERGED)
   8536                     {
   8537                         inter_cu_results_t *ps_cu_results =
   8538                             s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
   8539 
   8540                         if(!((ps_cu_results->u1_num_best_results == 1) &&
   8541                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
   8542                         {
   8543                             hme_map_mvs_to_grid(
   8544                                 &aps_mv_grid[0],
   8545                                 s_merge_prms_32x32_bl.ps_results_merge,
   8546                                 s_merge_prms_32x32_bl.au1_pred_dir_searched,
   8547                                 s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
   8548                         }
   8549 
   8550                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
   8551                         {
   8552                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8553                                 .ps_child_node_bl->is_node_valid = 1;
   8554                             NULLIFY_THE_CHILDREN_NODES(
   8555                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8556                                     .ps_child_node_bl);
   8557                         }
   8558 
   8559                         merge_count_32x32++;
   8560                         e_merge_result = CU_SPLIT;
   8561                     }
   8562                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
   8563                     {
   8564 #if ENABLE_CU_TREE_CULLING
   8565                         cur_ctb_cu_tree_t *ps_tree =
   8566                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
   8567 
   8568                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
   8569                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
   8570                         ENABLE_THE_CHILDREN_NODES(ps_tree);
   8571                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
   8572                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
   8573                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
   8574                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
   8575 #endif
   8576                     }
   8577                 }
   8578                 else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
   8579                 {
   8580 #if ENABLE_CU_TREE_CULLING
   8581                     cur_ctb_cu_tree_t *ps_tree =
   8582                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
   8583 
   8584                     ENABLE_THE_CHILDREN_NODES(ps_tree);
   8585                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
   8586                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
   8587                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
   8588                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
   8589 #endif
   8590 
   8591                     if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
   8592                     {
   8593                         ps_tree->is_node_valid = 0;
   8594                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
   8595                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
   8596                     }
   8597                 }
   8598 
   8599                 if((en_merge_32x32 & 8) && (en_merge_execution & 8))
   8600                 {
   8601                     range_prms_t *ps_pic_limit;
   8602                     if(s_merge_prms_32x32_br.i4_use_rec == 1)
   8603                     {
   8604                         ps_pic_limit = &s_pic_limit_rec;
   8605                     }
   8606                     else
   8607                     {
   8608                         ps_pic_limit = &s_pic_limit_inp;
   8609                     }
   8610                     /* MV limit is different based on ref. PIC */
   8611                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
   8612                     {
   8613                         hme_derive_search_range(
   8614                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
   8615                             ps_pic_limit,
   8616                             &as_mv_limit[ref_ctr],
   8617                             (i4_ctb_x << 6) + 32,
   8618                             (i4_ctb_y << 6) + 32,
   8619                             32,
   8620                             32);
   8621 
   8622                         SCALE_RANGE_PRMS_POINTERS(
   8623                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
   8624                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
   8625                             2);
   8626                     }
   8627                     s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
   8628                     s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
   8629                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
   8630 
   8631                     e_merge_result = hme_try_merge_high_speed(
   8632                         ps_thrd_ctxt,
   8633                         ps_ctxt,
   8634                         ps_cur_ipe_ctb,
   8635                         &s_subpel_prms,
   8636                         &s_merge_prms_32x32_br,
   8637                         ps_pu_results,
   8638                         &as_pu_results[0][0][0]);
   8639 
   8640                     if(e_merge_result == CU_MERGED)
   8641                     {
   8642                         /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
   8643 
   8644                         if(!((ps_cu_results->u1_num_best_results == 1) &&
   8645                         (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
   8646                         {
   8647                         hme_map_mvs_to_grid
   8648                         (
   8649                         &aps_mv_grid[0],
   8650                         s_merge_prms_32x32_br.ps_results_merge,
   8651                         s_merge_prms_32x32_br.au1_pred_dir_searched,
   8652                         s_merge_prms_32x32_br.i4_num_pred_dir_actual
   8653                         );
   8654                         }*/
   8655 
   8656                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
   8657                         {
   8658                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8659                                 .ps_child_node_br->is_node_valid = 1;
   8660                             NULLIFY_THE_CHILDREN_NODES(
   8661                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8662                                     .ps_child_node_br);
   8663                         }
   8664 
   8665                         merge_count_32x32++;
   8666                         e_merge_result = CU_SPLIT;
   8667                     }
   8668                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
   8669                     {
   8670 #if ENABLE_CU_TREE_CULLING
   8671                         cur_ctb_cu_tree_t *ps_tree =
   8672                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
   8673 
   8674                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
   8675                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
   8676                         ENABLE_THE_CHILDREN_NODES(ps_tree);
   8677                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
   8678                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
   8679                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
   8680                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
   8681 #endif
   8682                     }
   8683                 }
   8684                 else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
   8685                 {
   8686 #if ENABLE_CU_TREE_CULLING
   8687                     cur_ctb_cu_tree_t *ps_tree =
   8688                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
   8689 
   8690                     ENABLE_THE_CHILDREN_NODES(ps_tree);
   8691                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
   8692                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
   8693                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
   8694                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
   8695 #endif
   8696 
   8697                     if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
   8698                     {
   8699                         ps_tree->is_node_valid = 0;
   8700                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
   8701                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
   8702                     }
   8703                 }
   8704 
   8705                 /* Try merging all 32x32 to 64x64 candts */
   8706                 if(((en_merge_32x32 & 0xf) == 0xf) &&
   8707                    (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
   8708                     ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
   8709                     if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
   8710                          !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
   8711                         (e_me_quality_presets != ME_XTREME_SPEED_25)))
   8712                     {
   8713                         range_prms_t *ps_pic_limit;
   8714                         if(s_merge_prms_64x64.i4_use_rec == 1)
   8715                         {
   8716                             ps_pic_limit = &s_pic_limit_rec;
   8717                         }
   8718                         else
   8719                         {
   8720                             ps_pic_limit = &s_pic_limit_inp;
   8721                         }
   8722                         /* MV limit is different based on ref. PIC */
   8723                         for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
   8724                         {
   8725                             hme_derive_search_range(
   8726                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
   8727                                 ps_pic_limit,
   8728                                 &as_mv_limit[ref_ctr],
   8729                                 i4_ctb_x << 6,
   8730                                 i4_ctb_y << 6,
   8731                                 64,
   8732                                 64);
   8733 
   8734                             SCALE_RANGE_PRMS_POINTERS(
   8735                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
   8736                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
   8737                                 2);
   8738                         }
   8739                         s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
   8740                         s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
   8741                         s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
   8742 
   8743                         e_merge_result = hme_try_merge_high_speed(
   8744                             ps_thrd_ctxt,
   8745                             ps_ctxt,
   8746                             ps_cur_ipe_ctb,
   8747                             &s_subpel_prms,
   8748                             &s_merge_prms_64x64,
   8749                             ps_pu_results,
   8750                             &as_pu_results[0][0][0]);
   8751 
   8752                         if((e_merge_result == CU_MERGED) &&
   8753                            (ME_PRISTINE_QUALITY != e_me_quality_presets))
   8754                         {
   8755                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8756                                 .is_node_valid = 1;
   8757                             NULLIFY_THE_CHILDREN_NODES(
   8758                                 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
   8759                         }
   8760                         else if(
   8761                             (e_merge_result == CU_SPLIT) &&
   8762                             (ME_PRISTINE_QUALITY == e_me_quality_presets))
   8763                         {
   8764                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
   8765                                 .is_node_valid = 0;
   8766                         }
   8767                     }
   8768 
   8769                 /*****************************************************************/
   8770                 /* UPDATION OF RESULT TO EXTERNAL STRUCTURES                     */
   8771                 /*****************************************************************/
   8772                 pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
   8773 
   8774                 {
   8775 #ifdef _DEBUG
   8776                     S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
   8777                                  ? 64
   8778                                  : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
   8779                     S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
   8780                                  ? 64
   8781                                  : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
   8782                     ASSERT(
   8783                         (wd * ht) ==
   8784                         ihevce_compute_area_of_valid_cus_in_ctb(
   8785                             &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
   8786 #endif
   8787                 }
   8788             }
   8789 
   8790             /* set the dependency for the corresponding row in enc loop */
   8791             ihevce_dmgr_set_row_row_sync(
   8792                 pv_dep_mngr_encloop_dep_me,
   8793                 (i4_ctb_x + 1),
   8794                 i4_ctb_y,
   8795                 tile_col_idx /* Col Tile No. */);
   8796 
   8797             left_ctb_in_diff_tile = 0;
   8798         }
   8799     }
   8800 }
   8801 
   8802 /**
   8803 ********************************************************************************
   8804 *  @fn   void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
   8805 *                       refine_layer_prms_t *ps_refine_prms)
   8806 *
   8807 *  @brief  Top level entry point for refinement ME
   8808 *
   8809 *  @param[in,out]  ps_ctxt: ME Handle
   8810 *
   8811 *  @param[in]  ps_refine_prms : refinement layer prms
   8812 *
   8813 *  @return None
   8814 ********************************************************************************
   8815 */
   8816 void hme_refine_no_encode(
   8817     coarse_me_ctxt_t *ps_ctxt,
   8818     refine_prms_t *ps_refine_prms,
   8819     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
   8820     S32 lyr_job_type,
   8821     WORD32 i4_ping_pong,
   8822     void **ppv_dep_mngr_hme_sync)
   8823 {
   8824     BLK_SIZE_T e_search_blk_size, e_result_blk_size;
   8825     ME_QUALITY_PRESETS_T e_me_quality_presets =
   8826         ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
   8827 
   8828     /*************************************************************************/
   8829     /* Complexity of search: Low to High                                     */
   8830     /*************************************************************************/
   8831     SEARCH_COMPLEXITY_T e_search_complexity;
   8832 
   8833     /*************************************************************************/
   8834     /* Config parameter structures for varius ME submodules                  */
   8835     /*************************************************************************/
   8836     hme_search_prms_t s_search_prms_blk;
   8837     mvbank_update_prms_t s_mv_update_prms;
   8838 
   8839     /*************************************************************************/
   8840     /* All types of search candidates for predictor based search.            */
   8841     /*************************************************************************/
   8842     S32 num_init_candts = 0;
   8843     search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
   8844     search_node_t as_top_neighbours[4], as_left_neighbours[3];
   8845     search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
   8846     search_node_t *ps_candt_l, *ps_candt_t;
   8847     search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
   8848     search_node_t *ps_candt_prj_bl[2];
   8849     search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
   8850     search_node_t *ps_candt_prj_coloc[2];
   8851 
   8852     pf_get_wt_inp fp_get_wt_inp;
   8853 
   8854     search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
   8855     U32 au4_unique_node_map[MAP_X_MAX * 2];
   8856 
   8857     /*EIID */
   8858     WORD32 i4_num_inter_wins = 0;  //debug code to find stat of
   8859     WORD32 i4_num_comparisions = 0;  //debug code
   8860     WORD32 i4_threshold_multiplier;
   8861     WORD32 i4_threshold_divider;
   8862     WORD32 i4_temporal_layer =
   8863         ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
   8864 
   8865     /*************************************************************************/
   8866     /* points ot the search results for the blk level search (8x8/16x16)     */
   8867     /*************************************************************************/
   8868     search_results_t *ps_search_results;
   8869 
   8870     /*************************************************************************/
   8871     /* Coordinates                                                           */
   8872     /*************************************************************************/
   8873     S32 blk_x, i4_ctb_x, blk_id_in_ctb;
   8874     //S32 i4_ctb_y;
   8875     S32 pos_x, pos_y;
   8876     S32 blk_id_in_full_ctb;
   8877     S32 i4_num_srch_cands;
   8878 
   8879     S32 blk_y;
   8880 
   8881     /*************************************************************************/
   8882     /* Related to dimensions of block being searched and pic dimensions      */
   8883     /*************************************************************************/
   8884     S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
   8885     S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
   8886     S32 num_results_prev_layer;
   8887 
   8888     /*************************************************************************/
   8889     /* Size of a basic unit for this layer. For non encode layers, we search */
   8890     /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
   8891     /* basic unit size is the ctb size.                                      */
   8892     /*************************************************************************/
   8893     S32 unit_size;
   8894 
   8895     /*************************************************************************/
   8896     /* Pointers to context in current and coarser layers                     */
   8897     /*************************************************************************/
   8898     layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
   8899 
   8900     /*************************************************************************/
   8901     /* to store mv range per blk, and picture limit, allowed search range    */
   8902     /* range prms in hpel and qpel units as well                             */
   8903     /*************************************************************************/
   8904     range_prms_t s_range_prms_inp, s_range_prms_rec;
   8905     range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
   8906     /*************************************************************************/
   8907     /* These variables are used to track number of references at different   */
   8908     /* stages of ME.                                                         */
   8909     /*************************************************************************/
   8910     S32 i4_num_ref_fpel, i4_num_ref_before_merge;
   8911     S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
   8912     S32 lambda_inp = ps_refine_prms->lambda_inp;
   8913 
   8914     /*************************************************************************/
   8915     /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
   8916     /* Explicit means it searches on all active ref idx.                     */
   8917     /*************************************************************************/
   8918     S32 curr_layer_implicit, prev_layer_implicit;
   8919 
   8920     /*************************************************************************/
   8921     /* Variables for loop counts                                             */
   8922     /*************************************************************************/
   8923     S32 id;
   8924     S08 i1_ref_idx;
   8925 
   8926     /*************************************************************************/
   8927     /* Input pointer and stride                                              */
   8928     /*************************************************************************/
   8929     U08 *pu1_inp;
   8930     S32 i4_inp_stride;
   8931 
   8932     S32 end_of_frame;
   8933 
   8934     S32 num_sync_units_in_row;
   8935 
   8936     PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
   8937     ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
   8938 
   8939     /*************************************************************************/
   8940     /* Pointers to current and coarse layer are needed for projection */
   8941     /* Pointer to prev layer are needed for other candts like coloc   */
   8942     /*************************************************************************/
   8943     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
   8944 
   8945     ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
   8946 
   8947     num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
   8948 
   8949     /* Function pointer is selected based on the C vc X86 macro */
   8950 
   8951     fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
   8952                         ->pf_get_wt_inp_8x8;
   8953 
   8954     i4_inp_stride = ps_curr_layer->i4_inp_stride;
   8955     i4_pic_wd = ps_curr_layer->i4_wd;
   8956     i4_pic_ht = ps_curr_layer->i4_ht;
   8957     e_search_complexity = ps_refine_prms->e_search_complexity;
   8958 
   8959     end_of_frame = 0;
   8960 
   8961     /* If the previous layer is non-encode layer, then use dyadic projection */
   8962     if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
   8963         pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
   8964     else
   8965         pf_hme_project_coloc_candt = hme_project_coloc_candt;
   8966 
   8967     /* This points to all the initial candts */
   8968     ps_search_candts = &as_search_candts[0];
   8969 
   8970     {
   8971         e_search_blk_size = BLK_8x8;
   8972         blk_wd = blk_ht = 8;
   8973         blk_size_shift = 3;
   8974         s_mv_update_prms.i4_shift = 0;
   8975         /*********************************************************************/
   8976         /* In case we do not encode this layer, we search 8x8 with or without*/
   8977         /* enable 4x4 SAD.                                                   */
   8978         /*********************************************************************/
   8979         {
   8980             S32 i4_mask = (ENABLE_2Nx2N);
   8981 
   8982             e_result_blk_size = BLK_8x8;
   8983             if(ps_refine_prms->i4_enable_4x4_part)
   8984             {
   8985                 i4_mask |= (ENABLE_NxN);
   8986                 e_result_blk_size = BLK_4x4;
   8987                 s_mv_update_prms.i4_shift = 1;
   8988             }
   8989 
   8990             s_search_prms_blk.i4_part_mask = i4_mask;
   8991         }
   8992 
   8993         unit_size = blk_wd;
   8994         s_search_prms_blk.i4_inp_stride = unit_size;
   8995     }
   8996 
   8997     /* This is required to properly update the layer mv bank */
   8998     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
   8999     s_search_prms_blk.e_blk_size = e_search_blk_size;
   9000 
   9001     /*************************************************************************/
   9002     /* If current layer is explicit, then the number of ref frames are to    */
   9003     /* be same as previous layer. Else it will be 2                          */
   9004     /*************************************************************************/
   9005     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
   9006     if(ps_refine_prms->explicit_ref)
   9007     {
   9008         curr_layer_implicit = 0;
   9009         i4_num_ref_fpel = i4_num_ref_prev_layer;
   9010         /* 100578 : Using same mv cost fun. for all presets. */
   9011         s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
   9012     }
   9013     else
   9014     {
   9015         i4_num_ref_fpel = 2;
   9016         curr_layer_implicit = 1;
   9017         {
   9018             if(ME_MEDIUM_SPEED > e_me_quality_presets)
   9019             {
   9020                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
   9021             }
   9022             else
   9023             {
   9024 #if USE_MODIFIED == 1
   9025                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
   9026 #else
   9027                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
   9028 #endif
   9029             }
   9030         }
   9031     }
   9032 
   9033     i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
   9034     if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
   9035            IV_IDR_FRAME ||
   9036        ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
   9037     {
   9038         i4_num_ref_fpel = 1;
   9039     }
   9040     if(i4_num_ref_prev_layer <= 2)
   9041     {
   9042         prev_layer_implicit = 1;
   9043         curr_layer_implicit = 1;
   9044         i4_num_ref_each_dir = 1;
   9045     }
   9046     else
   9047     {
   9048         /* It is assumed that we have equal number of references in each dir */
   9049         //ASSERT(!(i4_num_ref_prev_layer & 1));
   9050         prev_layer_implicit = 0;
   9051         i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
   9052     }
   9053     s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
   9054     s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
   9055     s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
   9056 
   9057     /* this can be kept to 1 or 2 */
   9058     i4_num_ref_before_merge = 2;
   9059     i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
   9060 
   9061     /* Set up place holders to hold the search nodes of each initial candt */
   9062     for(i = 0; i < MAX_INIT_CANDTS; i++)
   9063     {
   9064         ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
   9065         INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
   9066     }
   9067 
   9068     /* redundant, but doing it here since it is used in pred ctxt init */
   9069     ps_candt_zeromv = ps_search_candts[0].ps_search_node;
   9070     for(i = 0; i < 3; i++)
   9071     {
   9072         search_node_t *ps_search_node;
   9073         ps_search_node = &as_left_neighbours[i];
   9074         INIT_SEARCH_NODE(ps_search_node, 0);
   9075         ps_search_node = &as_top_neighbours[i];
   9076         INIT_SEARCH_NODE(ps_search_node, 0);
   9077     }
   9078 
   9079     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
   9080     /* bottom left node always not available for the blk being searched */
   9081     as_left_neighbours[2].u1_is_avail = 0;
   9082     /*************************************************************************/
   9083     /* Initialize all the search results structure here. We update all the   */
   9084     /* search results to default values, and configure things like blk sizes */
   9085     /*************************************************************************/
   9086     if(ps_refine_prms->i4_encode == 0)
   9087     {
   9088         S32 pred_lx;
   9089         search_results_t *ps_search_results;
   9090 
   9091         ps_search_results = &ps_ctxt->s_search_results_8x8;
   9092         hme_init_search_results(
   9093             ps_search_results,
   9094             i4_num_ref_fpel,
   9095             ps_refine_prms->i4_num_fpel_results,
   9096             ps_refine_prms->i4_num_results_per_part,
   9097             e_search_blk_size,
   9098             0,
   9099             0,
   9100             &ps_ctxt->au1_is_past[0]);
   9101         for(pred_lx = 0; pred_lx < 2; pred_lx++)
   9102         {
   9103             hme_init_pred_ctxt_no_encode(
   9104                 &ps_search_results->as_pred_ctxt[pred_lx],
   9105                 ps_search_results,
   9106                 &as_top_neighbours[0],
   9107                 &as_left_neighbours[0],
   9108                 &ps_candt_prj_coloc[0],
   9109                 ps_candt_zeromv,
   9110                 ps_candt_zeromv,
   9111                 pred_lx,
   9112                 lambda_inp,
   9113                 ps_refine_prms->lambda_q_shift,
   9114                 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
   9115                 &ps_ctxt->ai2_ref_scf[0]);
   9116         }
   9117     }
   9118 
   9119     /*********************************************************************/
   9120     /* Initialize the dyn. search range params. for each reference index */
   9121     /* in current layer ctxt                                             */
   9122     /*********************************************************************/
   9123     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
   9124     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
   9125     {
   9126         WORD32 ref_ctr;
   9127 
   9128         for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
   9129         {
   9130             INIT_DYN_SEARCH_PRMS(
   9131                 &ps_ctxt->s_coarse_dyn_range_prms
   9132                      .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
   9133                 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
   9134         }
   9135     }
   9136 
   9137     /* Next set up initial candidates according to a given set of rules.   */
   9138     /* The number of initial candidates affects the quality of ME in the   */
   9139     /* case of motion with multiple degrees of freedom. In case of simple  */
   9140     /* translational motion, a current and a few causal and non causal     */
   9141     /* candts would suffice. More candidates help to cover more complex    */
   9142     /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
   9143     /* where multiple ref helps etc.                                       */
   9144     /* The candidate choice also depends on the following parameters.      */
   9145     /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH         */
   9146     /* Whether we encode or not, and the type of search across reference   */
   9147     /* i.e. the previous layer may have been explicit/implicit and curr    */
   9148     /* layer may be explicit/implicit                                      */
   9149 
   9150     /* 0, 0, L, T, projected coloc best always presnt by default */
   9151     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
   9152     ps_candt_zeromv = ps_search_candts[id].ps_search_node;
   9153     ps_search_candts[id].u1_num_steps_refine = 0;
   9154     ps_candt_zeromv->s_mv.i2_mvx = 0;
   9155     ps_candt_zeromv->s_mv.i2_mvy = 0;
   9156 
   9157     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
   9158     ps_candt_l = ps_search_candts[id].ps_search_node;
   9159     ps_search_candts[id].u1_num_steps_refine = 0;
   9160 
   9161     /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
   9162     /* not at the CTB boundary use the causal T and */
   9163     /* not the projected T, although the candidate is */
   9164     /* still pointed to by ps_candt_prj_t[0] */
   9165     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
   9166     {
   9167         /* Using Projected top to eliminate sync */
   9168         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9169             PROJECTED_TOP0, e_me_quality_presets);
   9170         ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
   9171         ps_search_candts[id].u1_num_steps_refine = 1;
   9172     }
   9173     else
   9174     {
   9175         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9176             SPATIAL_TOP0, e_me_quality_presets);
   9177         ps_candt_t = ps_search_candts[id].ps_search_node;
   9178         ps_search_candts[id].u1_num_steps_refine = 0;
   9179     }
   9180 
   9181     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9182         PROJECTED_COLOC0, e_me_quality_presets);
   9183     ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
   9184     ps_search_candts[id].u1_num_steps_refine = 1;
   9185 
   9186     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9187         PROJECTED_COLOC1, e_me_quality_presets);
   9188     ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
   9189     ps_search_candts[id].u1_num_steps_refine = 1;
   9190 
   9191     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
   9192     {
   9193         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9194             PROJECTED_TOP_RIGHT0, e_me_quality_presets);
   9195         ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
   9196         ps_search_candts[id].u1_num_steps_refine = 1;
   9197 
   9198         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9199             PROJECTED_TOP_LEFT0, e_me_quality_presets);
   9200         ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
   9201         ps_search_candts[id].u1_num_steps_refine = 1;
   9202     }
   9203     else
   9204     {
   9205         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9206             SPATIAL_TOP_RIGHT0, e_me_quality_presets);
   9207         ps_candt_tr = ps_search_candts[id].ps_search_node;
   9208         ps_search_candts[id].u1_num_steps_refine = 0;
   9209 
   9210         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9211             SPATIAL_TOP_LEFT0, e_me_quality_presets);
   9212         ps_candt_tl = ps_search_candts[id].ps_search_node;
   9213         ps_search_candts[id].u1_num_steps_refine = 0;
   9214     }
   9215 
   9216     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9217         PROJECTED_RIGHT0, e_me_quality_presets);
   9218     ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
   9219     ps_search_candts[id].u1_num_steps_refine = 1;
   9220 
   9221     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9222         PROJECTED_BOTTOM0, e_me_quality_presets);
   9223     ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
   9224     ps_search_candts[id].u1_num_steps_refine = 1;
   9225 
   9226     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9227         PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
   9228     ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
   9229     ps_search_candts[id].u1_num_steps_refine = 1;
   9230 
   9231     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9232         PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
   9233     ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
   9234     ps_search_candts[id].u1_num_steps_refine = 1;
   9235 
   9236     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9237         PROJECTED_RIGHT1, e_me_quality_presets);
   9238     ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
   9239     ps_search_candts[id].u1_num_steps_refine = 1;
   9240 
   9241     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9242         PROJECTED_BOTTOM1, e_me_quality_presets);
   9243     ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
   9244     ps_search_candts[id].u1_num_steps_refine = 1;
   9245 
   9246     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9247         PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
   9248     ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
   9249     ps_search_candts[id].u1_num_steps_refine = 1;
   9250 
   9251     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9252         PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
   9253     ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
   9254     ps_search_candts[id].u1_num_steps_refine = 1;
   9255 
   9256     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
   9257     ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
   9258     ps_search_candts[id].u1_num_steps_refine = 1;
   9259 
   9260     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9261         PROJECTED_TOP_RIGHT1, e_me_quality_presets);
   9262     ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
   9263     ps_search_candts[id].u1_num_steps_refine = 1;
   9264 
   9265     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
   9266         PROJECTED_TOP_LEFT1, e_me_quality_presets);
   9267     ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
   9268     ps_search_candts[id].u1_num_steps_refine = 1;
   9269 
   9270     /*************************************************************************/
   9271     /* Now that the candidates have been ordered, to choose the right number */
   9272     /* of initial candidates.                                                */
   9273     /*************************************************************************/
   9274     if(curr_layer_implicit && !prev_layer_implicit)
   9275     {
   9276         if(e_search_complexity == SEARCH_CX_LOW)
   9277             num_init_candts = 7;
   9278         else if(e_search_complexity == SEARCH_CX_MED)
   9279             num_init_candts = 13;
   9280         else if(e_search_complexity == SEARCH_CX_HIGH)
   9281             num_init_candts = 18;
   9282         else
   9283             ASSERT(0);
   9284     }
   9285     else
   9286     {
   9287         if(e_search_complexity == SEARCH_CX_LOW)
   9288             num_init_candts = 5;
   9289         else if(e_search_complexity == SEARCH_CX_MED)
   9290             num_init_candts = 11;
   9291         else if(e_search_complexity == SEARCH_CX_HIGH)
   9292             num_init_candts = 16;
   9293         else
   9294             ASSERT(0);
   9295     }
   9296 
   9297     if(ME_XTREME_SPEED_25 == e_me_quality_presets)
   9298     {
   9299         num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
   9300     }
   9301 
   9302     /*************************************************************************/
   9303     /* The following search parameters are fixed throughout the search across*/
   9304     /* all blks. So these are configured outside processing loop             */
   9305     /*************************************************************************/
   9306     s_search_prms_blk.i4_num_init_candts = num_init_candts;
   9307     s_search_prms_blk.i4_start_step = 1;
   9308     s_search_prms_blk.i4_use_satd = 0;
   9309     s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
   9310     /* we use recon only for encoded layers, otherwise it is not available */
   9311     s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
   9312 
   9313     s_search_prms_blk.ps_search_candts = ps_search_candts;
   9314     /* We use the same mv_range for all ref. pic. So assign to member 0 */
   9315     if(s_search_prms_blk.i4_use_rec)
   9316         s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
   9317     else
   9318         s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
   9319     /*************************************************************************/
   9320     /* Initialize coordinates. Meaning as follows                            */
   9321     /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
   9322     /* blk_y : same as above, y coord.                                       */
   9323     /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
   9324     /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
   9325     /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
   9326     /* corner of the picture. Always multiple of 64.                         */
   9327     /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
   9328     /*************************************************************************/
   9329     blk_y = 0;
   9330     blk_id_in_ctb = 0;
   9331 
   9332     GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
   9333 
   9334     /* Get the number of sync units in a row based on encode/non enocde layer */
   9335     num_sync_units_in_row = num_blks_in_row;
   9336 
   9337     /*************************************************************************/
   9338     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
   9339     /* every block given its coordinate. Note thsi assumes that the min amt  */
   9340     /* of padding to right of pic is equal to the blk size. If we go all the */
   9341     /* way upto 64x64, then the min padding on right size of picture should  */
   9342     /* be 64, and also on bottom side of picture.                            */
   9343     /*************************************************************************/
   9344     SET_PIC_LIMIT(
   9345         s_pic_limit_inp,
   9346         ps_curr_layer->i4_pad_x_inp,
   9347         ps_curr_layer->i4_pad_y_inp,
   9348         ps_curr_layer->i4_wd,
   9349         ps_curr_layer->i4_ht,
   9350         s_search_prms_blk.i4_num_steps_post_refine);
   9351 
   9352     SET_PIC_LIMIT(
   9353         s_pic_limit_rec,
   9354         ps_curr_layer->i4_pad_x_rec,
   9355         ps_curr_layer->i4_pad_y_rec,
   9356         ps_curr_layer->i4_wd,
   9357         ps_curr_layer->i4_ht,
   9358         s_search_prms_blk.i4_num_steps_post_refine);
   9359 
   9360     /*************************************************************************/
   9361     /* set the MV limit per ref. pic.                                        */
   9362     /*    - P pic. : Based on the config params.                             */
   9363     /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
   9364     /*************************************************************************/
   9365     {
   9366         WORD32 ref_ctr;
   9367         /* Only for B/b pic. */
   9368         if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
   9369         {
   9370             WORD16 i2_mv_y_per_poc, i2_max_mv_y;
   9371             WORD32 cur_poc, ref_poc, abs_poc_diff;
   9372 
   9373             cur_poc = ps_ctxt->i4_curr_poc;
   9374 
   9375             /* Get abs MAX for symmetric search */
   9376             i2_mv_y_per_poc = MAX(
   9377                 ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
   9378                 (ABS(ps_ctxt->s_coarse_dyn_range_prms
   9379                          .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
   9380 
   9381             for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
   9382             {
   9383                 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
   9384                 abs_poc_diff = ABS((cur_poc - ref_poc));
   9385                 /* Get the cur. max MV based on POC distance */
   9386                 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
   9387                 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
   9388 
   9389                 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
   9390                 as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
   9391                 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
   9392                 as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
   9393             }
   9394         }
   9395         else
   9396         {
   9397             /* Set the Config. File Params for P pic. */
   9398             for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
   9399             {
   9400                 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
   9401                 as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
   9402                 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
   9403                 as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
   9404             }
   9405         }
   9406     }
   9407 
   9408     /* EIID: Calculate threshold based on quality preset and/or temporal layers */
   9409     if(e_me_quality_presets == ME_MEDIUM_SPEED)
   9410     {
   9411         i4_threshold_multiplier = 1;
   9412         i4_threshold_divider = 4;
   9413     }
   9414     else if(e_me_quality_presets == ME_HIGH_SPEED)
   9415     {
   9416         i4_threshold_multiplier = 1;
   9417         i4_threshold_divider = 2;
   9418     }
   9419     else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
   9420     {
   9421 #if OLD_XTREME_SPEED
   9422         /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
   9423         i4_temporal_layer = 1;
   9424 #endif
   9425         if(i4_temporal_layer == 0)
   9426         {
   9427             i4_threshold_multiplier = 3;
   9428             i4_threshold_divider = 4;
   9429         }
   9430         else if(i4_temporal_layer == 1)
   9431         {
   9432             i4_threshold_multiplier = 3;
   9433             i4_threshold_divider = 4;
   9434         }
   9435         else if(i4_temporal_layer == 2)
   9436         {
   9437             i4_threshold_multiplier = 1;
   9438             i4_threshold_divider = 1;
   9439         }
   9440         else
   9441         {
   9442             i4_threshold_multiplier = 5;
   9443             i4_threshold_divider = 4;
   9444         }
   9445     }
   9446     else if(e_me_quality_presets == ME_HIGH_QUALITY)
   9447     {
   9448         i4_threshold_multiplier = 1;
   9449         i4_threshold_divider = 1;
   9450     }
   9451 
   9452     /*************************************************************************/
   9453     /*************************************************************************/
   9454     /*************************************************************************/
   9455     /* START OF THE CORE LOOP                                                */
   9456     /* If Encode is 0, then we just loop over each blk                       */
   9457     /*************************************************************************/
   9458     /*************************************************************************/
   9459     /*************************************************************************/
   9460     while(0 == end_of_frame)
   9461     {
   9462         job_queue_t *ps_job;
   9463         ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row;  //EIID
   9464         WORD32 i4_ctb_row_ctr;  //counter to calculate CTB row counter. It's (row_ctr /4)
   9465         WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4;  //calculations verified for L1 only
   9466         //+3 to get ceil values when divided by 4
   9467         WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
   9468             8 * 8;  //considering CTB size 32x32 at L1. hardcoded for now
   9469         //if there is variable for ctb size use that and this variable can be derived
   9470         WORD32 offset_val, check_dep_pos, set_dep_pos;
   9471         void *pv_hme_dep_mngr;
   9472         ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
   9473 
   9474         /* Get the current layer HME Dep Mngr       */
   9475         /* Note : Use layer_id - 1 in HME layers    */
   9476 
   9477         pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
   9478 
   9479         /* Get the current row from the job queue */
   9480         ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
   9481             ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
   9482 
   9483         /* If all rows are done, set the end of process flag to 1, */
   9484         /* and the current row to -1 */
   9485         if(NULL == ps_job)
   9486         {
   9487             blk_y = -1;
   9488             end_of_frame = 1;
   9489 
   9490             continue;
   9491         }
   9492 
   9493         if(1 == ps_ctxt->s_frm_prms.is_i_pic)
   9494         {
   9495             /* set the output dependency of current row */
   9496             ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
   9497             continue;
   9498         }
   9499 
   9500         blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
   9501         blk_x = 0;
   9502         i4_ctb_x = 0;
   9503 
   9504         /* wait for Corresponding Pre intra Job to be completed */
   9505         if(1 == ps_refine_prms->i4_layer_id)
   9506         {
   9507             volatile UWORD32 i4_l1_done;
   9508             volatile UWORD32 *pi4_l1_done;
   9509             pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
   9510                               ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
   9511             i4_l1_done = *pi4_l1_done;
   9512             while(!i4_l1_done)
   9513             {
   9514                 i4_l1_done = *pi4_l1_done;
   9515             }
   9516         }
   9517         /* Set Variables for Dep. Checking and Setting */
   9518         set_dep_pos = blk_y + 1;
   9519         if(blk_y > 0)
   9520         {
   9521             offset_val = 2;
   9522             check_dep_pos = blk_y - 1;
   9523         }
   9524         else
   9525         {
   9526             /* First row should run without waiting */
   9527             offset_val = -1;
   9528             check_dep_pos = 0;
   9529         }
   9530 
   9531         /* EIID: calculate ed_blk_ctxt pointer for current row */
   9532         /* valid for only layer-1. not varified and used for other layers */
   9533         i4_ctb_row_ctr = blk_y / 4;
   9534         ps_ed_blk_ctxt_curr_row =
   9535             ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
   9536                                   i4_num_4x4_blocks_in_ctb_at_l1);  //valid for L1 only
   9537         ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
   9538 
   9539         /* if non-encode layer then i4_ctb_x will be same as blk_x */
   9540         /* loop over all the units is a row                        */
   9541         for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
   9542         {
   9543             ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb;  //EIDD
   9544             ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
   9545             WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
   9546 
   9547             /* Wait till top row block is processed   */
   9548             /* Currently checking till top right block*/
   9549 
   9550             /* Disabled since all candidates, except for */
   9551             /* L and C, are projected from the coarser layer, */
   9552             /* only in ME_HIGH_SPEED mode */
   9553             if((ME_MEDIUM_SPEED > e_me_quality_presets))
   9554             {
   9555                 if(i4_ctb_x < (num_sync_units_in_row - 1))
   9556                 {
   9557                     ihevce_dmgr_chk_row_row_sync(
   9558                         pv_hme_dep_mngr,
   9559                         i4_ctb_x,
   9560                         offset_val,
   9561                         check_dep_pos,
   9562                         0, /* Col Tile No. : Not supported in PreEnc*/
   9563                         ps_ctxt->thrd_id);
   9564                 }
   9565             }
   9566 
   9567             {
   9568                 /* for non encoder layer only one block is processed */
   9569                 num_blks_in_this_ctb = 1;
   9570             }
   9571 
   9572             /* EIID: derive ed_ctxt ptr for current CTB */
   9573             ps_ed_blk_ctxt_curr_ctb =
   9574                 ps_ed_blk_ctxt_curr_row +
   9575                 (i4_ctb_blk_ctr *
   9576                  i4_num_4x4_blocks_in_ctb_at_l1);  //currently valid for l1 layer only
   9577             ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
   9578 
   9579             /* loop over all the blocks in CTB will always be 1 */
   9580             for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
   9581             {
   9582                 {
   9583                     /* non encode layer */
   9584                     blk_x = i4_ctb_x;
   9585                     blk_id_in_full_ctb = 0;
   9586                     s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
   9587                 }
   9588 
   9589                 /* get the current input blk point */
   9590                 pos_x = blk_x << blk_size_shift;
   9591                 pos_y = blk_y << blk_size_shift;
   9592                 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
   9593 
   9594                 /*********************************************************************/
   9595                 /* replicate the inp buffer at blk or ctb level for each ref id,     */
   9596                 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
   9597                 /* thereby avoiding a bloat up of memory. If we did all references   */
   9598                 /* weighted pred, we will end up with a duplicate copy of each ref   */
   9599                 /* at each layer, since we need to preserve the original reference.  */
   9600                 /* ToDo: Need to observe performance with this mechanism and compare */
   9601                 /* with case where ref is weighted.                                  */
   9602                 /*********************************************************************/
   9603                 if(blk_id_in_ctb == 0)
   9604                 {
   9605                     fp_get_wt_inp(
   9606                         ps_curr_layer,
   9607                         &ps_ctxt->s_wt_pred,
   9608                         unit_size,
   9609                         pos_x,
   9610                         pos_y,
   9611                         unit_size,
   9612                         ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
   9613                         ps_ctxt->i4_wt_pred_enable_flag);
   9614                 }
   9615 
   9616                 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
   9617                 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
   9618                 /* Select search results from a suitable search result in the context */
   9619                 {
   9620                     ps_search_results = &ps_ctxt->s_search_results_8x8;
   9621                 }
   9622 
   9623                 s_search_prms_blk.ps_search_results = ps_search_results;
   9624 
   9625                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
   9626                 hme_reset_search_results(
   9627                     ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
   9628 
   9629                 /* Loop across different Ref IDx */
   9630                 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
   9631                 {
   9632                     S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
   9633                     S32 prev_blk_offset = 6;
   9634                     S32 resultid;
   9635 
   9636                     /*********************************************************************/
   9637                     /* For every blk in the picture, the search range needs to be derived*/
   9638                     /* Any blk can have any mv, but practical search constraints are     */
   9639                     /* imposed by the picture boundary and amt of padding.               */
   9640                     /*********************************************************************/
   9641                     /* MV limit is different based on ref. PIC */
   9642                     hme_derive_search_range(
   9643                         &s_range_prms_inp,
   9644                         &s_pic_limit_inp,
   9645                         &as_mv_limit[i1_ref_idx],
   9646                         pos_x,
   9647                         pos_y,
   9648                         blk_wd,
   9649                         blk_ht);
   9650                     hme_derive_search_range(
   9651                         &s_range_prms_rec,
   9652                         &s_pic_limit_rec,
   9653                         &as_mv_limit[i1_ref_idx],
   9654                         pos_x,
   9655                         pos_y,
   9656                         blk_wd,
   9657                         blk_ht);
   9658 
   9659                     s_search_prms_blk.i1_ref_idx = i1_ref_idx;
   9660                     ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
   9661 
   9662                     i4_num_srch_cands = 1;
   9663 
   9664                     if(1 != ps_refine_prms->i4_layer_id)
   9665                     {
   9666                         S32 x, y;
   9667                         x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
   9668                         y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
   9669 
   9670                         if(ME_MEDIUM_SPEED > e_me_quality_presets)
   9671                         {
   9672                             hme_get_spatial_candt(
   9673                                 ps_curr_layer,
   9674                                 e_search_blk_size,
   9675                                 blk_x,
   9676                                 blk_y,
   9677                                 i1_ref_idx,
   9678                                 &as_top_neighbours[0],
   9679                                 &as_left_neighbours[0],
   9680                                 0,
   9681                                 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
   9682                                 0,
   9683                                 ps_refine_prms->i4_encode);
   9684 
   9685                             *ps_candt_tr = as_top_neighbours[3];
   9686                             *ps_candt_t = as_top_neighbours[1];
   9687                             *ps_candt_tl = as_top_neighbours[0];
   9688                             i4_num_srch_cands += 3;
   9689                         }
   9690                         else
   9691                         {
   9692                             layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
   9693                             S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
   9694                             S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
   9695                             search_node_t *ps_search_node;
   9696                             S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
   9697                             hme_mv_t *ps_mv, *ps_mv_base;
   9698                             S08 *pi1_ref_idx, *pi1_ref_idx_base;
   9699                             S32 jump = 1, mvs_in_blk, mvs_in_row;
   9700                             S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
   9701 
   9702                             if(i4_blk_size1 != i4_blk_size2)
   9703                             {
   9704                                 blk_x_temp <<= 1;
   9705                                 blk_y_temp <<= 1;
   9706                                 jump = 2;
   9707                                 if((i4_blk_size1 << 2) == i4_blk_size2)
   9708                                 {
   9709                                     blk_x_temp <<= 1;
   9710                                     blk_y_temp <<= 1;
   9711                                     jump = 4;
   9712                                 }
   9713                             }
   9714 
   9715                             mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
   9716                             mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
   9717 
   9718                             /* Adjust teh blk coord to point to top left locn */
   9719                             blk_x_temp -= 1;
   9720                             blk_y_temp -= 1;
   9721 
   9722                             /* Pick up the mvs from the location */
   9723                             i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
   9724                             i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
   9725 
   9726                             ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   9727                             pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   9728 
   9729                             ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
   9730                             pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
   9731 
   9732                             ps_mv_base = ps_mv;
   9733                             pi1_ref_idx_base = pi1_ref_idx;
   9734 
   9735                             ps_search_node = &as_left_neighbours[0];
   9736                             ps_mv = ps_mv_base + mvs_in_row;
   9737                             pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
   9738                             COPY_MV_TO_SEARCH_NODE(
   9739                                 ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
   9740 
   9741                             i4_num_srch_cands++;
   9742                         }
   9743                     }
   9744                     else
   9745                     {
   9746                         S32 x, y;
   9747                         x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
   9748                         y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
   9749 
   9750                         if(ME_MEDIUM_SPEED > e_me_quality_presets)
   9751                         {
   9752                             hme_get_spatial_candt_in_l1_me(
   9753                                 ps_curr_layer,
   9754                                 e_search_blk_size,
   9755                                 blk_x,
   9756                                 blk_y,
   9757                                 i1_ref_idx,
   9758                                 !ps_search_results->pu1_is_past[i1_ref_idx],
   9759                                 &as_top_neighbours[0],
   9760                                 &as_left_neighbours[0],
   9761                                 0,
   9762                                 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
   9763                                 0,
   9764                                 ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
   9765                                 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
   9766 
   9767                             *ps_candt_tr = as_top_neighbours[3];
   9768                             *ps_candt_t = as_top_neighbours[1];
   9769                             *ps_candt_tl = as_top_neighbours[0];
   9770 
   9771                             i4_num_srch_cands += 3;
   9772                         }
   9773                         else
   9774                         {
   9775                             layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
   9776                             S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
   9777                             S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
   9778                             S32 i4_mv_pos_in_implicit_array;
   9779                             search_node_t *ps_search_node;
   9780                             S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
   9781                             hme_mv_t *ps_mv, *ps_mv_base;
   9782                             S08 *pi1_ref_idx, *pi1_ref_idx_base;
   9783                             S32 jump = 1, mvs_in_blk, mvs_in_row;
   9784                             S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
   9785                             U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
   9786                             S32 i4_num_results_in_given_dir =
   9787                                 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
   9788                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
   9789                                                     : (ps_layer_mvbank->i4_num_mvs_per_ref *
   9790                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
   9791 
   9792                             if(i4_blk_size1 != i4_blk_size2)
   9793                             {
   9794                                 blk_x_temp <<= 1;
   9795                                 blk_y_temp <<= 1;
   9796                                 jump = 2;
   9797                                 if((i4_blk_size1 << 2) == i4_blk_size2)
   9798                                 {
   9799                                     blk_x_temp <<= 1;
   9800                                     blk_y_temp <<= 1;
   9801                                     jump = 4;
   9802                                 }
   9803                             }
   9804 
   9805                             mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
   9806                             mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
   9807 
   9808                             /* Adjust teh blk coord to point to top left locn */
   9809                             blk_x_temp -= 1;
   9810                             blk_y_temp -= 1;
   9811 
   9812                             /* Pick up the mvs from the location */
   9813                             i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
   9814                             i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
   9815 
   9816                             i4_offset +=
   9817                                 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
   9818                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
   9819                                                     : 0);
   9820 
   9821                             ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   9822                             pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   9823 
   9824                             ps_mv_base = ps_mv;
   9825                             pi1_ref_idx_base = pi1_ref_idx;
   9826 
   9827                             {
   9828                                 /* ps_mv and pi1_ref_idx now point to the top left locn */
   9829                                 ps_search_node = &as_left_neighbours[0];
   9830                                 ps_mv = ps_mv_base + mvs_in_row;
   9831                                 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
   9832 
   9833                                 i4_mv_pos_in_implicit_array =
   9834                                     hme_find_pos_of_implicitly_stored_ref_id(
   9835                                         pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
   9836 
   9837                                 if(-1 != i4_mv_pos_in_implicit_array)
   9838                                 {
   9839                                     COPY_MV_TO_SEARCH_NODE(
   9840                                         ps_search_node,
   9841                                         &ps_mv[i4_mv_pos_in_implicit_array],
   9842                                         &pi1_ref_idx[i4_mv_pos_in_implicit_array],
   9843                                         i1_ref_idx,
   9844                                         shift);
   9845                                 }
   9846                                 else
   9847                                 {
   9848                                     ps_search_node->u1_is_avail = 0;
   9849                                     ps_search_node->s_mv.i2_mvx = 0;
   9850                                     ps_search_node->s_mv.i2_mvy = 0;
   9851                                     ps_search_node->i1_ref_idx = i1_ref_idx;
   9852                                 }
   9853 
   9854                                 i4_num_srch_cands++;
   9855                             }
   9856                         }
   9857                     }
   9858 
   9859                     *ps_candt_l = as_left_neighbours[0];
   9860 
   9861                     /* when 16x16 is searched in an encode layer, and the prev layer */
   9862                     /* stores results for 4x4 blks, we project 5 candts corresponding */
   9863                     /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
   9864                     /* However in other cases, only 2,2 best and 2nd best reqd */
   9865                     resultid = 0;
   9866                     pf_hme_project_coloc_candt(
   9867                         ps_candt_prj_coloc[0],
   9868                         ps_curr_layer,
   9869                         ps_coarse_layer,
   9870                         pos_x + 2,
   9871                         pos_y + 2,
   9872                         i1_ref_idx,
   9873                         resultid);
   9874 
   9875                     i4_num_srch_cands++;
   9876 
   9877                     resultid = 1;
   9878                     if(num_results_prev_layer > 1)
   9879                     {
   9880                         pf_hme_project_coloc_candt(
   9881                             ps_candt_prj_coloc[1],
   9882                             ps_curr_layer,
   9883                             ps_coarse_layer,
   9884                             pos_x + 2,
   9885                             pos_y + 2,
   9886                             i1_ref_idx,
   9887                             resultid);
   9888 
   9889                         i4_num_srch_cands++;
   9890                     }
   9891 
   9892                     resultid = 0;
   9893 
   9894                     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
   9895                     {
   9896                         pf_hme_project_coloc_candt(
   9897                             ps_candt_prj_t[0],
   9898                             ps_curr_layer,
   9899                             ps_coarse_layer,
   9900                             pos_x,
   9901                             pos_y - prev_blk_offset,
   9902                             i1_ref_idx,
   9903                             resultid);
   9904 
   9905                         i4_num_srch_cands++;
   9906                     }
   9907 
   9908                     {
   9909                         pf_hme_project_coloc_candt(
   9910                             ps_candt_prj_br[0],
   9911                             ps_curr_layer,
   9912                             ps_coarse_layer,
   9913                             pos_x + next_blk_offset,
   9914                             pos_y + next_blk_offset,
   9915                             i1_ref_idx,
   9916                             resultid);
   9917                         pf_hme_project_coloc_candt(
   9918                             ps_candt_prj_bl[0],
   9919                             ps_curr_layer,
   9920                             ps_coarse_layer,
   9921                             pos_x - prev_blk_offset,
   9922                             pos_y + next_blk_offset,
   9923                             i1_ref_idx,
   9924                             resultid);
   9925                         pf_hme_project_coloc_candt(
   9926                             ps_candt_prj_r[0],
   9927                             ps_curr_layer,
   9928                             ps_coarse_layer,
   9929                             pos_x + next_blk_offset,
   9930                             pos_y,
   9931                             i1_ref_idx,
   9932                             resultid);
   9933                         pf_hme_project_coloc_candt(
   9934                             ps_candt_prj_b[0],
   9935                             ps_curr_layer,
   9936                             ps_coarse_layer,
   9937                             pos_x,
   9938                             pos_y + next_blk_offset,
   9939                             i1_ref_idx,
   9940                             resultid);
   9941 
   9942                         i4_num_srch_cands += 4;
   9943 
   9944                         if(ME_MEDIUM_SPEED <= e_me_quality_presets)
   9945                         {
   9946                             pf_hme_project_coloc_candt(
   9947                                 ps_candt_prj_tr[0],
   9948                                 ps_curr_layer,
   9949                                 ps_coarse_layer,
   9950                                 pos_x + next_blk_offset,
   9951                                 pos_y - prev_blk_offset,
   9952                                 i1_ref_idx,
   9953                                 resultid);
   9954                             pf_hme_project_coloc_candt(
   9955                                 ps_candt_prj_tl[0],
   9956                                 ps_curr_layer,
   9957                                 ps_coarse_layer,
   9958                                 pos_x - prev_blk_offset,
   9959                                 pos_y - prev_blk_offset,
   9960                                 i1_ref_idx,
   9961                                 resultid);
   9962 
   9963                             i4_num_srch_cands += 2;
   9964                         }
   9965                     }
   9966                     if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
   9967                     {
   9968                         resultid = 1;
   9969                         pf_hme_project_coloc_candt(
   9970                             ps_candt_prj_br[1],
   9971                             ps_curr_layer,
   9972                             ps_coarse_layer,
   9973                             pos_x + next_blk_offset,
   9974                             pos_y + next_blk_offset,
   9975                             i1_ref_idx,
   9976                             resultid);
   9977                         pf_hme_project_coloc_candt(
   9978                             ps_candt_prj_bl[1],
   9979                             ps_curr_layer,
   9980                             ps_coarse_layer,
   9981                             pos_x - prev_blk_offset,
   9982                             pos_y + next_blk_offset,
   9983                             i1_ref_idx,
   9984                             resultid);
   9985                         pf_hme_project_coloc_candt(
   9986                             ps_candt_prj_r[1],
   9987                             ps_curr_layer,
   9988                             ps_coarse_layer,
   9989                             pos_x + next_blk_offset,
   9990                             pos_y,
   9991                             i1_ref_idx,
   9992                             resultid);
   9993                         pf_hme_project_coloc_candt(
   9994                             ps_candt_prj_b[1],
   9995                             ps_curr_layer,
   9996                             ps_coarse_layer,
   9997                             pos_x,
   9998                             pos_y + next_blk_offset,
   9999                             i1_ref_idx,
   10000                             resultid);
   10001 
   10002                         i4_num_srch_cands += 4;
   10003 
   10004                         pf_hme_project_coloc_candt(
   10005                             ps_candt_prj_tr[1],
   10006                             ps_curr_layer,
   10007                             ps_coarse_layer,
   10008                             pos_x + next_blk_offset,
   10009                             pos_y - prev_blk_offset,
   10010                             i1_ref_idx,
   10011                             resultid);
   10012                         pf_hme_project_coloc_candt(
   10013                             ps_candt_prj_tl[1],
   10014                             ps_curr_layer,
   10015                             ps_coarse_layer,
   10016                             pos_x - prev_blk_offset,
   10017                             pos_y - prev_blk_offset,
   10018                             i1_ref_idx,
   10019                             resultid);
   10020                         pf_hme_project_coloc_candt(
   10021                             ps_candt_prj_t[1],
   10022                             ps_curr_layer,
   10023                             ps_coarse_layer,
   10024                             pos_x,
   10025                             pos_y - prev_blk_offset,
   10026                             i1_ref_idx,
   10027                             resultid);
   10028 
   10029                         i4_num_srch_cands += 3;
   10030                     }
   10031 
   10032                     /* Note this block also clips the MV range for all candidates */
   10033 #ifdef _DEBUG
   10034                     {
   10035                         S32 candt;
   10036                         range_prms_t *ps_range_prms;
   10037 
   10038                         S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
   10039                         for(candt = 0; candt < i4_num_srch_cands; candt++)
   10040                         {
   10041                             search_node_t *ps_search_node;
   10042 
   10043                             ps_search_node =
   10044                                 s_search_prms_blk.ps_search_candts[candt].ps_search_node;
   10045 
   10046                             ps_range_prms = s_search_prms_blk.aps_mv_range[0];
   10047 
   10048                             if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
   10049                                (ps_search_node->i1_ref_idx < 0))
   10050                             {
   10051                                 ASSERT(0);
   10052                             }
   10053                         }
   10054                     }
   10055 #endif
   10056 
   10057                     {
   10058                         S32 srch_cand;
   10059                         S32 num_unique_nodes = 0;
   10060                         S32 num_nodes_searched = 0;
   10061                         S32 num_best_cand = 0;
   10062                         S08 i1_grid_enable = 0;
   10063                         search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
   10064                         /* has list of valid partition to search terminated by -1 */
   10065                         S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
   10066                         S32 center_x;
   10067                         S32 center_y;
   10068 
   10069                         /* indicates if the centre point of grid needs to be explicitly added for search */
   10070                         S32 add_centre = 0;
   10071 
   10072                         memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
   10073                         center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
   10074                         center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
   10075 
   10076                         for(srch_cand = 0;
   10077                             (srch_cand < i4_num_srch_cands) &&
   10078                             (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
   10079                             srch_cand++)
   10080                         {
   10081                             search_node_t s_search_node_temp =
   10082                                 s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
   10083 
   10084                             s_search_node_temp.i1_ref_idx = i1_ref_idx;  //TEMP FIX;
   10085 
   10086                             /* Clip the motion vectors as well here since after clipping
   10087                             two candidates can become same and they will be removed during deduplication */
   10088                             CLIP_MV_WITHIN_RANGE(
   10089                                 s_search_node_temp.s_mv.i2_mvx,
   10090                                 s_search_node_temp.s_mv.i2_mvy,
   10091                                 s_search_prms_blk.aps_mv_range[0],
   10092                                 ps_refine_prms->i4_num_steps_fpel_refine,
   10093                                 ps_refine_prms->i4_num_steps_hpel_refine,
   10094                                 ps_refine_prms->i4_num_steps_qpel_refine);
   10095 
   10096                             /* PT_C */
   10097                             INSERT_NEW_NODE(
   10098                                 as_unique_search_nodes,
   10099                                 num_unique_nodes,
   10100                                 s_search_node_temp,
   10101                                 0,
   10102                                 au4_unique_node_map,
   10103                                 center_x,
   10104                                 center_y,
   10105                                 1);
   10106 
   10107                             num_nodes_searched += 1;
   10108                         }
   10109                         num_unique_nodes =
   10110                             MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
   10111 
   10112                         /* If number of candidates projected/number of candidates to be refined are more than 2,
   10113                         then filter out and choose the best two here */
   10114                         if(num_unique_nodes >= 2)
   10115                         {
   10116                             S32 num_results;
   10117                             S32 cnt;
   10118                             S32 *pi4_valid_part_ids;
   10119                             s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
   10120                             s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
   10121                             pi4_valid_part_ids = &ai4_valid_part_ids[0];
   10122 
   10123                             /* pi4_valid_part_ids is updated inside */
   10124                             hme_pred_search_no_encode(
   10125                                 &s_search_prms_blk,
   10126                                 ps_curr_layer,
   10127                                 &ps_ctxt->s_wt_pred,
   10128                                 pi4_valid_part_ids,
   10129                                 1,
   10130                                 e_me_quality_presets,
   10131                                 i1_grid_enable,
   10132                                 (ihevce_me_optimised_function_list_t *)
   10133                                     ps_ctxt->pv_me_optimised_function_list
   10134 
   10135                             );
   10136 
   10137                             num_best_cand = 0;
   10138                             cnt = 0;
   10139                             num_results = ps_search_results->u1_num_results_per_part;
   10140 
   10141                             while((id = pi4_valid_part_ids[cnt++]) >= 0)
   10142                             {
   10143                                 num_results =
   10144                                     MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
   10145 
   10146                                 for(i = 0; i < num_results; i++)
   10147                                 {
   10148                                     search_node_t s_search_node_temp;
   10149                                     s_search_node_temp =
   10150                                         *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
   10151                                     if(s_search_node_temp.i1_ref_idx >= 0)
   10152                                     {
   10153                                         INSERT_NEW_NODE_NOMAP(
   10154                                             as_best_two_proj_node,
   10155                                             num_best_cand,
   10156                                             s_search_node_temp,
   10157                                             0);
   10158                                     }
   10159                                 }
   10160                             }
   10161                         }
   10162                         else
   10163                         {
   10164                             add_centre = 1;
   10165                             num_best_cand = num_unique_nodes;
   10166                             as_best_two_proj_node[0] = as_unique_search_nodes[0];
   10167                         }
   10168 
   10169                         num_unique_nodes = 0;
   10170                         num_nodes_searched = 0;
   10171 
   10172                         if(1 == num_best_cand)
   10173                         {
   10174                             search_node_t s_search_node_temp = as_best_two_proj_node[0];
   10175                             S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
   10176                             S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
   10177                             S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
   10178 
   10179                             i1_grid_enable = 1;
   10180 
   10181                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
   10182                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
   10183                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10184 
   10185                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
   10186                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
   10187                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10188 
   10189                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
   10190                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
   10191                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10192 
   10193                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
   10194                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
   10195                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10196 
   10197                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
   10198                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
   10199                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10200 
   10201                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
   10202                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
   10203                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10204 
   10205                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
   10206                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
   10207                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10208 
   10209                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
   10210                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
   10211                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10212 
   10213                             if(add_centre)
   10214                             {
   10215                                 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
   10216                                 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
   10217                                 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
   10218                             }
   10219                         }
   10220                         else
   10221                         {
   10222                             /* For the candidates where refinement was required, choose the best two */
   10223                             for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
   10224                             {
   10225                                 search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
   10226                                 WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
   10227                                 WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
   10228 
   10229                                 /* Because there may not be two best unique candidates (because of clipping),
   10230                                 second best candidate can be uninitialized, ignore that */
   10231                                 if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
   10232                                    s_search_node_temp.i1_ref_idx < 0)
   10233                                 {
   10234                                     num_nodes_searched++;
   10235                                     continue;
   10236                                 }
   10237 
   10238                                 /* PT_C */
   10239                                 /* Since the center point has already be evaluated and best results are persistent,
   10240                                 it will not be evaluated again */
   10241                                 if(add_centre) /* centre point added explicitly again if search results is not updated */
   10242                                 {
   10243                                     INSERT_NEW_NODE(
   10244                                         as_unique_search_nodes,
   10245                                         num_unique_nodes,
   10246                                         s_search_node_temp,
   10247                                         0,
   10248                                         au4_unique_node_map,
   10249                                         center_x,
   10250                                         center_y,
   10251                                         1);
   10252                                 }
   10253 
   10254                                 /* PT_L */
   10255                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
   10256                                 s_search_node_temp.s_mv.i2_mvy = mv_y;
   10257                                 INSERT_NEW_NODE(
   10258                                     as_unique_search_nodes,
   10259                                     num_unique_nodes,
   10260                                     s_search_node_temp,
   10261                                     0,
   10262                                     au4_unique_node_map,
   10263                                     center_x,
   10264                                     center_y,
   10265                                     1);
   10266 
   10267                                 /* PT_T */
   10268                                 s_search_node_temp.s_mv.i2_mvx = mv_x;
   10269                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
   10270                                 INSERT_NEW_NODE(
   10271                                     as_unique_search_nodes,
   10272                                     num_unique_nodes,
   10273                                     s_search_node_temp,
   10274                                     0,
   10275                                     au4_unique_node_map,
   10276                                     center_x,
   10277                                     center_y,
   10278                                     1);
   10279 
   10280                                 /* PT_R */
   10281                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
   10282                                 s_search_node_temp.s_mv.i2_mvy = mv_y;
   10283                                 INSERT_NEW_NODE(
   10284                                     as_unique_search_nodes,
   10285                                     num_unique_nodes,
   10286                                     s_search_node_temp,
   10287                                     0,
   10288                                     au4_unique_node_map,
   10289                                     center_x,
   10290                                     center_y,
   10291                                     1);
   10292 
   10293                                 /* PT_B */
   10294                                 s_search_node_temp.s_mv.i2_mvx = mv_x;
   10295                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
   10296                                 INSERT_NEW_NODE(
   10297                                     as_unique_search_nodes,
   10298                                     num_unique_nodes,
   10299                                     s_search_node_temp,
   10300                                     0,
   10301                                     au4_unique_node_map,
   10302                                     center_x,
   10303                                     center_y,
   10304                                     1);
   10305 
   10306                                 /* PT_TL */
   10307                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
   10308                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
   10309                                 INSERT_NEW_NODE(
   10310                                     as_unique_search_nodes,
   10311                                     num_unique_nodes,
   10312                                     s_search_node_temp,
   10313                                     0,
   10314                                     au4_unique_node_map,
   10315                                     center_x,
   10316                                     center_y,
   10317                                     1);
   10318 
   10319                                 /* PT_TR */
   10320                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
   10321                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
   10322                                 INSERT_NEW_NODE(
   10323                                     as_unique_search_nodes,
   10324                                     num_unique_nodes,
   10325                                     s_search_node_temp,
   10326                                     0,
   10327                                     au4_unique_node_map,
   10328                                     center_x,
   10329                                     center_y,
   10330                                     1);
   10331 
   10332                                 /* PT_BL */
   10333                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
   10334                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
   10335                                 INSERT_NEW_NODE(
   10336                                     as_unique_search_nodes,
   10337                                     num_unique_nodes,
   10338                                     s_search_node_temp,
   10339                                     0,
   10340                                     au4_unique_node_map,
   10341                                     center_x,
   10342                                     center_y,
   10343                                     1);
   10344 
   10345                                 /* PT_BR */
   10346                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
   10347                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
   10348                                 INSERT_NEW_NODE(
   10349                                     as_unique_search_nodes,
   10350                                     num_unique_nodes,
   10351                                     s_search_node_temp,
   10352                                     0,
   10353                                     au4_unique_node_map,
   10354                                     center_x,
   10355                                     center_y,
   10356                                     1);
   10357                             }
   10358                         }
   10359 
   10360                         s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
   10361                         s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
   10362 
   10363                         /*****************************************************************/
   10364                         /* Call the search algorithm, this includes:                     */
   10365                         /* Pre-Search-Refinement (for coarse candts)                     */
   10366                         /* Search on each candidate                                      */
   10367                         /* Post Search Refinement on winners/other new candidates        */
   10368                         /*****************************************************************/
   10369 
   10370                         hme_pred_search_no_encode(
   10371                             &s_search_prms_blk,
   10372                             ps_curr_layer,
   10373                             &ps_ctxt->s_wt_pred,
   10374                             ai4_valid_part_ids,
   10375                             0,
   10376                             e_me_quality_presets,
   10377                             i1_grid_enable,
   10378                             (ihevce_me_optimised_function_list_t *)
   10379                                 ps_ctxt->pv_me_optimised_function_list);
   10380 
   10381                         i1_grid_enable = 0;
   10382                     }
   10383                 }
   10384 
   10385                 /* for non encode layer update MV and end processing for block */
   10386                 {
   10387                     WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
   10388                     search_node_t *ps_search_node;
   10389                     /* now update the reqd results back to the layer mv bank. */
   10390                     if(1 == ps_refine_prms->i4_layer_id)
   10391                     {
   10392                         hme_update_mv_bank_in_l1_me(
   10393                             ps_search_results,
   10394                             ps_curr_layer->ps_layer_mvbank,
   10395                             blk_x,
   10396                             blk_y,
   10397                             &s_mv_update_prms);
   10398                     }
   10399                     else
   10400                     {
   10401                         hme_update_mv_bank_noencode(
   10402                             ps_search_results,
   10403                             ps_curr_layer->ps_layer_mvbank,
   10404                             blk_x,
   10405                             blk_y,
   10406                             &s_mv_update_prms);
   10407                     }
   10408 
   10409                     /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
   10410                     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
   10411                     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
   10412                     {
   10413                         WORD32 i4_j;
   10414                         layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
   10415 
   10416                         //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
   10417                         /* Not considering this for Dyn. Search Update */
   10418                         {
   10419                             for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
   10420                                 i4_ref_id++)
   10421                             {
   10422                                 ps_search_node =
   10423                                     ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
   10424 
   10425                                 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
   10426                                 {
   10427                                     hme_update_dynamic_search_params(
   10428                                         &ps_ctxt->s_coarse_dyn_range_prms
   10429                                              .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
   10430                                                                [i4_ref_id],
   10431                                         ps_search_node->s_mv.i2_mvy);
   10432 
   10433                                     ps_search_node++;
   10434                                 }
   10435                             }
   10436                         }
   10437                     }
   10438 
   10439                     if(1 == ps_refine_prms->i4_layer_id)
   10440                     {
   10441                         WORD32 wt_pred_val, log_wt_pred_val;
   10442                         WORD32 ref_id_of_nearest_poc = 0;
   10443                         WORD32 max_val = 0x7fffffff;
   10444                         WORD32 max_l0_val = 0x7fffffff;
   10445                         WORD32 max_l1_val = 0x7fffffff;
   10446                         WORD32 cur_val;
   10447                         WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
   10448 
   10449                         WORD32 bestl0_sad = 0x7fffffff;
   10450                         WORD32 bestl1_sad = 0x7fffffff;
   10451                         search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
   10452 
   10453                         for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
   10454                             i4_ref_id++)
   10455                         {
   10456                             wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
   10457                             log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
   10458 
   10459                             ps_search_node =
   10460                                 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
   10461 
   10462                             i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
   10463                                                      ((1 << log_wt_pred_val) >> 1)) >>
   10464                                                     log_wt_pred_val;
   10465 
   10466                             i4_local_cost_weighted_pred =
   10467                                 i4_local_weighted_sad +
   10468                                 (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
   10469                             //the loop is redundant as the results are already sorted based on total cost
   10470                             //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
   10471                             {
   10472                                 if(i4_local_cost_weighted_pred < min_cost)
   10473                                 {
   10474                                     min_cost = i4_local_cost_weighted_pred;
   10475                                     min_sad = i4_local_weighted_sad;
   10476                                 }
   10477                             }
   10478 
   10479                             /* For P frame, calculate the nearest poc which is either P or I frame*/
   10480                             if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
   10481                             {
   10482                                 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
   10483                                 {
   10484                                     cur_val =
   10485                                         ABS(ps_ctxt->i4_curr_poc -
   10486                                             ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
   10487                                     if(cur_val < max_val)
   10488                                     {
   10489                                         max_val = cur_val;
   10490                                         ref_id_of_nearest_poc = i4_ref_id;
   10491                                     }
   10492                                 }
   10493                             }
   10494                         }
   10495                         /*Store me cost wrt. to past frame only for P frame  */
   10496                         if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
   10497                         {
   10498                             if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
   10499                             {
   10500                                 WORD16 i2_mvx, i2_mvy;
   10501 
   10502                                 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
   10503                                 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
   10504                                 WORD32 z_scan_idx =
   10505                                     gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
   10506                                 WORD32 wt, log_wt;
   10507 
   10508                                 /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
   10509                                 <= (1 + ps_ctxt->num_b_frms));*/
   10510 
   10511                                 /*obtain mvx and mvy */
   10512                                 i2_mvx =
   10513                                     ps_search_results
   10514                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
   10515                                         ->s_mv.i2_mvx;
   10516                                 i2_mvy =
   10517                                     ps_search_results
   10518                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
   10519                                         ->s_mv.i2_mvy;
   10520 
   10521                                 /*register the min cost for l1 me in blk context */
   10522                                 wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
   10523                                 log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
   10524 
   10525                                 /*register the min cost for l1 me in blk context */
   10526                                 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
   10527                                     ((ps_search_results
   10528                                           ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
   10529                                           ->i4_sad *
   10530                                       wt) +
   10531                                      ((1 << log_wt) >> 1)) >>
   10532                                     log_wt;
   10533                                 ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
   10534                                     ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
   10535                                     (ps_search_results
   10536                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
   10537                                          ->i4_tot_cost -
   10538                                      ps_search_results
   10539                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
   10540                                          ->i4_sad);
   10541                                 /*for complexity change detection*/
   10542                                 ps_ctxt->i4_num_blks++;
   10543                                 if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
   10544                                    (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
   10545                                 {
   10546                                     ps_ctxt->i4_num_blks_high_sad++;
   10547                                 }
   10548                             }
   10549                         }
   10550                     }
   10551 
   10552                     /* EIID: Early inter intra decisions */
   10553                     /* tap L1 level SAD for inter intra decisions */
   10554                     if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
   10555                        (!ps_ctxt->s_frm_prms
   10556                              .is_i_pic))  //for high-quality preset->disable early decisions
   10557                     {
   10558                         if(1 == ps_refine_prms->i4_layer_id)
   10559                         {
   10560                             WORD32 i4_min_sad_cost_8x8_block = min_cost;
   10561                             ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
   10562                             WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
   10563                             WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
   10564                             WORD32 z_scan_idx =
   10565                                 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
   10566                             ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
   10567 
   10568                             /*register the min cost for l1 me in blk context */
   10569                             ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
   10570                                 i4_min_sad_cost_8x8_block;
   10571                             i4_num_comparisions++;
   10572 
   10573                             /* take early inter-intra decision here */
   10574                             ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
   10575 #if DISABLE_INTRA_IN_BPICS
   10576                             if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
   10577                                (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
   10578                             {
   10579                                 ps_curr_ed_blk_ctxt->intra_or_inter =
   10580                                     2; /*eval only inter if inter cost is less */
   10581                                 i4_num_inter_wins++;
   10582                             }
   10583                             else
   10584 #endif
   10585                             {
   10586                                 if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
   10587                                    ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
   10588                                      i4_threshold_multiplier) /
   10589                                     i4_threshold_divider))
   10590                                 {
   10591                                     ps_curr_ed_blk_ctxt->intra_or_inter =
   10592                                         2; /*eval only inter if inter cost is less */
   10593                                     i4_num_inter_wins++;
   10594                                 }
   10595                             }
   10596 
   10597                             //{
   10598                             //  DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
   10599                             //      blk_x,blk_y,
   10600                             //      i4_ctb_blk_ctr, i4_ctb_row_ctr,
   10601                             //      ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
   10602                             //      i4_min_sad_cost_8x8_block
   10603                             //      );
   10604                             //}
   10605 
   10606                         }  //end of layer-1
   10607                     }  //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
   10608                     else
   10609                     {
   10610                         if(1 == ps_refine_prms->i4_layer_id)
   10611                         {
   10612                             WORD32 i4_min_sad_cost_8x8_block = min_cost;
   10613                             WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
   10614                             WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
   10615                             WORD32 z_scan_idx =
   10616                                 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
   10617 
   10618                             /*register the min cost for l1 me in blk context */
   10619                             ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
   10620                                 i4_min_sad_cost_8x8_block;
   10621                         }
   10622                     }
   10623                     if(1 == ps_refine_prms->i4_layer_id)
   10624                     {
   10625                         WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
   10626                         WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
   10627                         WORD32 z_scan_idx =
   10628                             gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
   10629 
   10630                         ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
   10631                             min_sad;
   10632 
   10633                         if(min_cost <
   10634                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
   10635                         {
   10636                             ps_ctxt->i4_L1_hme_best_cost += min_cost;
   10637                             ps_ctxt->i4_L1_hme_sad += min_sad;
   10638                             ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
   10639                         }
   10640                         else
   10641                         {
   10642                             ps_ctxt->i4_L1_hme_best_cost +=
   10643                                 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
   10644                             ps_ctxt->i4_L1_hme_sad +=
   10645                                 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
   10646                             ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
   10647                                 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
   10648                         }
   10649                     }
   10650                 }
   10651             }
   10652 
   10653             /* Update the number of blocks processed in the current row */
   10654             if((ME_MEDIUM_SPEED > e_me_quality_presets))
   10655             {
   10656                 ihevce_dmgr_set_row_row_sync(
   10657                     pv_hme_dep_mngr,
   10658                     (i4_ctb_x + 1),
   10659                     blk_y,
   10660                     0 /* Col Tile No. : Not supported in PreEnc*/);
   10661             }
   10662         }
   10663 
   10664         /* set the output dependency after completion of row */
   10665         ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
   10666     }
   10667 }
   10668