Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /*****************************************************************************/
     22 /* File Includes                                                             */
     23 /*****************************************************************************/
     24 /* System include files */
     25 #include <stdio.h>
     26 #include <string.h>
     27 #include <stdlib.h>
     28 #include <assert.h>
     29 #include <stdarg.h>
     30 #include <math.h>
     31 #include <limits.h>
     32 
     33 /* User include files */
     34 #include "ihevc_typedefs.h"
     35 #include "itt_video_api.h"
     36 #include "ihevce_api.h"
     37 
     38 #include "rc_cntrl_param.h"
     39 #include "rc_frame_info_collector.h"
     40 #include "rc_look_ahead_params.h"
     41 
     42 #include "ihevc_defs.h"
     43 #include "ihevc_structs.h"
     44 #include "ihevc_platform_macros.h"
     45 #include "ihevc_deblk.h"
     46 #include "ihevc_itrans_recon.h"
     47 #include "ihevc_chroma_itrans_recon.h"
     48 #include "ihevc_chroma_intra_pred.h"
     49 #include "ihevc_intra_pred.h"
     50 #include "ihevc_inter_pred.h"
     51 #include "ihevc_mem_fns.h"
     52 #include "ihevc_padding.h"
     53 #include "ihevc_weighted_pred.h"
     54 #include "ihevc_sao.h"
     55 #include "ihevc_resi_trans.h"
     56 #include "ihevc_quant_iquant_ssd.h"
     57 #include "ihevc_cabac_tables.h"
     58 
     59 #include "ihevce_defs.h"
     60 #include "ihevce_lap_enc_structs.h"
     61 #include "ihevce_multi_thrd_structs.h"
     62 #include "ihevce_multi_thrd_funcs.h"
     63 #include "ihevce_me_common_defs.h"
     64 #include "ihevce_had_satd.h"
     65 #include "ihevce_error_codes.h"
     66 #include "ihevce_bitstream.h"
     67 #include "ihevce_cabac.h"
     68 #include "ihevce_rdoq_macros.h"
     69 #include "ihevce_function_selector.h"
     70 #include "ihevce_enc_structs.h"
     71 #include "ihevce_entropy_structs.h"
     72 #include "ihevce_cmn_utils_instr_set_router.h"
     73 #include "ihevce_enc_loop_structs.h"
     74 #include "ihevce_inter_pred.h"
     75 #include "ihevce_global_tables.h"
     76 #include "ihevce_dep_mngr_interface.h"
     77 #include "hme_datatype.h"
     78 #include "hme_interface.h"
     79 #include "hme_common_defs.h"
     80 #include "hme_defs.h"
     81 #include "ihevce_me_instr_set_router.h"
     82 #include "hme_globals.h"
     83 #include "hme_utils.h"
     84 #include "hme_coarse.h"
     85 #include "hme_fullpel.h"
     86 #include "hme_subpel.h"
     87 #include "hme_refine.h"
     88 #include "hme_err_compute.h"
     89 #include "hme_common_utils.h"
     90 #include "hme_search_algo.h"
     91 #include "ihevce_stasino_helpers.h"
     92 #include "ihevce_common_utils.h"
     93 
     94 /*****************************************************************************/
     95 /* Macros                                                                    */
     96 /*****************************************************************************/
     97 #define UNI_SATD_SCALE 1
     98 
     99 /*****************************************************************************/
    100 /* Function Definitions                                                      */
    101 /*****************************************************************************/
    102 void ihevce_open_loop_pred_data(
    103     me_frm_ctxt_t *ps_ctxt,
    104     inter_pu_results_t *ps_pu_results,
    105     U08 *pu1_src,
    106     U08 *pu1_temp_pred,
    107     S32 stride,
    108     S32 src_strd,
    109     UWORD8 e_part_id)
    110 {
    111     S32 best_sad_l0 = -1, best_sad_l1 = -1;
    112     S32 sad_diff, status;
    113     inter_pred_me_ctxt_t *ps_inter_pred_me_ctxt;
    114     U08 enable_bi = 0;
    115     pu_t s_pu;
    116 
    117     ps_inter_pred_me_ctxt = &ps_ctxt->s_mc_ctxt;
    118     ps_ctxt->i4_count++;
    119     /* L0*/
    120     if(ps_pu_results->u1_num_results_per_part_l0[e_part_id])
    121     {
    122         pu_result_t *ps_best_l0_pu;
    123         ps_best_l0_pu = ps_pu_results->aps_pu_results[0][PRT_2Nx2N];
    124         best_sad_l0 = ps_best_l0_pu->i4_tot_cost - ps_best_l0_pu->i4_mv_cost;
    125         s_pu.b2_pred_mode = PRED_L0;
    126         s_pu.b4_ht = ps_best_l0_pu->pu.b4_ht;
    127         s_pu.b4_wd = ps_best_l0_pu->pu.b4_wd;
    128         s_pu.b4_pos_x = ps_best_l0_pu->pu.b4_pos_x;
    129         s_pu.b4_pos_y = ps_best_l0_pu->pu.b4_pos_y;
    130         s_pu.b1_intra_flag = 0;
    131         s_pu.mv.s_l0_mv.i2_mvx = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvx;
    132         s_pu.mv.s_l0_mv.i2_mvy = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvy;
    133         s_pu.mv.i1_l0_ref_idx = ps_best_l0_pu->pu.mv.i1_l0_ref_idx;
    134     }
    135     /*L1*/
    136     if(ps_pu_results->u1_num_results_per_part_l1[e_part_id])
    137     {
    138         pu_result_t *ps_best_l1_pu;
    139         ps_best_l1_pu = ps_pu_results->aps_pu_results[1][PRT_2Nx2N];
    140         best_sad_l1 = ps_best_l1_pu->i4_tot_cost - ps_best_l1_pu->i4_mv_cost;
    141         s_pu.b2_pred_mode = PRED_L1;
    142         s_pu.b4_ht = ps_best_l1_pu->pu.b4_ht;
    143         s_pu.b4_wd = ps_best_l1_pu->pu.b4_wd;
    144         s_pu.b4_pos_x = ps_best_l1_pu->pu.b4_pos_x;
    145         s_pu.b4_pos_y = ps_best_l1_pu->pu.b4_pos_y;
    146         s_pu.b1_intra_flag = 0;
    147         s_pu.mv.s_l1_mv.i2_mvx = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvx;
    148         s_pu.mv.s_l1_mv.i2_mvy = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvy;
    149         s_pu.mv.i1_l1_ref_idx = ps_best_l1_pu->pu.mv.i1_l1_ref_idx;
    150     }
    151     ASSERT((best_sad_l0 != -1) || (best_sad_l1 != -1));
    152     /*bi selection*/
    153     if((best_sad_l0 != -1) && (best_sad_l1 != -1))
    154     {
    155         sad_diff = abs(best_sad_l0 - best_sad_l1);
    156         if((sad_diff < (best_sad_l0 * 0.15)) && (sad_diff < (best_sad_l1 * 0.15)))
    157         {
    158             enable_bi = 1;
    159             s_pu.b2_pred_mode = PRED_BI;
    160         }
    161         if(!enable_bi)
    162         {
    163             if(best_sad_l0 < best_sad_l1)
    164             {
    165                 s_pu.b2_pred_mode = PRED_L0;
    166             }
    167             else
    168             {
    169                 s_pu.b2_pred_mode = PRED_L1;
    170             }
    171         }
    172     }
    173     status = ihevce_luma_inter_pred_pu(ps_inter_pred_me_ctxt, &s_pu, pu1_temp_pred, stride, 1);
    174     if(status == -1)
    175     {
    176         ASSERT(0);
    177     }
    178 }
    179 
    180 /**
    181 ********************************************************************************
    182 *  @fn     void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
    183 *
    184 *  @brief  Allocates a block of size = i4_size from working memory and returns
    185 *
    186 *  @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
    187 *
    188 *  @param[in]  i4_size : size required
    189 *
    190 *  @return void pointer to allocated memory, NULL if failure
    191 ********************************************************************************
    192 */
    193 void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
    194 {
    195     U08 *pu1_mem;
    196 
    197     if(ps_buf_mgr->i4_used + i4_size > ps_buf_mgr->i4_total)
    198         return NULL;
    199 
    200     pu1_mem = ps_buf_mgr->pu1_wkg_mem + ps_buf_mgr->i4_used;
    201     ps_buf_mgr->i4_used += i4_size;
    202 
    203     return ((void *)pu1_mem);
    204 }
    205 
    206 /**
    207 ********************************************************************************
    208 *  @fn     hme_init_histogram(
    209 *
    210 *  @brief  Top level entry point for Coarse ME. Runs across blocks and does the
    211 *          needful by calling other low level routines.
    212 *
    213 *  @param[in,out]  ps_hist : the histogram structure
    214 *
    215 *  @param[in]  i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
    216 *
    217 *  @param[in]  i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
    218 *
    219 *  @return None
    220 ********************************************************************************
    221 */
    222 
    223 void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y)
    224 {
    225     S32 i4_num_bins, i4_num_cols, i4_num_rows;
    226     S32 i4_shift_x, i4_shift_y, i, i4_range, i4_val;
    227 
    228     /*************************************************************************/
    229     /* Evaluate the shift_x and shift_y. For this, we use the following logic*/
    230     /* Assuming that we use up all MAX_NUM_BINS. Then the number of bins is  */
    231     /* given by formula ((max_mv_x * 2) >> shift_x)*((max_mv_y * 2)>>shift_y)*/
    232     /* or shift_x + shift_y is log ((max_mv_x * max_mv_y * 4) / MAX_NUM_BINS)*/
    233     /* if above quantity is negative, then we make it zero.                  */
    234     /* If result is odd, then shift_y is result >> 1, shift_x is shift_y + 1 */
    235     /*************************************************************************/
    236     i4_val = i4_max_mv_x * i4_max_mv_y * 4;
    237     i4_range = (hme_get_range(i4_val - 1)) + 1;
    238     if(i4_range > LOG_MAX_NUM_BINS)
    239     {
    240         i4_shift_y = (i4_range - LOG_MAX_NUM_BINS);
    241         i4_shift_x = (i4_shift_y + 1) >> 1;
    242         i4_shift_y >>= 1;
    243     }
    244     else
    245     {
    246         i4_shift_y = 0;
    247         i4_shift_x = 0;
    248     }
    249 
    250     /* we assume the mv range is -max_mv_x to +max_mv_x, ditto for y */
    251     /* So number of columns is 2*max_mv_x >> i4_shift_x. Ditto for rows */
    252     /* this helps us compute num bins that are active for this histo session */
    253     i4_num_cols = (i4_max_mv_x << 1) >> i4_shift_x;
    254     i4_num_rows = (i4_max_mv_y << 1) >> i4_shift_y;
    255     i4_num_bins = i4_num_rows * i4_num_cols;
    256 
    257     ASSERT(i4_num_bins <= MAX_NUM_BINS);
    258 
    259     ps_hist->i4_num_rows = i4_num_rows;
    260     ps_hist->i4_num_cols = i4_num_cols;
    261     ps_hist->i4_min_x = -i4_max_mv_x;
    262     ps_hist->i4_min_y = -i4_max_mv_y;
    263     ps_hist->i4_shift_x = i4_shift_x;
    264     ps_hist->i4_shift_y = i4_shift_y;
    265     ps_hist->i4_lobe1_size = 5;
    266     ps_hist->i4_lobe2_size = 3;
    267 
    268     ps_hist->i4_num_bins = i4_num_bins;
    269 
    270     for(i = 0; i < i4_num_bins; i++)
    271     {
    272         ps_hist->ai4_bin_count[i] = 0;
    273     }
    274 }
    275 
    276 /**
    277 ********************************************************************************
    278 *  @fn     hme_update_histogram(
    279 *
    280 *  @brief  Updates the histogram given an mv entry
    281 *
    282 *  @param[in,out]  ps_hist : the histogram structure
    283 *
    284 *  @param[in]  i4_mv_x : x component of the mv (fpel units)
    285 *
    286 *  @param[in]  i4_mv_y : y component of the mv (fpel units)
    287 *
    288 *  @return None
    289 ********************************************************************************
    290 */
    291 void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y)
    292 {
    293     S32 i4_bin_index, i4_col, i4_row;
    294 
    295     i4_col = (i4_mv_x - ps_hist->i4_min_x) >> ps_hist->i4_shift_x;
    296     i4_row = (i4_mv_y - ps_hist->i4_min_y) >> ps_hist->i4_shift_y;
    297 
    298     i4_bin_index = i4_col + (i4_row * ps_hist->i4_num_cols);
    299     /* Sanity Check */
    300     ASSERT(i4_bin_index < MAX_NUM_BINS);
    301 
    302     ps_hist->ai4_bin_count[i4_bin_index]++;
    303 }
    304 
    305 /**
    306 ********************************************************************************
    307 *  @fn     hme_get_global_mv(
    308 *
    309 *  @brief  returns the global mv of a previous picture. Accounts for the fact
    310 *          that the delta poc of the previous picture may have been different
    311 *          from delta poc of current picture. Delta poc is POC difference
    312 *          between a picture and its reference.
    313 *
    314 *  @param[out]  ps_mv: mv_t structure where the motion vector is returned
    315 *
    316 *  @param[in]  i4_delta_poc: the delta poc for the current pic w.r.t. reference
    317 *
    318 *  @return None
    319 ********************************************************************************
    320 */
    321 void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc)
    322 {
    323     S16 i2_mv_x, i2_mv_y;
    324     S32 i4_delta_poc_prev;
    325     S32 i4_poc_prev = ps_prev_layer->i4_poc;
    326     S32 i4_poc_prev_ref = ps_prev_layer->ai4_ref_id_to_poc_lc[0];
    327 
    328     i4_delta_poc_prev = i4_poc_prev - i4_poc_prev_ref;
    329     i2_mv_x = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_x;
    330     i2_mv_y = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_y;
    331 
    332     i2_mv_x = (S16)((i2_mv_x * i4_delta_poc) / i4_delta_poc_prev);
    333     i2_mv_y = (S16)((i2_mv_y * i4_delta_poc) / i4_delta_poc_prev);
    334 
    335     ps_mv->i2_mv_x = i2_mv_x;
    336     ps_mv->i2_mv_y = i2_mv_y;
    337 }
    338 
    339 /**
    340 ********************************************************************************
    341 *  @fn     hme_calculate_global_mv(
    342 *
    343 *  @brief  Calculates global mv for a given histogram
    344 *
    345 *  @param[in]  ps_hist : the histogram structure
    346 *
    347 *  @param[in]  ps_mv : used to return the global mv
    348 *
    349 *  @param[in]  e_lobe_type : refer to GMV_MVTYPE_T
    350 *
    351 *  @return None
    352 ********************************************************************************
    353 */
    354 void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type)
    355 {
    356     S32 i4_offset, i4_lobe_size, i4_y, i4_x, *pi4_bin_count;
    357     S32 i4_max_sum = -1;
    358     S32 i4_max_x = 0, i4_max_y = 0;
    359 
    360     if(e_lobe_type == GMV_THICK_LOBE)
    361         i4_lobe_size = ps_hist->i4_lobe1_size;
    362     else
    363         i4_lobe_size = ps_hist->i4_lobe2_size;
    364 
    365     i4_offset = i4_lobe_size >> 1;
    366     for(i4_y = i4_offset; i4_y < ps_hist->i4_num_rows - i4_offset; i4_y++)
    367     {
    368         for(i4_x = i4_offset; i4_x < ps_hist->i4_num_cols - i4_offset; i4_x++)
    369         {
    370             S32 i4_bin_id, i4_sum;
    371             i4_bin_id = (i4_x - 2) + ((i4_y - 2) * ps_hist->i4_num_cols);
    372 
    373             pi4_bin_count = &ps_hist->ai4_bin_count[i4_bin_id];
    374             i4_sum = hme_compute_2d_sum_unsigned(
    375                 (void *)pi4_bin_count,
    376                 i4_lobe_size,
    377                 i4_lobe_size,
    378                 ps_hist->i4_num_cols,
    379                 sizeof(U32));
    380 
    381             if(i4_sum > i4_max_sum)
    382             {
    383                 i4_max_x = i4_x;
    384                 i4_max_y = i4_y;
    385                 i4_max_sum = i4_sum;
    386             }
    387         }
    388     }
    389 
    390     ps_mv->i2_mv_y = (S16)((i4_max_y << ps_hist->i4_shift_y) + ps_hist->i4_min_y);
    391     ps_mv->i2_mv_x = (S16)((i4_max_x << ps_hist->i4_shift_x) + ps_hist->i4_min_x);
    392 }
    393 
    394 /**
    395 ********************************************************************************
    396 *  @fn    ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
    397 *
    398 *  @brief  returns a new ctb node usable for creating a new ctb candidate
    399 *
    400 *  @param[in] ps_mem_mgr : memory manager holding all ctb nodes
    401 *
    402 *  @return NULL if no free nodes, else ptr to the new ctb node
    403 ********************************************************************************
    404 */
    405 ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
    406 {
    407     U08 *pu1_ret;
    408     if((ps_mem_mgr->i4_used + ps_mem_mgr->i4_size) > ps_mem_mgr->i4_tot)
    409         return (NULL);
    410     pu1_ret = ps_mem_mgr->pu1_mem + ps_mem_mgr->i4_used;
    411     ps_mem_mgr->i4_used += ps_mem_mgr->i4_size;
    412     return ((ctb_node_t *)pu1_ret);
    413 }
    414 
    415 /**
    416 ********************************************************************************
    417 *  @fn     hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
    418 search_results_t *ps_search_results, S32 i4_num_ref)
    419 *
    420 *  @brief  For a given CU whose results are in ps_search_results, the 17x17
    421 *          mv grid is updated for future use within the CTB
    422 *
    423 *  @param[in] ps_search_results : Search results data structure
    424 *
    425 *  @param[out] pps_mv_grid: The mv grid (as many as num ref)
    426 *
    427 *  @param[in]  i4_num_ref: nuber of search iterations to update
    428 *
    429 *  @return None
    430 ********************************************************************************
    431 */
    432 void hme_map_mvs_to_grid(
    433     mv_grid_t **pps_mv_grid,
    434     search_results_t *ps_search_results,
    435     U08 *pu1_pred_dir_searched,
    436     S32 i4_num_pred_dir)
    437 {
    438     S32 i4_cu_start_offset;
    439     /*************************************************************************/
    440     /* Start x, y offset of CU relative to CTB. To update the mv grid which  */
    441     /* stores 1 mv per 4x4, we convert pixel offset to 4x4 blk offset        */
    442     /*************************************************************************/
    443     S32 i4_cu_offset_x = (S32)ps_search_results->u1_x_off >> 2;
    444     S32 i4_cu_offset_y = (S32)ps_search_results->u1_y_off >> 2;
    445 
    446     /* Controls the attribute of a given partition within CU   */
    447     /* , i.e. start locn, size                                 */
    448     part_attr_t *ps_part_attr;
    449 
    450     S32 i4_part, i4_part_id, num_parts, i4_stride;
    451     S16 i2_mv_x, i2_mv_y;
    452     S08 i1_ref_idx;
    453 
    454     /* Per partition, attributes w.r.t. CU start */
    455     S32 x_start, y_start, x_end, y_end, i4_x, i4_y;
    456     PART_TYPE_T e_part_type;
    457 
    458     /* Points to exact mv structures within the grid to be udpated */
    459     search_node_t *ps_grid_node, *ps_grid_node_tmp;
    460 
    461     /* points to exact mv grid (based on search iteration) to be updated */
    462     mv_grid_t *ps_mv_grid;
    463 
    464     search_node_t *ps_search_node;
    465 
    466     S32 shift, i, mv_shift = 2;
    467     /* Proportional to the size of CU, controls the number of 4x4 blks */
    468     /* to be updated                                                   */
    469     shift = ps_search_results->e_cu_size;
    470     ASSERT(i4_num_pred_dir <= 2);
    471 
    472     e_part_type = (PART_TYPE_T)ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
    473 
    474     if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
    475        (ps_search_results->i4_part_mask & ENABLE_NxN))
    476     {
    477         e_part_type = PRT_NxN;
    478     }
    479 
    480     for(i = 0; i < i4_num_pred_dir; i++)
    481     {
    482         num_parts = gau1_num_parts_in_part_type[e_part_type];
    483         ps_mv_grid = pps_mv_grid[pu1_pred_dir_searched[i]];
    484         i4_stride = ps_mv_grid->i4_stride;
    485 
    486         i4_cu_start_offset =
    487             i4_cu_offset_x + i4_cu_offset_y * i4_stride + ps_mv_grid->i4_start_offset;
    488 
    489         /* Move to the appropriate 2d locn of CU start within Grid */
    490         ps_grid_node = &ps_mv_grid->as_node[i4_cu_start_offset];
    491 
    492         for(i4_part = 0; i4_part < num_parts; i4_part++)
    493         {
    494             i4_part_id = ge_part_type_to_part_id[e_part_type][i4_part];
    495 
    496             /* Pick the mvx and y and ref id corresponding to this partition */
    497             ps_search_node =
    498                 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
    499 
    500             i2_mv_x = ps_search_node->s_mv.i2_mvx;
    501             i2_mv_y = ps_search_node->s_mv.i2_mvy;
    502             i1_ref_idx = ps_search_node->i1_ref_idx;
    503 
    504             /* Move to the appropriate location within the CU */
    505             ps_part_attr = &gas_part_attr_in_cu[i4_part_id];
    506             x_start = ps_part_attr->u1_x_start;
    507             x_end = x_start + ps_part_attr->u1_x_count;
    508             y_start = ps_part_attr->u1_y_start;
    509             y_end = y_start + ps_part_attr->u1_y_count;
    510 
    511             /* Convert attributes from 8x8 CU size to given CU size */
    512             x_start = (x_start << shift) >> mv_shift;
    513             x_end = (x_end << shift) >> mv_shift;
    514             y_start = (y_start << shift) >> mv_shift;
    515             y_end = (y_end << shift) >> mv_shift;
    516 
    517             ps_grid_node_tmp = ps_grid_node + y_start * i4_stride;
    518 
    519             /* Update all 4x4 blk mvs with the part mv */
    520             /* For e.g. we update 4 units in case of NxN for 16x16 CU */
    521             for(i4_y = y_start; i4_y < y_end; i4_y++)
    522             {
    523                 for(i4_x = x_start; i4_x < x_end; i4_x++)
    524                 {
    525                     ps_grid_node_tmp[i4_x].s_mv.i2_mvx = i2_mv_x;
    526                     ps_grid_node_tmp[i4_x].s_mv.i2_mvy = i2_mv_y;
    527                     ps_grid_node_tmp[i4_x].i1_ref_idx = i1_ref_idx;
    528                     ps_grid_node_tmp[i4_x].u1_subpel_done = 1;
    529                 }
    530                 ps_grid_node_tmp += i4_stride;
    531             }
    532         }
    533     }
    534 }
    535 
    536 void hme_set_ctb_pred_attr(ctb_node_t *ps_parent, U08 *pu1_pred0, U08 *pu1_pred1, S32 i4_stride)
    537 {
    538     ps_parent->apu1_pred[0] = pu1_pred0;
    539     ps_parent->apu1_pred[1] = pu1_pred1;
    540     ps_parent->i4_pred_stride = i4_stride;
    541     if(ps_parent->ps_tl != NULL)
    542     {
    543         S32 blk_wd = (S32)ps_parent->ps_tr->u1_x_off;
    544         blk_wd -= (S32)ps_parent->u1_x_off;
    545 
    546         hme_set_ctb_pred_attr(ps_parent->ps_tl, pu1_pred0, pu1_pred1, i4_stride >> 1);
    547 
    548         hme_set_ctb_pred_attr(
    549             ps_parent->ps_tr, pu1_pred0 + blk_wd, pu1_pred1 + blk_wd, i4_stride >> 1);
    550 
    551         hme_set_ctb_pred_attr(
    552             ps_parent->ps_bl,
    553             pu1_pred0 + (blk_wd * i4_stride),
    554             pu1_pred1 + (blk_wd * i4_stride),
    555             i4_stride >> 1);
    556 
    557         hme_set_ctb_pred_attr(
    558             ps_parent->ps_tr,
    559             pu1_pred0 + (blk_wd * (1 + i4_stride)),
    560             pu1_pred1 + (blk_wd * (1 + i4_stride)),
    561             i4_stride >> 1);
    562     }
    563 }
    564 
    565 /**
    566 ********************************************************************************
    567 *  @fn     hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
    568 *
    569 *  @brief  Expands the part mask to a list of valid part ids terminated by -1
    570 *
    571 *  @param[in] i4_part_mask : bit mask of active partitino ids
    572 *
    573 *  @param[out] pi4_valid_part_ids : array, each entry has one valid part id
    574 *               Terminated by -1 to signal end.
    575 *
    576 *  @return number of partitions
    577 ********************************************************************************
    578 */
    579 S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
    580 {
    581     S32 id = 0, i;
    582     for(i = 0; i < TOT_NUM_PARTS; i++)
    583     {
    584         if(i4_part_mask & (1 << i))
    585         {
    586             pi4_valid_part_ids[id] = i;
    587             id++;
    588         }
    589     }
    590     pi4_valid_part_ids[id] = -1;
    591 
    592     return id;
    593 }
    594 
    595 ctb_boundary_attrs_t *
    596     get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt)
    597 {
    598     S32 horz_crop, vert_crop;
    599     ctb_boundary_attrs_t *ps_attrs;
    600 
    601     horz_crop = ((ctb_start_x + 64) > pic_wd) ? 2 : 0;
    602     vert_crop = ((ctb_start_y + 64) > pic_ht) ? 1 : 0;
    603     switch(horz_crop + vert_crop)
    604     {
    605     case 0:
    606         ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_CENTRE];
    607         break;
    608     case 1:
    609         ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_PIC_BOUNDARY];
    610         break;
    611     case 2:
    612         ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_RT_PIC_BOUNDARY];
    613         break;
    614     case 3:
    615         ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_RT_PIC_BOUNDARY];
    616         break;
    617     }
    618     return (ps_attrs);
    619 }
    620 
    621 /**
    622 ********************************************************************************
    623 *  @fn     hevc_avg_2d(U08 *pu1_src1,
    624 *                   U08 *pu1_src2,
    625 *                   S32 i4_src1_stride,
    626 *                   S32 i4_src2_stride,
    627 *                   S32 i4_blk_wd,
    628 *                   S32 i4_blk_ht,
    629 *                   U08 *pu1_dst,
    630 *                   S32 i4_dst_stride)
    631 *
    632 *
    633 *  @brief  point wise average of two buffers into a third buffer
    634 *
    635 *  @param[in] pu1_src1 : first source buffer
    636 *
    637 *  @param[in] pu1_src2 : 2nd source buffer
    638 *
    639 *  @param[in] i4_src1_stride : stride of source 1 buffer
    640 *
    641 *  @param[in] i4_src2_stride : stride of source 2 buffer
    642 *
    643 *  @param[in] i4_blk_wd : block width
    644 *
    645 *  @param[in] i4_blk_ht : block height
    646 *
    647 *  @param[out] pu1_dst : destination buffer
    648 *
    649 *  @param[in] i4_dst_stride : stride of the destination buffer
    650 *
    651 *  @return void
    652 ********************************************************************************
    653 */
    654 void hevc_avg_2d(
    655     U08 *pu1_src1,
    656     U08 *pu1_src2,
    657     S32 i4_src1_stride,
    658     S32 i4_src2_stride,
    659     S32 i4_blk_wd,
    660     S32 i4_blk_ht,
    661     U08 *pu1_dst,
    662     S32 i4_dst_stride)
    663 {
    664     S32 i, j;
    665 
    666     for(i = 0; i < i4_blk_ht; i++)
    667     {
    668         for(j = 0; j < i4_blk_wd; j++)
    669         {
    670             pu1_dst[j] = (pu1_src1[j] + pu1_src2[j] + 1) >> 1;
    671         }
    672         pu1_src1 += i4_src1_stride;
    673         pu1_src2 += i4_src2_stride;
    674         pu1_dst += i4_dst_stride;
    675     }
    676 }
    677 /**
    678 ********************************************************************************
    679 *  @fn     hme_pick_back_search_node(search_results_t *ps_search_results,
    680 *                                   search_node_t *ps_search_node_fwd,
    681 *                                   S32 i4_part_idx,
    682 *                                   layer_ctxt_t *ps_curr_layer)
    683 *
    684 *
    685 *  @brief  returns the search node corresponding to a ref idx in same or
    686 *          opp direction. Preference is given to opp direction, but if that
    687 *          does not yield results, same direction is attempted.
    688 *
    689 *  @param[in] ps_search_results: search results overall
    690 *
    691 *  @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
    692 *
    693 *  @param[in] i4_part_idx : partition id
    694 *
    695 *  @param[in] ps_curr_layer : layer context for current layer.
    696 *
    697 *  @return search node corresponding to hte "other direction"
    698 ********************************************************************************
    699 */
    700 //#define PICK_L1_REF_SAME_DIR
    701 search_node_t *hme_pick_back_search_node(
    702     search_results_t *ps_search_results,
    703     search_node_t *ps_search_node_fwd,
    704     S32 i4_part_idx,
    705     layer_ctxt_t *ps_curr_layer)
    706 {
    707     S32 is_past_l0, is_past_l1, id, i, i4_poc;
    708     S32 *pi4_ref_id_to_poc_lc = ps_curr_layer->ai4_ref_id_to_poc_lc;
    709     //ref_attr_t *ps_ref_attr_lc;
    710     S08 i1_ref_idx_fwd;
    711     S16 i2_mv_x, i2_mv_y;
    712     search_node_t *ps_search_node;
    713 
    714     i1_ref_idx_fwd = ps_search_node_fwd->i1_ref_idx;
    715     i2_mv_x = ps_search_node_fwd->s_mv.i2_mvx;
    716     i2_mv_y = ps_search_node_fwd->s_mv.i2_mvy;
    717     i4_poc = ps_curr_layer->i4_poc;
    718 
    719     //ps_ref_attr_lc = &ps_curr_layer->as_ref_attr_lc[0];
    720     /* If the ref id already picked up maps to a past pic, then we pick */
    721     /* a result corresponding to future pic. If such a result is not    */
    722     /* to be found, then we pick a result corresponding to a past pic   */
    723     //is_past = ps_ref_attr_lc[i1_ref_idx_fwd].u1_is_past;
    724     is_past_l0 = (i4_poc > pi4_ref_id_to_poc_lc[i1_ref_idx_fwd]) ? 1 : 0;
    725 
    726     ASSERT(ps_search_results->u1_num_active_ref <= 2);
    727 
    728     /* pick the right iteration of search nodes to pick up */
    729 #ifdef PICK_L1_REF_SAME_DIR
    730     if(ps_search_results->u1_num_active_ref == 2)
    731         id = !is_past_l0;
    732 #else
    733     if(ps_search_results->u1_num_active_ref == 2)
    734         id = is_past_l0;
    735 #endif
    736     else
    737         id = 0;
    738 
    739     ps_search_node = ps_search_results->aps_part_results[id][i4_part_idx];
    740 
    741     for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
    742     {
    743         S08 i1_ref_test = ps_search_node[i].i1_ref_idx;
    744         is_past_l1 = (pi4_ref_id_to_poc_lc[i1_ref_test] < i4_poc) ? 1 : 0;
    745         //if (ps_ref_attr_lc[ps_search_node[i].i1_ref_idx].u1_is_past != is_past)
    746 #ifdef PICK_L1_REF_SAME_DIR
    747         if(is_past_l1 == is_past_l0)
    748 #else
    749         if(is_past_l1 != is_past_l0)
    750 #endif
    751         {
    752             /* belongs to same direction as the ref idx passed, so continue */
    753             return (ps_search_node + i);
    754         }
    755     }
    756 
    757     /* Unable to find best result in opp direction, so try same direction */
    758     /* However we need to ensure that we do not pick up same result       */
    759     for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
    760     {
    761         if((ps_search_node->i1_ref_idx != i1_ref_idx_fwd) ||
    762            (ps_search_node->s_mv.i2_mvx != i2_mv_x) || (ps_search_node->s_mv.i2_mvy != i2_mv_y))
    763         {
    764             return (ps_search_node);
    765         }
    766         ps_search_node++;
    767     }
    768 
    769     //ASSERT(0);
    770     return (ps_search_results->aps_part_results[id][i4_part_idx]);
    771 
    772     //return (NULL);
    773 }
    774 
    775 /**
    776 ********************************************************************************
    777 *  @fn     hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride)
    778 *
    779 *
    780 *  @brief  Examines input 16x16 for possible edges and orientations of those,
    781 *          and returns a bit mask of partitions that should be searched for
    782 *
    783 *  @param[in] pu1_inp : input buffer
    784 *
    785 *  @param[in] i4_inp_stride: input stride
    786 *
    787 *  @return part mask (bit mask of active partitions to search)
    788 ********************************************************************************
    789 */
    790 
    791 S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions)
    792 {
    793     S32 i4_rsum[16], i4_csum[16];
    794     U08 *pu1_tmp, u1_tmp;
    795     S32 i4_max_ridx, i4_max_cidx, i4_tmp;
    796     S32 i, j, i4_ret;
    797     S32 i4_max_rp[4], i4_max_cp[4];
    798     S32 i4_seg_lutc[4] = { 0, ENABLE_nLx2N, ENABLE_Nx2N, ENABLE_nRx2N };
    799     S32 i4_seg_lutr[4] = { 0, ENABLE_2NxnU, ENABLE_2NxN, ENABLE_2NxnD };
    800 #define EDGE_THR (15 * 16)
    801 #define HI_PASS(ptr, i) (2 * (ptr[i] - ptr[i - 1]) + (ptr[i + 1] - ptr[i - 2]))
    802 
    803     if(0 == limit_active_partitions)
    804     {
    805         /*********************************************************************/
    806         /* In this case, we do not optimize on active partitions and search  */
    807         /* brute force. This way, 17 partitinos would be enabled.            */
    808         /*********************************************************************/
    809         return (ENABLE_ALL_PARTS);
    810     }
    811 
    812     /*************************************************************************/
    813     /* Control passes below in case we wish to optimize on active partitions.*/
    814     /* This is based on input characteristics, check how an edge passes along*/
    815     /* an input 16x16 area, if at all, and decide active partitinos.         */
    816     /*************************************************************************/
    817 
    818     /* Initialize row and col sums */
    819     for(i = 0; i < 16; i++)
    820     {
    821         i4_rsum[i] = 0;
    822         i4_csum[i] = 0;
    823     }
    824     pu1_tmp = pu1_inp;
    825     for(i = 0; i < 16; i++)
    826     {
    827         for(j = 0; j < 16; j++)
    828         {
    829             u1_tmp = *pu1_tmp++;
    830             i4_rsum[i] += u1_tmp;
    831             i4_csum[j] += u1_tmp;
    832         }
    833         pu1_tmp += (i4_inp_stride - 16);
    834     }
    835 
    836     /* 0 is dummy; 1 is 4; 2 is 8; 3 is 12 */
    837     i4_max_rp[0] = 0;
    838     i4_max_cp[0] = 0;
    839     i4_max_rp[1] = 0;
    840     i4_max_cp[1] = 0;
    841     i4_max_rp[2] = 0;
    842     i4_max_cp[2] = 0;
    843     i4_max_rp[3] = 0;
    844     i4_max_cp[3] = 0;
    845 
    846     /* Get Max edge strength across (2,3) (3,4) (4,5) */
    847     for(i = 3; i < 6; i++)
    848     {
    849         /* Run [-1 -2 2 1] filter through rsum/csum */
    850         i4_tmp = HI_PASS(i4_rsum, i);
    851         if(ABS(i4_tmp) > i4_max_rp[1])
    852             i4_max_rp[1] = i4_tmp;
    853 
    854         i4_tmp = HI_PASS(i4_csum, i);
    855         if(ABS(i4_tmp) > i4_max_cp[1])
    856             i4_max_cp[1] = i4_tmp;
    857     }
    858 
    859     /* Get Max edge strength across (6,7) (7,8) (8,9) */
    860     for(i = 7; i < 10; i++)
    861     {
    862         /* Run [-1 -2 2 1] filter through rsum/csum */
    863         i4_tmp = HI_PASS(i4_rsum, i);
    864         if(ABS(i4_tmp) > i4_max_rp[2])
    865             i4_max_rp[2] = i4_tmp;
    866 
    867         i4_tmp = HI_PASS(i4_csum, i);
    868         if(ABS(i4_tmp) > i4_max_cp[2])
    869             i4_max_cp[2] = i4_tmp;
    870     }
    871 
    872     /* Get Max edge strength across (10,11) (11,12) (12,13) */
    873     for(i = 11; i < 14; i++)
    874     {
    875         /* Run [-1 -2 2 1] filter through rsum/csum */
    876         i4_tmp = HI_PASS(i4_rsum, i);
    877         if(ABS(i4_tmp) > i4_max_rp[3])
    878             i4_max_rp[3] = i4_tmp;
    879 
    880         i4_tmp = HI_PASS(i4_csum, i);
    881         if(ABS(i4_tmp) > i4_max_cp[3])
    882             i4_max_cp[3] = i4_tmp;
    883     }
    884 
    885     /* Find the maximum across the 3 and see whether the strength qualifies as edge */
    886     i4_max_ridx = 1;
    887     i4_max_cidx = 1;
    888     for(i = 2; i <= 3; i++)
    889     {
    890         if(i4_max_rp[i] > i4_max_rp[i4_max_ridx])
    891             i4_max_ridx = i;
    892 
    893         if(i4_max_cp[i] > i4_max_cp[i4_max_cidx])
    894             i4_max_cidx = i;
    895     }
    896 
    897     if(EDGE_THR > i4_max_rp[i4_max_ridx])
    898     {
    899         i4_max_ridx = 0;
    900     }
    901 
    902     if(EDGE_THR > i4_max_cp[i4_max_cidx])
    903     {
    904         i4_max_cidx = 0;
    905     }
    906 
    907     i4_ret = ENABLE_2Nx2N;
    908 
    909     /* If only vertical discontinuity, go with one of 2Nx? */
    910     if(0 == (i4_max_ridx + i4_max_cidx))
    911     {
    912         //num_me_parts++;
    913         return i4_ret;
    914     }
    915 
    916     if(i4_max_ridx && (i4_max_cidx == 0))
    917     {
    918         //num_me_parts += 3;
    919         return ((i4_ret | i4_seg_lutr[i4_max_ridx]));
    920     }
    921 
    922     /* If only horizontal discontinuity, go with one of ?x2N */
    923     if(i4_max_cidx && (i4_max_ridx == 0))
    924     {
    925         //num_me_parts += 3;
    926         return ((i4_ret | i4_seg_lutc[i4_max_cidx]));
    927     }
    928 
    929     /* If middle is dominant in both directions, go with NxN */
    930     if((2 == i4_max_cidx) && (2 == i4_max_ridx))
    931     {
    932         //num_me_parts += 5;
    933         return ((i4_ret | ENABLE_NxN));
    934     }
    935 
    936     /* Otherwise, conservatively, enable NxN and the 2 AMPs */
    937     //num_me_parts += 9;
    938     return (i4_ret | ENABLE_NxN | i4_seg_lutr[i4_max_ridx] | i4_seg_lutc[i4_max_cidx]);
    939 }
    940 
    941 /**
    942 ********************************************************************************
    943 *  @fn     hme_init_search_results(search_results_t *ps_search_results,
    944 *                           S32 i4_num_ref,
    945 *                           S32 i4_num_best_results,
    946 *                           S32 i4_num_results_per_part,
    947 *                           BLK_SIZE_T e_blk_size,
    948 *                           S32 i4_x_off,
    949 *                           S32 i4_y_off)
    950 *
    951 *  @brief  Initializes the search results structure with some key attributes
    952 *
    953 *  @param[out] ps_search_results : search results structure to initialise
    954 *
    955 *  @param[in] i4_num_Ref: corresponds to the number of ref ids searched
    956 *
    957 *  @param[in] i4_num_best_results: Number of best results for the CU to
    958 *               be maintained in the result structure
    959 *
    960 *  @param[in] i4_num_results_per_part: Per active partition the number of best
    961 *               results to be maintained
    962 *
    963 *  @param[in] e_blk_size: blk size of the CU for which this structure used
    964 *
    965 *  @param[in] i4_x_off: x offset of the top left of CU from CTB top left
    966 *
    967 *  @param[in] i4_y_off: y offset of the top left of CU from CTB top left
    968 *
    969 *  @param[in] pu1_is_past : points ot an array that tells whether a given ref id
    970 *              has prominence in L0 or in L1 list (past or future )
    971 *
    972 *  @return void
    973 ********************************************************************************
    974 */
    975 void hme_init_search_results(
    976     search_results_t *ps_search_results,
    977     S32 i4_num_ref,
    978     S32 i4_num_best_results,
    979     S32 i4_num_results_per_part,
    980     BLK_SIZE_T e_blk_size,
    981     S32 i4_x_off,
    982     S32 i4_y_off,
    983     U08 *pu1_is_past)
    984 {
    985     CU_SIZE_T e_cu_size = ge_blk_size_to_cu_size[e_blk_size];
    986 
    987     ASSERT(e_cu_size != -1);
    988     ps_search_results->e_cu_size = e_cu_size;
    989     ps_search_results->u1_x_off = (U08)i4_x_off;
    990     ps_search_results->u1_y_off = (U08)i4_y_off;
    991     ps_search_results->u1_num_active_ref = (U08)i4_num_ref;
    992     ps_search_results->u1_num_best_results = (U08)i4_num_best_results;
    993     ps_search_results->u1_num_results_per_part = (U08)i4_num_results_per_part;
    994     ps_search_results->pu1_is_past = pu1_is_past;
    995     ps_search_results->u1_split_flag = 0;
    996     ps_search_results->best_cu_cost = MAX_32BIT_VAL;
    997 }
    998 
    999 /**
   1000 ********************************************************************************
   1001 *  @fn     hme_reset_search_results((search_results_t *ps_search_results,
   1002 *                               S32 i4_part_mask)
   1003 *
   1004 *
   1005 *  @brief  Resets the best results to maximum values, so as to allow search
   1006 *          for the new CU's partitions. The existing results may be from an
   1007 *          older CU using same structure.
   1008 *
   1009 *  @param[in] ps_search_results: search results structure
   1010 *
   1011 *  @param[in] i4_part_mask : bit mask of active partitions
   1012 *
   1013 *  @return part mask (bit mask of active partitions to search)
   1014 ********************************************************************************
   1015 */
   1016 void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res)
   1017 {
   1018     S32 i4_num_ref = (S32)ps_search_results->u1_num_active_ref;
   1019     S08 i1_ref_idx;
   1020     S32 i, j;
   1021     search_node_t *ps_search_node;
   1022 
   1023     /* store this for future use */
   1024     ps_search_results->i4_part_mask = i4_part_mask;
   1025 
   1026     /* Reset the spli_flag to zero */
   1027     ps_search_results->u1_split_flag = 0;
   1028 
   1029     HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[0]), mv_res);
   1030     HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[1]), mv_res);
   1031 
   1032     for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
   1033     {
   1034         /* Reset the individual partitino results */
   1035         for(i = 0; i < TOT_NUM_PARTS; i++)
   1036         {
   1037             if(!(i4_part_mask & (1 << i)))
   1038                 continue;
   1039 
   1040             ps_search_node = ps_search_results->aps_part_results[i1_ref_idx][i];
   1041 
   1042             for(j = 0; j < ps_search_results->u1_num_results_per_part; j++)
   1043             {
   1044                 ps_search_node[j].s_mv.i2_mvx = 0;
   1045                 ps_search_node[j].s_mv.i2_mvy = 0;
   1046                 ps_search_node[j].i4_tot_cost = MAX_32BIT_VAL;
   1047                 ps_search_node[j].i4_sad = MAX_32BIT_VAL;
   1048                 ps_search_node[j].i4_sdi = 0;
   1049                 ps_search_node[j].i1_ref_idx = -1;
   1050                 ps_search_node[j].u1_subpel_done = 0;
   1051                 ps_search_node[j].u1_is_avail = 1;
   1052                 ps_search_node[j].i4_mv_cost = 0;
   1053             }
   1054         }
   1055     }
   1056 }
   1057 /**
   1058 ********************************************************************************
   1059 *  @fn     hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
   1060 *                               S32 i4_step,
   1061 *                               range_prms_t *ps_mvrange)
   1062 *
   1063 *  @brief  Given a central pt within mv range, and a grid of points surrounding
   1064 *           this pt, this function returns a grid mask of pts within search rng
   1065 *
   1066 *  @param[in] ps_search_node: the centre pt of the grid
   1067 *
   1068 *  @param[in] i4_step: step size of grid
   1069 *
   1070 *  @param[in] ps_mvrange: structure containing the current mv range
   1071 *
   1072 *  @return bitmask of the  pts in grid within search range
   1073 ********************************************************************************
   1074 */
   1075 S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange)
   1076 {
   1077     S32 i4_mask = GRID_ALL_PTS_VALID;
   1078     if(ps_search_node->s_mv.i2_mvx + i4_step >= ps_mvrange->i2_max_x)
   1079     {
   1080         i4_mask &= (GRID_RT_3_INVALID);
   1081     }
   1082     if(ps_search_node->s_mv.i2_mvx - i4_step < ps_mvrange->i2_min_x)
   1083     {
   1084         i4_mask &= (GRID_LT_3_INVALID);
   1085     }
   1086     if(ps_search_node->s_mv.i2_mvy + i4_step >= ps_mvrange->i2_max_y)
   1087     {
   1088         i4_mask &= (GRID_BOT_3_INVALID);
   1089     }
   1090     if(ps_search_node->s_mv.i2_mvy - i4_step < ps_mvrange->i2_min_y)
   1091     {
   1092         i4_mask &= (GRID_TOP_3_INVALID);
   1093     }
   1094     return i4_mask;
   1095 }
   1096 
   1097 /**
   1098 ********************************************************************************
   1099 *  @fn    layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
   1100 S32 i4_layer_id)
   1101 *
   1102 *  @brief  returns the layer ctxt of the layer with given id from the temporally
   1103 *          previous frame
   1104 *
   1105 *  @param[in] ps_ctxt : ME context
   1106 *
   1107 *  @param[in] i4_layer_id : id of layer required
   1108 *
   1109 *  @return layer ctxt of given layer id in temporally previous frame
   1110 ********************************************************************************
   1111 */
   1112 layer_ctxt_t *hme_get_past_layer_ctxt(
   1113     me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel)
   1114 {
   1115     S32 i4_poc = ps_frm_ctxt->ai4_ref_idx_to_poc_lc[0];
   1116     S32 i;
   1117     layers_descr_t *ps_desc;
   1118 
   1119     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
   1120     {
   1121         ps_desc = &ps_ctxt->as_ref_descr[i];
   1122         if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
   1123             return (ps_desc->aps_layers[i4_layer_id]);
   1124     }
   1125     return NULL;
   1126 }
   1127 
   1128 /**
   1129 ********************************************************************************
   1130 *  @fn    layer_ctxt_t *hme_coarse_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
   1131 S32 i4_layer_id)
   1132 *
   1133 *  @brief  returns the layer ctxt of the layer with given id from the temporally
   1134 *          previous frame
   1135 *
   1136 *  @param[in] ps_ctxt : ME context
   1137 *
   1138 *  @param[in] i4_layer_id : id of layer required
   1139 *
   1140 *  @return layer ctxt of given layer id in temporally previous frame
   1141 ********************************************************************************
   1142 */
   1143 layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id)
   1144 {
   1145     S32 i4_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[0];
   1146     S32 i;
   1147     layers_descr_t *ps_desc;
   1148 
   1149     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
   1150     {
   1151         ps_desc = &ps_ctxt->as_ref_descr[i];
   1152         if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
   1153             return (ps_desc->aps_layers[i4_layer_id]);
   1154     }
   1155     return NULL;
   1156 }
   1157 
   1158 /**
   1159 ********************************************************************************
   1160 *  @fn    void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
   1161 BLK_SIZE_T e_blk_size,
   1162 S32 i4_num_ref,
   1163 S32 i4_num_results_per_part)
   1164 *
   1165 *  @brief  Given a blk size to be used for this layer, this function initialize
   1166 *          the mv bank to make it ready to store and return results.
   1167 *
   1168 *  @param[in, out] ps_layer_ctxt: pointer to layer ctxt
   1169 *
   1170 *  @param[in] e_blk_size : resolution at which mvs are stored
   1171 *
   1172 *  @param[in] i4_num_ref: number of reference frames corresponding to which
   1173 *              results are stored.
   1174 *
   1175 *  @param[in] e_blk_size : resolution at which mvs are stored
   1176 *
   1177 *  @param[in] i4_num_results_per_part : Number of results to be stored per
   1178 *               ref idx. So these many best results stored
   1179 *
   1180 *  @return void
   1181 ********************************************************************************
   1182 */
   1183 void hme_init_mv_bank(
   1184     layer_ctxt_t *ps_layer_ctxt,
   1185     BLK_SIZE_T e_blk_size,
   1186     S32 i4_num_ref,
   1187     S32 i4_num_results_per_part,
   1188     U08 u1_enc)
   1189 {
   1190     layer_mv_t *ps_mv_bank;
   1191     hme_mv_t *ps_mv1, *ps_mv2;
   1192     S08 *pi1_ref_id1, *pi1_ref_id2;
   1193     S32 blk_wd, mvs_in_blk, blks_in_row, mvs_in_row, blks_in_col;
   1194     S32 i4_i, i4_j, blk_ht;
   1195 
   1196     ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
   1197     ps_mv_bank->i4_num_mvs_per_ref = i4_num_results_per_part;
   1198     ps_mv_bank->i4_num_ref = i4_num_ref;
   1199     mvs_in_blk = i4_num_ref * i4_num_results_per_part;
   1200     ps_mv_bank->i4_num_mvs_per_blk = mvs_in_blk;
   1201 
   1202     /*************************************************************************/
   1203     /* Store blk size, from blk size derive blk width and use this to compute*/
   1204     /* number of blocks every row. We also pad to left and top by 1, to      */
   1205     /* support the prediction mechanism.                                     */
   1206     /*************************************************************************/
   1207     ps_mv_bank->e_blk_size = e_blk_size;
   1208     blk_wd = gau1_blk_size_to_wd[e_blk_size];
   1209     blk_ht = gau1_blk_size_to_ht[e_blk_size];
   1210 
   1211     blks_in_row = (ps_layer_ctxt->i4_wd + (blk_wd - 1)) / blk_wd;
   1212     blks_in_col = (ps_layer_ctxt->i4_ht + (blk_ht - 1)) / blk_ht;
   1213 
   1214     if(u1_enc)
   1215     {
   1216         /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
   1217         WORD32 num_ctb_cols = ((ps_layer_ctxt->i4_wd + 63) >> 6);
   1218         WORD32 num_ctb_rows = ((ps_layer_ctxt->i4_ht + 63) >> 6);
   1219 
   1220         blks_in_row = (num_ctb_cols << 3);
   1221         blks_in_col = (num_ctb_rows << 3);
   1222     }
   1223 
   1224     blks_in_row += 2;
   1225     mvs_in_row = blks_in_row * mvs_in_blk;
   1226 
   1227     ps_mv_bank->i4_num_blks_per_row = blks_in_row;
   1228     ps_mv_bank->i4_num_mvs_per_row = mvs_in_row;
   1229 
   1230     /* To ensure run time requirements fall within allocation time request */
   1231     ASSERT(ps_mv_bank->i4_num_mvs_per_row <= ps_mv_bank->max_num_mvs_per_row);
   1232 
   1233     /*************************************************************************/
   1234     /* Increment by one full row at top for padding and one column in left   */
   1235     /* this gives us the actual start of mv for 0,0 blk                      */
   1236     /*************************************************************************/
   1237     ps_mv_bank->ps_mv = ps_mv_bank->ps_mv_base + mvs_in_row + mvs_in_blk;
   1238     ps_mv_bank->pi1_ref_idx = ps_mv_bank->pi1_ref_idx_base + mvs_in_row + mvs_in_blk;
   1239 
   1240     memset(ps_mv_bank->ps_mv_base, 0, mvs_in_row * sizeof(hme_mv_t));
   1241     memset(ps_mv_bank->pi1_ref_idx_base, -1, mvs_in_row * sizeof(U08));
   1242 
   1243     /*************************************************************************/
   1244     /* Initialize top row, left col and right col with zeros since these are */
   1245     /* used as candidates during searches.                                   */
   1246     /*************************************************************************/
   1247     ps_mv1 = ps_mv_bank->ps_mv_base + mvs_in_row;
   1248     ps_mv2 = ps_mv1 + mvs_in_row - mvs_in_blk;
   1249     pi1_ref_id1 = ps_mv_bank->pi1_ref_idx_base + mvs_in_row;
   1250     pi1_ref_id2 = pi1_ref_id1 + mvs_in_row - mvs_in_blk;
   1251     for(i4_i = 0; i4_i < blks_in_col; i4_i++)
   1252     {
   1253         for(i4_j = 0; i4_j < mvs_in_blk; i4_j++)
   1254         {
   1255             ps_mv1[i4_j].i2_mv_x = 0;
   1256             ps_mv1[i4_j].i2_mv_y = 0;
   1257             ps_mv2[i4_j].i2_mv_x = 0;
   1258             ps_mv2[i4_j].i2_mv_y = 0;
   1259             pi1_ref_id1[i4_j] = -1;
   1260             pi1_ref_id2[i4_j] = -1;
   1261         }
   1262         ps_mv1 += mvs_in_row;
   1263         ps_mv2 += mvs_in_row;
   1264         pi1_ref_id1 += mvs_in_row;
   1265         pi1_ref_id2 += mvs_in_row;
   1266     }
   1267 }
   1268 void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt)
   1269 {
   1270     layer_mv_t *ps_mv_bank;
   1271     hme_mv_t *ps_mv;
   1272     S08 *pi1_ref_id;
   1273     S32 blk_wd, blks_in_row, mvs_in_row, blks_in_col;
   1274     S32 i, j, blk_ht;
   1275     BLK_SIZE_T e_blk_size;
   1276 
   1277     ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
   1278 
   1279     /*************************************************************************/
   1280     /* Store blk size, from blk size derive blk width and use this to compute*/
   1281     /* number of blocks every row. We also pad to left and top by 1, to      */
   1282     /* support the prediction mechanism.                                     */
   1283     /*************************************************************************/
   1284     e_blk_size = ps_mv_bank->e_blk_size;
   1285     blk_wd = gau1_blk_size_to_wd[e_blk_size];
   1286     blk_ht = gau1_blk_size_to_wd[e_blk_size];
   1287     blks_in_row = ps_layer_ctxt->i4_wd / blk_wd;
   1288     blks_in_col = ps_layer_ctxt->i4_ht / blk_ht;
   1289     mvs_in_row = blks_in_row * ps_mv_bank->i4_num_mvs_per_blk;
   1290 
   1291     /*************************************************************************/
   1292     /* Increment by one full row at top for padding and one column in left   */
   1293     /* this gives us the actual start of mv for 0,0 blk                      */
   1294     /*************************************************************************/
   1295     ps_mv = ps_mv_bank->ps_mv;
   1296     pi1_ref_id = ps_mv_bank->pi1_ref_idx;
   1297 
   1298     for(i = 0; i < blks_in_col; i++)
   1299     {
   1300         for(j = 0; j < blks_in_row; j++)
   1301         {
   1302             ps_mv[j].i2_mv_x = INTRA_MV;
   1303             ps_mv[j].i2_mv_y = INTRA_MV;
   1304             pi1_ref_id[j] = -1;
   1305         }
   1306         ps_mv += ps_mv_bank->i4_num_mvs_per_row;
   1307         pi1_ref_id += ps_mv_bank->i4_num_mvs_per_row;
   1308     }
   1309 }
   1310 
   1311 /**
   1312 ********************************************************************************
   1313 *  @fn    void hme_derive_search_range(range_prms_t *ps_range,
   1314 *                                   range_prms_t *ps_pic_limit,
   1315 *                                   range_prms_t *ps_mv_limit,
   1316 *                                   S32 i4_x,
   1317 *                                   S32 i4_y,
   1318 *                                   S32 blk_wd,
   1319 *                                   S32 blk_ht)
   1320 *
   1321 *  @brief  given picture limits and blk dimensions and mv search limits, obtains
   1322 *          teh valid search range such that the blk stays within pic boundaries,
   1323 *          where picture boundaries include padded portions of picture
   1324 *
   1325 *  @param[out] ps_range: updated with actual search range
   1326 *
   1327 *  @param[in] ps_pic_limit : picture boundaries
   1328 *
   1329 *  @param[in] ps_mv_limit: Search range limits for the mvs
   1330 *
   1331 *  @param[in] i4_x : x coordinate of the blk
   1332 *
   1333 *  @param[in] i4_y : y coordinate of the blk
   1334 *
   1335 *  @param[in] blk_wd : blk width
   1336 *
   1337 *  @param[in] blk_ht : blk height
   1338 *
   1339 *  @return void
   1340 ********************************************************************************
   1341 */
   1342 void hme_derive_search_range(
   1343     range_prms_t *ps_range,
   1344     range_prms_t *ps_pic_limit,
   1345     range_prms_t *ps_mv_limit,
   1346     S32 i4_x,
   1347     S32 i4_y,
   1348     S32 blk_wd,
   1349     S32 blk_ht)
   1350 {
   1351     ps_range->i2_max_x =
   1352         MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)i4_x), ps_mv_limit->i2_max_x);
   1353     ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
   1354     ps_range->i2_max_y =
   1355         MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)i4_y), ps_mv_limit->i2_max_y);
   1356     ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
   1357 }
   1358 
   1359 /**
   1360 ********************************************************************************
   1361 *  @fn    void hme_get_spatial_candt(search_node_t *ps_search_node,
   1362 *                                   layer_ctxt_t *ps_curr_layer,
   1363 *                                   S32 i4_blk_x,
   1364 *                                   S32 i4_blk_y,
   1365 *                                   S08 i1_ref_id,
   1366 *                                   S32 i4_result_id)
   1367 *
   1368 *  @brief  obtains a candt from the same mv bank as the current one, its called
   1369 *          spatial candt as it does not require scaling for temporal distances
   1370 *
   1371 *  @param[out] ps_search_node: mv and ref id updated here of the candt
   1372 *
   1373 *  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
   1374 *
   1375 *  @param[in] i4_blk_x : x coordinate of the block in mv bank
   1376 *
   1377 *  @param[in] i4_blk_y : y coordinate of the block in mv bank
   1378 *
   1379 *  @param[in] i1_ref_id : Corresponds to ref idx from which to pick up mv
   1380 *              results, useful if multiple ref idx candts maintained separately.
   1381 *
   1382 *  @param[in] i4_result_id : If multiple results stored per ref idx, this
   1383 *              pts to the id of the result
   1384 *
   1385 *  @param[in] tr_avail : top right availability of the block
   1386 *
   1387 *  @param[in] bl_avail : bottom left availability of the block
   1388 *
   1389 *  @return void
   1390 ********************************************************************************
   1391 */
   1392 void hme_get_spatial_candt(
   1393     layer_ctxt_t *ps_curr_layer,
   1394     BLK_SIZE_T e_search_blk_size,
   1395     S32 i4_blk_x,
   1396     S32 i4_blk_y,
   1397     S08 i1_ref_idx,
   1398     search_node_t *ps_top_neighbours,
   1399     search_node_t *ps_left_neighbours,
   1400     S32 i4_result_id,
   1401     S32 tr_avail,
   1402     S32 bl_avail,
   1403     S32 encode)
   1404 
   1405 {
   1406     layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
   1407     S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
   1408     S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
   1409     search_node_t *ps_search_node;
   1410     S32 i4_offset;
   1411     hme_mv_t *ps_mv, *ps_mv_base;
   1412     S08 *pi1_ref_idx, *pi1_ref_idx_base;
   1413     S32 jump = 1, mvs_in_blk, mvs_in_row;
   1414     S32 shift = (encode ? 2 : 0);
   1415 
   1416     if(i4_blk_size1 != i4_blk_size2)
   1417     {
   1418         i4_blk_x <<= 1;
   1419         i4_blk_y <<= 1;
   1420         jump = 2;
   1421         if((i4_blk_size1 << 2) == i4_blk_size2)
   1422         {
   1423             i4_blk_x <<= 1;
   1424             i4_blk_y <<= 1;
   1425             jump = 4;
   1426         }
   1427     }
   1428 
   1429     mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
   1430     mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
   1431 
   1432     /* Adjust teh blk coord to point to top left locn */
   1433     i4_blk_x -= 1;
   1434     i4_blk_y -= 1;
   1435     /* Pick up the mvs from the location */
   1436     i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
   1437     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
   1438 
   1439     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   1440     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   1441 
   1442     ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
   1443     pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
   1444 
   1445     ps_mv_base = ps_mv;
   1446     pi1_ref_idx_base = pi1_ref_idx;
   1447 
   1448     /* ps_mv and pi1_ref_idx now point to the top left locn */
   1449     /* Get 4 mvs as follows:                                */
   1450     ps_search_node = ps_top_neighbours;
   1451     COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
   1452 
   1453     /* Move to top */
   1454     ps_search_node++;
   1455     ps_mv += mvs_in_blk;
   1456     pi1_ref_idx += mvs_in_blk;
   1457     COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
   1458 
   1459     /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
   1460     if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
   1461     {
   1462         ps_search_node++;
   1463         ps_mv += (mvs_in_blk * (jump >> 1));
   1464         pi1_ref_idx += (mvs_in_blk * (jump >> 1));
   1465         COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
   1466     }
   1467     else
   1468     {
   1469         ps_search_node++;
   1470         ps_search_node->s_mv.i2_mvx = 0;
   1471         ps_search_node->s_mv.i2_mvy = 0;
   1472         ps_search_node->i1_ref_idx = i1_ref_idx;
   1473         ps_search_node->u1_is_avail = 0;
   1474         ps_search_node->u1_subpel_done = 0;
   1475     }
   1476 
   1477     /* Move to tr: this will be tr w.r.t. the blk being searched */
   1478     ps_search_node++;
   1479     if(tr_avail == 0)
   1480     {
   1481         ps_search_node->s_mv.i2_mvx = 0;
   1482         ps_search_node->s_mv.i2_mvy = 0;
   1483         ps_search_node->i1_ref_idx = i1_ref_idx;
   1484         ps_search_node->u1_is_avail = 0;
   1485         ps_search_node->u1_subpel_done = 0;
   1486     }
   1487     else
   1488     {
   1489         ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
   1490         pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
   1491         COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
   1492     }
   1493 
   1494     /* Move to left */
   1495     ps_search_node = ps_left_neighbours;
   1496     ps_mv = ps_mv_base + mvs_in_row;
   1497     pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
   1498     COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
   1499 
   1500     /* Move to l1 */
   1501     if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
   1502     {
   1503         ps_search_node++;
   1504         ps_mv += (mvs_in_row * (jump >> 1));
   1505         pi1_ref_idx += (mvs_in_row * (jump >> 1));
   1506         COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
   1507     }
   1508     else
   1509     {
   1510         ps_search_node++;
   1511         ps_search_node->s_mv.i2_mvx = 0;
   1512         ps_search_node->s_mv.i2_mvy = 0;
   1513         ps_search_node->i1_ref_idx = i1_ref_idx;
   1514         ps_search_node->u1_is_avail = 0;
   1515         ps_search_node->u1_subpel_done = 0;
   1516     }
   1517 
   1518     /* Move to bl */
   1519     ps_search_node++;
   1520     if(bl_avail == 0)
   1521     {
   1522         ps_search_node->s_mv.i2_mvx = 0;
   1523         ps_search_node->s_mv.i2_mvy = 0;
   1524         ps_search_node->i1_ref_idx = i1_ref_idx;
   1525         ps_search_node->u1_is_avail = 0;
   1526     }
   1527     else
   1528     {
   1529         ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
   1530         pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
   1531         COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
   1532     }
   1533 }
   1534 
   1535 void hme_get_spatial_candt_in_l1_me(
   1536     layer_ctxt_t *ps_curr_layer,
   1537     BLK_SIZE_T e_search_blk_size,
   1538     S32 i4_blk_x,
   1539     S32 i4_blk_y,
   1540     S08 i1_ref_idx,
   1541     U08 u1_pred_dir,
   1542     search_node_t *ps_top_neighbours,
   1543     search_node_t *ps_left_neighbours,
   1544     S32 i4_result_id,
   1545     S32 tr_avail,
   1546     S32 bl_avail,
   1547     S32 i4_num_act_ref_l0,
   1548     S32 i4_num_act_ref_l1)
   1549 {
   1550     search_node_t *ps_search_node;
   1551     hme_mv_t *ps_mv, *ps_mv_base;
   1552 
   1553     S32 i4_offset;
   1554     S32 mvs_in_blk, mvs_in_row;
   1555     S08 *pi1_ref_idx, *pi1_ref_idx_base;
   1556     S32 i4_mv_pos_in_implicit_array;
   1557 
   1558     layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
   1559 
   1560     S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
   1561     S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
   1562     S32 jump = 1;
   1563     S32 shift = 0;
   1564     S32 i4_num_results_in_given_dir =
   1565         ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l1)
   1566                             : (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0));
   1567 
   1568     if(i4_blk_size1 != i4_blk_size2)
   1569     {
   1570         i4_blk_x <<= 1;
   1571         i4_blk_y <<= 1;
   1572         jump = 2;
   1573         if((i4_blk_size1 << 2) == i4_blk_size2)
   1574         {
   1575             i4_blk_x <<= 1;
   1576             i4_blk_y <<= 1;
   1577             jump = 4;
   1578         }
   1579     }
   1580 
   1581     mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
   1582     mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
   1583 
   1584     /* Adjust the blk coord to point to top left locn */
   1585     i4_blk_x -= 1;
   1586     i4_blk_y -= 1;
   1587     /* Pick up the mvs from the location */
   1588     i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
   1589     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
   1590 
   1591     i4_offset +=
   1592         ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0) : 0);
   1593 
   1594     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   1595     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   1596 
   1597     ps_mv_base = ps_mv;
   1598     pi1_ref_idx_base = pi1_ref_idx;
   1599 
   1600     /* TL */
   1601     {
   1602         /* ps_mv and pi1_ref_idx now point to the top left locn */
   1603         ps_search_node = ps_top_neighbours;
   1604 
   1605         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
   1606             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
   1607 
   1608         if(-1 != i4_mv_pos_in_implicit_array)
   1609         {
   1610             COPY_MV_TO_SEARCH_NODE(
   1611                 ps_search_node,
   1612                 &ps_mv[i4_mv_pos_in_implicit_array],
   1613                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
   1614                 i1_ref_idx,
   1615                 shift);
   1616         }
   1617         else
   1618         {
   1619             ps_search_node->u1_is_avail = 0;
   1620             ps_search_node->s_mv.i2_mvx = 0;
   1621             ps_search_node->s_mv.i2_mvy = 0;
   1622             ps_search_node->i1_ref_idx = i1_ref_idx;
   1623         }
   1624     }
   1625 
   1626     /* Move to top */
   1627     {
   1628         /* ps_mv and pi1_ref_idx now point to the top left locn */
   1629         ps_search_node++;
   1630         ps_mv += mvs_in_blk;
   1631         pi1_ref_idx += mvs_in_blk;
   1632 
   1633         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
   1634             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
   1635 
   1636         if(-1 != i4_mv_pos_in_implicit_array)
   1637         {
   1638             COPY_MV_TO_SEARCH_NODE(
   1639                 ps_search_node,
   1640                 &ps_mv[i4_mv_pos_in_implicit_array],
   1641                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
   1642                 i1_ref_idx,
   1643                 shift);
   1644         }
   1645         else
   1646         {
   1647             ps_search_node->u1_is_avail = 0;
   1648             ps_search_node->s_mv.i2_mvx = 0;
   1649             ps_search_node->s_mv.i2_mvy = 0;
   1650             ps_search_node->i1_ref_idx = i1_ref_idx;
   1651         }
   1652     }
   1653 
   1654     /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
   1655     if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
   1656     {
   1657         ps_search_node++;
   1658         ps_mv += (mvs_in_blk * (jump >> 1));
   1659         pi1_ref_idx += (mvs_in_blk * (jump >> 1));
   1660 
   1661         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
   1662             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
   1663 
   1664         if(-1 != i4_mv_pos_in_implicit_array)
   1665         {
   1666             COPY_MV_TO_SEARCH_NODE(
   1667                 ps_search_node,
   1668                 &ps_mv[i4_mv_pos_in_implicit_array],
   1669                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
   1670                 i1_ref_idx,
   1671                 shift);
   1672         }
   1673         else
   1674         {
   1675             ps_search_node->u1_is_avail = 0;
   1676             ps_search_node->s_mv.i2_mvx = 0;
   1677             ps_search_node->s_mv.i2_mvy = 0;
   1678             ps_search_node->i1_ref_idx = i1_ref_idx;
   1679         }
   1680     }
   1681     else
   1682     {
   1683         ps_search_node++;
   1684         ps_search_node->u1_is_avail = 0;
   1685         ps_search_node->s_mv.i2_mvx = 0;
   1686         ps_search_node->s_mv.i2_mvy = 0;
   1687         ps_search_node->i1_ref_idx = i1_ref_idx;
   1688     }
   1689 
   1690     /* Move to tr: this will be tr w.r.t. the blk being searched */
   1691     ps_search_node++;
   1692     if(tr_avail == 0)
   1693     {
   1694         ps_search_node->s_mv.i2_mvx = 0;
   1695         ps_search_node->s_mv.i2_mvy = 0;
   1696         ps_search_node->i1_ref_idx = i1_ref_idx;
   1697         ps_search_node->u1_is_avail = 0;
   1698         ps_search_node->u1_subpel_done = 0;
   1699     }
   1700     else
   1701     {
   1702         /* ps_mv and pi1_ref_idx now point to the top left locn */
   1703         ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
   1704         pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
   1705 
   1706         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
   1707             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
   1708 
   1709         if(-1 != i4_mv_pos_in_implicit_array)
   1710         {
   1711             COPY_MV_TO_SEARCH_NODE(
   1712                 ps_search_node,
   1713                 &ps_mv[i4_mv_pos_in_implicit_array],
   1714                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
   1715                 i1_ref_idx,
   1716                 shift);
   1717         }
   1718         else
   1719         {
   1720             ps_search_node->u1_is_avail = 0;
   1721             ps_search_node->s_mv.i2_mvx = 0;
   1722             ps_search_node->s_mv.i2_mvy = 0;
   1723             ps_search_node->i1_ref_idx = i1_ref_idx;
   1724         }
   1725     }
   1726 
   1727     /* Move to left */
   1728     {
   1729         /* ps_mv and pi1_ref_idx now point to the top left locn */
   1730         ps_search_node = ps_left_neighbours;
   1731         ps_mv = ps_mv_base + mvs_in_row;
   1732         pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
   1733 
   1734         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
   1735             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
   1736 
   1737         if(-1 != i4_mv_pos_in_implicit_array)
   1738         {
   1739             COPY_MV_TO_SEARCH_NODE(
   1740                 ps_search_node,
   1741                 &ps_mv[i4_mv_pos_in_implicit_array],
   1742                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
   1743                 i1_ref_idx,
   1744                 shift);
   1745         }
   1746         else
   1747         {
   1748             ps_search_node->u1_is_avail = 0;
   1749             ps_search_node->s_mv.i2_mvx = 0;
   1750             ps_search_node->s_mv.i2_mvy = 0;
   1751             ps_search_node->i1_ref_idx = i1_ref_idx;
   1752         }
   1753     }
   1754 
   1755     /* Move to l1 */
   1756     if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
   1757     {
   1758         /* ps_mv and pi1_ref_idx now point to the top left locn */
   1759         ps_search_node++;
   1760         ps_mv += (mvs_in_row * (jump >> 1));
   1761         pi1_ref_idx += (mvs_in_row * (jump >> 1));
   1762 
   1763         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
   1764             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
   1765 
   1766         if(-1 != i4_mv_pos_in_implicit_array)
   1767         {
   1768             COPY_MV_TO_SEARCH_NODE(
   1769                 ps_search_node,
   1770                 &ps_mv[i4_mv_pos_in_implicit_array],
   1771                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
   1772                 i1_ref_idx,
   1773                 shift);
   1774         }
   1775         else
   1776         {
   1777             ps_search_node->u1_is_avail = 0;
   1778             ps_search_node->s_mv.i2_mvx = 0;
   1779             ps_search_node->s_mv.i2_mvy = 0;
   1780             ps_search_node->i1_ref_idx = i1_ref_idx;
   1781         }
   1782     }
   1783     else
   1784     {
   1785         ps_search_node++;
   1786         ps_search_node->u1_is_avail = 0;
   1787         ps_search_node->s_mv.i2_mvx = 0;
   1788         ps_search_node->s_mv.i2_mvy = 0;
   1789         ps_search_node->i1_ref_idx = i1_ref_idx;
   1790     }
   1791 
   1792     /* Move to bl */
   1793     ps_search_node++;
   1794     if(bl_avail == 0)
   1795     {
   1796         ps_search_node->s_mv.i2_mvx = 0;
   1797         ps_search_node->s_mv.i2_mvy = 0;
   1798         ps_search_node->i1_ref_idx = i1_ref_idx;
   1799         ps_search_node->u1_is_avail = 0;
   1800     }
   1801     else
   1802     {
   1803         /* ps_mv and pi1_ref_idx now point to the top left locn */
   1804         ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
   1805         pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
   1806 
   1807         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
   1808             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
   1809 
   1810         if(-1 != i4_mv_pos_in_implicit_array)
   1811         {
   1812             COPY_MV_TO_SEARCH_NODE(
   1813                 ps_search_node,
   1814                 &ps_mv[i4_mv_pos_in_implicit_array],
   1815                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
   1816                 i1_ref_idx,
   1817                 shift);
   1818         }
   1819         else
   1820         {
   1821             ps_search_node->u1_is_avail = 0;
   1822             ps_search_node->s_mv.i2_mvx = 0;
   1823             ps_search_node->s_mv.i2_mvy = 0;
   1824             ps_search_node->i1_ref_idx = i1_ref_idx;
   1825         }
   1826     }
   1827 }
   1828 
   1829 /**
   1830 ********************************************************************************
   1831 *  @fn    void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
   1832 *                                   S32 i4_blk_x,
   1833 *                                   S32 i4_blk_y,
   1834 *                                   mvgrid_t *ps_mv_grid ,
   1835 *                                   S32 i1_ref_id)
   1836 *
   1837 *  @brief  The 18x18 MV grid for a ctb, is filled in first row and 1st col
   1838 *          this corresponds to neighbours (TL, T, TR, L, BL)
   1839 *
   1840 *  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
   1841 *
   1842 *  @param[in] blk_x : x coordinate of the block in mv bank
   1843 *
   1844 *  @param[in] blk_y : y coordinate of the block in mv bank
   1845 *
   1846 *  @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
   1847 *
   1848 *  @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
   1849 *              results, useful if multiple ref idx candts maintained separately.
   1850 *
   1851 *  @return void
   1852 ********************************************************************************
   1853 */
   1854 void hme_fill_ctb_neighbour_mvs(
   1855     layer_ctxt_t *ps_curr_layer,
   1856     S32 blk_x,
   1857     S32 blk_y,
   1858     mv_grid_t *ps_mv_grid,
   1859     U08 u1_pred_dir_ctr,
   1860     U08 u1_default_ref_id,
   1861     S32 i4_num_act_ref_l0)
   1862 {
   1863     search_node_t *ps_grid_node;
   1864     layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
   1865     S32 i4_offset;
   1866     hme_mv_t *ps_mv, *ps_mv_base;
   1867     S08 *pi1_ref_idx, *pi1_ref_idx_base;
   1868     S32 jump = 0, inc, i, mvs_in_blk, mvs_in_row;
   1869 
   1870     if(ps_layer_mvbank->e_blk_size == BLK_4x4)
   1871     {
   1872         /* searching 16x16, mvs are for 4x4 */
   1873         jump = 1;
   1874         blk_x <<= 2;
   1875         blk_y <<= 2;
   1876     }
   1877     else
   1878     {
   1879         /* Searching 16x16, mvs are for 8x8 */
   1880         blk_x <<= 1;
   1881         blk_y <<= 1;
   1882     }
   1883     ASSERT(ps_layer_mvbank->e_blk_size != BLK_16x16);
   1884 
   1885     mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
   1886     mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
   1887 
   1888     /* Adjust the blk coord to point to top left locn */
   1889     blk_x -= 1;
   1890     blk_y -= 1;
   1891 
   1892     /* Pick up the mvs from the location */
   1893     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
   1894     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
   1895 
   1896     i4_offset += (u1_pred_dir_ctr == 1);
   1897 
   1898     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   1899     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   1900 
   1901     ps_mv_base = ps_mv;
   1902     pi1_ref_idx_base = pi1_ref_idx;
   1903 
   1904     /* the 0, 0 entry of the grid pts to top left for the ctb */
   1905     ps_grid_node = &ps_mv_grid->as_node[0];
   1906 
   1907     /* Copy 18 mvs at 4x4 level including top left, 16 top mvs for ctb, 1 tr */
   1908     for(i = 0; i < 18; i++)
   1909     {
   1910         COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
   1911         ps_grid_node++;
   1912         inc = 1;
   1913         /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
   1914         if(i & 1)
   1915             inc = jump;
   1916 
   1917         ps_mv += (mvs_in_blk * inc);
   1918         pi1_ref_idx += (mvs_in_blk * inc);
   1919     }
   1920 
   1921     ps_mv = ps_mv_base + mvs_in_row;
   1922     pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
   1923 
   1924     /* now copy left 16 left mvs */
   1925     ps_grid_node = &ps_mv_grid->as_node[0];
   1926     ps_grid_node += (ps_mv_grid->i4_stride);
   1927     for(i = 0; i < 16; i++)
   1928     {
   1929         COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
   1930         ps_grid_node += ps_mv_grid->i4_stride;
   1931         inc = 1;
   1932         /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
   1933         if(!(i & 1))
   1934             inc = jump;
   1935 
   1936         ps_mv += (mvs_in_row * inc);
   1937         pi1_ref_idx += (mvs_in_row * inc);
   1938     }
   1939     /* last one set to invalid as bottom left not yet encoded */
   1940     ps_grid_node->u1_is_avail = 0;
   1941 }
   1942 
   1943 void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr)
   1944 {
   1945     ps_buf_mgr->i4_used = 0;
   1946 }
   1947 void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size)
   1948 {
   1949     ps_buf_mgr->pu1_wkg_mem = pu1_mem;
   1950     ps_buf_mgr->i4_total = size;
   1951     hme_reset_wkg_mem(ps_buf_mgr);
   1952 }
   1953 
   1954 void hme_init_mv_grid(mv_grid_t *ps_mv_grid)
   1955 {
   1956     S32 i, j;
   1957     search_node_t *ps_search_node;
   1958     /*************************************************************************/
   1959     /* We have a 64x64 CTB in the worst case. For this, we have 16x16 4x4 MVs*/
   1960     /* Additionally, we have 1 neighbour on each side. This makes it a 18x18 */
   1961     /* MV Grid. The boundary of this Grid on all sides are neighbours and the*/
   1962     /* left and top edges of this grid is filled run time. The center portion*/
   1963     /* represents the actual CTB MVs (16x16) and is also filled run time.    */
   1964     /* However, the availability is always set as available (init time)      */
   1965     /*************************************************************************/
   1966     ps_mv_grid->i4_stride = NUM_COLUMNS_IN_CTB_GRID;
   1967     ps_mv_grid->i4_start_offset = ps_mv_grid->i4_stride + CTB_MV_GRID_PAD;
   1968     ps_search_node = &ps_mv_grid->as_node[ps_mv_grid->i4_start_offset];
   1969     for(i = 0; i < 16; i++)
   1970     {
   1971         for(j = 0; j < 16; j++)
   1972         {
   1973             ps_search_node[j].u1_is_avail = 1;
   1974         }
   1975 
   1976         ps_search_node += ps_mv_grid->i4_stride;
   1977     }
   1978 }
   1979 /**
   1980 ********************************************************************************
   1981 *  @fn    void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
   1982 *
   1983 *  @brief  Pads horizontally to left side. Each pixel replicated across a line
   1984 *
   1985 *  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
   1986 *
   1987 *  @param[in] stride : stride of destination buffer
   1988 *
   1989 *  @param[in] pad_wd : Amt of horizontal padding to be done
   1990 *
   1991 *  @param[in] pad_ht : Number of lines for which horizontal padding to be done
   1992 *
   1993 *  @return void
   1994 ********************************************************************************
   1995 */
   1996 void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
   1997 {
   1998     S32 i, j;
   1999     U08 u1_val;
   2000     for(i = 0; i < pad_ht; i++)
   2001     {
   2002         u1_val = pu1_dst[0];
   2003         for(j = -pad_wd; j < 0; j++)
   2004             pu1_dst[j] = u1_val;
   2005 
   2006         pu1_dst += stride;
   2007     }
   2008 }
   2009 /**
   2010 ********************************************************************************
   2011 *  @fn    void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
   2012 *
   2013 *  @brief  Pads horizontally to rt side. Each pixel replicated across a line
   2014 *
   2015 *  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
   2016 *
   2017 *  @param[in] stride : stride of destination buffer
   2018 *
   2019 *  @param[in] pad_wd : Amt of horizontal padding to be done
   2020 *
   2021 *  @param[in] pad_ht : Number of lines for which horizontal padding to be done
   2022 *
   2023 *  @return void
   2024 ********************************************************************************
   2025 */
   2026 void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
   2027 {
   2028     S32 i, j;
   2029     U08 u1_val;
   2030     for(i = 0; i < pad_ht; i++)
   2031     {
   2032         u1_val = pu1_dst[0];
   2033         for(j = 1; j <= pad_wd; j++)
   2034             pu1_dst[j] = u1_val;
   2035 
   2036         pu1_dst += stride;
   2037     }
   2038 }
   2039 /**
   2040 ********************************************************************************
   2041 *  @fn    void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
   2042 *
   2043 *  @brief  Pads vertically on the top. Repeats the top line for top padding
   2044 *
   2045 *  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
   2046 *
   2047 *  @param[in] stride : stride of destination buffer
   2048 *
   2049 *  @param[in] pad_ht : Amt of vertical padding to be done
   2050 *
   2051 *  @param[in] pad_wd : Number of columns for which vertical padding to be done
   2052 *
   2053 *  @return void
   2054 ********************************************************************************
   2055 */
   2056 void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
   2057 {
   2058     S32 i;
   2059     for(i = 1; i <= pad_ht; i++)
   2060         memcpy(pu1_dst - (i * stride), pu1_dst, pad_wd);
   2061 }
   2062 /**
   2063 ********************************************************************************
   2064 *  @fn    void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
   2065 *
   2066 *  @brief  Pads vertically on the bot. Repeats the top line for top padding
   2067 *
   2068 *  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
   2069 *
   2070 *  @param[in] stride : stride of destination buffer
   2071 *
   2072 *  @param[in] pad_ht : Amt of vertical padding to be done
   2073 *
   2074 *  @param[in] pad_wd : Number of columns for which vertical padding to be done
   2075 *
   2076 *  @return void
   2077 ********************************************************************************
   2078 */
   2079 void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
   2080 {
   2081     S32 i;
   2082     for(i = 1; i <= pad_ht; i++)
   2083         memcpy(pu1_dst + (i * stride), pu1_dst, pad_wd);
   2084 }
   2085 
   2086 /**
   2087 ********************************************************************************
   2088 *  @fn    void hme_get_wt_inp(layer_ctxt_t *ps_curr_layer,  S32 pos_x,
   2089 *                           S32 pos_y, S32 size)
   2090 *
   2091 *  @brief  Does weighting of the input in case the search needs to happen
   2092 *          with reference frames weighted
   2093 *
   2094 *  @param[in] ps_curr_layer: layer ctxt
   2095 *
   2096 *  @param[in] pos_x : x coordinate of the input blk in the picture
   2097 *
   2098 *  @param[in] pos_y : y coordinate of hte input blk in the picture
   2099 *
   2100 *  @param[in] size : size of the input block
   2101 *
   2102 *  @param[in] num_ref : Number of reference frames
   2103 *
   2104 *  @return void
   2105 ********************************************************************************
   2106 */
   2107 void hme_get_wt_inp(
   2108     layer_ctxt_t *ps_curr_layer,
   2109     wgt_pred_ctxt_t *ps_wt_inp_prms,
   2110     S32 dst_stride,
   2111     S32 pos_x,
   2112     S32 pos_y,
   2113     S32 size,
   2114     S32 num_ref,
   2115     U08 u1_is_wt_pred_on)
   2116 {
   2117     S32 ref, i, j;
   2118     U08 *pu1_src, *pu1_dst, *pu1_src_tmp;
   2119     S32 log_wdc = ps_wt_inp_prms->wpred_log_wdc;
   2120     S32 x_count, y_count;
   2121 
   2122     /* Fixed source */
   2123     pu1_src = ps_curr_layer->pu1_inp;
   2124 
   2125     /* Make sure the start positions of block are inside frame limits */
   2126     pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1);
   2127     pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1);
   2128 
   2129     pu1_src += (pos_x + (pos_y * ps_curr_layer->i4_inp_stride));
   2130 
   2131     /* In case we handle imcomplete CTBs, we copy only as much as reqd */
   2132     /* from input buffers to prevent out of bound accesses. In this    */
   2133     /* case, we do padding in x or y or both dirns */
   2134     x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x));
   2135     y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y));
   2136 
   2137     for(i = 0; i < num_ref + 1; i++)
   2138     {
   2139         ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
   2140     }
   2141 
   2142     /* Run thro all ref ids */
   2143     for(ref = 0; ref < num_ref + 1; ref++)
   2144     {
   2145         S32 wt, off;
   2146         S32 inv_wt;
   2147 
   2148         pu1_src_tmp = pu1_src;
   2149 
   2150         /* Each ref id may have differnet wt/offset. */
   2151         /* So we have unique inp buf for each ref id */
   2152         pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
   2153 
   2154         if(ref == num_ref)
   2155         {
   2156             /* last ref will be non weighted input */
   2157             for(i = 0; i < y_count; i++)
   2158             {
   2159                 for(j = 0; j < x_count; j++)
   2160                 {
   2161                     pu1_dst[j] = pu1_src_tmp[j];
   2162                 }
   2163                 pu1_src_tmp += ps_curr_layer->i4_inp_stride;
   2164                 pu1_dst += dst_stride;
   2165             }
   2166         }
   2167         else
   2168         {
   2169             /* Wt and off specific to this ref id */
   2170             wt = ps_wt_inp_prms->a_wpred_wt[ref];
   2171             inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ref];
   2172             off = ps_wt_inp_prms->a_wpred_off[ref];
   2173 
   2174             /* Generate size*size worth of modified input samples */
   2175             for(i = 0; i < y_count; i++)
   2176             {
   2177                 for(j = 0; j < x_count; j++)
   2178                 {
   2179                     S32 tmp;
   2180 
   2181                     /* Since we scale input, we use inverse transform of wt pred */
   2182                     //tmp = HME_INV_WT_PRED(pu1_src_tmp[j], wt, off, log_wdc);
   2183                     tmp = HME_INV_WT_PRED1(pu1_src_tmp[j], inv_wt, off, log_wdc);
   2184                     pu1_dst[j] = (U08)(HME_CLIP(tmp, 0, 255));
   2185                 }
   2186                 pu1_src_tmp += ps_curr_layer->i4_inp_stride;
   2187                 pu1_dst += dst_stride;
   2188             }
   2189         }
   2190 
   2191         /* Check and do padding in right direction if need be */
   2192         pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
   2193         if(x_count != size)
   2194         {
   2195             hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
   2196         }
   2197 
   2198         /* Check and do padding in bottom directino if need be */
   2199         if(y_count != size)
   2200         {
   2201             hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
   2202         }
   2203     }
   2204 }
   2205 /**
   2206 ****************************************************************************************
   2207 *  @fn     hme_pick_best_pu_cand(pu_result_t *ps_pu_results_dst,
   2208 *                                pu_result_t *ps_pu_results_inp,
   2209 *                                UWORD8 u1_num_results_per_part,
   2210 *                                UWORD8 u1_num_best_cand)
   2211 *
   2212 *  @brief  Does the candidate evaluation across all the current candidates and returns
   2213 *           the best two or one candidates across given lists
   2214 *
   2215 *  @param[in]  - ps_pu_results_inp : Pointer to the input candidates
   2216 *              - u1_num_results_per_part: Number of available candidates
   2217 *
   2218 *  @param[out] - ps_pu_results_dst : Pointer to best PU results
   2219 *
   2220 ****************************************************************************************
   2221 */
   2222 void hme_pick_best_pu_cand(
   2223     pu_result_t *ps_pu_results_dst,
   2224     pu_result_t *ps_pu_results_list0,
   2225     pu_result_t *ps_pu_results_list1,
   2226     UWORD8 u1_num_results_per_part_l0,
   2227     UWORD8 u1_num_results_per_part_l1,
   2228     UWORD8 u1_candidate_rank)
   2229 {
   2230     struct cand_pos_data
   2231     {
   2232         U08 u1_cand_list_id;
   2233 
   2234         U08 u1_cand_id_in_cand_list;
   2235     } as_cand_pos_data[MAX_NUM_RESULTS_PER_PART_LIST << 1];
   2236 
   2237     S32 ai4_costs[MAX_NUM_RESULTS_PER_PART_LIST << 1];
   2238     U08 i, j;
   2239 
   2240     for(i = 0; i < u1_num_results_per_part_l0; i++)
   2241     {
   2242         ai4_costs[i] = ps_pu_results_list0[i].i4_tot_cost;
   2243         as_cand_pos_data[i].u1_cand_id_in_cand_list = i;
   2244         as_cand_pos_data[i].u1_cand_list_id = 0;
   2245     }
   2246 
   2247     for(i = 0, j = u1_num_results_per_part_l0; i < u1_num_results_per_part_l1; i++, j++)
   2248     {
   2249         ai4_costs[j] = ps_pu_results_list1[i].i4_tot_cost;
   2250         as_cand_pos_data[j].u1_cand_id_in_cand_list = i;
   2251         as_cand_pos_data[j].u1_cand_list_id = 1;
   2252     }
   2253 
   2254     SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
   2255         ai4_costs,
   2256         as_cand_pos_data,
   2257         u1_num_results_per_part_l0 + u1_num_results_per_part_l1,
   2258         struct cand_pos_data);
   2259 
   2260     if(as_cand_pos_data[u1_candidate_rank].u1_cand_list_id)
   2261     {
   2262         ps_pu_results_dst[0] =
   2263             ps_pu_results_list1[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
   2264     }
   2265     else
   2266     {
   2267         ps_pu_results_dst[0] =
   2268             ps_pu_results_list0[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
   2269     }
   2270 }
   2271 
   2272 /* Returns the number of candidates */
   2273 static S32 hme_tu_recur_cand_harvester(
   2274     part_type_results_t *ps_cand_container,
   2275     inter_pu_results_t *ps_pu_data,
   2276     inter_ctb_prms_t *ps_inter_ctb_prms,
   2277     S32 i4_part_mask)
   2278 {
   2279     part_type_results_t s_cand_data;
   2280 
   2281     U08 i, j;
   2282     PART_ID_T e_part_id;
   2283 
   2284     S32 i4_num_cands = 0;
   2285 
   2286     /* 2Nx2N part_type decision part */
   2287     if(i4_part_mask & ENABLE_2Nx2N)
   2288     {
   2289         U08 u1_num_candt_to_pick;
   2290 
   2291         e_part_id = ge_part_type_to_part_id[PRT_2Nx2N][0];
   2292 
   2293         ASSERT(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands >= 1);
   2294 
   2295         if(!ps_inter_ctb_prms->i4_bidir_enabled || (i4_part_mask == ENABLE_2Nx2N))
   2296         {
   2297             u1_num_candt_to_pick =
   2298                 MIN(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands,
   2299                     ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
   2300                         ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
   2301         }
   2302         else
   2303         {
   2304             u1_num_candt_to_pick =
   2305                 MIN(1,
   2306                     ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
   2307                         ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
   2308         }
   2309 
   2310         if(ME_XTREME_SPEED_25 == ps_inter_ctb_prms->i1_quality_preset)
   2311         {
   2312             u1_num_candt_to_pick = MIN(u1_num_candt_to_pick, MAX_NUM_TU_RECUR_CANDS_IN_XS25);
   2313         }
   2314 
   2315         for(i = 0; i < u1_num_candt_to_pick; i++)
   2316         {
   2317             /* Picks the best two candidates of all the available ones */
   2318             hme_pick_best_pu_cand(
   2319                 ps_cand_container[i4_num_cands].as_pu_results,
   2320                 ps_pu_data->aps_pu_results[0][e_part_id],
   2321                 ps_pu_data->aps_pu_results[1][e_part_id],
   2322                 ps_pu_data->u1_num_results_per_part_l0[e_part_id],
   2323                 ps_pu_data->u1_num_results_per_part_l1[e_part_id],
   2324                 i);
   2325 
   2326             /* Update the other params part_type and total_cost in part_type_results */
   2327             ps_cand_container[i4_num_cands].u1_part_type = e_part_id;
   2328             ps_cand_container[i4_num_cands].i4_tot_cost =
   2329                 ps_cand_container[i4_num_cands].as_pu_results->i4_tot_cost;
   2330 
   2331             i4_num_cands++;
   2332         }
   2333     }
   2334 
   2335     /* SMP */
   2336     {
   2337         S32 i4_total_cost;
   2338 
   2339         S32 num_part_types = PRT_Nx2N - PRT_2NxN + 1;
   2340         S32 start_part_type = PRT_2NxN;
   2341         S32 best_cost = MAX_32BIT_VAL;
   2342         S32 part_type_cnt = 0;
   2343 
   2344         for(j = 0; j < num_part_types; j++)
   2345         {
   2346             if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
   2347             {
   2348                 continue;
   2349             }
   2350 
   2351             for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
   2352             {
   2353                 e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
   2354 
   2355                 /* Pick the best candidate for the partition acroos lists */
   2356                 hme_pick_best_pu_cand(
   2357                     &s_cand_data.as_pu_results[i],
   2358                     ps_pu_data->aps_pu_results[0][e_part_id],
   2359                     ps_pu_data->aps_pu_results[1][e_part_id],
   2360                     ps_pu_data->u1_num_results_per_part_l0[e_part_id],
   2361                     ps_pu_data->u1_num_results_per_part_l1[e_part_id],
   2362                     0);
   2363             }
   2364 
   2365             i4_total_cost =
   2366                 s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
   2367 
   2368             if(i4_total_cost < best_cost)
   2369             {
   2370                 /* Stores the index of the best part_type in the sub-catoegory */
   2371                 best_cost = i4_total_cost;
   2372 
   2373                 ps_cand_container[i4_num_cands] = s_cand_data;
   2374 
   2375                 ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
   2376                 ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
   2377             }
   2378 
   2379             part_type_cnt++;
   2380         }
   2381 
   2382         i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
   2383     }
   2384 
   2385     /* AMP */
   2386     {
   2387         S32 i4_total_cost;
   2388 
   2389         S32 num_part_types = PRT_nRx2N - PRT_2NxnU + 1;
   2390         S32 start_part_type = PRT_2NxnU;
   2391         S32 best_cost = MAX_32BIT_VAL;
   2392         S32 part_type_cnt = 0;
   2393 
   2394         for(j = 0; j < num_part_types; j++)
   2395         {
   2396             if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
   2397             {
   2398                 continue;
   2399             }
   2400 
   2401             for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
   2402             {
   2403                 e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
   2404 
   2405                 /* Pick the best candidate for the partition acroos lists */
   2406                 hme_pick_best_pu_cand(
   2407                     &s_cand_data.as_pu_results[i],
   2408                     ps_pu_data->aps_pu_results[0][e_part_id],
   2409                     ps_pu_data->aps_pu_results[1][e_part_id],
   2410                     ps_pu_data->u1_num_results_per_part_l0[e_part_id],
   2411                     ps_pu_data->u1_num_results_per_part_l1[e_part_id],
   2412                     0);
   2413             }
   2414 
   2415             i4_total_cost =
   2416                 s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
   2417 
   2418             if(i4_total_cost < best_cost)
   2419             {
   2420                 /* Stores the index of the best part_type in the sub-catoegory */
   2421                 best_cost = i4_total_cost;
   2422 
   2423                 ps_cand_container[i4_num_cands] = s_cand_data;
   2424 
   2425                 ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
   2426                 ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
   2427             }
   2428 
   2429             part_type_cnt++;
   2430         }
   2431 
   2432         i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
   2433     }
   2434 
   2435     return i4_num_cands;
   2436 }
   2437 
   2438 /**
   2439 *****************************************************************************
   2440 *  @fn     hme_decide_part_types(search_results_t *ps_search_results)
   2441 *
   2442 *  @brief  Does uni/bi evaluation accross various partition types,
   2443 *          decides best inter partition types for the CU, compares
   2444 *          intra cost and decides the best K results for the CU
   2445 *
   2446 *          This is called post subpel refinmenent for 16x16s, 8x8s and
   2447 *          for post merge evaluation for 32x32,64x64 CUs
   2448 *
   2449 *  @param[in,out] ps_search_results : Search results data structure
   2450 *                 - In : 2 lists of upto 2mvs & refids, active partition mask
   2451 *                 - Out: Best results for final rdo evaluation of the cu
   2452 *
   2453 *  @param[in]     ps_subpel_prms : Sub pel params data structure
   2454 *
   2455 *
   2456 *  @par Description
   2457 *    --------------------------------------------------------------------------------
   2458 *     Flow:
   2459 *            for each category (SMP,AMP,2Nx2N based on part mask)
   2460 *            {
   2461 *                for each part_type
   2462 *                {
   2463 *                    for each part
   2464 *                        pick best candidate from each list
   2465 *                    combine uni part type
   2466 *                    update best results for part type
   2467 *                }
   2468 *                pick the best part type for given category (for SMP & AMP)
   2469 *            }
   2470 *                    ||
   2471 *                    ||
   2472 *                    \/
   2473 *           Bi-Pred evaluation:
   2474 *            for upto 4 best part types
   2475 *            {
   2476 *                for each part
   2477 *                {
   2478 *                    compute fixed size had for all uni and remember coeffs
   2479 *                    compute bisatd
   2480 *                    uni vs bi and gives upto two results
   2481 *                    also gives the pt level pred buffer
   2482 *                }
   2483 *             }
   2484 *                    ||
   2485 *                    ||
   2486 *                    \/
   2487 *            select X candidates for tu recursion as per the Note below
   2488 *               tu_rec_on_part_type (reuse transform coeffs)
   2489 *                    ||
   2490 *                    ||
   2491 *                    \/
   2492 *            insert intra nodes at appropriate result id
   2493 *                    ||
   2494 *                    ||
   2495 *                    \/
   2496 *            populate y best resuls for rdo based on preset
   2497 *
   2498 *     Note :
   2499 *     number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
   2500 *     number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
   2501 *     --------------------------------------------------------------------------------
   2502 *
   2503 *  @return None
   2504 ********************************************************************************
   2505 */
   2506 void hme_decide_part_types(
   2507     inter_cu_results_t *ps_cu_results,
   2508     inter_pu_results_t *ps_pu_results,
   2509     inter_ctb_prms_t *ps_inter_ctb_prms,
   2510     me_frm_ctxt_t *ps_ctxt,
   2511     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
   2512     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
   2513 
   2514 )
   2515 {
   2516     S32 i, j;
   2517     S32 i4_part_mask;
   2518     ULWORD64 au8_pred_sigmaXSquare[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
   2519     ULWORD64 au8_pred_sigmaX[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
   2520     S32 i4_noise_term;
   2521     WORD32 e_part_id;
   2522 
   2523     PF_SAD_FXN_TU_REC apf_err_compute[4];
   2524 
   2525     part_type_results_t as_part_type_results[NUM_BEST_ME_OUTPUTS];
   2526     part_type_results_t *ps_part_type_results;
   2527 
   2528     S32 num_best_cand = 0;
   2529     const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
   2530 
   2531     i4_part_mask = ps_cu_results->i4_part_mask;
   2532 
   2533     num_best_cand = hme_tu_recur_cand_harvester(
   2534         as_part_type_results, ps_pu_results, ps_inter_ctb_prms, i4_part_mask);
   2535 
   2536     /* Partition ID for the current PU */
   2537     e_part_id = (UWORD8)ge_part_type_to_part_id[PRT_2Nx2N][0];
   2538 
   2539     ps_part_type_results = as_part_type_results;
   2540     for(i = 0; i < num_best_cand; i++)
   2541     {
   2542         hme_compute_pred_and_evaluate_bi(
   2543             ps_cu_results,
   2544             ps_pu_results,
   2545             ps_inter_ctb_prms,
   2546             &(ps_part_type_results[i]),
   2547             au8_pred_sigmaXSquare[i],
   2548             au8_pred_sigmaX[i],
   2549             ps_cmn_utils_optimised_function_list,
   2550             ps_me_optimised_function_list
   2551 
   2552         );
   2553     }
   2554     /* Perform TU_REC on the best candidates selected */
   2555     {
   2556         WORD32 i4_sad_grid;
   2557         WORD32 ai4_tu_split_flag[4];
   2558         WORD32 ai4_tu_early_cbf[4];
   2559 
   2560         WORD32 best_cost[NUM_BEST_ME_OUTPUTS];
   2561         WORD32 ai4_final_idx[NUM_BEST_ME_OUTPUTS];
   2562         WORD16 i2_wght;
   2563         WORD32 i4_satd;
   2564 
   2565         err_prms_t s_err_prms;
   2566         err_prms_t *ps_err_prms = &s_err_prms;
   2567 
   2568         /* Default cost and final idx initialization */
   2569         for(i = 0; i < num_best_cand; i++)
   2570         {
   2571             best_cost[i] = MAX_32BIT_VAL;
   2572             ai4_final_idx[i] = -1;
   2573         }
   2574 
   2575         /* Assign the stad function to the err_compute function pointer :
   2576         Implemented only for 32x32 and 64x64, hence 16x16 and 8x8 are kept NULL */
   2577         apf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
   2578         apf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
   2579         apf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
   2580         apf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
   2581 
   2582         ps_err_prms->pi4_sad_grid = &i4_sad_grid;
   2583         ps_err_prms->pi4_tu_split_flags = ai4_tu_split_flag;
   2584         ps_err_prms->u1_max_tr_depth = ps_inter_ctb_prms->u1_max_tr_depth;
   2585         ps_err_prms->pi4_tu_early_cbf = ai4_tu_early_cbf;
   2586         ps_err_prms->i4_grid_mask = 1;
   2587         ps_err_prms->pu1_wkg_mem = ps_inter_ctb_prms->pu1_wkg_mem;
   2588         ps_err_prms->u1_max_tr_size = 32;
   2589 
   2590         if(ps_inter_ctb_prms->u1_is_cu_noisy)
   2591         {
   2592             ps_err_prms->u1_max_tr_size = MAX_TU_SIZE_WHEN_NOISY;
   2593         }
   2594 
   2595         /* TU_REC for the best candidates, as mentioned in NOTE above (except candidates that
   2596         are disabled by Part_mask */
   2597         for(i = 0; i < num_best_cand; i++)
   2598         {
   2599             part_type_results_t *ps_best_results;
   2600             pu_result_t *ps_pu_result;
   2601             WORD32 part_type_cost;
   2602             WORD32 cand_idx;
   2603 
   2604             WORD32 pred_dir;
   2605             S32 i4_inp_off;
   2606 
   2607             S32 lambda;
   2608             U08 lambda_qshift;
   2609             U08 *apu1_inp[MAX_NUM_INTER_PARTS];
   2610             S16 ai2_wt[MAX_NUM_INTER_PARTS];
   2611             S32 ai4_inv_wt[MAX_NUM_INTER_PARTS];
   2612             S32 ai4_inv_wt_shift_val[MAX_NUM_INTER_PARTS];
   2613 
   2614             WORD32 part_type = ps_part_type_results[i].u1_part_type;
   2615             WORD32 e_cu_size = ps_cu_results->u1_cu_size;
   2616             WORD32 e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
   2617             U08 u1_num_parts = gau1_num_parts_in_part_type[part_type];
   2618             U08 u1_inp_buf_idx = UCHAR_MAX;
   2619 
   2620             ps_err_prms->i4_part_mask = i4_part_mask;
   2621             ps_err_prms->i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
   2622             ps_err_prms->i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
   2623             ps_err_prms->pu1_ref = ps_part_type_results[i].pu1_pred;
   2624             ps_err_prms->i4_ref_stride = ps_part_type_results[i].i4_pred_stride;
   2625 
   2626             /* Current offset for the present part type */
   2627             i4_inp_off = ps_cu_results->i4_inp_offset;
   2628 
   2629             ps_best_results = &(ps_part_type_results[i]);
   2630 
   2631             part_type_cost = 0;
   2632             lambda = ps_inter_ctb_prms->i4_lamda;
   2633             lambda_qshift = ps_inter_ctb_prms->u1_lamda_qshift;
   2634 
   2635             for(j = 0; j < u1_num_parts; j++)
   2636             {
   2637                 ps_pu_result = &(ps_best_results->as_pu_results[j]);
   2638 
   2639                 pred_dir = ps_pu_result->pu.b2_pred_mode;
   2640 
   2641                 if(PRED_L0 == pred_dir)
   2642                 {
   2643                     apu1_inp[j] =
   2644                         ps_inter_ctb_prms->apu1_wt_inp[PRED_L0][ps_pu_result->pu.mv.i1_l0_ref_idx] +
   2645                         i4_inp_off;
   2646                     ai2_wt[j] =
   2647                         ps_inter_ctb_prms->pps_rec_list_l0[ps_pu_result->pu.mv.i1_l0_ref_idx]
   2648                             ->s_weight_offset.i2_luma_weight;
   2649                     ai4_inv_wt[j] =
   2650                         ps_inter_ctb_prms->pi4_inv_wt
   2651                             [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
   2652                     ai4_inv_wt_shift_val[j] =
   2653                         ps_inter_ctb_prms->pi4_inv_wt_shift_val
   2654                             [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
   2655                 }
   2656                 else if(PRED_L1 == pred_dir)
   2657                 {
   2658                     apu1_inp[j] =
   2659                         ps_inter_ctb_prms->apu1_wt_inp[PRED_L1][ps_pu_result->pu.mv.i1_l1_ref_idx] +
   2660                         i4_inp_off;
   2661                     ai2_wt[j] =
   2662                         ps_inter_ctb_prms->pps_rec_list_l1[ps_pu_result->pu.mv.i1_l1_ref_idx]
   2663                             ->s_weight_offset.i2_luma_weight;
   2664                     ai4_inv_wt[j] =
   2665                         ps_inter_ctb_prms->pi4_inv_wt
   2666                             [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
   2667                     ai4_inv_wt_shift_val[j] =
   2668                         ps_inter_ctb_prms->pi4_inv_wt_shift_val
   2669                             [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
   2670                 }
   2671                 else if(PRED_BI == pred_dir)
   2672                 {
   2673                     apu1_inp[j] = ps_inter_ctb_prms->pu1_non_wt_inp + i4_inp_off;
   2674                     ai2_wt[j] = 1 << ps_inter_ctb_prms->wpred_log_wdc;
   2675                     ai4_inv_wt[j] = i4_default_src_wt;
   2676                     ai4_inv_wt_shift_val[j] = 0;
   2677                 }
   2678                 else
   2679                 {
   2680                     ASSERT(0);
   2681                 }
   2682 
   2683                 part_type_cost += ps_pu_result->i4_mv_cost;
   2684             }
   2685 
   2686             if((u1_num_parts == 1) || (ai2_wt[0] == ai2_wt[1]))
   2687             {
   2688                 ps_err_prms->pu1_inp = apu1_inp[0];
   2689                 ps_err_prms->i4_inp_stride = ps_inter_ctb_prms->i4_inp_stride;
   2690                 i2_wght = ai2_wt[0];
   2691             }
   2692             else
   2693             {
   2694                 if(1 != ihevce_get_free_pred_buf_indices(
   2695                             &u1_inp_buf_idx,
   2696                             &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
   2697                             1))
   2698                 {
   2699                     ASSERT(0);
   2700                 }
   2701                 else
   2702                 {
   2703                     U08 *pu1_dst =
   2704                         ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
   2705                     U08 *pu1_src = apu1_inp[0];
   2706                     U08 u1_pu1_wd = (ps_part_type_results[i].as_pu_results[0].pu.b4_wd + 1) << 2;
   2707                     U08 u1_pu1_ht = (ps_part_type_results[i].as_pu_results[0].pu.b4_ht + 1) << 2;
   2708                     U08 u1_pu2_wd = (ps_part_type_results[i].as_pu_results[1].pu.b4_wd + 1) << 2;
   2709                     U08 u1_pu2_ht = (ps_part_type_results[i].as_pu_results[1].pu.b4_ht + 1) << 2;
   2710 
   2711                     ps_cmn_utils_optimised_function_list->pf_copy_2d(
   2712                         pu1_dst,
   2713                         MAX_CU_SIZE,
   2714                         pu1_src,
   2715                         ps_inter_ctb_prms->i4_inp_stride,
   2716                         u1_pu1_wd,
   2717                         u1_pu1_ht);
   2718 
   2719                     pu1_dst +=
   2720                         (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
   2721                              ? u1_pu1_ht * MAX_CU_SIZE
   2722                              : u1_pu1_wd);
   2723                     pu1_src =
   2724                         apu1_inp[1] + (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
   2725                                            ? u1_pu1_ht * ps_inter_ctb_prms->i4_inp_stride
   2726                                            : u1_pu1_wd);
   2727 
   2728                     ps_cmn_utils_optimised_function_list->pf_copy_2d(
   2729                         pu1_dst,
   2730                         MAX_CU_SIZE,
   2731                         pu1_src,
   2732                         ps_inter_ctb_prms->i4_inp_stride,
   2733                         u1_pu2_wd,
   2734                         u1_pu2_ht);
   2735 
   2736                     ps_err_prms->pu1_inp =
   2737                         ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
   2738                     ps_err_prms->i4_inp_stride = MAX_CU_SIZE;
   2739                     i2_wght = ai2_wt[1];
   2740                 }
   2741             }
   2742 
   2743 #if !DISABLE_TU_RECURSION
   2744             i4_satd = apf_err_compute[e_cu_size](
   2745                 ps_err_prms,
   2746                 lambda,
   2747                 lambda_qshift,
   2748                 ps_inter_ctb_prms->i4_qstep_ls8,
   2749                 ps_ctxt->ps_func_selector);
   2750 #else
   2751             ps_err_prms->pi4_sad_grid = &i4_satd;
   2752 
   2753             pf_err_compute(ps_err_prms);
   2754 
   2755             if((part_type == PRT_2Nx2N) || (e_cu_size != CU_64x64))
   2756             {
   2757                 ai4_tu_split_flag[0] = 1;
   2758                 ai4_tu_split_flag[1] = 1;
   2759                 ai4_tu_split_flag[2] = 1;
   2760                 ai4_tu_split_flag[3] = 1;
   2761 
   2762                 ps_err_prms->i4_tu_split_cost = 0;
   2763             }
   2764             else
   2765             {
   2766                 ai4_tu_split_flag[0] = 1;
   2767                 ai4_tu_split_flag[1] = 1;
   2768                 ai4_tu_split_flag[2] = 1;
   2769                 ai4_tu_split_flag[3] = 1;
   2770 
   2771                 ps_err_prms->i4_tu_split_cost = 0;
   2772             }
   2773 #endif
   2774 
   2775 #if UNI_SATD_SCALE
   2776             i4_satd = (i4_satd * i2_wght) >> ps_inter_ctb_prms->wpred_log_wdc;
   2777 #endif
   2778 
   2779             if(ps_inter_ctb_prms->u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
   2780             {
   2781                 ULWORD64 u8_temp_var, u8_temp_var1, u8_pred_sigmaSquaredX;
   2782                 ULWORD64 u8_src_variance, u8_pred_variance;
   2783                 unsigned long u4_shift_val;
   2784                 S32 i4_bits_req;
   2785                 S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
   2786 
   2787                 if(1 == u1_num_parts)
   2788                 {
   2789                     u8_pred_sigmaSquaredX = au8_pred_sigmaX[i][0] * au8_pred_sigmaX[i][0];
   2790                     u8_pred_variance = au8_pred_sigmaXSquare[i][0] - u8_pred_sigmaSquaredX;
   2791 
   2792                     if(e_cu_size == CU_8x8)
   2793                     {
   2794                         PART_ID_T e_part_id = (PART_ID_T)(
   2795                             (PART_ID_NxN_TL) + (ps_cu_results->u1_x_off & 1) +
   2796                             ((ps_cu_results->u1_y_off & 1) << 1));
   2797 
   2798                         u4_shift_val = ihevce_calc_stim_injected_variance(
   2799                             ps_inter_ctb_prms->pu8_part_src_sigmaX,
   2800                             ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
   2801                             &u8_src_variance,
   2802                             ai4_inv_wt[0],
   2803                             ai4_inv_wt_shift_val[0],
   2804                             ps_inter_ctb_prms->wpred_log_wdc,
   2805                             e_part_id);
   2806                     }
   2807                     else
   2808                     {
   2809                         u4_shift_val = ihevce_calc_stim_injected_variance(
   2810                             ps_inter_ctb_prms->pu8_part_src_sigmaX,
   2811                             ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
   2812                             &u8_src_variance,
   2813                             ai4_inv_wt[0],
   2814                             ai4_inv_wt_shift_val[0],
   2815                             ps_inter_ctb_prms->wpred_log_wdc,
   2816                             e_part_id);
   2817                     }
   2818 
   2819                     u8_pred_variance = u8_pred_variance >> u4_shift_val;
   2820 
   2821                     GETRANGE64(i4_bits_req, u8_pred_variance);
   2822 
   2823                     if(i4_bits_req > 27)
   2824                     {
   2825                         u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
   2826                         u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
   2827                     }
   2828 
   2829                     if(u8_src_variance == u8_pred_variance)
   2830                     {
   2831                         u8_temp_var = (1 << STIM_Q_FORMAT);
   2832                     }
   2833                     else
   2834                     {
   2835                         u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
   2836                         u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
   2837                         u8_temp_var1 = (u8_src_variance * u8_src_variance) +
   2838                                        (u8_pred_variance * u8_pred_variance);
   2839                         u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
   2840                         u8_temp_var = (u8_temp_var / u8_temp_var1);
   2841                     }
   2842 
   2843                     i4_noise_term = (UWORD32)u8_temp_var;
   2844 
   2845                     ASSERT(i4_noise_term >= 0);
   2846 
   2847                     i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
   2848 
   2849                     u8_temp_var = i4_satd;
   2850                     u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
   2851                     u8_temp_var += (1 << ((i4_q_level)-1));
   2852                     i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
   2853                 }
   2854                 else /*if(e_cu_size <= CU_16x16)*/
   2855                 {
   2856                     unsigned long temp_shift_val;
   2857                     PART_ID_T ae_part_id[MAX_NUM_INTER_PARTS] = {
   2858                         ge_part_type_to_part_id[part_type][0], ge_part_type_to_part_id[part_type][1]
   2859                     };
   2860 
   2861                     u4_shift_val = ihevce_calc_variance_for_diff_weights(
   2862                         ps_inter_ctb_prms->pu8_part_src_sigmaX,
   2863                         ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
   2864                         &u8_src_variance,
   2865                         ai4_inv_wt,
   2866                         ai4_inv_wt_shift_val,
   2867                         ps_best_results->as_pu_results,
   2868                         ps_inter_ctb_prms->wpred_log_wdc,
   2869                         ae_part_id,
   2870                         gau1_blk_size_to_wd[e_blk_size],
   2871                         u1_num_parts,
   2872                         1);
   2873 
   2874                     temp_shift_val = u4_shift_val;
   2875 
   2876                     u4_shift_val = ihevce_calc_variance_for_diff_weights(
   2877                         au8_pred_sigmaX[i],
   2878                         au8_pred_sigmaXSquare[i],
   2879                         &u8_pred_variance,
   2880                         ai4_inv_wt,
   2881                         ai4_inv_wt_shift_val,
   2882                         ps_best_results->as_pu_results,
   2883                         0,
   2884                         ae_part_id,
   2885                         gau1_blk_size_to_wd[e_blk_size],
   2886                         u1_num_parts,
   2887                         0);
   2888 
   2889                     u8_pred_variance = u8_pred_variance >> temp_shift_val;
   2890 
   2891                     GETRANGE64(i4_bits_req, u8_pred_variance);
   2892 
   2893                     if(i4_bits_req > 27)
   2894                     {
   2895                         u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
   2896                         u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
   2897                     }
   2898 
   2899                     if(u8_src_variance == u8_pred_variance)
   2900                     {
   2901                         u8_temp_var = (1 << STIM_Q_FORMAT);
   2902                     }
   2903                     else
   2904                     {
   2905                         u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
   2906                         u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
   2907                         u8_temp_var1 = (u8_src_variance * u8_src_variance) +
   2908                                        (u8_pred_variance * u8_pred_variance);
   2909                         u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
   2910                         u8_temp_var = (u8_temp_var / u8_temp_var1);
   2911                     }
   2912 
   2913                     i4_noise_term = (UWORD32)u8_temp_var;
   2914 
   2915                     ASSERT(i4_noise_term >= 0);
   2916                     ASSERT(i4_noise_term <= (1 << (STIM_Q_FORMAT + ALPHA_Q_FORMAT)));
   2917 
   2918                     i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
   2919 
   2920                     u8_temp_var = i4_satd;
   2921                     u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
   2922                     u8_temp_var += (1 << ((i4_q_level)-1));
   2923                     i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
   2924 
   2925                     ASSERT(i4_satd >= 0);
   2926                 }
   2927             }
   2928 
   2929             if(u1_inp_buf_idx != UCHAR_MAX)
   2930             {
   2931                 ihevce_set_pred_buf_as_free(
   2932                     &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
   2933                     u1_inp_buf_idx);
   2934             }
   2935 
   2936             part_type_cost += i4_satd;
   2937 
   2938             /*Update the best results with the new results */
   2939             ps_best_results->i4_tot_cost = part_type_cost;
   2940 
   2941             ps_best_results->i4_tu_split_cost = ps_err_prms->i4_tu_split_cost;
   2942 
   2943             ASSERT(ai4_tu_split_flag[0] >= 0);
   2944             if(e_cu_size == CU_64x64)
   2945             {
   2946                 ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
   2947                 ps_best_results->ai4_tu_split_flag[1] = ai4_tu_split_flag[1];
   2948                 ps_best_results->ai4_tu_split_flag[2] = ai4_tu_split_flag[2];
   2949                 ps_best_results->ai4_tu_split_flag[3] = ai4_tu_split_flag[3];
   2950 
   2951                 /* Update the TU early cbf flags into the best results structure */
   2952                 ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
   2953                 ps_best_results->ai4_tu_early_cbf[1] = ai4_tu_early_cbf[1];
   2954                 ps_best_results->ai4_tu_early_cbf[2] = ai4_tu_early_cbf[2];
   2955                 ps_best_results->ai4_tu_early_cbf[3] = ai4_tu_early_cbf[3];
   2956             }
   2957             else
   2958             {
   2959                 ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
   2960                 ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
   2961             }
   2962 
   2963             if(part_type_cost < best_cost[num_best_cand - 1])
   2964             {
   2965                 /* Push and sort current part type if it is one of the num_best_cand */
   2966                 for(cand_idx = 0; cand_idx < i; cand_idx++)
   2967                 {
   2968                     if(part_type_cost <= best_cost[cand_idx])
   2969                     {
   2970                         memmove(
   2971                             &ai4_final_idx[cand_idx + 1],
   2972                             &ai4_final_idx[cand_idx],
   2973                             sizeof(WORD32) * (i - cand_idx));
   2974                         memmove(
   2975                             &best_cost[cand_idx + 1],
   2976                             &best_cost[cand_idx],
   2977                             sizeof(WORD32) * (i - cand_idx));
   2978                         break;
   2979                     }
   2980                 }
   2981 
   2982                 ai4_final_idx[cand_idx] = i;
   2983                 best_cost[cand_idx] = part_type_cost;
   2984             }
   2985         }
   2986 
   2987         ps_cu_results->u1_num_best_results = num_best_cand;
   2988 
   2989         for(i = 0; i < num_best_cand; i++)
   2990         {
   2991             ASSERT(ai4_final_idx[i] < num_best_cand);
   2992 
   2993             if(ai4_final_idx[i] != -1)
   2994             {
   2995                 memcpy(
   2996                     &(ps_cu_results->ps_best_results[i]),
   2997                     &(ps_part_type_results[ai4_final_idx[i]]),
   2998                     sizeof(part_type_results_t));
   2999             }
   3000         }
   3001     }
   3002 
   3003     for(i = 0; i < (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS)-2; i++)
   3004     {
   3005         ihevce_set_pred_buf_as_free(
   3006             &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, i);
   3007     }
   3008 }
   3009 
   3010 /**
   3011 **************************************************************************************************
   3012 *  @fn     hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
   3013 *
   3014 *  @brief Does the population of the inter_cu_results structure with the results after the
   3015 *           subpel refinement
   3016 *
   3017 *          This is called post subpel refinmenent for 16x16s, 8x8s and
   3018 *          for post merge evaluation for 32x32,64x64 CUs
   3019 *
   3020 *  @param[in,out] ps_search_results : Search results data structure
   3021 *                 - ps_cu_results : cu_results data structure
   3022 *                   ps_pu_result  : Pointer to the memory for storing PU's
   3023 *
   3024 ****************************************************************************************************
   3025 */
   3026 void hme_populate_pus(
   3027     me_ctxt_t *ps_thrd_ctxt,
   3028     me_frm_ctxt_t *ps_ctxt,
   3029     hme_subpel_prms_t *ps_subpel_prms,
   3030     search_results_t *ps_search_results,
   3031     inter_cu_results_t *ps_cu_results,
   3032     inter_pu_results_t *ps_pu_results,
   3033     pu_result_t *ps_pu_result,
   3034     inter_ctb_prms_t *ps_inter_ctb_prms,
   3035     wgt_pred_ctxt_t *ps_wt_prms,
   3036     layer_ctxt_t *ps_curr_layer,
   3037     U08 *pu1_pred_dir_searched,
   3038     WORD32 i4_num_active_ref)
   3039 {
   3040     WORD32 i, j, k;
   3041     WORD32 i4_part_mask;
   3042     WORD32 i4_ref;
   3043     UWORD8 e_part_id;
   3044     pu_result_t *ps_curr_pu;
   3045     search_node_t *ps_search_node;
   3046     part_attr_t *ps_part_attr;
   3047     UWORD8 e_cu_size = ps_search_results->e_cu_size;
   3048     WORD32 num_results_per_part_l0 = 0;
   3049     WORD32 num_results_per_part_l1 = 0;
   3050     WORD32 i4_ref_id;
   3051     WORD32 i4_total_act_ref;
   3052 
   3053     i4_part_mask = ps_search_results->i4_part_mask;
   3054 
   3055     /* pred_buf_mngr init */
   3056     {
   3057         hme_get_wkg_mem(&ps_ctxt->s_buf_mgr, MAX_WKG_MEM_SIZE_PER_THREAD);
   3058 
   3059         ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator = UINT_MAX;
   3060 
   3061         for(i = 0; i < MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS - 2; i++)
   3062         {
   3063             ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[i] =
   3064                 ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
   3065             ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator &= ~(1 << i);
   3066         }
   3067 
   3068         ps_inter_ctb_prms->pu1_wkg_mem = ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
   3069     }
   3070 
   3071     ps_inter_ctb_prms->i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
   3072     ps_inter_ctb_prms->u1_is_cu_noisy = ps_subpel_prms->u1_is_cu_noisy;
   3073     ps_inter_ctb_prms->i4_lamda = ps_search_results->as_pred_ctxt[0].lambda;
   3074 
   3075     /* Populate the CU level parameters */
   3076     ps_cu_results->u1_cu_size = ps_search_results->e_cu_size;
   3077     ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
   3078     ps_cu_results->i4_part_mask = ps_search_results->i4_part_mask;
   3079     ps_cu_results->u1_x_off = ps_search_results->u1_x_off;
   3080     ps_cu_results->u1_y_off = ps_search_results->u1_y_off;
   3081 
   3082     i4_total_act_ref =
   3083         ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
   3084     /*Populate the partition results
   3085     Loop across all the active references that are enabled right now */
   3086     for(i = 0; i < MAX_PART_TYPES; i++)
   3087     {
   3088         if(!(i4_part_mask & gai4_part_type_to_part_mask[i]))
   3089         {
   3090             continue;
   3091         }
   3092 
   3093         for(j = 0; j < gau1_num_parts_in_part_type[i]; j++)
   3094         {
   3095             /* Partition ID for the current PU */
   3096             e_part_id = (UWORD8)ge_part_type_to_part_id[i][j];
   3097             ps_part_attr = &gas_part_attr_in_cu[e_part_id];
   3098 
   3099             num_results_per_part_l0 = 0;
   3100             num_results_per_part_l1 = 0;
   3101 
   3102             ps_pu_results->aps_pu_results[0][e_part_id] =
   3103                 ps_pu_result + (e_part_id * MAX_NUM_RESULTS_PER_PART_LIST);
   3104             ps_pu_results->aps_pu_results[1][e_part_id] =
   3105                 ps_pu_result + ((e_part_id + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
   3106 
   3107             for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
   3108             {
   3109                 U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
   3110 
   3111                 for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
   3112                 {
   3113                     ps_search_node =
   3114                         &ps_search_results->aps_part_results[u1_pred_dir][e_part_id][k];
   3115 
   3116                     /* If subpel is done then the node is a valid candidate else break the loop */
   3117                     if(ps_search_node->u1_subpel_done)
   3118                     {
   3119                         i4_ref_id = ps_search_node->i1_ref_idx;
   3120 
   3121                         ASSERT(i4_ref_id >= 0);
   3122 
   3123                         /* Check whether current ref_id is past or future and assign the pointers to L0 or L1 list accordingly */
   3124                         if(!u1_pred_dir)
   3125                         {
   3126                             ps_curr_pu = ps_pu_results->aps_pu_results[0][e_part_id] +
   3127                                          num_results_per_part_l0;
   3128 
   3129                             ASSERT(
   3130                                 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
   3131                                 ps_inter_ctb_prms->u1_num_active_ref_l0);
   3132 
   3133                             /* Always populate the ref_idx value in l0_ref_idx */
   3134                             ps_curr_pu->pu.mv.i1_l0_ref_idx =
   3135                                 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
   3136                             ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
   3137                             ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
   3138                             ps_curr_pu->pu.b2_pred_mode = PRED_L0;
   3139 
   3140                             ps_inter_ctb_prms->apu1_wt_inp[0][ps_curr_pu->pu.mv.i1_l0_ref_idx] =
   3141                                 ps_wt_prms->apu1_wt_inp[i4_ref_id];
   3142 
   3143                             num_results_per_part_l0++;
   3144                         }
   3145                         else
   3146                         {
   3147                             ps_curr_pu = ps_pu_results->aps_pu_results[1][e_part_id] +
   3148                                          num_results_per_part_l1;
   3149 
   3150                             ASSERT(
   3151                                 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
   3152                                 ps_inter_ctb_prms->u1_num_active_ref_l1);
   3153 
   3154                             /* populate the ref_idx value in l1_ref_idx */
   3155                             ps_curr_pu->pu.mv.i1_l1_ref_idx =
   3156                                 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
   3157                             ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
   3158                             ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
   3159                             ps_curr_pu->pu.b2_pred_mode = PRED_L1;
   3160 
   3161                             /* Copy the values from weighted params to common_frm_aprams */
   3162                             ps_inter_ctb_prms->apu1_wt_inp[1][ps_curr_pu->pu.mv.i1_l1_ref_idx] =
   3163                                 ps_wt_prms->apu1_wt_inp[i4_ref_id];
   3164 
   3165                             num_results_per_part_l1++;
   3166                         }
   3167                         ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
   3168                         ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
   3169 
   3170 #if UNI_SATD_SCALE
   3171                         /*SATD is scaled by weight. Hence rescale the SATD */
   3172                         ps_curr_pu->i4_tot_cost =
   3173                             ((ps_search_node->i4_sad *
   3174                                   ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
   3175                               (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
   3176                              ps_inter_ctb_prms->wpred_log_wdc) +
   3177                             ps_search_node->i4_mv_cost;
   3178 #endif
   3179 
   3180                         /* Packed format of the width and height */
   3181                         ps_curr_pu->pu.b4_wd = ((ps_part_attr->u1_x_count << e_cu_size) >> 2) - 1;
   3182                         ps_curr_pu->pu.b4_ht = ((ps_part_attr->u1_y_count << e_cu_size) >> 2) - 1;
   3183 
   3184                         ps_curr_pu->pu.b4_pos_x =
   3185                             (((ps_part_attr->u1_x_start << e_cu_size) + ps_cu_results->u1_x_off) >>
   3186                              2);
   3187                         ps_curr_pu->pu.b4_pos_y =
   3188                             (((ps_part_attr->u1_y_start << e_cu_size) + ps_cu_results->u1_y_off) >>
   3189                              2);
   3190 
   3191                         ps_curr_pu->pu.b1_intra_flag = 0;
   3192 
   3193                         /* Unweighted input */
   3194                         ps_inter_ctb_prms->pu1_non_wt_inp =
   3195                             ps_wt_prms->apu1_wt_inp[i4_total_act_ref];
   3196 
   3197                         ps_search_node++;
   3198                     }
   3199                     else
   3200                     {
   3201                         break;
   3202                     }
   3203                 }
   3204             }
   3205 
   3206             ps_pu_results->u1_num_results_per_part_l0[e_part_id] = num_results_per_part_l0;
   3207             ps_pu_results->u1_num_results_per_part_l1[e_part_id] = num_results_per_part_l1;
   3208         }
   3209     }
   3210 }
   3211 
   3212 /**
   3213 *********************************************************************************************************
   3214 *  @fn     hme_populate_pus_8x8_cu(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
   3215 *
   3216 *  @brief Does the population of the inter_cu_results structure with the results after the
   3217 *           subpel refinement
   3218 *
   3219 *          This is called post subpel refinmenent for 16x16s, 8x8s and
   3220 *          for post merge evaluation for 32x32,64x64 CUs
   3221 *
   3222 *  @param[in,out] ps_search_results : Search results data structure
   3223 *                 - ps_cu_results : cu_results data structure
   3224 *                   ps_pu_results : Pointer for the PU's
   3225 *                   ps_pu_result  : Pointer to the memory for storing PU's
   3226 *
   3227 *********************************************************************************************************
   3228 */
   3229 void hme_populate_pus_8x8_cu(
   3230     me_ctxt_t *ps_thrd_ctxt,
   3231     me_frm_ctxt_t *ps_ctxt,
   3232     hme_subpel_prms_t *ps_subpel_prms,
   3233     search_results_t *ps_search_results,
   3234     inter_cu_results_t *ps_cu_results,
   3235     inter_pu_results_t *ps_pu_results,
   3236     pu_result_t *ps_pu_result,
   3237     inter_ctb_prms_t *ps_inter_ctb_prms,
   3238     U08 *pu1_pred_dir_searched,
   3239     WORD32 i4_num_active_ref,
   3240     U08 u1_blk_8x8_mask)
   3241 {
   3242     WORD32 i, k;
   3243     WORD32 i4_part_mask;
   3244     WORD32 i4_ref;
   3245     pu_result_t *ps_curr_pu;
   3246     search_node_t *ps_search_node;
   3247     WORD32 i4_ref_id;
   3248     WORD32 x_off, y_off;
   3249 
   3250     /* Make part mask available as only 2Nx2N
   3251     Later support for 4x8 and 8x4 needs to be added */
   3252     i4_part_mask = ENABLE_2Nx2N;
   3253 
   3254     x_off = ps_search_results->u1_x_off;
   3255     y_off = ps_search_results->u1_y_off;
   3256 
   3257     for(i = 0; i < 4; i++)
   3258     {
   3259         if(u1_blk_8x8_mask & (1 << i))
   3260         {
   3261             UWORD8 u1_x_pos, u1_y_pos;
   3262 
   3263             WORD32 num_results_per_part_l0 = 0;
   3264             WORD32 num_results_per_part_l1 = 0;
   3265 
   3266             ps_cu_results->u1_cu_size = CU_8x8;
   3267             ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
   3268             ps_cu_results->i4_part_mask = i4_part_mask;
   3269             ps_cu_results->u1_x_off = x_off + (i & 1) * 8;
   3270             ps_cu_results->u1_y_off = y_off + (i >> 1) * 8;
   3271             ps_cu_results->i4_inp_offset = ps_cu_results->u1_x_off + (ps_cu_results->u1_y_off * 64);
   3272 
   3273             ps_cu_results->ps_best_results[0].i4_tot_cost = MAX_32BIT_VAL;
   3274             ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
   3275 
   3276             u1_x_pos = ps_cu_results->u1_x_off >> 2;
   3277             u1_y_pos = ps_cu_results->u1_y_off >> 2;
   3278 
   3279             if(!(ps_search_results->i4_part_mask & ENABLE_NxN))
   3280             {
   3281                 ps_curr_pu = &ps_cu_results->ps_best_results[0].as_pu_results[0];
   3282 
   3283                 ps_cu_results->i4_part_mask = 0;
   3284                 ps_cu_results->u1_num_best_results = 0;
   3285 
   3286                 ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
   3287 
   3288                 ps_curr_pu->pu.b4_wd = 1;
   3289                 ps_curr_pu->pu.b4_ht = 1;
   3290                 ps_curr_pu->pu.b4_pos_x = u1_x_pos;
   3291                 ps_curr_pu->pu.b4_pos_y = u1_y_pos;
   3292                 ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
   3293 
   3294                 ps_cu_results++;
   3295                 ps_pu_results++;
   3296 
   3297                 continue;
   3298             }
   3299 
   3300             ps_pu_results->aps_pu_results[0][0] =
   3301                 ps_pu_result + (i * MAX_NUM_RESULTS_PER_PART_LIST);
   3302             ps_pu_results->aps_pu_results[1][0] =
   3303                 ps_pu_result + ((i + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
   3304 
   3305             for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
   3306             {
   3307                 U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
   3308 
   3309                 /* Select the NxN partition node for the current ref_idx in the search results*/
   3310                 ps_search_node =
   3311                     ps_search_results->aps_part_results[u1_pred_dir][PART_ID_NxN_TL + i];
   3312 
   3313                 for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
   3314                 {
   3315                     /* If subpel is done then the node is a valid candidate else break the loop */
   3316                     if((ps_search_node->u1_is_avail) || (ps_search_node->u1_subpel_done))
   3317                     {
   3318                         i4_ref_id = ps_search_node->i1_ref_idx;
   3319 
   3320                         ASSERT(i4_ref_id >= 0);
   3321 
   3322                         if(!u1_pred_dir)
   3323                         {
   3324                             ps_curr_pu =
   3325                                 ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
   3326 
   3327                             ASSERT(
   3328                                 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
   3329                                 ps_inter_ctb_prms->u1_num_active_ref_l0);
   3330 
   3331                             ps_curr_pu->pu.mv.i1_l0_ref_idx =
   3332                                 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
   3333                             ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
   3334                             ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
   3335                             ps_curr_pu->pu.b2_pred_mode = PRED_L0;
   3336 
   3337                             num_results_per_part_l0++;
   3338                         }
   3339                         else
   3340                         {
   3341                             ps_curr_pu =
   3342                                 ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
   3343 
   3344                             ASSERT(
   3345                                 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
   3346                                 ps_inter_ctb_prms->u1_num_active_ref_l1);
   3347 
   3348                             ps_curr_pu->pu.mv.i1_l1_ref_idx =
   3349                                 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
   3350                             ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
   3351                             ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
   3352                             ps_curr_pu->pu.b2_pred_mode = PRED_L1;
   3353 
   3354                             num_results_per_part_l1++;
   3355                         }
   3356                         ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
   3357                         ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
   3358 
   3359 #if UNI_SATD_SCALE
   3360                         /*SATD is scaled by weight. Hence rescale the SATD */
   3361                         ps_curr_pu->i4_tot_cost =
   3362                             ((ps_search_node->i4_sad *
   3363                                   ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
   3364                               (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
   3365                              ps_inter_ctb_prms->wpred_log_wdc) +
   3366                             ps_search_node->i4_mv_cost;
   3367 #endif
   3368 
   3369                         ps_curr_pu->pu.b4_wd = 1;
   3370                         ps_curr_pu->pu.b4_ht = 1;
   3371                         ps_curr_pu->pu.b4_pos_x = u1_x_pos;
   3372                         ps_curr_pu->pu.b4_pos_y = u1_y_pos;
   3373                         ps_curr_pu->pu.b1_intra_flag = 0;
   3374 
   3375                         ps_search_node++;
   3376                     }
   3377                     else
   3378                     {
   3379                         /* if NxN was not evaluated at 16x16 level, assign max cost to 8x8 CU
   3380                         to remove 8x8's as possible candidates during evaluation */
   3381 
   3382                         ps_curr_pu = ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
   3383 
   3384                         ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
   3385 
   3386                         ps_curr_pu = ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
   3387 
   3388                         ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
   3389 
   3390                         break;
   3391                     }
   3392                 }
   3393             }
   3394 
   3395             /* Update the num_results per_part across lists L0 and L1 */
   3396             ps_pu_results->u1_num_results_per_part_l0[0] = num_results_per_part_l0;
   3397             ps_pu_results->u1_num_results_per_part_l1[0] = num_results_per_part_l1;
   3398         }
   3399         ps_cu_results++;
   3400         ps_pu_results++;
   3401     }
   3402 }
   3403 
   3404 /**
   3405 ********************************************************************************
   3406 *  @fn     hme_insert_intra_nodes_post_bipred
   3407 *
   3408 *  @brief  Compares intra costs (populated by IPE) with the best inter costs
   3409 *          (populated after evaluating bi-pred) and updates the best results
   3410 *          if intra cost is better
   3411 *
   3412 *  @param[in,out]  ps_cu_results    [inout] : Best results structure of CU
   3413 *                  ps_cur_ipe_ctb   [in]    : intra results for the current CTB
   3414 *                  i4_frm_qstep     [in]    : current frame quantizer(qscale)*
   3415 *
   3416 *  @return None
   3417 ********************************************************************************
   3418 */
   3419 void hme_insert_intra_nodes_post_bipred(
   3420     inter_cu_results_t *ps_cu_results,
   3421     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
   3422     WORD32 i4_frm_qstep)
   3423 {
   3424     WORD32 i;
   3425     WORD32 num_results;
   3426     WORD32 cu_size = ps_cu_results->u1_cu_size;
   3427     UWORD8 u1_x_off = ps_cu_results->u1_x_off;
   3428     UWORD8 u1_y_off = ps_cu_results->u1_y_off;
   3429 
   3430     /* Id of the 32x32 block, 16x16 block in a CTB */
   3431     WORD32 i4_32x32_id = (u1_y_off >> 5) * 2 + (u1_x_off >> 5);
   3432     WORD32 i4_16x16_id = ((u1_y_off >> 4) & 0x1) * 2 + ((u1_x_off >> 4) & 0x1);
   3433 
   3434     /* Flags to indicate if intra64/intra32/intra16 cusize are invalid as per IPE decision */
   3435     WORD32 disable_intra64 = 0;
   3436     WORD32 disable_intra32 = 0;
   3437     WORD32 disable_intra16 = 0;
   3438 
   3439     S32 i4_intra_2nx2n_cost;
   3440 
   3441     /* ME final results for this CU (post seeding of best uni/bi pred results) */
   3442     part_type_results_t *ps_best_result;
   3443 
   3444     i4_frm_qstep *= !L0ME_IN_OPENLOOP_MODE;
   3445 
   3446     /*If inter candidates are enabled then enter the for loop to update the intra candidate */
   3447 
   3448     if((ps_cu_results->u1_num_best_results == 0) && (CU_8x8 == ps_cu_results->u1_cu_size))
   3449     {
   3450         ps_cu_results->u1_num_best_results = 1;
   3451     }
   3452 
   3453     num_results = ps_cu_results->u1_num_best_results;
   3454 
   3455     ps_best_result = &ps_cu_results->ps_best_results[0];
   3456 
   3457     /* Disable intra16/32/64 flags based on split flags recommended by IPE */
   3458     if(ps_cur_ipe_ctb->u1_split_flag)
   3459     {
   3460         disable_intra64 = 1;
   3461         if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
   3462         {
   3463             disable_intra32 = 1;
   3464 
   3465             if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
   3466                    .as_intra16_analyse[i4_16x16_id]
   3467                    .b1_split_flag)
   3468             {
   3469                 disable_intra16 = 1;
   3470             }
   3471         }
   3472     }
   3473 
   3474     /* Derive the intra cost based on current cu size and offset */
   3475     switch(cu_size)
   3476     {
   3477     case CU_8x8:
   3478     {
   3479         i4_intra_2nx2n_cost = ps_cur_ipe_ctb->ai4_best8x8_intra_cost[u1_y_off + (u1_x_off >> 3)];
   3480 
   3481         /* Accounting for coding noise in the open loop IPE cost */
   3482         i4_intra_2nx2n_cost +=
   3483             ((i4_frm_qstep * 16) >> 2) /*+ ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
   3484 
   3485         break;
   3486     }
   3487 
   3488     case CU_16x16:
   3489     {
   3490         i4_intra_2nx2n_cost =
   3491             ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_y_off >> 4) * 4 + (u1_x_off >> 4)];
   3492 
   3493         /* Accounting for coding noise in the open loop IPE cost */
   3494         i4_intra_2nx2n_cost +=
   3495             ((i4_frm_qstep * 16)); /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */
   3496 
   3497         if(disable_intra16)
   3498         {
   3499             /* Disable intra 2Nx2N (intra 16) as IPE suggested best mode as 8x8 */
   3500             i4_intra_2nx2n_cost = MAX_32BIT_VAL;
   3501         }
   3502         break;
   3503     }
   3504 
   3505     case CU_32x32:
   3506     {
   3507         i4_intra_2nx2n_cost =
   3508             ps_cur_ipe_ctb->ai4_best32x32_intra_cost[(u1_y_off >> 5) * 2 + (u1_x_off >> 5)];
   3509 
   3510         /* Accounting for coding noise in the open loop IPE cost */
   3511         i4_intra_2nx2n_cost +=
   3512             (i4_frm_qstep * 16 * 4) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
   3513 
   3514         if(disable_intra32)
   3515         {
   3516             /* Disable intra 2Nx2N (intra 32) as IPE suggested best mode as 16x16 or 8x8 */
   3517             i4_intra_2nx2n_cost = MAX_32BIT_VAL;
   3518         }
   3519         break;
   3520     }
   3521 
   3522     case CU_64x64:
   3523     {
   3524         i4_intra_2nx2n_cost = ps_cur_ipe_ctb->i4_best64x64_intra_cost;
   3525 
   3526         /* Accounting for coding noise in the open loop IPE cost */
   3527         i4_intra_2nx2n_cost +=
   3528             (i4_frm_qstep * 16 * 16) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
   3529 
   3530         if(disable_intra64)
   3531         {
   3532             /* Disable intra 2Nx2N (intra 64) as IPE suggested best mode as 32x32 /16x16 / 8x8 */
   3533             i4_intra_2nx2n_cost = MAX_32BIT_VAL;
   3534         }
   3535         break;
   3536     }
   3537 
   3538     default:
   3539         ASSERT(0);
   3540     }
   3541 
   3542     {
   3543         /*****************************************************************/
   3544         /* Intra / Inter cost comparison for  2Nx2N : cu size 8/16/32/64 */
   3545         /* Identify where the current result isto be placed. Basically   */
   3546         /* find the node which has cost just higher than node under test */
   3547         /*****************************************************************/
   3548         for(i = 0; i < num_results; i++)
   3549         {
   3550             /* Subtrqact the tu_spli_flag_cost from total_inter_cost for fair comparision */
   3551             WORD32 inter_cost = ps_best_result[i].i4_tot_cost - ps_best_result[i].i4_tu_split_cost;
   3552 
   3553             if(i4_intra_2nx2n_cost < inter_cost)
   3554             {
   3555                 if(i < (num_results - 1))
   3556                 {
   3557                     memmove(
   3558                         ps_best_result + i + 1,
   3559                         ps_best_result + i,
   3560                         sizeof(ps_best_result[0]) * (num_results - 1 - i));
   3561                 }
   3562 
   3563                 /* Insert the intra node result */
   3564                 ps_best_result[i].u1_part_type = PRT_2Nx2N;
   3565                 ps_best_result[i].i4_tot_cost = i4_intra_2nx2n_cost;
   3566                 ps_best_result[i].ai4_tu_split_flag[0] = 0;
   3567                 ps_best_result[i].ai4_tu_split_flag[1] = 0;
   3568                 ps_best_result[i].ai4_tu_split_flag[2] = 0;
   3569                 ps_best_result[i].ai4_tu_split_flag[3] = 0;
   3570 
   3571                 /* Populate intra flag, cost and default mvs, refidx for intra pu */
   3572                 ps_best_result[i].as_pu_results[0].i4_tot_cost = i4_intra_2nx2n_cost;
   3573                 //ps_best_result[i].as_pu_results[0].i4_sad = i4_intra_2nx2n_cost;
   3574                 ps_best_result[i].as_pu_results[0].i4_mv_cost = 0;
   3575                 ps_best_result[i].as_pu_results[0].pu.b1_intra_flag = 1;
   3576                 ps_best_result[i].as_pu_results[0].pu.mv.i1_l0_ref_idx = -1;
   3577                 ps_best_result[i].as_pu_results[0].pu.mv.i1_l1_ref_idx = -1;
   3578                 ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvx = INTRA_MV;
   3579                 ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvy = INTRA_MV;
   3580                 ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvx = INTRA_MV;
   3581                 ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvy = INTRA_MV;
   3582 
   3583                 break;
   3584             }
   3585         }
   3586     }
   3587 }
   3588 
   3589 S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
   3590     me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb)
   3591 {
   3592     double lambda;
   3593     double lambda_modifier;
   3594     WORD32 i4_cu_qp;
   3595     frm_lambda_ctxt_t *ps_frm_lambda_ctxt;
   3596     //ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
   3597     WORD32 i4_frame_qp;
   3598     rc_quant_t *ps_rc_quant_ctxt;
   3599     WORD32 i4_is_bpic;
   3600 
   3601     ps_frm_lambda_ctxt = &ps_ctxt->s_frm_lambda_ctxt;
   3602     //ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base;
   3603     i4_frame_qp = ps_ctxt->s_frm_prms.i4_frame_qp;
   3604     ps_rc_quant_ctxt = ps_ctxt->ps_rc_quant_ctxt;
   3605     i4_is_bpic = ps_ctxt->s_frm_prms.bidir_enabled;
   3606 
   3607     i4_cu_qp = ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset];
   3608 
   3609     {
   3610         if(ps_ctxt->i4_l0me_qp_mod)
   3611         {
   3612 #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
   3613 #if LAMDA_BASED_ON_QUANT
   3614             WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[2][0];
   3615 #else
   3616             WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[3][0];
   3617 #endif
   3618             i4_cu_qp = (((i4_cu_qp)*i4_activity) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
   3619                        QP_LEVEL_MOD_ACT_FACTOR;
   3620 
   3621 #endif
   3622         }
   3623         if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qscale)
   3624             i4_cu_qp = ps_rc_quant_ctxt->i2_max_qscale;
   3625         else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qscale)
   3626             i4_cu_qp = ps_rc_quant_ctxt->i2_min_qscale;
   3627 
   3628         i4_cu_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_cu_qp];
   3629     }
   3630 
   3631     if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qp)
   3632         i4_cu_qp = ps_rc_quant_ctxt->i2_max_qp;
   3633     else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qp)
   3634         i4_cu_qp = ps_rc_quant_ctxt->i2_min_qp;
   3635 
   3636     lambda = pow(2.0, (((double)(i4_cu_qp - 12)) / 3));
   3637 
   3638     lambda_modifier = ps_frm_lambda_ctxt->lambda_modifier;
   3639 
   3640     if(i4_is_bpic)
   3641     {
   3642         lambda_modifier = lambda_modifier * CLIP3((((double)(i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
   3643     }
   3644     if(ps_ctxt->i4_use_const_lamda_modifier)
   3645     {
   3646         if(ps_ctxt->s_frm_prms.is_i_pic)
   3647         {
   3648             lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
   3649         }
   3650         else
   3651         {
   3652             lambda_modifier = CONST_LAMDA_MOD_VAL;
   3653         }
   3654     }
   3655     lambda *= lambda_modifier;
   3656 
   3657     return ((WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)));
   3658 }
   3659 
   3660 /**
   3661 ********************************************************************************
   3662 *  @fn     hme_update_dynamic_search_params
   3663 *
   3664 *  @brief  Update the Dynamic search params based on the current MVs
   3665 *
   3666 *  @param[in,out]  ps_dyn_range_prms    [inout] : Dyn. Range Param str.
   3667 *                  i2_mvy               [in]    : current MV y comp.
   3668 *
   3669 *  @return None
   3670 ********************************************************************************
   3671 */
   3672 void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy)
   3673 {
   3674     /* If MV is up large, update i2_dyn_max_y */
   3675     if(i2_mvy > ps_dyn_range_prms->i2_dyn_max_y)
   3676         ps_dyn_range_prms->i2_dyn_max_y = i2_mvy;
   3677     /* If MV is down large, update i2_dyn_min_y */
   3678     if(i2_mvy < ps_dyn_range_prms->i2_dyn_min_y)
   3679         ps_dyn_range_prms->i2_dyn_min_y = i2_mvy;
   3680 }
   3681 
   3682 void hme_add_new_node_to_a_sorted_array(
   3683     search_node_t *ps_result_node,
   3684     search_node_t **pps_sorted_array,
   3685     U08 *pu1_shifts,
   3686     U32 u4_num_results_updated,
   3687     U08 u1_shift)
   3688 {
   3689     U32 i;
   3690 
   3691     if(NULL == pu1_shifts)
   3692     {
   3693         S32 i4_cur_node_cost = ps_result_node->i4_tot_cost;
   3694 
   3695         for(i = 0; i < u4_num_results_updated; i++)
   3696         {
   3697             if(i4_cur_node_cost < pps_sorted_array[i]->i4_tot_cost)
   3698             {
   3699                 memmove(
   3700                     &pps_sorted_array[i + 1],
   3701                     &pps_sorted_array[i],
   3702                     (u4_num_results_updated - i) * sizeof(search_node_t *));
   3703 
   3704                 break;
   3705             }
   3706         }
   3707     }
   3708     else
   3709     {
   3710         S32 i4_cur_node_cost =
   3711             (u1_shift == 0) ? ps_result_node->i4_tot_cost
   3712                             : (ps_result_node->i4_tot_cost + (1 << (u1_shift - 1))) >> u1_shift;
   3713 
   3714         for(i = 0; i < u4_num_results_updated; i++)
   3715         {
   3716             S32 i4_prev_node_cost = (pu1_shifts[i] == 0) ? pps_sorted_array[i]->i4_tot_cost
   3717                                                          : (pps_sorted_array[i]->i4_tot_cost +
   3718                                                             (1 << (pu1_shifts[i] - 1))) >>
   3719                                                                pu1_shifts[i];
   3720 
   3721             if(i4_cur_node_cost < i4_prev_node_cost)
   3722             {
   3723                 memmove(
   3724                     &pps_sorted_array[i + 1],
   3725                     &pps_sorted_array[i],
   3726                     (u4_num_results_updated - i) * sizeof(search_node_t *));
   3727                 memmove(
   3728                     &pu1_shifts[i + 1], &pu1_shifts[i], (u4_num_results_updated - i) * sizeof(U08));
   3729 
   3730                 break;
   3731             }
   3732         }
   3733 
   3734         pu1_shifts[i] = u1_shift;
   3735     }
   3736 
   3737     pps_sorted_array[i] = ps_result_node;
   3738 }
   3739 
   3740 S32 hme_find_pos_of_implicitly_stored_ref_id(
   3741     S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results)
   3742 {
   3743     S32 i;
   3744 
   3745     for(i = 0; i < i4_num_results; i++)
   3746     {
   3747         if(i1_ref_idx == pi1_ref_idx[i])
   3748         {
   3749             if(0 == i4_result_id)
   3750             {
   3751                 return i;
   3752             }
   3753             else
   3754             {
   3755                 i4_result_id--;
   3756             }
   3757         }
   3758     }
   3759 
   3760     return -1;
   3761 }
   3762 
   3763 static __inline void hme_search_node_populator(
   3764     search_node_t *ps_search_node, hme_mv_t *ps_mv, S08 i1_ref_idx, S08 i1_mv_magnitude_shift)
   3765 {
   3766     ps_search_node->ps_mv->i2_mvx = SHL_NEG((WORD16)ps_mv->i2_mv_x, i1_mv_magnitude_shift);
   3767     ps_search_node->ps_mv->i2_mvy = SHL_NEG((WORD16)ps_mv->i2_mv_y, i1_mv_magnitude_shift);
   3768     ps_search_node->i1_ref_idx = i1_ref_idx;
   3769     ps_search_node->u1_is_avail = 1;
   3770     ps_search_node->u1_subpel_done = 0;
   3771 }
   3772 
   3773 S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt)
   3774 {
   3775     hme_mv_t *ps_mv;
   3776 
   3777     S32 wd_c, ht_c, wd_p, ht_p;
   3778     S32 blksize_p, blksize_c;
   3779     S32 i;
   3780     S08 *pi1_ref_idx;
   3781     /* Cache for storing offsets */
   3782     S32 ai4_cand_offsets[NUM_SEARCH_CAND_LOCATIONS];
   3783 
   3784     layer_ctxt_t *ps_curr_layer = ps_ctxt->ps_curr_layer;
   3785     layer_ctxt_t *ps_coarse_layer = ps_ctxt->ps_coarse_layer;
   3786     layer_mv_t *ps_coarse_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
   3787     layer_mv_t *ps_curr_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
   3788     search_candt_t *ps_search_cands = ps_ctxt->ps_search_cands;
   3789     hme_mv_t s_zero_mv = { 0 };
   3790 
   3791     S32 i4_pos_x = ps_ctxt->i4_pos_x;
   3792     S32 i4_pos_y = ps_ctxt->i4_pos_y;
   3793     S32 i4_num_act_ref_l0 = ps_ctxt->i4_num_act_ref_l0;
   3794     S32 i4_num_act_ref_l1 = ps_ctxt->i4_num_act_ref_l1;
   3795     U08 u1_pred_dir = ps_ctxt->u1_pred_dir;
   3796     U08 u1_pred_dir_ctr = ps_ctxt->u1_pred_dir_ctr;
   3797     U08 u1_num_results_in_curr_mvbank = ps_ctxt->u1_num_results_in_mvbank;
   3798     U08 u1_num_results_in_coarse_mvbank =
   3799         (u1_pred_dir == 0) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref)
   3800                            : (i4_num_act_ref_l1 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref);
   3801     S32 i4_init_offset_projected =
   3802         (u1_pred_dir == 1) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) : 0;
   3803     S32 i4_init_offset_spatial =
   3804         (u1_pred_dir_ctr == 1)
   3805             ? (ps_curr_layer_mvbank->i4_num_mvs_per_ref * u1_num_results_in_curr_mvbank)
   3806             : 0;
   3807     U08 u1_search_candidate_list_index = ps_ctxt->u1_search_candidate_list_index;
   3808     U08 u1_max_num_search_cands =
   3809         gau1_max_num_search_cands_in_l0_me[u1_search_candidate_list_index];
   3810     S32 i4_num_srch_cands = MIN(u1_max_num_search_cands, ps_ctxt->i4_max_num_init_cands << 1);
   3811     U16 u2_is_offset_available = 0;
   3812     U08 u1_search_blk_to_spatial_mvbank_blk_size_factor = 1;
   3813 
   3814     /* Width and ht of current and prev layers */
   3815     wd_c = ps_curr_layer->i4_wd;
   3816     ht_c = ps_curr_layer->i4_ht;
   3817     wd_p = ps_coarse_layer->i4_wd;
   3818     ht_p = ps_coarse_layer->i4_ht;
   3819 
   3820     blksize_p = gau1_blk_size_to_wd_shift[ps_coarse_layer_mvbank->e_blk_size];
   3821     blksize_c = gau1_blk_size_to_wd_shift[ps_curr_layer_mvbank->e_blk_size];
   3822 
   3823     /* ASSERT for valid sizes */
   3824     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
   3825 
   3826     {
   3827         S32 x = i4_pos_x >> 4;
   3828         S32 y = i4_pos_y >> 4;
   3829 
   3830         if(blksize_c != gau1_blk_size_to_wd_shift[ps_ctxt->e_search_blk_size])
   3831         {
   3832             x *= 2;
   3833             y *= 2;
   3834 
   3835             u1_search_blk_to_spatial_mvbank_blk_size_factor = 2;
   3836         }
   3837 
   3838         i4_init_offset_spatial += (x + y * ps_curr_layer_mvbank->i4_num_blks_per_row) *
   3839                                   ps_curr_layer_mvbank->i4_num_mvs_per_blk;
   3840     }
   3841 
   3842     for(i = 0; i < i4_num_srch_cands; i++)
   3843     {
   3844         SEARCH_CANDIDATE_TYPE_T e_search_cand_type =
   3845             gae_search_cand_priority_to_search_cand_type_map_in_l0_me[u1_search_candidate_list_index]
   3846                                                                      [i];
   3847         SEARCH_CAND_LOCATIONS_T e_search_cand_loc =
   3848             gae_search_cand_type_to_location_map[e_search_cand_type];
   3849         S08 i1_result_id = MIN(
   3850             gai1_search_cand_type_to_result_id_map[e_search_cand_type],
   3851             (e_search_cand_loc < 0 ? 0
   3852                                    : ps_ctxt->pu1_num_fpel_search_cands[e_search_cand_loc] - 1));
   3853         U08 u1_is_spatial_cand = (1 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
   3854         U08 u1_is_proj_cand = (0 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
   3855         U08 u1_is_zeroMV_cand = (ZERO_MV == e_search_cand_type) ||
   3856                                 (ZERO_MV_ALTREF == e_search_cand_type);
   3857 
   3858         /* When spatial candidates are available, use them, else use the projected candidates */
   3859         /* This is required since some blocks will never have certain spatial candidates, and in order */
   3860         /* to accomodate such instances in 'gae_search_cand_priority_to_search_cand_type_map_in_l0_me' list,  */
   3861         /* all candidates apart from the 'LEFT' have been marked as projected */
   3862         if(((e_search_cand_loc == TOPLEFT) || (e_search_cand_loc == TOP) ||
   3863             (e_search_cand_loc == TOPRIGHT)) &&
   3864            (i1_result_id < u1_num_results_in_curr_mvbank) && u1_is_proj_cand)
   3865         {
   3866             if(e_search_cand_loc == TOPLEFT)
   3867             {
   3868                 u1_is_spatial_cand = ps_ctxt->u1_is_topLeft_available ||
   3869                                      !ps_ctxt->u1_is_left_available;
   3870             }
   3871             else if(e_search_cand_loc == TOPRIGHT)
   3872             {
   3873                 u1_is_spatial_cand = ps_ctxt->u1_is_topRight_available;
   3874             }
   3875             else
   3876             {
   3877                 u1_is_spatial_cand = ps_ctxt->u1_is_top_available;
   3878             }
   3879 
   3880             u1_is_proj_cand = !u1_is_spatial_cand;
   3881         }
   3882 
   3883         switch(u1_is_zeroMV_cand + (u1_is_spatial_cand << 1) + (u1_is_proj_cand << 2))
   3884         {
   3885         case 1:
   3886         {
   3887             hme_search_node_populator(
   3888                 ps_search_cands[i].ps_search_node,
   3889                 &s_zero_mv,
   3890                 (ZERO_MV == e_search_cand_type) ? ps_ctxt->i1_default_ref_id
   3891                                                 : ps_ctxt->i1_alt_default_ref_id,
   3892                 0);
   3893 
   3894             break;
   3895         }
   3896         case 2:
   3897         {
   3898             S08 i1_mv_magnitude_shift = 0;
   3899 
   3900             S32 i4_offset = i4_init_offset_spatial;
   3901 
   3902             i1_result_id = MIN(i1_result_id, u1_num_results_in_curr_mvbank - 1);
   3903             i4_offset += i1_result_id;
   3904 
   3905             switch(e_search_cand_loc)
   3906             {
   3907             case LEFT:
   3908             {
   3909                 if(ps_ctxt->u1_is_left_available)
   3910                 {
   3911                     i1_mv_magnitude_shift = -2;
   3912 
   3913                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
   3914 
   3915                     ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
   3916                     pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
   3917                 }
   3918                 else
   3919                 {
   3920                     i1_mv_magnitude_shift = 0;
   3921 
   3922                     ps_mv = &s_zero_mv;
   3923                     pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
   3924                 }
   3925 
   3926                 break;
   3927             }
   3928             case TOPLEFT:
   3929             {
   3930                 if(ps_ctxt->u1_is_topLeft_available)
   3931                 {
   3932                     i1_mv_magnitude_shift = -2;
   3933 
   3934                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
   3935                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
   3936 
   3937                     ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
   3938                     pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
   3939                 }
   3940                 else
   3941                 {
   3942                     i1_mv_magnitude_shift = 0;
   3943 
   3944                     ps_mv = &s_zero_mv;
   3945                     pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
   3946                 }
   3947 
   3948                 break;
   3949             }
   3950             case TOP:
   3951             {
   3952                 if(ps_ctxt->u1_is_top_available)
   3953                 {
   3954                     i1_mv_magnitude_shift = -2;
   3955 
   3956                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
   3957 
   3958                     ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
   3959                     pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
   3960                 }
   3961                 else
   3962                 {
   3963                     i1_mv_magnitude_shift = 0;
   3964 
   3965                     ps_mv = &s_zero_mv;
   3966                     pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
   3967                 }
   3968 
   3969                 break;
   3970             }
   3971             case TOPRIGHT:
   3972             {
   3973                 if(ps_ctxt->u1_is_topRight_available)
   3974                 {
   3975                     i1_mv_magnitude_shift = -2;
   3976 
   3977                     i4_offset += ps_curr_layer_mvbank->i4_num_mvs_per_blk *
   3978                                  u1_search_blk_to_spatial_mvbank_blk_size_factor;
   3979                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
   3980 
   3981                     ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
   3982                     pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
   3983                 }
   3984                 else
   3985                 {
   3986                     i1_mv_magnitude_shift = 0;
   3987                     ps_mv = &s_zero_mv;
   3988                     pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
   3989                 }
   3990 
   3991                 break;
   3992             }
   3993             default:
   3994             {
   3995                 /* AiyAiyYo!! */
   3996                 ASSERT(0);
   3997             }
   3998             }
   3999 
   4000             hme_search_node_populator(
   4001                 ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], i1_mv_magnitude_shift);
   4002 
   4003             break;
   4004         }
   4005         case 4:
   4006         {
   4007             ASSERT(ILLUSORY_CANDIDATE != e_search_cand_type);
   4008             ASSERT(ILLUSORY_LOCATION != e_search_cand_loc);
   4009 
   4010             i1_result_id = MIN(i1_result_id, u1_num_results_in_coarse_mvbank - 1);
   4011 
   4012             if(!(u2_is_offset_available & (1 << e_search_cand_loc)))
   4013             {
   4014                 S32 x, y;
   4015 
   4016                 x = i4_pos_x + gai4_search_cand_location_to_x_offset_map[e_search_cand_loc];
   4017                 y = i4_pos_y + gai4_search_cand_location_to_y_offset_map[e_search_cand_loc];
   4018 
   4019                 /* Safety check to avoid uninitialized access across temporal layers */
   4020                 x = CLIP3(x, 0, (wd_c - blksize_p));
   4021                 y = CLIP3(y, 0, (ht_c - blksize_p));
   4022 
   4023                 /* Project the positions to prev layer */
   4024                 x = x >> blksize_p;
   4025                 y = y >> blksize_p;
   4026 
   4027                 ai4_cand_offsets[e_search_cand_loc] =
   4028                     (x * ps_coarse_layer_mvbank->i4_num_mvs_per_blk);
   4029                 ai4_cand_offsets[e_search_cand_loc] +=
   4030                     (y * ps_coarse_layer_mvbank->i4_num_mvs_per_row);
   4031                 ai4_cand_offsets[e_search_cand_loc] += i4_init_offset_projected;
   4032 
   4033                 u2_is_offset_available |= (1 << e_search_cand_loc);
   4034             }
   4035 
   4036             ps_mv =
   4037                 ps_coarse_layer_mvbank->ps_mv + ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
   4038             pi1_ref_idx = ps_coarse_layer_mvbank->pi1_ref_idx +
   4039                           ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
   4040 
   4041             hme_search_node_populator(ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], 1);
   4042 
   4043             break;
   4044         }
   4045         default:
   4046         {
   4047             /* NoNoNoNoNooooooooNO! */
   4048             ASSERT(0);
   4049         }
   4050         }
   4051 
   4052         ASSERT(ps_search_cands[i].ps_search_node->i1_ref_idx >= 0);
   4053         ASSERT(
   4054             !u1_pred_dir
   4055                 ? (ps_ctxt->pi4_ref_id_lc_to_l0_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
   4056                    i4_num_act_ref_l0)
   4057                 : (ps_ctxt->pi4_ref_id_lc_to_l1_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
   4058                    ps_ctxt->i4_num_act_ref_l1));
   4059     }
   4060 
   4061     return i4_num_srch_cands;
   4062 }
   4063 
   4064 void hme_mv_clipper(
   4065     hme_search_prms_t *ps_search_prms_blk,
   4066     S32 i4_num_srch_cands,
   4067     S08 i1_check_for_mult_refs,
   4068     U08 u1_fpel_refine_extent,
   4069     U08 u1_hpel_refine_extent,
   4070     U08 u1_qpel_refine_extent)
   4071 {
   4072     S32 candt;
   4073     range_prms_t *ps_range_prms;
   4074 
   4075     for(candt = 0; candt < i4_num_srch_cands; candt++)
   4076     {
   4077         search_node_t *ps_search_node;
   4078 
   4079         ps_search_node = ps_search_prms_blk->ps_search_candts[candt].ps_search_node;
   4080         ps_range_prms = ps_search_prms_blk->aps_mv_range[ps_search_node->i1_ref_idx];
   4081 
   4082         /* Clip the motion vectors as well here since after clipping
   4083         two candidates can become same and they will be removed during deduplication */
   4084         CLIP_MV_WITHIN_RANGE(
   4085             ps_search_node->ps_mv->i2_mvx,
   4086             ps_search_node->ps_mv->i2_mvy,
   4087             ps_range_prms,
   4088             u1_fpel_refine_extent,
   4089             u1_hpel_refine_extent,
   4090             u1_qpel_refine_extent);
   4091     }
   4092 }
   4093 
   4094 void hme_init_pred_buf_info(
   4095     hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
   4096     hme_pred_buf_mngr_t *ps_buf_mngr,
   4097     U08 u1_pu1_wd,
   4098     U08 u1_pu1_ht,
   4099     PART_TYPE_T e_part_type)
   4100 {
   4101     U08 u1_pred_buf_array_id;
   4102 
   4103     if(1 != ihevce_get_free_pred_buf_indices(
   4104                 &u1_pred_buf_array_id, &ps_buf_mngr->u4_pred_buf_usage_indicator, 1))
   4105     {
   4106         ASSERT(0);
   4107     }
   4108     else
   4109     {
   4110         ps_info[0][0].i4_pred_stride = MAX_CU_SIZE;
   4111         ps_info[0][0].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id];
   4112         ps_info[0][0].u1_pred_buf_array_id = u1_pred_buf_array_id;
   4113 
   4114         if(PRT_2Nx2N != e_part_type)
   4115         {
   4116             ps_info[0][1].i4_pred_stride = MAX_CU_SIZE;
   4117             ps_info[0][1].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id] +
   4118                                      (gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]]
   4119                                           ? u1_pu1_ht * ps_info[0][1].i4_pred_stride
   4120                                           : u1_pu1_wd);
   4121             ps_info[0][1].u1_pred_buf_array_id = u1_pred_buf_array_id;
   4122         }
   4123     }
   4124 }
   4125 
   4126 void hme_debrief_bipred_eval(
   4127     part_type_results_t *ps_part_type_result,
   4128     hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
   4129     hme_pred_buf_mngr_t *ps_pred_buf_mngr,
   4130     U08 *pu1_allocated_pred_buf_array_indixes,
   4131     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list
   4132 
   4133 )
   4134 {
   4135     PART_TYPE_T e_part_type = (PART_TYPE_T)ps_part_type_result->u1_part_type;
   4136 
   4137     U32 *pu4_pred_buf_usage_indicator = &ps_pred_buf_mngr->u4_pred_buf_usage_indicator;
   4138     U08 u1_is_part_vertical = gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]];
   4139 
   4140     if(0 == ps_part_type_result->u1_part_type)
   4141     {
   4142         if(ps_part_type_result->as_pu_results->pu.b2_pred_mode == PRED_BI)
   4143         {
   4144             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
   4145 
   4146             ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
   4147             ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
   4148 
   4149             ihevce_set_pred_buf_as_free(
   4150                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
   4151 
   4152             ihevce_set_pred_buf_as_free(
   4153                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
   4154         }
   4155         else
   4156         {
   4157             ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
   4158             ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
   4159 
   4160             ihevce_set_pred_buf_as_free(
   4161                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
   4162 
   4163             ihevce_set_pred_buf_as_free(
   4164                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
   4165 
   4166             if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
   4167             {
   4168                 ihevce_set_pred_buf_as_free(
   4169                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
   4170             }
   4171         }
   4172     }
   4173     else
   4174     {
   4175         U08 *pu1_src_pred;
   4176         U08 *pu1_dst_pred;
   4177         S32 i4_src_pred_stride;
   4178         S32 i4_dst_pred_stride;
   4179 
   4180         U08 u1_pu1_wd = (ps_part_type_result->as_pu_results[0].pu.b4_wd + 1) << 2;
   4181         U08 u1_pu1_ht = (ps_part_type_result->as_pu_results[0].pu.b4_ht + 1) << 2;
   4182         U08 u1_pu2_wd = (ps_part_type_result->as_pu_results[1].pu.b4_wd + 1) << 2;
   4183         U08 u1_pu2_ht = (ps_part_type_result->as_pu_results[1].pu.b4_ht + 1) << 2;
   4184 
   4185         U08 u1_condition_for_switch =
   4186             (ps_part_type_result->as_pu_results[0].pu.b2_pred_mode == PRED_BI) |
   4187             ((ps_part_type_result->as_pu_results[1].pu.b2_pred_mode == PRED_BI) << 1);
   4188 
   4189         switch(u1_condition_for_switch)
   4190         {
   4191         case 0:
   4192         {
   4193             ps_part_type_result->pu1_pred =
   4194                 ps_pred_buf_mngr->apu1_pred_bufs[pu1_allocated_pred_buf_array_indixes[0]];
   4195             ps_part_type_result->i4_pred_stride = MAX_CU_SIZE;
   4196 
   4197             ihevce_set_pred_buf_as_free(
   4198                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
   4199 
   4200             ihevce_set_pred_buf_as_free(
   4201                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
   4202 
   4203             if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
   4204             {
   4205                 pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
   4206                 pu1_dst_pred = ps_part_type_result->pu1_pred;
   4207                 i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
   4208                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
   4209 
   4210                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
   4211                     pu1_dst_pred,
   4212                     i4_dst_pred_stride,
   4213                     pu1_src_pred,
   4214                     i4_src_pred_stride,
   4215                     u1_pu1_wd,
   4216                     u1_pu1_ht);
   4217             }
   4218 
   4219             if(UCHAR_MAX == ps_pred_buf_info[0][1].u1_pred_buf_array_id)
   4220             {
   4221                 pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
   4222                 pu1_dst_pred = ps_part_type_result->pu1_pred +
   4223                                (u1_is_part_vertical
   4224                                     ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
   4225                                     : u1_pu1_wd);
   4226                 i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
   4227                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
   4228 
   4229                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
   4230                     pu1_dst_pred,
   4231                     i4_dst_pred_stride,
   4232                     pu1_src_pred,
   4233                     i4_src_pred_stride,
   4234                     u1_pu2_wd,
   4235                     u1_pu2_ht);
   4236             }
   4237 
   4238             break;
   4239         }
   4240         case 1:
   4241         {
   4242             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
   4243 
   4244             ihevce_set_pred_buf_as_free(
   4245                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
   4246 
   4247             /* Copy PU1 pred into PU2's pred buf */
   4248             if(((u1_pu1_ht < u1_pu2_ht) || (u1_pu1_wd < u1_pu2_wd)) &&
   4249                (UCHAR_MAX != ps_pred_buf_info[0][1].u1_pred_buf_array_id))
   4250             {
   4251                 ps_part_type_result->pu1_pred =
   4252                     ps_pred_buf_info[0][1].pu1_pred -
   4253                     (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[0][1].i4_pred_stride
   4254                                          : u1_pu1_wd);
   4255                 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
   4256 
   4257                 ihevce_set_pred_buf_as_free(
   4258                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
   4259 
   4260                 pu1_src_pred = ps_pred_buf_info[2][0].pu1_pred;
   4261                 pu1_dst_pred = ps_part_type_result->pu1_pred;
   4262                 i4_src_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
   4263                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
   4264 
   4265                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
   4266                     pu1_dst_pred,
   4267                     i4_dst_pred_stride,
   4268                     pu1_src_pred,
   4269                     i4_src_pred_stride,
   4270                     u1_pu1_wd,
   4271                     u1_pu1_ht);
   4272             }
   4273             else
   4274             {
   4275                 ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
   4276                 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
   4277 
   4278                 ihevce_set_pred_buf_as_free(
   4279                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
   4280 
   4281                 pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
   4282                 pu1_dst_pred = ps_part_type_result->pu1_pred;
   4283                 i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
   4284                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
   4285 
   4286                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
   4287                     pu1_dst_pred,
   4288                     i4_dst_pred_stride,
   4289                     pu1_src_pred,
   4290                     i4_src_pred_stride,
   4291                     u1_pu2_wd,
   4292                     u1_pu2_ht);
   4293             }
   4294 
   4295             break;
   4296         }
   4297         case 2:
   4298         {
   4299             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
   4300 
   4301             ihevce_set_pred_buf_as_free(
   4302                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
   4303 
   4304             /* Copy PU2 pred into PU1's pred buf */
   4305             if(((u1_pu1_ht > u1_pu2_ht) || (u1_pu1_wd > u1_pu2_wd)) &&
   4306                (UCHAR_MAX != ps_pred_buf_info[0][0].u1_pred_buf_array_id))
   4307             {
   4308                 ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
   4309                 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
   4310 
   4311                 ihevce_set_pred_buf_as_free(
   4312                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
   4313 
   4314                 pu1_src_pred = ps_pred_buf_info[2][1].pu1_pred;
   4315                 pu1_dst_pred = ps_part_type_result->pu1_pred +
   4316                                (u1_is_part_vertical
   4317                                     ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
   4318                                     : u1_pu1_wd);
   4319                 i4_src_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
   4320                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
   4321 
   4322                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
   4323                     pu1_dst_pred,
   4324                     i4_dst_pred_stride,
   4325                     pu1_src_pred,
   4326                     i4_src_pred_stride,
   4327                     u1_pu2_wd,
   4328                     u1_pu2_ht);
   4329             }
   4330             else
   4331             {
   4332                 ps_part_type_result->pu1_pred =
   4333                     ps_pred_buf_info[2][1].pu1_pred -
   4334                     (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[2][1].i4_pred_stride
   4335                                          : u1_pu1_wd);
   4336                 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
   4337 
   4338                 ihevce_set_pred_buf_as_free(
   4339                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
   4340 
   4341                 pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
   4342                 pu1_dst_pred = ps_part_type_result->pu1_pred;
   4343                 i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
   4344                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
   4345 
   4346                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
   4347                     pu1_dst_pred,
   4348                     i4_dst_pred_stride,
   4349                     pu1_src_pred,
   4350                     i4_src_pred_stride,
   4351                     u1_pu1_wd,
   4352                     u1_pu1_ht);
   4353             }
   4354 
   4355             break;
   4356         }
   4357         case 3:
   4358         {
   4359             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
   4360             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
   4361             ASSERT(
   4362                 ps_pred_buf_info[2][1].u1_pred_buf_array_id ==
   4363                 ps_pred_buf_info[2][0].u1_pred_buf_array_id);
   4364 
   4365             ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
   4366             ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
   4367 
   4368             ihevce_set_pred_buf_as_free(
   4369                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
   4370 
   4371             break;
   4372         }
   4373         }
   4374     }
   4375 }
   4376 
   4377 U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
   4378     SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset)
   4379 {
   4380     U08 u1_priority_val =
   4381         gau1_search_cand_priority_in_l1_and_l2_me[e_quality_preset >= ME_MEDIUM_SPEED][e_cand_type];
   4382 
   4383     if(UCHAR_MAX == u1_priority_val)
   4384     {
   4385         ASSERT(0);
   4386     }
   4387 
   4388     ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
   4389 
   4390     return u1_priority_val;
   4391 }
   4392 
   4393 U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index)
   4394 {
   4395     U08 u1_priority_val = gau1_search_cand_priority_in_l0_me[u1_index][e_cand_type];
   4396 
   4397     if(UCHAR_MAX == u1_priority_val)
   4398     {
   4399         ASSERT(0);
   4400     }
   4401 
   4402     ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
   4403 
   4404     return u1_priority_val;
   4405 }
   4406 
   4407 void hme_search_cand_data_init(
   4408     S32 *pi4_id_Z,
   4409     S32 *pi4_id_coloc,
   4410     S32 *pi4_num_coloc_cands,
   4411     U08 *pu1_search_candidate_list_index,
   4412     S32 i4_num_act_ref_l0,
   4413     S32 i4_num_act_ref_l1,
   4414     U08 u1_is_bidir_enabled,
   4415     U08 u1_4x4_blk_in_l1me)
   4416 {
   4417     S32 i, j;
   4418     S32 i4_num_coloc_cands;
   4419 
   4420     U08 u1_search_candidate_list_index;
   4421 
   4422     if(!u1_is_bidir_enabled && !u1_4x4_blk_in_l1me)
   4423     {
   4424         S32 i;
   4425 
   4426         u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2;
   4427         i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
   4428 
   4429         switch(i4_num_act_ref_l0)
   4430         {
   4431         case 1:
   4432         {
   4433             for(i = 0; i < 2; i++)
   4434             {
   4435                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4436                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4437                     u1_search_candidate_list_index);
   4438             }
   4439 
   4440             break;
   4441         }
   4442         case 2:
   4443         {
   4444             for(i = 0; i < 4; i++)
   4445             {
   4446                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4447                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4448                     u1_search_candidate_list_index);
   4449             }
   4450 
   4451             break;
   4452         }
   4453         case 3:
   4454         {
   4455             for(i = 0; i < 6; i++)
   4456             {
   4457                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4458                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4459                     u1_search_candidate_list_index);
   4460             }
   4461 
   4462             break;
   4463         }
   4464         case 4:
   4465         {
   4466             for(i = 0; i < 8; i++)
   4467             {
   4468                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4469                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4470                     u1_search_candidate_list_index);
   4471             }
   4472 
   4473             break;
   4474         }
   4475         default:
   4476         {
   4477             ASSERT(0);
   4478         }
   4479         }
   4480 
   4481         *pi4_num_coloc_cands = i4_num_coloc_cands;
   4482         *pu1_search_candidate_list_index = u1_search_candidate_list_index;
   4483     }
   4484     else if(!u1_is_bidir_enabled && u1_4x4_blk_in_l1me)
   4485     {
   4486         S32 i;
   4487 
   4488         i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
   4489         u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2 + 1;
   4490 
   4491         switch(i4_num_act_ref_l0)
   4492         {
   4493         case 1:
   4494         {
   4495             for(i = 0; i < 2; i++)
   4496             {
   4497                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4498                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4499                     u1_search_candidate_list_index);
   4500             }
   4501 
   4502             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4503                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
   4504 
   4505             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
   4506                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
   4507 
   4508             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
   4509                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
   4510 
   4511             i4_num_coloc_cands += 3;
   4512 
   4513             break;
   4514         }
   4515         case 2:
   4516         {
   4517             for(i = 0; i < 4; i++)
   4518             {
   4519                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4520                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4521                     u1_search_candidate_list_index);
   4522             }
   4523 
   4524             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4525                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
   4526 
   4527             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
   4528                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
   4529 
   4530             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
   4531                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
   4532 
   4533             pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
   4534                 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
   4535 
   4536             pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
   4537                 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
   4538 
   4539             pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
   4540                 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
   4541 
   4542             i4_num_coloc_cands += 6;
   4543 
   4544             break;
   4545         }
   4546         case 3:
   4547         {
   4548             for(i = 0; i < 6; i++)
   4549             {
   4550                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4551                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4552                     u1_search_candidate_list_index);
   4553             }
   4554 
   4555             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4556                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
   4557 
   4558             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
   4559                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
   4560 
   4561             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
   4562                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
   4563 
   4564             pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
   4565                 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
   4566 
   4567             pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
   4568                 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
   4569 
   4570             pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
   4571                 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
   4572 
   4573             i4_num_coloc_cands += 6;
   4574 
   4575             break;
   4576         }
   4577         case 4:
   4578         {
   4579             for(i = 0; i < 8; i++)
   4580             {
   4581                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4582                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4583                     u1_search_candidate_list_index);
   4584             }
   4585 
   4586             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4587                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
   4588 
   4589             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
   4590                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
   4591 
   4592             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
   4593                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
   4594 
   4595             pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
   4596                 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
   4597 
   4598             pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
   4599                 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
   4600 
   4601             pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
   4602                 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
   4603 
   4604             i4_num_coloc_cands += 6;
   4605 
   4606             break;
   4607         }
   4608         default:
   4609         {
   4610             ASSERT(0);
   4611         }
   4612         }
   4613 
   4614         *pi4_num_coloc_cands = i4_num_coloc_cands;
   4615         *pu1_search_candidate_list_index = u1_search_candidate_list_index;
   4616     }
   4617     else
   4618     {
   4619         /* The variable 'u1_search_candidate_list_index' is hardcoded */
   4620         /* to 10 and 11 respectively. But, these values are not returned */
   4621         /* by this function since the actual values are dependent on */
   4622         /* the number of refs in L0 and L1 respectively */
   4623         /* Hence, the actual return values are being recomputed */
   4624         /* in the latter part of this block */
   4625 
   4626         if(!u1_4x4_blk_in_l1me)
   4627         {
   4628             u1_search_candidate_list_index = 10;
   4629 
   4630             i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
   4631 
   4632             for(i = 0; i < i4_num_coloc_cands; i++)
   4633             {
   4634                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4635                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4636                     u1_search_candidate_list_index);
   4637             }
   4638         }
   4639         else
   4640         {
   4641             u1_search_candidate_list_index = 11;
   4642 
   4643             i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
   4644 
   4645             for(i = 0; i < i4_num_coloc_cands; i++)
   4646             {
   4647                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4648                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
   4649                     u1_search_candidate_list_index);
   4650             }
   4651 
   4652             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
   4653                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
   4654 
   4655             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
   4656                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
   4657 
   4658             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
   4659                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
   4660         }
   4661 
   4662         for(j = 0; j < 2; j++)
   4663         {
   4664             if(0 == j)
   4665             {
   4666                 pu1_search_candidate_list_index[j] =
   4667                     8 + ((i4_num_act_ref_l0 > 1) * 2) + u1_4x4_blk_in_l1me;
   4668                 pi4_num_coloc_cands[j] =
   4669                     (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l0 > 1) * 2);
   4670             }
   4671             else
   4672             {
   4673                 pu1_search_candidate_list_index[j] =
   4674                     8 + ((i4_num_act_ref_l1 > 1) * 2) + u1_4x4_blk_in_l1me;
   4675                 pi4_num_coloc_cands[j] =
   4676                     (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l1 > 1) * 2);
   4677             }
   4678         }
   4679     }
   4680 
   4681     if(i4_num_act_ref_l0 || i4_num_act_ref_l1)
   4682     {
   4683         pi4_id_Z[0] = hme_decide_search_candidate_priority_in_l0_me(
   4684             (SEARCH_CANDIDATE_TYPE_T)ZERO_MV, pu1_search_candidate_list_index[0]);
   4685     }
   4686 
   4687     if((i4_num_act_ref_l0 > 1) && !u1_is_bidir_enabled)
   4688     {
   4689         pi4_id_Z[1] = hme_decide_search_candidate_priority_in_l0_me(
   4690             (SEARCH_CANDIDATE_TYPE_T)ZERO_MV_ALTREF, pu1_search_candidate_list_index[0]);
   4691     }
   4692 }
   4693 
   4694 static U08
   4695     hme_determine_base_block_size(S32 *pi4_valid_part_array, S32 i4_num_valid_parts, U08 u1_cu_size)
   4696 {
   4697     ASSERT(i4_num_valid_parts > 0);
   4698 
   4699     if(1 == i4_num_valid_parts)
   4700     {
   4701         ASSERT(pi4_valid_part_array[i4_num_valid_parts - 1] == PART_ID_2Nx2N);
   4702 
   4703         return u1_cu_size;
   4704     }
   4705     else
   4706     {
   4707         if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_NxN_BR)
   4708         {
   4709             return u1_cu_size / 2;
   4710         }
   4711         else if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_nRx2N_R)
   4712         {
   4713             return u1_cu_size / 4;
   4714         }
   4715     }
   4716 
   4717     return u1_cu_size / 4;
   4718 }
   4719 
   4720 static U32 hme_compute_variance_of_pu_from_base_blocks(
   4721     ULWORD64 *pu8_SigmaX,
   4722     ULWORD64 *pu8_SigmaXSquared,
   4723     U08 u1_cu_size,
   4724     U08 u1_base_block_size,
   4725     S32 i4_part_id)
   4726 {
   4727     U08 i, j;
   4728     ULWORD64 u8_final_variance;
   4729 
   4730     U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
   4731     S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
   4732     S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
   4733     U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
   4734     U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
   4735     U08 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
   4736     U08 u1_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
   4737     U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
   4738     ULWORD64 u8_final_SigmaXSquared = 0;
   4739     ULWORD64 u8_final_SigmaX = 0;
   4740 
   4741     if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
   4742     {
   4743         U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
   4744                                         ? (gai1_is_part_vertical[i4_part_id]
   4745                                                ? 0
   4746                                                : (u1_cu_size - i4_part_wd) / u1_base_block_size)
   4747                                         : 0;
   4748         U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
   4749                                      ? (gai1_is_part_vertical[i4_part_id]
   4750                                             ? (u1_cu_size - i4_part_ht) / u1_base_block_size
   4751                                             : 0)
   4752                                      : 0;
   4753         U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
   4754         U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
   4755 
   4756         for(i = u1_row_start_index; i < u1_row_end_index; i++)
   4757         {
   4758             for(j = u1_column_start_index; j < u1_column_end_index; j++)
   4759             {
   4760                 u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
   4761                 u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
   4762             }
   4763         }
   4764 
   4765         u8_final_variance =
   4766             u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
   4767         u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
   4768         u8_final_variance +=
   4769             ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
   4770              (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
   4771         u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
   4772                              (u1_num_base_blocks * u4_num_pixels_in_base_block);
   4773 
   4774         ASSERT(u8_final_variance <= UINT_MAX);
   4775     }
   4776     else
   4777     {
   4778         U08 u1_row_start_index;
   4779         U08 u1_column_start_index;
   4780         U08 u1_row_end_index;
   4781         U08 u1_column_end_index;
   4782 
   4783         switch(gau1_part_id_to_part_num[i4_part_id])
   4784         {
   4785         case 0:
   4786         {
   4787             u1_row_start_index = 0;
   4788             u1_column_start_index = 0;
   4789 
   4790             break;
   4791         }
   4792         case 1:
   4793         {
   4794             u1_row_start_index = 0;
   4795             u1_column_start_index = u1_num_base_blocks_in_pu_row;
   4796 
   4797             break;
   4798         }
   4799         case 2:
   4800         {
   4801             u1_row_start_index = u1_num_base_blocks_in_pu_column;
   4802             u1_column_start_index = 0;
   4803 
   4804             break;
   4805         }
   4806         case 3:
   4807         {
   4808             u1_row_start_index = u1_num_base_blocks_in_pu_column;
   4809             u1_column_start_index = u1_num_base_blocks_in_pu_row;
   4810 
   4811             break;
   4812         }
   4813         }
   4814 
   4815         u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
   4816         u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
   4817 
   4818         for(i = u1_row_start_index; i < u1_row_end_index; i++)
   4819         {
   4820             for(j = u1_column_start_index; j < u1_column_end_index; j++)
   4821             {
   4822                 u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
   4823                 u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
   4824             }
   4825         }
   4826 
   4827         u8_final_variance =
   4828             u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
   4829         u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
   4830         u8_final_variance +=
   4831             ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
   4832              (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
   4833         u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
   4834                              (u1_num_base_blocks * u4_num_pixels_in_base_block);
   4835 
   4836         ASSERT(u8_final_variance <= UINT_MAX);
   4837     }
   4838 
   4839     return u8_final_variance;
   4840 }
   4841 
   4842 void hme_compute_variance_for_all_parts(
   4843     U08 *pu1_data,
   4844     S32 i4_data_stride,
   4845     S32 *pi4_valid_part_array,
   4846     U32 *pu4_variance,
   4847     S32 i4_num_valid_parts,
   4848     U08 u1_cu_size)
   4849 {
   4850     ULWORD64 au8_SigmaX[16];
   4851     ULWORD64 au8_SigmaXSquared[16];
   4852     U08 i, j, k, l;
   4853     U08 u1_base_block_size;
   4854     U08 u1_num_base_blocks_in_cu_row;
   4855     U08 u1_num_base_blocks_in_cu_column;
   4856 
   4857     u1_base_block_size =
   4858         hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
   4859 
   4860     u1_num_base_blocks_in_cu_row = u1_num_base_blocks_in_cu_column =
   4861         u1_cu_size / u1_base_block_size;
   4862 
   4863     ASSERT(u1_num_base_blocks_in_cu_row <= 4);
   4864 
   4865     for(i = 0; i < u1_num_base_blocks_in_cu_column; i++)
   4866     {
   4867         for(j = 0; j < u1_num_base_blocks_in_cu_row; j++)
   4868         {
   4869             U08 *pu1_buf =
   4870                 pu1_data + (u1_base_block_size * j) + (u1_base_block_size * i * i4_data_stride);
   4871 
   4872             au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] = 0;
   4873             au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] = 0;
   4874 
   4875             for(k = 0; k < u1_base_block_size; k++)
   4876             {
   4877                 for(l = 0; l < u1_base_block_size; l++)
   4878                 {
   4879                     au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] +=
   4880                         pu1_buf[l + k * i4_data_stride];
   4881                     au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] +=
   4882                         pu1_buf[l + k * i4_data_stride] * pu1_buf[l + k * i4_data_stride];
   4883                 }
   4884             }
   4885         }
   4886     }
   4887 
   4888     for(i = 0; i < i4_num_valid_parts; i++)
   4889     {
   4890         pu4_variance[pi4_valid_part_array[i]] = hme_compute_variance_of_pu_from_base_blocks(
   4891             au8_SigmaX, au8_SigmaXSquared, u1_cu_size, u1_base_block_size, pi4_valid_part_array[i]);
   4892     }
   4893 }
   4894 
   4895 void hme_compute_final_sigma_of_pu_from_base_blocks(
   4896     U32 *pu4_SigmaX,
   4897     U32 *pu4_SigmaXSquared,
   4898     ULWORD64 *pu8_final_sigmaX,
   4899     ULWORD64 *pu8_final_sigmaX_Squared,
   4900     U08 u1_cu_size,
   4901     U08 u1_base_block_size,
   4902     S32 i4_part_id,
   4903     U08 u1_base_blk_array_stride)
   4904 {
   4905     U08 i, j;
   4906     //U08 u1_num_base_blocks_in_cu_row;
   4907 
   4908     U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
   4909     S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
   4910     S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
   4911     U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
   4912     U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
   4913     U16 u2_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
   4914     U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
   4915     U32 u4_N = (u2_num_base_blocks * u4_num_pixels_in_base_block);
   4916 
   4917     /*if (u1_is_for_src)
   4918     {
   4919     u1_num_base_blocks_in_cu_row = 16;
   4920     }
   4921     else
   4922     {
   4923     u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
   4924     }*/
   4925 
   4926     pu8_final_sigmaX[i4_part_id] = 0;
   4927     pu8_final_sigmaX_Squared[i4_part_id] = 0;
   4928 
   4929     if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
   4930     {
   4931         U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
   4932                                         ? (gai1_is_part_vertical[i4_part_id]
   4933                                                ? 0
   4934                                                : (u1_cu_size - i4_part_wd) / u1_base_block_size)
   4935                                         : 0;
   4936         U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
   4937                                      ? (gai1_is_part_vertical[i4_part_id]
   4938                                             ? (u1_cu_size - i4_part_ht) / u1_base_block_size
   4939                                             : 0)
   4940                                      : 0;
   4941         U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
   4942         U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
   4943 
   4944         for(i = u1_row_start_index; i < u1_row_end_index; i++)
   4945         {
   4946             for(j = u1_column_start_index; j < u1_column_end_index; j++)
   4947             {
   4948                 pu8_final_sigmaX_Squared[i4_part_id] +=
   4949                     pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
   4950                 pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
   4951             }
   4952         }
   4953     }
   4954     else
   4955     {
   4956         U08 u1_row_start_index;
   4957         U08 u1_column_start_index;
   4958         U08 u1_row_end_index;
   4959         U08 u1_column_end_index;
   4960 
   4961         switch(gau1_part_id_to_part_num[i4_part_id])
   4962         {
   4963         case 0:
   4964         {
   4965             u1_row_start_index = 0;
   4966             u1_column_start_index = 0;
   4967 
   4968             break;
   4969         }
   4970         case 1:
   4971         {
   4972             u1_row_start_index = 0;
   4973             u1_column_start_index = u1_num_base_blocks_in_pu_row;
   4974 
   4975             break;
   4976         }
   4977         case 2:
   4978         {
   4979             u1_row_start_index = u1_num_base_blocks_in_pu_column;
   4980             u1_column_start_index = 0;
   4981 
   4982             break;
   4983         }
   4984         case 3:
   4985         {
   4986             u1_row_start_index = u1_num_base_blocks_in_pu_column;
   4987             u1_column_start_index = u1_num_base_blocks_in_pu_row;
   4988 
   4989             break;
   4990         }
   4991         }
   4992 
   4993         u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
   4994         u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
   4995 
   4996         for(i = u1_row_start_index; i < u1_row_end_index; i++)
   4997         {
   4998             for(j = u1_column_start_index; j < u1_column_end_index; j++)
   4999             {
   5000                 pu8_final_sigmaX_Squared[i4_part_id] +=
   5001                     pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
   5002                 pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
   5003             }
   5004         }
   5005     }
   5006 
   5007     pu8_final_sigmaX_Squared[i4_part_id] *= u4_N;
   5008 }
   5009 
   5010 void hme_compute_stim_injected_distortion_for_all_parts(
   5011     U08 *pu1_pred,
   5012     S32 i4_pred_stride,
   5013     S32 *pi4_valid_part_array,
   5014     ULWORD64 *pu8_src_sigmaX,
   5015     ULWORD64 *pu8_src_sigmaXSquared,
   5016     S32 *pi4_sad_array,
   5017     S32 i4_alpha_stim_multiplier,
   5018     S32 i4_inv_wt,
   5019     S32 i4_inv_wt_shift_val,
   5020     S32 i4_num_valid_parts,
   5021     S32 i4_wpred_log_wdc,
   5022     U08 u1_cu_size)
   5023 {
   5024     U32 au4_sigmaX[16], au4_sigmaXSquared[16];
   5025     ULWORD64 au8_final_ref_sigmaX[17], au8_final_ref_sigmaXSquared[17];
   5026     S32 i4_noise_term;
   5027     U16 i2_count;
   5028 
   5029     ULWORD64 u8_temp_var, u8_temp_var1, u8_pure_dist;
   5030     ULWORD64 u8_ref_X_Square, u8_src_var, u8_ref_var;
   5031 
   5032     U08 u1_base_block_size;
   5033 
   5034     WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
   5035 
   5036     u1_base_block_size =
   5037         hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
   5038 
   5039     ASSERT(u1_cu_size >= 16);
   5040 
   5041     hme_compute_sigmaX_and_sigmaXSquared(
   5042         pu1_pred,
   5043         i4_pred_stride,
   5044         au4_sigmaX,
   5045         au4_sigmaXSquared,
   5046         u1_base_block_size,
   5047         u1_base_block_size,
   5048         u1_cu_size,
   5049         u1_cu_size,
   5050         1,
   5051         u1_cu_size / u1_base_block_size);
   5052 
   5053     /* Noise Term Computation */
   5054     for(i2_count = 0; i2_count < i4_num_valid_parts; i2_count++)
   5055     {
   5056         unsigned long u4_shift_val;
   5057         S32 i4_bits_req;
   5058         S32 part_id = pi4_valid_part_array[i2_count];
   5059 
   5060         if(i4_alpha_stim_multiplier)
   5061         {
   5062             /* Final SigmaX and SigmaX-Squared Calculation */
   5063             hme_compute_final_sigma_of_pu_from_base_blocks(
   5064                 au4_sigmaX,
   5065                 au4_sigmaXSquared,
   5066                 au8_final_ref_sigmaX,
   5067                 au8_final_ref_sigmaXSquared,
   5068                 u1_cu_size,
   5069                 u1_base_block_size,
   5070                 part_id,
   5071                 (u1_cu_size / u1_base_block_size));
   5072 
   5073             u8_ref_X_Square = (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]);
   5074             u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square);
   5075 
   5076             u4_shift_val = ihevce_calc_stim_injected_variance(
   5077                 pu8_src_sigmaX,
   5078                 pu8_src_sigmaXSquared,
   5079                 &u8_src_var,
   5080                 i4_inv_wt,
   5081                 i4_inv_wt_shift_val,
   5082                 i4_wpred_log_wdc,
   5083                 part_id);
   5084 
   5085             u8_ref_var = u8_ref_var >> u4_shift_val;
   5086 
   5087             GETRANGE64(i4_bits_req, u8_ref_var);
   5088 
   5089             if(i4_bits_req > 27)
   5090             {
   5091                 u8_ref_var = u8_ref_var >> (i4_bits_req - 27);
   5092                 u8_src_var = u8_src_var >> (i4_bits_req - 27);
   5093             }
   5094 
   5095             if(u8_src_var == u8_ref_var)
   5096             {
   5097                 u8_temp_var = (1 << STIM_Q_FORMAT);
   5098             }
   5099             else
   5100             {
   5101                 u8_temp_var = (u8_src_var * u8_ref_var * (1 << STIM_Q_FORMAT));
   5102                 u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var);
   5103                 u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
   5104                 u8_temp_var = (u8_temp_var / u8_temp_var1);
   5105                 u8_temp_var = (2 * u8_temp_var);
   5106             }
   5107 
   5108             i4_noise_term = (UWORD32)u8_temp_var;
   5109 
   5110             ASSERT(i4_noise_term >= 0);
   5111 
   5112             i4_noise_term *= i4_alpha_stim_multiplier;
   5113         }
   5114         else
   5115         {
   5116             i4_noise_term = 0;
   5117         }
   5118 
   5119         u8_pure_dist = pi4_sad_array[part_id];
   5120         u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term));
   5121         u8_pure_dist += (1 << ((i4_q_level)-1));
   5122         pi4_sad_array[part_id] = (UWORD32)(u8_pure_dist >> (i4_q_level));
   5123     }
   5124 }
   5125 
   5126 void hme_compute_sigmaX_and_sigmaXSquared(
   5127     U08 *pu1_data,
   5128     S32 i4_buf_stride,
   5129     void *pv_sigmaX,
   5130     void *pv_sigmaXSquared,
   5131     U08 u1_base_blk_wd,
   5132     U08 u1_base_blk_ht,
   5133     U08 u1_blk_wd,
   5134     U08 u1_blk_ht,
   5135     U08 u1_is_sigma_pointer_size_32_bit,
   5136     U08 u1_array_stride)
   5137 {
   5138     U08 i, j, k, l;
   5139     U08 u1_num_base_blks_in_row;
   5140     U08 u1_num_base_blks_in_column;
   5141 
   5142     u1_num_base_blks_in_row = u1_blk_wd / u1_base_blk_wd;
   5143     u1_num_base_blks_in_column = u1_blk_ht / u1_base_blk_ht;
   5144 
   5145     if(u1_is_sigma_pointer_size_32_bit)
   5146     {
   5147         U32 *sigmaX, *sigmaXSquared;
   5148 
   5149         sigmaX = (U32 *)pv_sigmaX;
   5150         sigmaXSquared = (U32 *)pv_sigmaXSquared;
   5151 
   5152         /* Loop to compute the sigma_X and sigma_X_Squared */
   5153         for(i = 0; i < u1_num_base_blks_in_column; i++)
   5154         {
   5155             for(j = 0; j < u1_num_base_blks_in_row; j++)
   5156             {
   5157                 U32 u4_sigmaX = 0, u4_sigmaXSquared = 0;
   5158                 U08 *pu1_buf =
   5159                     pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
   5160 
   5161                 for(k = 0; k < u1_base_blk_ht; k++)
   5162                 {
   5163                     for(l = 0; l < u1_base_blk_wd; l++)
   5164                     {
   5165                         u4_sigmaX += pu1_buf[l + k * i4_buf_stride];
   5166                         u4_sigmaXSquared +=
   5167                             (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
   5168                     }
   5169                 }
   5170 
   5171                 sigmaX[j + i * u1_array_stride] = u4_sigmaX;
   5172                 sigmaXSquared[j + i * u1_array_stride] = u4_sigmaXSquared;
   5173             }
   5174         }
   5175     }
   5176     else
   5177     {
   5178         ULWORD64 *sigmaX, *sigmaXSquared;
   5179 
   5180         sigmaX = (ULWORD64 *)pv_sigmaX;
   5181         sigmaXSquared = (ULWORD64 *)pv_sigmaXSquared;
   5182 
   5183         /* Loop to compute the sigma_X and sigma_X_Squared */
   5184         for(i = 0; i < u1_num_base_blks_in_column; i++)
   5185         {
   5186             for(j = 0; j < u1_num_base_blks_in_row; j++)
   5187             {
   5188                 ULWORD64 u8_sigmaX = 0, u8_sigmaXSquared = 0;
   5189                 U08 *pu1_buf =
   5190                     pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
   5191 
   5192                 for(k = 0; k < u1_base_blk_ht; k++)
   5193                 {
   5194                     for(l = 0; l < u1_base_blk_wd; l++)
   5195                     {
   5196                         u8_sigmaX += pu1_buf[l + k * i4_buf_stride];
   5197                         u8_sigmaXSquared +=
   5198                             (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
   5199                     }
   5200                 }
   5201 
   5202                 u8_sigmaXSquared = u8_sigmaXSquared * u1_blk_wd * u1_blk_ht;
   5203 
   5204                 sigmaX[j + i * u1_array_stride] = u8_sigmaX;
   5205                 sigmaXSquared[j + i * u1_array_stride] = u8_sigmaXSquared;
   5206             }
   5207         }
   5208     }
   5209 }
   5210 
   5211 #if TEMPORAL_NOISE_DETECT
   5212 WORD32 ihevce_16x16block_temporal_noise_detect(
   5213     WORD32 had_block_size,
   5214     WORD32 ctb_width,
   5215     WORD32 ctb_height,
   5216     ihevce_ctb_noise_params *ps_ctb_noise_params,
   5217     fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
   5218     hme_search_prms_t *s_search_prms_blk,
   5219     me_frm_ctxt_t *ps_ctxt,
   5220     WORD32 num_pred_dir,
   5221     WORD32 i4_num_act_ref_l0,
   5222     WORD32 i4_num_act_ref_l1,
   5223     WORD32 i4_cu_x_off,
   5224     WORD32 i4_cu_y_off,
   5225     wgt_pred_ctxt_t *ps_wt_inp_prms,
   5226     WORD32 input_stride,
   5227     WORD32 index_8x8_block,
   5228     WORD32 num_horz_blocks,
   5229     WORD32 num_8x8_in_ctb_row,
   5230     WORD32 i4_16x16_index)
   5231 {
   5232     WORD32 i;
   5233     WORD32 noise_detected;
   5234 
   5235     UWORD8 *pu1_l0_block;
   5236     UWORD8 *pu1_l1_block;
   5237 
   5238     WORD32 mean;
   5239     UWORD32 variance_8x8;
   5240 
   5241     /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
   5242     WORD16 pi2_residue_16x16[256];
   5243     WORD32 mean_16x16;
   5244     UWORD32 variance_16x16[2];
   5245 
   5246     /* throw errors in case of un- supported arguments */
   5247     /* assumptions size is 8 or 16 or 32 */
   5248     assert(
   5249         (had_block_size == 8) || (had_block_size == 16) || (had_block_size == 32));  //ihevc_assert
   5250 
   5251     /* initialize the variables */
   5252     noise_detected = 0;
   5253     variance_8x8 = 0;
   5254 
   5255     mean = 0;
   5256 
   5257     {
   5258         i = 0;
   5259         /* get the ref/pred and source using the MV of both directions */
   5260         /* pick the best candidates in each direction */
   5261         /* Colocated cands */
   5262         {
   5263             // steps to be done
   5264             /* pick the candidates */
   5265             /* do motion compoensation using the candidates got from prev step : pick from the offset */
   5266             /* get the ref or the pred from the offset*/
   5267             /* get the source data */
   5268             /* send the pred - source to noise detect */
   5269             /* do noise detect on the residue of source and pred */
   5270 
   5271             layer_mv_t *ps_layer_mvbank;
   5272             hme_mv_t *ps_mv;
   5273 
   5274             //S32 i;
   5275             S32 wd_c, ht_c, wd_p, ht_p;
   5276             S32 blksize_p, blk_x, blk_y, i4_offset;
   5277             S08 *pi1_ref_idx;
   5278             fpel_srch_cand_init_data_t *ps_ctxt_2 = s_proj_srch_cand_init_data;
   5279             layer_ctxt_t *ps_curr_layer = ps_ctxt_2->ps_curr_layer;
   5280             layer_ctxt_t *ps_coarse_layer = ps_ctxt_2->ps_coarse_layer;
   5281             err_prms_t s_err_prms;
   5282             S32 i4_blk_wd;
   5283             S32 i4_blk_ht;
   5284             BLK_SIZE_T e_blk_size;
   5285             hme_search_prms_t *ps_search_prms;
   5286             S32 i4_part_mask;
   5287             S32 *pi4_valid_part_ids;
   5288 
   5289             /* has list of valid partition to search terminated by -1 */
   5290             S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
   5291 
   5292             /*SEARCH_COMPLEXITY_T e_search_complexity = ps_ctxt->e_search_complexity;*/
   5293 
   5294             S32 i4_pos_x;
   5295             S32 i4_pos_y;
   5296             U08 u1_pred_dir;  // = ps_ctxt_2->u1_pred_dir;
   5297             U08 u1_default_ref_id = 0;  //ps_ctxt_2->u1_default_ref_id;
   5298             S32 i4_inp_off, i4_ref_offset, i4_ref_stride;
   5299 
   5300             /* The reference is actually an array of ptrs since there are several    */
   5301             /* reference id. So an array gets passed form calling function           */
   5302             U08 **ppu1_ref;
   5303 
   5304             /* Atributes of input candidates */
   5305             search_node_t as_search_node[2];
   5306             wgt_pred_ctxt_t *ps_wt_inp_prms;
   5307 
   5308             S32 posx;
   5309             S32 posy;
   5310             S32 i4_num_results_to_proj;
   5311             S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
   5312             S32 i4_inp_stride;
   5313 
   5314             /* intialize variables */
   5315             /* Width and ht of current and prev layers */
   5316             wd_c = ps_curr_layer->i4_wd;
   5317             ht_c = ps_curr_layer->i4_ht;
   5318             wd_p = ps_coarse_layer->i4_wd;
   5319             ht_p = ps_coarse_layer->i4_ht;
   5320 
   5321             ps_search_prms = s_search_prms_blk;
   5322 
   5323             ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
   5324             e_blk_size = ps_search_prms->e_blk_size;
   5325             i4_part_mask = ps_search_prms->i4_part_mask;
   5326 
   5327             i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
   5328             i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
   5329 
   5330             ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
   5331             blksize_p = gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
   5332 
   5333             /* ASSERT for valid sizes */
   5334             ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
   5335 
   5336             i4_pos_x = i4_cu_x_off;
   5337             i4_pos_y = i4_cu_y_off;
   5338             posx = i4_pos_x + 2;
   5339             posy = i4_pos_y + 2;
   5340 
   5341             i4_inp_stride = ps_search_prms->i4_inp_stride;
   5342             /* Move to the location of the search blk in inp buffer */
   5343             //i4_inp_off = i4_cu_x_off;
   5344             //i4_inp_off += i4_cu_y_off * i4_inp_stride;
   5345             i4_inp_off = (i4_16x16_index % 4) * 16;
   5346             i4_inp_off += (i4_16x16_index / 4) * 16 * i4_inp_stride;
   5347 
   5348             /***********pick the candidates**************************************/
   5349             for(u1_pred_dir = 0; u1_pred_dir < num_pred_dir; u1_pred_dir++)
   5350             {
   5351                 WORD32 actual_pred_dir = 0;
   5352 
   5353                 if(u1_pred_dir == 0 && i4_num_act_ref_l0 == 0)
   5354                 {
   5355                     actual_pred_dir = 1;
   5356                 }
   5357                 else if(u1_pred_dir == 0 && i4_num_act_ref_l0 != 0)
   5358                 {
   5359                     actual_pred_dir = 0;
   5360                 }
   5361                 else if(u1_pred_dir == 1)
   5362                 {
   5363                     actual_pred_dir = 1;
   5364                 }
   5365 
   5366                 i4_num_results_to_proj = 1;  // only the best proj
   5367 
   5368                 /* Safety check to avoid uninitialized access across temporal layers */
   5369                 posx = CLIP3(posx, 0, (wd_c - blksize_p)); /* block position withing frAME */
   5370                 posy = CLIP3(posy, 0, (ht_c - blksize_p));
   5371 
   5372                 /* Project the positions to prev layer */
   5373                 blk_x = posx >> blksize_p;
   5374                 blk_y = posy >> blksize_p;
   5375 
   5376                 /* Pick up the mvs from the location */
   5377                 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
   5378                 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
   5379 
   5380                 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
   5381                 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
   5382 
   5383                 if(actual_pred_dir == 1)
   5384                 {
   5385                     ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
   5386                     pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
   5387                 }
   5388 
   5389                 {
   5390                     as_search_node[actual_pred_dir].s_mv.i2_mvx = ps_mv[0].i2_mv_x << 1;
   5391                     as_search_node[actual_pred_dir].s_mv.i2_mvy = ps_mv[0].i2_mv_y << 1;
   5392                     as_search_node[actual_pred_dir].i1_ref_idx = pi1_ref_idx[0];
   5393 
   5394                     if((as_search_node[actual_pred_dir].i1_ref_idx < 0) ||
   5395                        (as_search_node[actual_pred_dir].s_mv.i2_mvx == INTRA_MV))
   5396                     {
   5397                         as_search_node[actual_pred_dir].i1_ref_idx = u1_default_ref_id;
   5398                         as_search_node[actual_pred_dir].s_mv.i2_mvx = 0;
   5399                         as_search_node[actual_pred_dir].s_mv.i2_mvy = 0;
   5400                     }
   5401                 }
   5402 
   5403                 /********************************************************************************************/
   5404                 {
   5405                     /* declare the variables */
   5406                     //ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
   5407 
   5408                     pi4_valid_part_ids = ai4_valid_part_ids;
   5409                     i4_ref_stride = ps_curr_layer->i4_rec_stride;
   5410                     s_err_prms.i4_inp_stride = i4_inp_stride;
   5411                     s_err_prms.i4_ref_stride = i4_ref_stride;
   5412                     s_err_prms.i4_part_mask = i4_part_mask;
   5413                     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
   5414                     s_err_prms.i4_blk_wd = i4_blk_wd;
   5415                     s_err_prms.i4_blk_ht = i4_blk_ht;
   5416                     s_err_prms.i4_step = 1;
   5417                     s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
   5418                     //s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
   5419 
   5420                     /*************************************************************************/
   5421                     /* Depending on flag i4_use_rec, we use either input of previously       */
   5422                     /* encoded pictures or we use recon of previously encoded pictures.      */
   5423                     i4_ref_stride = ps_curr_layer->i4_rec_stride;
   5424                     ppu1_ref = ps_curr_layer->ppu1_list_rec_fxfy;  // pointer to the pred
   5425 
   5426                     i4_ref_offset = (i4_ref_stride * i4_cu_y_off) + i4_cu_x_off;  //i4_x_off;
   5427 
   5428                     s_err_prms.pu1_ref =
   5429                         ppu1_ref[as_search_node[actual_pred_dir].i1_ref_idx] + i4_ref_offset;
   5430                     s_err_prms.pu1_ref += as_search_node[actual_pred_dir].s_mv.i2_mvx;
   5431                     s_err_prms.pu1_ref +=
   5432                         as_search_node[actual_pred_dir].s_mv.i2_mvy * i4_ref_stride;
   5433 
   5434                     /*get the source */
   5435                     s_err_prms.pu1_inp =
   5436                         ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] +
   5437                         i4_inp_off;  //pu1_src_input + i4_inp_off;//ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + i4_inp_off;
   5438 
   5439                     /* send the pred - source to noise detect */
   5440                     // noise_detect_hme(noise_structure, s_err_prms.pu1_inp, s_err_prms.pu1_ref);
   5441                 }
   5442                 /* change the l0/l1 blcok pointer names accrodingle */
   5443 
   5444                 /* get memory pointers the input and the reference */
   5445                 pu1_l0_block = s_err_prms.pu1_inp;
   5446                 pu1_l1_block = s_err_prms.pu1_ref;
   5447 
   5448                 {
   5449                     WORD32 i2, j2;
   5450                     WORD32 dim = 16;
   5451                     UWORD8 *buf1;
   5452                     UWORD8 *buf2;
   5453                     for(i2 = 0; i2 < dim; i2++)
   5454                     {
   5455                         buf1 = pu1_l0_block + i2 * i4_inp_stride;
   5456                         buf2 = pu1_l1_block + i2 * i4_ref_stride;
   5457 
   5458                         for(j2 = 0; j2 < dim; j2++)
   5459                         {
   5460                             pi2_residue_16x16[i2 * dim + j2] = (WORD16)(buf1[j2] - buf2[j2]);
   5461                         }
   5462                     }
   5463 
   5464                     ihevce_calc_variance_signed(
   5465                         pi2_residue_16x16, 16, &mean_16x16, &variance_16x16[u1_pred_dir], 16, 16);
   5466 
   5467                     /* compare the source and residue variance for this block ps_ctb_noise_params->i4_variance_src_16x16 */
   5468                     if(variance_16x16[u1_pred_dir] >
   5469                        ((TEMPORAL_VARIANCE_FACTOR *
   5470                          ps_ctb_noise_params->au4_variance_src_16x16[i4_16x16_index]) >>
   5471                         Q_TEMPORAL_VARIANCE_FACTOR))
   5472                     {
   5473                         /* update noisy block count only if all  best MV in diff directions indicates noise */
   5474                         if(u1_pred_dir == num_pred_dir - 1)
   5475                         {
   5476                             ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 1;
   5477                             ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 1;
   5478                             ps_ctb_noise_params
   5479                                 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 1;
   5480                             ps_ctb_noise_params
   5481                                 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 1;
   5482                             noise_detected = 1;
   5483                         }
   5484                     }
   5485                     else /* if any one of the direction mv says it as non noise then dont check for the other directions MV , move for next block*/
   5486                     {
   5487                         noise_detected = 0;
   5488                         ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 0;
   5489                         ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 0;
   5490                         ps_ctb_noise_params
   5491                             ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 0;
   5492                         ps_ctb_noise_params
   5493                             ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 0;
   5494                         break;
   5495                     }
   5496                 }  // variance analysis and calculation
   5497             }  // for each direction
   5498         }  // HME code
   5499 
   5500     }  // for each 16x16 block
   5501 
   5502     return (noise_detected);
   5503 }
   5504 #endif
   5505 
   5506 void hme_qpel_interp_avg_1pt(
   5507     interp_prms_t *ps_prms,
   5508     S32 i4_mv_x,
   5509     S32 i4_mv_y,
   5510     S32 i4_buf_id,
   5511     U08 **ppu1_final,
   5512     S32 *pi4_final_stride)
   5513 {
   5514     U08 *pu1_src1, *pu1_src2, *pu1_dst;
   5515     qpel_input_buf_cfg_t *ps_inp_cfg;
   5516     S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
   5517 
   5518     /*************************************************************************/
   5519     /* For a given QPEL pt, we need to determine the 2 source pts that are   */
   5520     /* needed to do the QPEL averaging. The logic to do this is as follows   */
   5521     /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are     */
   5522     /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0    */
   5523     /* pt of th reference blk that is colocated to the inp blk.              */
   5524     /*    A j E k B                                                          */
   5525     /*    l m n o p                                                          */
   5526     /*    F q G r H                                                          */
   5527     /*    s t u v w                                                          */
   5528     /*    C x I y D                                                          */
   5529     /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/
   5530     /* and (1,1) respectively in the fpel buffer (id = 0)                    */
   5531     /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf  */
   5532     /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf  */
   5533     /* G is hxhy pt in offset 0,0 in hxhy buf                                */
   5534     /* All above offsets are computed w.r.t. motion displaced pt in          */
   5535     /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and  */
   5536     /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G                     */
   5537     /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3   */
   5538     /* If we consider pt v to be derived. v has a fractional comp of 3, 3    */
   5539     /* v is avg of H and I. So the table look up of v should give following  */
   5540     /* buf 1 (H) : offset = (1, 0) buf id = 2.                               */
   5541     /* buf 2 (I) : offset = 0 , 1) buf id = 1.                               */
   5542     /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1.       */
   5543     /*************************************************************************/
   5544     i4_mv_x_frac = i4_mv_x & 3;
   5545     i4_mv_y_frac = i4_mv_y & 3;
   5546 
   5547     i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride;
   5548 
   5549     /* Derive the descriptor that has all offset and size info */
   5550     ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
   5551 
   5552     pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
   5553     pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
   5554     pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
   5555 
   5556     pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
   5557     pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
   5558     pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride);
   5559 
   5560     pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
   5561     hevc_avg_2d(
   5562         pu1_src1,
   5563         pu1_src2,
   5564         ps_prms->i4_ref_stride,
   5565         ps_prms->i4_ref_stride,
   5566         ps_prms->i4_blk_wd,
   5567         ps_prms->i4_blk_ht,
   5568         pu1_dst,
   5569         ps_prms->i4_out_stride);
   5570     ppu1_final[i4_buf_id] = pu1_dst;
   5571     pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride;
   5572 }
   5573 
   5574 void hme_qpel_interp_avg_2pt_vert_with_reuse(
   5575     interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
   5576 {
   5577     hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
   5578 
   5579     hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
   5580 }
   5581 
   5582 void hme_qpel_interp_avg_2pt_horz_with_reuse(
   5583     interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
   5584 {
   5585     hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
   5586 
   5587     hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
   5588 }
   5589 
   5590 void hme_set_mv_limit_using_dvsr_data(
   5591     me_frm_ctxt_t *ps_ctxt,
   5592     layer_ctxt_t *ps_curr_layer,
   5593     range_prms_t *ps_mv_limit,
   5594     S16 *pi2_prev_enc_frm_max_mv_y,
   5595     U08 u1_num_act_ref_pics)
   5596 {
   5597     WORD32 ref_ctr;
   5598 
   5599     /* Only for B/b pic. */
   5600     if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
   5601     {
   5602         WORD16 i2_mv_y_per_poc, i2_max_mv_y;
   5603         WORD32 cur_poc, prev_poc, ref_poc, abs_poc_diff;
   5604         WORD32 prev_poc_count = 0;
   5605         WORD32 i4_p_idx;
   5606 
   5607         pi2_prev_enc_frm_max_mv_y[0] = 0;
   5608 
   5609         cur_poc = ps_ctxt->i4_curr_poc;
   5610 
   5611         i4_p_idx = 0;
   5612 
   5613         /* Get abs MAX for symmetric search */
   5614         i2_mv_y_per_poc = ps_curr_layer->i2_max_mv_y;
   5615         /* Assuming P to P distance as 4 */
   5616         i2_mv_y_per_poc = (i2_mv_y_per_poc + 2) >> 2;
   5617 
   5618         for(ref_ctr = 0; ref_ctr < u1_num_act_ref_pics; ref_ctr++)
   5619         {
   5620             /* Get the prev. encoded frame POC */
   5621             prev_poc = ps_ctxt->i4_prev_poc;
   5622 
   5623             ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
   5624             abs_poc_diff = ABS((cur_poc - ref_poc));
   5625             /* Get the cur. max MV based on POC distance */
   5626             i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
   5627             i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
   5628 
   5629             ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
   5630             ps_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
   5631             ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
   5632             ps_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
   5633 
   5634             /* Find the MAX MV for the prev. encoded frame to optimize */
   5635             /* the reverse dependency of ME on Enc.Loop                */
   5636             if(ref_poc == prev_poc)
   5637             {
   5638                 /* TO DO : Same thing for horz. search also */
   5639                 pi2_prev_enc_frm_max_mv_y[0] = i2_max_mv_y;
   5640                 prev_poc_count++;
   5641             }
   5642         }
   5643     }
   5644     else
   5645     {
   5646         ASSERT(0 == ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
   5647 
   5648         /* Set the Config. File Params for P pic. */
   5649         for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
   5650         {
   5651             ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
   5652             ps_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
   5653             ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
   5654             ps_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
   5655         }
   5656 
   5657         /* For P PIC., go with  Config. File Params */
   5658         pi2_prev_enc_frm_max_mv_y[0] = ps_curr_layer->i2_max_mv_y;
   5659     }
   5660 }
   5661 
   5662 S32 hme_part_mask_populator(
   5663     U08 *pu1_inp,
   5664     S32 i4_inp_stride,
   5665     U08 u1_limit_active_partitions,
   5666     U08 u1_is_bPic,
   5667     U08 u1_is_refPic,
   5668     U08 u1_blk_8x8_mask,
   5669     ME_QUALITY_PRESETS_T e_me_quality_preset)
   5670 {
   5671     if(15 != u1_blk_8x8_mask)
   5672     {
   5673         return ENABLE_NxN;
   5674     }
   5675     else
   5676     {
   5677         U08 u1_call_inp_segmentation_based_part_mask_populator =
   5678             (ME_XTREME_SPEED_25 != e_me_quality_preset) ||
   5679             (!u1_is_bPic && !DISABLE_8X8CUS_IN_PPICS_IN_P6) ||
   5680             (u1_is_bPic && u1_is_refPic && !DISABLE_8X8CUS_IN_REFBPICS_IN_P6) ||
   5681             (u1_is_bPic && !u1_is_refPic && !DISABLE_8X8CUS_IN_NREFBPICS_IN_P6);
   5682 
   5683         if(u1_call_inp_segmentation_based_part_mask_populator)
   5684         {
   5685             S32 i4_part_mask =
   5686                 hme_study_input_segmentation(pu1_inp, i4_inp_stride, u1_limit_active_partitions);
   5687 
   5688             if(e_me_quality_preset == ME_XTREME_SPEED)
   5689             {
   5690                 i4_part_mask &= ~ENABLE_AMP;
   5691             }
   5692 
   5693             if(e_me_quality_preset == ME_XTREME_SPEED_25)
   5694             {
   5695                 i4_part_mask &= ~ENABLE_AMP;
   5696 
   5697                 i4_part_mask &= ~ENABLE_SMP;
   5698             }
   5699 
   5700             return i4_part_mask;
   5701         }
   5702         else
   5703         {
   5704             return ENABLE_2Nx2N;
   5705         }
   5706     }
   5707 }
   5708