Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /**
     22 ******************************************************************************
     23 * @file hme_coarse.c
     24 *
     25 * @brief
     26 *    Contains ME algorithm for the coarse layer.
     27 *
     28 * @author
     29 *    Ittiam
     30 *
     31 *
     32 * List of Functions
     33 * hme_update_mv_bank_coarse()
     34 * hme_coarse()
     35 ******************************************************************************
     36 */
     37 
     38 /*****************************************************************************/
     39 /* File Includes                                                             */
     40 /*****************************************************************************/
     41 /* System include files */
     42 #include <stdio.h>
     43 #include <string.h>
     44 #include <stdlib.h>
     45 #include <assert.h>
     46 #include <stdarg.h>
     47 #include <math.h>
     48 #include <limits.h>
     49 
     50 /* User include files */
     51 #include "ihevc_typedefs.h"
     52 #include "itt_video_api.h"
     53 #include "ihevce_api.h"
     54 
     55 #include "rc_cntrl_param.h"
     56 #include "rc_frame_info_collector.h"
     57 #include "rc_look_ahead_params.h"
     58 
     59 #include "ihevc_defs.h"
     60 #include "ihevc_structs.h"
     61 #include "ihevc_platform_macros.h"
     62 #include "ihevc_deblk.h"
     63 #include "ihevc_itrans_recon.h"
     64 #include "ihevc_chroma_itrans_recon.h"
     65 #include "ihevc_chroma_intra_pred.h"
     66 #include "ihevc_intra_pred.h"
     67 #include "ihevc_inter_pred.h"
     68 #include "ihevc_mem_fns.h"
     69 #include "ihevc_padding.h"
     70 #include "ihevc_weighted_pred.h"
     71 #include "ihevc_sao.h"
     72 #include "ihevc_resi_trans.h"
     73 #include "ihevc_quant_iquant_ssd.h"
     74 #include "ihevc_cabac_tables.h"
     75 
     76 #include "ihevce_defs.h"
     77 #include "ihevce_lap_enc_structs.h"
     78 #include "ihevce_multi_thrd_structs.h"
     79 #include "ihevce_multi_thrd_funcs.h"
     80 #include "ihevce_me_common_defs.h"
     81 #include "ihevce_had_satd.h"
     82 #include "ihevce_error_codes.h"
     83 #include "ihevce_bitstream.h"
     84 #include "ihevce_cabac.h"
     85 #include "ihevce_rdoq_macros.h"
     86 #include "ihevce_function_selector.h"
     87 #include "ihevce_enc_structs.h"
     88 #include "ihevce_entropy_structs.h"
     89 #include "ihevce_cmn_utils_instr_set_router.h"
     90 #include "ihevce_enc_loop_structs.h"
     91 #include "ihevce_bs_compute_ctb.h"
     92 #include "ihevce_global_tables.h"
     93 #include "ihevce_dep_mngr_interface.h"
     94 #include "hme_datatype.h"
     95 #include "hme_interface.h"
     96 #include "hme_common_defs.h"
     97 #include "hme_defs.h"
     98 #include "ihevce_me_instr_set_router.h"
     99 #include "hme_globals.h"
    100 #include "hme_utils.h"
    101 #include "hme_coarse.h"
    102 #include "hme_refine.h"
    103 #include "hme_err_compute.h"
    104 #include "hme_common_utils.h"
    105 #include "hme_search_algo.h"
    106 
    107 /*******************************************************************************
    108 *                             MACROS
    109 *******************************************************************************/
    110 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
    111     {                                                                                              \
    112         ps_mv->i2_mv_x = ps_search_node->s_mv.i2_mvx >> (shift);                                   \
    113         ps_mv->i2_mv_y = ps_search_node->s_mv.i2_mvy >> (shift);                                   \
    114         *pi1_ref_idx = ps_search_node->i1_ref_idx;                                                 \
    115     }
    116 
    117 /*****************************************************************************/
    118 /* Function Definitions                                                      */
    119 /*****************************************************************************/
    120 
    121 /**
    122 ********************************************************************************
    123 *  @fn     void hme_update_mv_bank_coarse(search_results_t *ps_search_results,
    124 *                                   layer_mv_t *ps_layer_mv,
    125 *                                   S32 i4_blk_x,
    126 *                                   S32 i4_blk_y,
    127 *                                   search_node_t *ps_search_node_4x8_l,
    128 *                                   search_node_t *ps_search_node_8x4_t,
    129 *                                   S08 i1_ref_idx,
    130 *                                   mvbank_update_prms_t *ps_prms
    131 *
    132 *  @brief  Updates the coarse layer MV Bank for a given ref id and blk pos
    133 *
    134 *  @param[in]  ps_search_results: Search results data structure
    135 *
    136 *  @param[in, out]  ps_layer_mv : MV Bank for this layer
    137 *
    138 *  @param[in]  i4_search_blk_x: column number of the 4x4 blk searched
    139 *
    140 *  @param[in]  i4_search_blk_y: row number of the 4x4 blk searched
    141 *
    142 *  @param[in]  ps_search_node_4x8_t: Best MV of the 4x8T blk
    143 *
    144 *  @param[in]  ps_search_node_8x4_l: Best MV of the 8x4L blk
    145 *
    146 *  @param[in]  i1_ref_idx : Reference ID that has been searched
    147 *
    148 *  @param[in]  ps_prms : Parameters pertaining to the MV Bank update
    149 *
    150 *  @return None
    151 ********************************************************************************
    152 */
    153 void hme_update_mv_bank_coarse(
    154     search_results_t *ps_search_results,
    155     layer_mv_t *ps_layer_mv,
    156     S32 i4_search_blk_x,
    157     S32 i4_search_blk_y,
    158     search_node_t *ps_search_node_4x8_t,
    159     search_node_t *ps_search_node_8x4_l,
    160     S08 i1_ref_idx,
    161     mvbank_update_prms_t *ps_prms)
    162 {
    163     /* These point to the MV and ref idx posn to be udpated */
    164     hme_mv_t *ps_mv;
    165     S08 *pi1_ref_idx;
    166 
    167     /* Offset within the bank */
    168     S32 i4_offset;
    169 
    170     S32 i, j, i4_blk_x, i4_blk_y;
    171 
    172     /* Best results for 8x4R and 4x8B blocks */
    173     search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
    174 
    175     /* Number of MVs in a block */
    176     S32 num_mvs = ps_layer_mv->i4_num_mvs_per_ref;
    177 
    178     search_node_t *aps_search_nodes[4];
    179 
    180     /* The search blk may be different in size from the blk used to hold MV */
    181     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
    182     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
    183 
    184     /* Compute the offset in the MV bank */
    185     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
    186     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
    187 
    188     /* Identify the correct offset in the mvbank and the reference id buf */
    189     ps_mv = ps_layer_mv->ps_mv + (i4_offset + (num_mvs * i1_ref_idx));
    190     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + (i4_offset + (num_mvs * i1_ref_idx));
    191 
    192     /*************************************************************************/
    193     /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
    194     /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp.      */
    195     /* If number of results to be stored is 4, then we store all these 4     */
    196     /* results, else we pick best ones                                       */
    197     /*************************************************************************/
    198     ps_search_node_8x4_r = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
    199     ps_search_node_4x8_b = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
    200 
    201     ASSERT(num_mvs <= 4);
    202 
    203     /* Doing this to sort best results */
    204     aps_search_nodes[0] = ps_search_node_8x4_r;
    205     aps_search_nodes[1] = ps_search_node_4x8_b;
    206     aps_search_nodes[2] = ps_search_node_8x4_l;
    207     aps_search_nodes[3] = ps_search_node_4x8_t;
    208     if(num_mvs == 4)
    209     {
    210         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[0], 0);
    211         ps_mv++;
    212         pi1_ref_idx++;
    213         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[1], 0);
    214         ps_mv++;
    215         pi1_ref_idx++;
    216         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[2], 0);
    217         ps_mv++;
    218         pi1_ref_idx++;
    219         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[3], 0);
    220         ps_mv++;
    221         pi1_ref_idx++;
    222         return;
    223     }
    224 
    225     /* Run through the results, store them in best to worst order */
    226     for(i = 0; i < num_mvs; i++)
    227     {
    228         for(j = i + 1; j < 4; j++)
    229         {
    230             if(aps_search_nodes[j]->i4_tot_cost < aps_search_nodes[i]->i4_tot_cost)
    231             {
    232                 SWAP_HME(aps_search_nodes[j], aps_search_nodes[i], search_node_t *);
    233             }
    234         }
    235         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[i], 0);
    236         ps_mv++;
    237         pi1_ref_idx++;
    238     }
    239 }
    240 
    241 /**
    242 ********************************************************************************
    243 *  @fn     void hme_coarse_frm_init(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
    244 *
    245 *  @brief  Frame init entry point Coarse ME.
    246 *
    247 *  @param[in,out]  ps_ctxt: ME Handle
    248 *
    249 *  @param[in]  ps_coarse_prms : Coarse layer config params
    250 *
    251 *  @return None
    252 ********************************************************************************
    253 */
    254 void hme_coarse_frm_init(coarse_me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
    255 {
    256     layer_ctxt_t *ps_curr_layer;
    257 
    258     S32 i4_pic_wd, i4_pic_ht;
    259 
    260     S32 num_blks_in_pic, num_blks_in_row;
    261 
    262     BLK_SIZE_T e_search_blk_size = BLK_4x4;
    263 
    264     S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
    265 
    266     /* Number of references to search */
    267     S32 i4_num_ref;
    268 
    269     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
    270     i4_num_ref = ps_coarse_prms->i4_num_ref;
    271 
    272     i4_pic_wd = ps_curr_layer->i4_wd;
    273     i4_pic_ht = ps_curr_layer->i4_ht;
    274     /* Macro updates num_blks_in_pic and num_blks_in_row*/
    275     GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
    276 
    277     /************************************************************************/
    278     /* Initialize the mv bank that holds results of this layer.             */
    279     /************************************************************************/
    280     hme_init_mv_bank(
    281         ps_curr_layer,
    282         BLK_4x4,
    283         i4_num_ref,
    284         ps_coarse_prms->num_results,
    285         ps_ctxt->u1_encode[ps_coarse_prms->i4_layer_id]);
    286 
    287     return;
    288 }
    289 
    290 /**
    291 ********************************************************************************
    292 *  @fn    void hme_derive_worst_case_search_range(range_prms_t *ps_range,
    293 *                                   range_prms_t *ps_pic_limit,
    294 *                                   range_prms_t *ps_mv_limit,
    295 *                                   S32 i4_x,
    296 *                                   S32 i4_y,
    297 *                                   S32 blk_wd,
    298 *                                   S32 blk_ht)
    299 *
    300 *  @brief  given picture limits and blk dimensions and mv search limits, obtains
    301 *          teh valid search range such that the blk stays within pic boundaries,
    302 *          where picture boundaries include padded portions of picture
    303 *
    304 *  @param[out] ps_range: updated with actual search range
    305 *
    306 *  @param[in] ps_pic_limit : picture boundaries
    307 *
    308 *  @param[in] ps_mv_limit: Search range limits for the mvs
    309 *
    310 *  @param[in] i4_x : x coordinate of the blk
    311 *
    312 *  @param[in] i4_y : y coordinate of the blk
    313 *
    314 *  @param[in] blk_wd : blk width
    315 *
    316 *  @param[in] blk_ht : blk height
    317 *
    318 *  @return void
    319 ********************************************************************************
    320 */
    321 void hme_derive_worst_case_search_range(
    322     range_prms_t *ps_range,
    323     range_prms_t *ps_pic_limit,
    324     range_prms_t *ps_mv_limit,
    325     S32 i4_x,
    326     S32 i4_y,
    327     S32 blk_wd,
    328     S32 blk_ht)
    329 {
    330     /* Taking max x of left block, min x of current block */
    331     ps_range->i2_max_x =
    332         MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)(i4_x - 4)), ps_mv_limit->i2_max_x);
    333     ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
    334     /* Taking max y of top block, min y of current block */
    335     ps_range->i2_max_y =
    336         MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)(i4_y - 4)), ps_mv_limit->i2_max_y);
    337     ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
    338 }
    339 
    340 /**
    341 ********************************************************************************
    342 * @fn void hme_combine_4x4_sads_and_compute_cost(S08 i1_ref_idx,
    343 *                                           range_prms_t *ps_mv_range,
    344 *                                           range_prms_t *ps_mv_limit,
    345 *                                           hme_mv_t *ps_best_mv_4x8,
    346 *                                           hme_mv_t *ps_best_mv_8x4,
    347 *                                           pred_ctxt_t *ps_pred_ctxt,
    348 *                                           PF_MV_COST_FXN pf_mv_cost_compute,
    349 *                                           ME_QUALITY_PRESETS_T e_me_quality_preset,
    350 *                                           S16 *pi2_sads_4x4_current,
    351 *                                           S16 *pi2_sads_4x4_east,
    352 *                                           S16 *pi2_sads_4x4_south,
    353 *                                           FILE *fp_dump_sad)
    354 *
    355 *  @brief  Does a full search on entire srch window with a given step size in coarse layer
    356 *
    357 *  @param[in] i1_ref_idx : Cur ref idx
    358 *
    359 *  @param[in] ps_layer_ctxt: All info about this layer
    360 *
    361 *  @param[out] ps_best_mv  : type hme_mv_t contains best mv x and y
    362 *
    363 *  @param[in] ps_pred_ctxt : Prediction ctxt for cost computation
    364 *
    365 *  @param[in] pf_mv_cost_compute : mv cost computation function
    366 *
    367 *  @return void
    368 ********************************************************************************
    369 */
    370 void hme_combine_4x4_sads_and_compute_cost_high_quality(
    371     S08 i1_ref_idx,
    372     range_prms_t *ps_mv_range,
    373     range_prms_t *ps_mv_limit,
    374     hme_mv_t *ps_best_mv_4x8,
    375     hme_mv_t *ps_best_mv_8x4,
    376     pred_ctxt_t *ps_pred_ctxt,
    377     PF_MV_COST_FXN pf_mv_cost_compute,
    378     S16 *pi2_sads_4x4_current,
    379     S16 *pi2_sads_4x4_east,
    380     S16 *pi2_sads_4x4_south)
    381 {
    382     /* These control number of parts and number of pts in grid to search */
    383     S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
    384     S32 step_shift_x, step_shift_y;
    385     S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
    386 
    387     S32 min_cost_4x8 = MAX_32BIT_VAL;
    388     S32 min_cost_8x4 = MAX_32BIT_VAL;
    389 
    390     search_node_t s_search_node;
    391     s_search_node.i1_ref_idx = i1_ref_idx;
    392 
    393     stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
    394     /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
    395     step_shift_x = step_shift_y = 1;
    396 
    397     mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
    398     mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
    399     mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
    400     mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
    401 
    402     /* Run 2loops to sweep over the reference area */
    403     for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
    404     {
    405         for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
    406         {
    407             S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
    408             S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
    409                           ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
    410 
    411             /* Get SAD by adding SAD for current and neighbour S  */
    412             sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
    413             sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
    414 
    415             //          fprintf(fp_dump_sad,"%d\t",sad);
    416             s_search_node.s_mv.i2_mvx = mvx;
    417             s_search_node.s_mv.i2_mvy = mvy;
    418 
    419             cost_4x8 = cost_8x4 =
    420                 pf_mv_cost_compute(&s_search_node, ps_pred_ctxt, PART_ID_2Nx2N, MV_RES_FPEL);
    421 
    422             cost_4x8 += sad_4x8;
    423             cost_8x4 += sad_8x4;
    424 
    425             if(cost_4x8 < min_cost_4x8)
    426             {
    427                 best_mv_x_4x8 = mvx;
    428                 best_mv_y_4x8 = mvy;
    429                 min_cost_4x8 = cost_4x8;
    430             }
    431             if(cost_8x4 < min_cost_8x4)
    432             {
    433                 best_mv_x_8x4 = mvx;
    434                 best_mv_y_8x4 = mvy;
    435                 min_cost_8x4 = cost_8x4;
    436             }
    437         }
    438     }
    439 
    440     ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
    441     ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
    442 
    443     ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
    444     ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
    445 }
    446 
    447 void hme_combine_4x4_sads_and_compute_cost_high_speed(
    448     S08 i1_ref_idx,
    449     range_prms_t *ps_mv_range,
    450     range_prms_t *ps_mv_limit,
    451     hme_mv_t *ps_best_mv_4x8,
    452     hme_mv_t *ps_best_mv_8x4,
    453     pred_ctxt_t *ps_pred_ctxt,
    454     PF_MV_COST_FXN pf_mv_cost_compute,
    455     S16 *pi2_sads_4x4_current,
    456     S16 *pi2_sads_4x4_east,
    457     S16 *pi2_sads_4x4_south)
    458 {
    459     /* These control number of parts and number of pts in grid to search */
    460     S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
    461     S32 step_shift_x, step_shift_y;
    462     S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
    463 
    464     S32 rnd, lambda, lambda_q_shift;
    465 
    466     S32 min_cost_4x8 = MAX_32BIT_VAL;
    467     S32 min_cost_8x4 = MAX_32BIT_VAL;
    468 
    469     (void)pf_mv_cost_compute;
    470     stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
    471     /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
    472     step_shift_x = step_shift_y = 2;
    473 
    474     mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
    475     mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
    476     mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
    477     mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
    478 
    479     lambda = ps_pred_ctxt->lambda;
    480     lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
    481     rnd = 1 << (lambda_q_shift - 1);
    482 
    483     ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x));
    484     ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y));
    485 
    486     /* Run 2loops to sweep over the reference area */
    487     for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
    488     {
    489         for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
    490         {
    491             S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
    492 
    493             S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
    494                           ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
    495 
    496             /* Get SAD by adding SAD for current and neighbour S  */
    497             sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
    498             sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
    499 
    500             //          fprintf(fp_dump_sad,"%d\t",sad);
    501 
    502             cost_4x8 = cost_8x4 =
    503                 (2 * hme_get_range(ABS(mvx)) - 1) + (2 * hme_get_range(ABS(mvy)) - 1) + i1_ref_idx;
    504 
    505             cost_4x8 += (mvx != 0) ? 1 : 0;
    506             cost_4x8 += (mvy != 0) ? 1 : 0;
    507             cost_4x8 = (cost_4x8 * lambda + rnd) >> lambda_q_shift;
    508 
    509             cost_8x4 += (mvx != 0) ? 1 : 0;
    510             cost_8x4 += (mvy != 0) ? 1 : 0;
    511             cost_8x4 = (cost_8x4 * lambda + rnd) >> lambda_q_shift;
    512 
    513             cost_4x8 += sad_4x8;
    514             cost_8x4 += sad_8x4;
    515 
    516             if(cost_4x8 < min_cost_4x8)
    517             {
    518                 best_mv_x_4x8 = mvx;
    519                 best_mv_y_4x8 = mvy;
    520                 min_cost_4x8 = cost_4x8;
    521             }
    522             if(cost_8x4 < min_cost_8x4)
    523             {
    524                 best_mv_x_8x4 = mvx;
    525                 best_mv_y_8x4 = mvy;
    526                 min_cost_8x4 = cost_8x4;
    527             }
    528         }
    529     }
    530 
    531     ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
    532     ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
    533 
    534     ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
    535     ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
    536 }
    537 
    538 /**
    539 ********************************************************************************
    540 *  @fn     hme_store_4x4_sads(hme_search_prms_t *ps_search_prms,
    541 *                               layer_ctxt_t *ps_layer_ctxt)
    542 *
    543 *  @brief  Does a 4x4 sad computation on a given range and stores it in memory
    544 *
    545 *  @param[in] ps_search_prms : Search prms structure containing info like
    546 *               blk dimensions, search range etc
    547 *
    548 *  @param[in] ps_layer_ctxt: All info about this layer
    549 *
    550 *  @param[in] ps_wt_inp_prms: All info about weighted input
    551 *
    552 *  @param[in] e_me_quality_preset: motion estimation quality preset
    553 *
    554 *  @param[in] pi2_sads_4x4: Memory to store all 4x4 SADs for given range
    555 *
    556 *  @return void
    557 ********************************************************************************
    558 */
    559 
    560 void hme_store_4x4_sads_high_quality(
    561     hme_search_prms_t *ps_search_prms,
    562     layer_ctxt_t *ps_layer_ctxt,
    563     range_prms_t *ps_mv_limit,
    564     wgt_pred_ctxt_t *ps_wt_inp_prms,
    565     S16 *pi2_sads_4x4)
    566 {
    567     S32 sad, i, j;
    568 
    569     /* Input and reference attributes */
    570     U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
    571     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
    572 
    573     /* The reference is actually an array of ptrs since there are several    */
    574     /* reference id. So an array gets passed form calling function           */
    575     U08 **ppu1_ref, *pu1_ref_coloc;
    576 
    577     S32 stepy, stepx, step_shift_x, step_shift_y;
    578     S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
    579 
    580     /* Points to the range limits for mv */
    581     range_prms_t *ps_range_prms;
    582 
    583     /* Reference index to be searched */
    584     S32 i4_search_idx = ps_search_prms->i1_ref_idx;
    585     /* Using the member 0 to store for all ref. idx. */
    586     ps_range_prms = ps_search_prms->aps_mv_range[0];
    587     pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
    588     i4_inp_stride = ps_search_prms->i4_inp_stride;
    589 
    590     /* Move to the location of the search blk in inp buffer */
    591     pu1_inp_orig += ps_search_prms->i4_cu_x_off;
    592     pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
    593 
    594     /*************************************************************************/
    595     /* we use either input of previously encoded pictures as reference       */
    596     /* in coarse layer                                                       */
    597     /*************************************************************************/
    598     i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
    599     ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
    600 
    601     /* colocated position in reference picture */
    602     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
    603     pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
    604 
    605     stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
    606     /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
    607     step_shift_x = step_shift_y = 1;
    608 
    609     mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
    610     mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
    611     mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
    612     mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
    613 
    614     /* Run 2loops to sweep over the reference area */
    615     for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
    616     {
    617         for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
    618         {
    619             /* Set up the reference and inp ptr */
    620             pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
    621             pu1_inp = pu1_inp_orig;
    622             /* SAD computation */
    623             {
    624                 sad = 0;
    625                 for(i = 0; i < 4; i++)
    626                 {
    627                     for(j = 0; j < 4; j++)
    628                     {
    629                         sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
    630                     }
    631                     pu1_inp += i4_inp_stride;
    632                     pu1_ref += i4_ref_stride;
    633                 }
    634             }
    635 
    636             pi2_sads_4x4
    637                 [((mvx >> step_shift_x) + mv_x_offset) +
    638                  ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
    639         }
    640     }
    641 }
    642 
    643 void hme_store_4x4_sads_high_speed(
    644     hme_search_prms_t *ps_search_prms,
    645     layer_ctxt_t *ps_layer_ctxt,
    646     range_prms_t *ps_mv_limit,
    647     wgt_pred_ctxt_t *ps_wt_inp_prms,
    648     S16 *pi2_sads_4x4)
    649 {
    650     S32 sad, i, j;
    651 
    652     /* Input and reference attributes */
    653     U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
    654     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
    655 
    656     /* The reference is actually an array of ptrs since there are several    */
    657     /* reference id. So an array gets passed form calling function           */
    658     U08 **ppu1_ref, *pu1_ref_coloc;
    659 
    660     S32 stepy, stepx, step_shift_x, step_shift_y;
    661     S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
    662 
    663     /* Points to the range limits for mv */
    664     range_prms_t *ps_range_prms;
    665 
    666     /* Reference index to be searched */
    667     S32 i4_search_idx = ps_search_prms->i1_ref_idx;
    668 
    669     /* Using the member 0 for all ref. idx */
    670     ps_range_prms = ps_search_prms->aps_mv_range[0];
    671     pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
    672     i4_inp_stride = ps_search_prms->i4_inp_stride;
    673 
    674     /* Move to the location of the search blk in inp buffer */
    675     pu1_inp_orig += ps_search_prms->i4_cu_x_off;
    676     pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
    677 
    678     /*************************************************************************/
    679     /* we use either input of previously encoded pictures as reference       */
    680     /* in coarse layer                                                       */
    681     /*************************************************************************/
    682     i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
    683     ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
    684 
    685     /* colocated position in reference picture */
    686     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
    687     pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
    688 
    689     stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
    690     /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
    691     step_shift_x = step_shift_y = 2;
    692 
    693     mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
    694     mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
    695     mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
    696     mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
    697 
    698     /* Run 2loops to sweep over the reference area */
    699     for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
    700     {
    701         for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
    702         {
    703             /* Set up the reference and inp ptr */
    704             pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
    705             pu1_inp = pu1_inp_orig;
    706             /* SAD computation */
    707             {
    708                 sad = 0;
    709                 for(i = 0; i < 4; i++)
    710                 {
    711                     for(j = 0; j < 4; j++)
    712                     {
    713                         sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
    714                     }
    715                     pu1_inp += i4_inp_stride;
    716                     pu1_ref += i4_ref_stride;
    717                 }
    718             }
    719 
    720             pi2_sads_4x4
    721                 [((mvx >> step_shift_x) + mv_x_offset) +
    722                  ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
    723         }
    724     }
    725 }
    726 /**
    727 ********************************************************************************
    728 *  @fn     void hme_coarsest(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
    729 *
    730 *  @brief  Top level entry point for Coarse ME. Runs across blks and searches
    731 *          at a 4x4 blk granularity by using 4x8 and 8x4 patterns.
    732 *
    733 *  @param[in,out]  ps_ctxt: ME Handle
    734 *
    735 *  @param[in]  ps_coarse_prms : Coarse layer config params
    736 *
    737 *  @param[in]  ps_multi_thrd_ctxt : Multi thread context
    738 *
    739 *  @return None
    740 ********************************************************************************
    741 */
    742 void hme_coarsest(
    743     coarse_me_ctxt_t *ps_ctxt,
    744     coarse_prms_t *ps_coarse_prms,
    745     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
    746     WORD32 i4_ping_pong,
    747     void **ppv_dep_mngr_hme_sync)
    748 {
    749     S16 *pi2_cur_ref_sads_4x4;
    750     S32 ai4_sad_4x4_block_size[MAX_NUM_REF], ai4_sad_4x4_block_stride[MAX_NUM_REF];
    751     S32 num_rows_coarse;
    752     S32 sad_top_offset, sad_current_offset;
    753     S32 search_node_top_offset, search_node_left_offset;
    754 
    755     ME_QUALITY_PRESETS_T e_me_quality_preset =
    756         ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
    757 
    758     search_results_t *ps_search_results;
    759     mvbank_update_prms_t s_mv_update_prms;
    760     BLK_SIZE_T e_search_blk_size = BLK_4x4;
    761     hme_search_prms_t s_search_prms_4x8, s_search_prms_8x4, s_search_prms_4x4;
    762 
    763     S32 global_id_8x4, global_id_4x8;
    764 
    765     /*************************************************************************/
    766     /* These directly point to the best search result nodes that will be     */
    767     /* updated by the search algorithm, rather than have to go through an    */
    768     /* elaborate structure                                                   */
    769     /*************************************************************************/
    770     search_node_t *aps_best_search_node_8x4[MAX_NUM_REF];
    771     search_node_t *aps_best_search_node_4x8[MAX_NUM_REF];
    772 
    773     /* These point to various spatial candts */
    774     search_node_t *ps_candt_8x4_l, *ps_candt_8x4_t, *ps_candt_8x4_tl;
    775     search_node_t *ps_candt_4x8_l, *ps_candt_4x8_t, *ps_candt_4x8_tl;
    776     search_node_t *ps_candt_zeromv_8x4, *ps_candt_zeromv_4x8;
    777     search_node_t *ps_candt_fs_8x4, *ps_candt_fs_4x8;
    778     search_node_t as_top_neighbours[4], as_left_neighbours[3];
    779 
    780     /* Holds the global mv for a given ref index */
    781     search_node_t s_candt_global[MAX_NUM_REF];
    782 
    783     /* All the search candidates */
    784     search_candt_t as_search_candts_8x4[MAX_INIT_CANDTS];
    785     search_candt_t as_search_candts_4x8[MAX_INIT_CANDTS];
    786     search_candt_t *ps_search_candts_8x4, *ps_search_candts_4x8;
    787 
    788     /* Actual range per blk and the pic level boundaries */
    789     range_prms_t s_range_prms, s_pic_limit, as_mv_limit[MAX_NUM_REF];
    790 
    791     /* Current and prev pic layer ctxt at the coarsest layer */
    792     layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
    793 
    794     /* best mv of full search */
    795     hme_mv_t best_mv_4x8, best_mv_8x4;
    796 
    797     /* Book keeping at blk level */
    798     S32 blk_x, num_blks_in_pic, num_blks_in_row, num_4x4_blks_in_row;
    799 
    800     S32 blk_y;
    801 
    802     /* Block dimensions */
    803     S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
    804 
    805     S32 lambda = ps_coarse_prms->lambda;
    806 
    807     /* Number of references to search */
    808     S32 i4_num_ref;
    809 
    810     S32 i4_i, id, i;
    811     S08 i1_ref_idx;
    812 
    813     S32 i4_pic_wd, i4_pic_ht;
    814     S32 i4_layer_id;
    815 
    816     S32 end_of_frame;
    817 
    818     pf_get_wt_inp fp_get_wt_inp;
    819 
    820     /* Maximum search iterations around any candidate */
    821     S32 i4_max_iters = ps_coarse_prms->i4_max_iters;
    822 
    823     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
    824     ps_prev_layer = hme_coarse_get_past_layer_ctxt(ps_ctxt, ps_coarse_prms->i4_layer_id);
    825 
    826     /* We need only one instance of search results structure */
    827     ps_search_results = &ps_ctxt->s_search_results_8x8;
    828 
    829     ps_search_candts_8x4 = &as_search_candts_8x4[0];
    830     ps_search_candts_4x8 = &as_search_candts_4x8[0];
    831 
    832     end_of_frame = 0;
    833 
    834     i4_pic_wd = ps_curr_layer->i4_wd;
    835     i4_pic_ht = ps_curr_layer->i4_ht;
    836 
    837     fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
    838                         ->pf_get_wt_inp_8x8;
    839 
    840     num_rows_coarse = ps_ctxt->i4_num_row_bufs;
    841 
    842     /*************************************************************************/
    843     /* Coarse Layer always does explicit search. Number of reference frames  */
    844     /* to search is a configurable parameter supplied by the application     */
    845     /*************************************************************************/
    846     i4_num_ref = ps_coarse_prms->i4_num_ref;
    847     i4_layer_id = ps_coarse_prms->i4_layer_id;
    848 
    849     /*************************************************************************/
    850     /*  The search algorithm goes as follows:                                */
    851     /*                                                                       */
    852     /*          ___                                                          */
    853     /*         | e |                                                         */
    854     /*      ___|___|___                                                      */
    855     /*     | c | a | b |                                                     */
    856     /*     |___|___|___|                                                     */
    857     /*         | d |                                                         */
    858     /*         |___|                                                         */
    859     /*                                                                       */
    860     /* For the target block a, we collect best results from 2 8x4 blks       */
    861     /* These are c-a and a-b. The 4x8 blks are e-a and a-d                   */
    862     /* c-a result is already available from results of blk c. a-b is         */
    863     /* evaluated in this blk. Likewise e-a result is stored in a row buffer  */
    864     /* a-d is evaluated this blk                                             */
    865     /* So we store a row buffer which stores best 4x8 results of all top blk */
    866     /*************************************************************************/
    867 
    868     /************************************************************************/
    869     /* Initialize the pointers to the best node.                            */
    870     /************************************************************************/
    871     for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
    872     {
    873         aps_best_search_node_8x4[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_2NxN_B];
    874         aps_best_search_node_4x8[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_Nx2N_R];
    875     }
    876 
    877     /************************************************************************/
    878     /* Initialize the "searchresults" structure. This will set up the number*/
    879     /* of search types, result updates etc                                  */
    880     /************************************************************************/
    881     {
    882         S32 num_results_per_part;
    883         /* We evaluate 4 types of results per 4x4 blk. 8x4L and 8x4R and     */
    884         /* 4x8 T and 4x8B. So if we are to give 4 results, then we need to   */
    885         /* only evaluate 1 result per part. In the coarse layer, we are      */
    886         /* limited to 2 results max per part, and max of 8 results.          */
    887         num_results_per_part = (ps_coarse_prms->num_results + 3) >> 2;
    888         hme_init_search_results(
    889             ps_search_results,
    890             i4_num_ref,
    891             ps_coarse_prms->num_results,
    892             num_results_per_part,
    893             BLK_8x8,
    894             0,
    895             0,
    896             ps_ctxt->au1_is_past);
    897     }
    898 
    899     /* Macro updates num_blks_in_pic and num_blks_in_row*/
    900     GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
    901 
    902     num_4x4_blks_in_row = num_blks_in_row + 1;
    903 
    904     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
    905     s_mv_update_prms.i4_num_ref = i4_num_ref;
    906     s_mv_update_prms.i4_shift = 0;
    907 
    908     /* For full search, support 2 or 4 step size */
    909     if(ps_coarse_prms->do_full_search)
    910     {
    911         ASSERT((ps_coarse_prms->full_search_step == 2) || (ps_coarse_prms->full_search_step == 4));
    912     }
    913 
    914     for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
    915     {
    916         S32 blk, delta_poc;
    917         S32 mv_x_clip, mv_y_clip;
    918         /* Initialize only the first row */
    919         for(blk = 0; blk < num_blks_in_row; blk++)
    920         {
    921             INIT_SEARCH_NODE(&ps_ctxt->aps_best_search_nodes_4x8_n_rows[i4_i][blk], i4_i);
    922         }
    923 
    924         delta_poc = ABS(ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i4_i]);
    925 
    926         /* Setting search range for different references based on the delta poc */
    927         /*************************************************************************/
    928         /* set the MV limit per ref. pic.                                        */
    929         /*    - P pic. : Based on the config params.                             */
    930         /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
    931         /*************************************************************************/
    932         {
    933             /* TO DO : Remove hard coding of P-P dist. of 4 */
    934             mv_x_clip = (ps_curr_layer->i2_max_mv_x * delta_poc) / 4;
    935 
    936             /* Only for B/b pic. */
    937             if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
    938             {
    939                 WORD16 i2_mv_y_per_poc;
    940 
    941                 /* Get abs MAX for symmetric search */
    942                 i2_mv_y_per_poc =
    943                     MAX(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id],
    944                         (ABS(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id])));
    945 
    946                 mv_y_clip = i2_mv_y_per_poc * delta_poc;
    947             }
    948             /* Set the Config. File Params for P pic. */
    949             else
    950             {
    951                 /* TO DO : Remove hard coding of P-P dist. of 4 */
    952                 mv_y_clip = (ps_curr_layer->i2_max_mv_y * delta_poc) / 4;
    953             }
    954 
    955             /* Making mv_x and mv_y range multiple of 4 */
    956             mv_x_clip = (((mv_x_clip + 3) >> 2) << 2);
    957             mv_y_clip = (((mv_y_clip + 3) >> 2) << 2);
    958             /* Clipping the range of mv_x and mv_y */
    959             mv_x_clip = CLIP3(mv_x_clip, 4, MAX_MVX_SUPPORTED_IN_COARSE_LAYER);
    960             mv_y_clip = CLIP3(mv_y_clip, 4, MAX_MVY_SUPPORTED_IN_COARSE_LAYER);
    961 
    962             as_mv_limit[i4_i].i2_min_x = -mv_x_clip;
    963             as_mv_limit[i4_i].i2_min_y = -mv_y_clip;
    964             as_mv_limit[i4_i].i2_max_x = mv_x_clip;
    965             as_mv_limit[i4_i].i2_max_y = mv_y_clip;
    966         }
    967         /*Populating SAD block size based on search range */
    968         ai4_sad_4x4_block_size[i4_i] = ((2 * mv_x_clip) / ps_coarse_prms->full_search_step) *
    969                                        ((2 * mv_y_clip) / ps_coarse_prms->full_search_step);
    970         ai4_sad_4x4_block_stride[i4_i] = (num_blks_in_row + 1) * ai4_sad_4x4_block_size[i4_i];
    971     }
    972 
    973     for(i = 0; i < 2 * MAX_INIT_CANDTS; i++)
    974     {
    975         search_node_t *ps_search_node;
    976         ps_search_node = &ps_ctxt->s_init_search_node[i];
    977         INIT_SEARCH_NODE(ps_search_node, 0);
    978     }
    979     for(i = 0; i < 3; i++)
    980     {
    981         search_node_t *ps_search_node;
    982         ps_search_node = &as_left_neighbours[i];
    983         INIT_SEARCH_NODE(ps_search_node, 0);
    984         ps_search_node = &as_top_neighbours[i];
    985         INIT_SEARCH_NODE(ps_search_node, 0);
    986     }
    987     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
    988     /* Set up place holders to hold the search nodes of each initial candt */
    989     for(i = 0; i < MAX_INIT_CANDTS; i++)
    990     {
    991         ps_search_candts_8x4[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
    992 
    993         ps_search_candts_4x8[i].ps_search_node = &ps_ctxt->s_init_search_node[MAX_INIT_CANDTS + i];
    994 
    995         ps_search_candts_8x4[i].u1_num_steps_refine = (U08)i4_max_iters;
    996         ps_search_candts_4x8[i].u1_num_steps_refine = (U08)i4_max_iters;
    997     }
    998 
    999     /* For Top,TopLeft and Left cand., no need for refinement */
   1000     id = 0;
   1001     if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
   1002     {
   1003         /* This search candt has the full search result */
   1004         ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
   1005         id++;
   1006     }
   1007 
   1008     ps_candt_8x4_l = ps_search_candts_8x4[id].ps_search_node;
   1009     ps_search_candts_8x4[id].u1_num_steps_refine = 0;
   1010     id++;
   1011     ps_candt_8x4_t = ps_search_candts_8x4[id].ps_search_node;
   1012     ps_search_candts_8x4[id].u1_num_steps_refine = 0;
   1013     id++;
   1014     ps_candt_8x4_tl = ps_search_candts_8x4[id].ps_search_node;
   1015     ps_search_candts_8x4[id].u1_num_steps_refine = 0;
   1016     id++;
   1017     /* This search candt stores the global candt */
   1018     global_id_8x4 = id;
   1019     id++;
   1020 
   1021     if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
   1022     {
   1023         /* This search candt has the full search result */
   1024         ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
   1025         id++;
   1026     }
   1027     /* Don't increment id as (0,0) is removed from cand. list. Initializing */
   1028     /* the pointer for hme_init_pred_ctxt_no_encode()                       */
   1029     ps_candt_zeromv_8x4 = ps_search_candts_8x4[id].ps_search_node;
   1030 
   1031     /* For Top,TopLeft and Left cand., no need for refinement */
   1032     id = 0;
   1033     if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
   1034     {
   1035         /* This search candt has the full search result */
   1036         ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
   1037         id++;
   1038     }
   1039 
   1040     ps_candt_4x8_l = ps_search_candts_4x8[id].ps_search_node;
   1041     ps_search_candts_4x8[id].u1_num_steps_refine = 0;
   1042     id++;
   1043     ps_candt_4x8_t = ps_search_candts_4x8[id].ps_search_node;
   1044     ps_search_candts_4x8[id].u1_num_steps_refine = 0;
   1045     id++;
   1046     ps_candt_4x8_tl = ps_search_candts_4x8[id].ps_search_node;
   1047     ps_search_candts_4x8[id].u1_num_steps_refine = 0;
   1048     id++;
   1049     /* This search candt stores the global candt */
   1050     global_id_4x8 = id;
   1051     id++;
   1052     if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
   1053     {
   1054         /* This search candt has the full search result */
   1055         ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
   1056         id++;
   1057     }
   1058     /* Don't increment id4as (0,0) is removed from cand. list. Initializing */
   1059     /* the pointer for hme_init_pred_ctxt_no_encode()                       */
   1060     ps_candt_zeromv_4x8 = ps_search_candts_4x8[id].ps_search_node;
   1061 
   1062     /* Zero mv always has 0 mvx and y componnent, ref idx initialized inside */
   1063     ps_candt_zeromv_8x4->s_mv.i2_mvx = 0;
   1064     ps_candt_zeromv_8x4->s_mv.i2_mvy = 0;
   1065     ps_candt_zeromv_4x8->s_mv.i2_mvx = 0;
   1066     ps_candt_zeromv_4x8->s_mv.i2_mvy = 0;
   1067 
   1068     /* SET UP THE PRED CTXT FOR L0 AND L1 */
   1069     {
   1070         S32 pred_lx;
   1071 
   1072         /* Bottom left always not available */
   1073         as_left_neighbours[2].u1_is_avail = 0;
   1074 
   1075         for(pred_lx = 0; pred_lx < 2; pred_lx++)
   1076         {
   1077             pred_ctxt_t *ps_pred_ctxt;
   1078 
   1079             ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
   1080             hme_init_pred_ctxt_no_encode(
   1081                 ps_pred_ctxt,
   1082                 ps_search_results,
   1083                 as_top_neighbours,
   1084                 as_left_neighbours,
   1085                 NULL,
   1086                 ps_candt_zeromv_8x4,
   1087                 ps_candt_zeromv_8x4,
   1088                 pred_lx,
   1089                 lambda,
   1090                 ps_coarse_prms->lambda_q_shift,
   1091                 ps_ctxt->apu1_ref_bits_tlu_lc,
   1092                 ps_ctxt->ai2_ref_scf);
   1093         }
   1094     }
   1095 
   1096     /*************************************************************************/
   1097     /* Initialize the search parameters for search algo with the following   */
   1098     /* parameters: No SATD, calculated number of initial candidates,         */
   1099     /* No post refinement, initial step size and number of iterations as     */
   1100     /* passed by the calling function.                                       */
   1101     /* Also, we use input for this layer search, and not recon.              */
   1102     /*************************************************************************/
   1103     if(e_me_quality_preset == ME_XTREME_SPEED_25)
   1104         s_search_prms_8x4.i4_num_init_candts = 1;
   1105     else
   1106         s_search_prms_8x4.i4_num_init_candts = id;
   1107     s_search_prms_8x4.i4_use_satd = 0;
   1108     s_search_prms_8x4.i4_start_step = ps_coarse_prms->i4_start_step;
   1109     s_search_prms_8x4.i4_num_steps_post_refine = 0;
   1110     s_search_prms_8x4.i4_use_rec = 0;
   1111     s_search_prms_8x4.ps_search_candts = ps_search_candts_8x4;
   1112     s_search_prms_8x4.e_blk_size = BLK_8x4;
   1113     s_search_prms_8x4.i4_max_iters = ps_coarse_prms->i4_max_iters;
   1114     /* Coarse layer is always explicit */
   1115     if(ME_MEDIUM_SPEED > e_me_quality_preset)
   1116     {
   1117         s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse;
   1118     }
   1119     else
   1120     {
   1121         s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
   1122     }
   1123 
   1124     s_search_prms_8x4.i4_inp_stride = 8;
   1125     s_search_prms_8x4.i4_cu_x_off = s_search_prms_8x4.i4_cu_y_off = 0;
   1126     if(ps_coarse_prms->do_full_search)
   1127         s_search_prms_8x4.i4_max_iters = 1;
   1128     s_search_prms_8x4.i4_part_mask = (1 << PART_ID_2NxN_B);
   1129     /* Using the member 0 to store for all ref. idx. */
   1130     s_search_prms_8x4.aps_mv_range[0] = &s_range_prms;
   1131     s_search_prms_8x4.ps_search_results = ps_search_results;
   1132     s_search_prms_8x4.full_search_step = ps_coarse_prms->full_search_step;
   1133 
   1134     s_search_prms_4x8 = s_search_prms_8x4;
   1135     s_search_prms_4x8.ps_search_candts = ps_search_candts_4x8;
   1136     s_search_prms_4x8.e_blk_size = BLK_4x8;
   1137     s_search_prms_4x8.i4_part_mask = (1 << PART_ID_Nx2N_R);
   1138 
   1139     s_search_prms_4x4 = s_search_prms_8x4;
   1140     /* Since s_search_prms_4x4 is used only to computer sad at 4x4 level, search candidate is not used */
   1141     s_search_prms_4x4.ps_search_candts = ps_search_candts_4x8;
   1142     s_search_prms_4x4.e_blk_size = BLK_4x4;
   1143     s_search_prms_4x4.i4_part_mask = (1 << PART_ID_2Nx2N);
   1144     /*************************************************************************/
   1145     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
   1146     /* every block given its coordinate.                                     */
   1147     /*************************************************************************/
   1148     SET_PIC_LIMIT(
   1149         s_pic_limit,
   1150         ps_curr_layer->i4_pad_x_inp,
   1151         ps_curr_layer->i4_pad_y_inp,
   1152         ps_curr_layer->i4_wd,
   1153         ps_curr_layer->i4_ht,
   1154         s_search_prms_4x4.i4_num_steps_post_refine);
   1155 
   1156     /* Pick the global mv from previous reference */
   1157     for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
   1158     {
   1159         if(ME_XTREME_SPEED_25 != e_me_quality_preset)
   1160         {
   1161             /* Distance of current pic from reference */
   1162             S32 i4_delta_poc;
   1163 
   1164             hme_mv_t s_mv;
   1165             i4_delta_poc = ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx];
   1166 
   1167             hme_get_global_mv(ps_prev_layer, &s_mv, i4_delta_poc);
   1168 
   1169             s_candt_global[i1_ref_idx].s_mv.i2_mvx = s_mv.i2_mv_x;
   1170             s_candt_global[i1_ref_idx].s_mv.i2_mvy = s_mv.i2_mv_y;
   1171             s_candt_global[i1_ref_idx].i1_ref_idx = i1_ref_idx;
   1172 
   1173             /*********************************************************************/
   1174             /* Initialize the histogram for each reference index in current      */
   1175             /* layer ctxt                                                        */
   1176             /*********************************************************************/
   1177             hme_init_histogram(
   1178                 ps_ctxt->aps_mv_hist[i1_ref_idx],
   1179                 (S32)as_mv_limit[i1_ref_idx].i2_max_x,
   1180                 (S32)as_mv_limit[i1_ref_idx].i2_max_y);
   1181         }
   1182 
   1183         /*********************************************************************/
   1184         /* Initialize the dyn. search range params. for each reference index */
   1185         /* in current layer ctxt                                             */
   1186         /*********************************************************************/
   1187         /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
   1188         if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
   1189         {
   1190             INIT_DYN_SEARCH_PRMS(
   1191                 &ps_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][i1_ref_idx],
   1192                 ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]);
   1193         }
   1194     }
   1195 
   1196     /*************************************************************************/
   1197     /* if exhaustive algorithmm then we use only 1 candt 0, 0                */
   1198     /* else we use a lot of causal and non causal candts                     */
   1199     /* finally set number to the configured number of candts                 */
   1200     /*************************************************************************/
   1201 
   1202     /* Loop in raster order over each 4x4 blk in a given row till end of frame */
   1203     while(0 == end_of_frame)
   1204     {
   1205         job_queue_t *ps_job;
   1206         void *pv_hme_dep_mngr;
   1207         WORD32 offset_val, check_dep_pos, set_dep_pos;
   1208 
   1209         /* Get the current layer HME Dep Mngr       */
   1210         /* Note : Use layer_id - 1 in HME layers    */
   1211         pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_coarse_prms->i4_layer_id - 1];
   1212 
   1213         /* Get the current row from the job queue */
   1214         ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
   1215             ps_multi_thrd_ctxt, ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type, 1, i4_ping_pong);
   1216 
   1217         /* If all rows are done, set the end of process flag to 1, */
   1218         /* and the current row to -1 */
   1219         if(NULL == ps_job)
   1220         {
   1221             blk_y = -1;
   1222             end_of_frame = 1;
   1223         }
   1224         else
   1225         {
   1226             ASSERT(ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type == ps_job->i4_pre_enc_task_type);
   1227 
   1228             /* Obtain the current row's details from the job */
   1229             blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
   1230 
   1231             if(1 == ps_ctxt->s_frm_prms.is_i_pic)
   1232             {
   1233                 /* set the output dependency of current row */
   1234                 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
   1235                 continue;
   1236             }
   1237 
   1238             /* Set Variables for Dep. Checking and Setting */
   1239             set_dep_pos = blk_y + 1;
   1240             if(blk_y > 0)
   1241             {
   1242                 offset_val = 2;
   1243                 check_dep_pos = blk_y - 1;
   1244             }
   1245             else
   1246             {
   1247                 /* First row should run without waiting */
   1248                 offset_val = -1;
   1249                 check_dep_pos = 0;
   1250             }
   1251 
   1252             /* Loop over all the blocks in current row */
   1253             /* One block extra, since the last block in a row needs East block */
   1254             for(blk_x = 0; blk_x < (num_blks_in_row + 1); blk_x++)
   1255             {
   1256                 /* Wait till top row block is processed   */
   1257                 /* Currently checking till top right block*/
   1258                 if(blk_x < (num_blks_in_row))
   1259                 {
   1260                     ihevce_dmgr_chk_row_row_sync(
   1261                         pv_hme_dep_mngr,
   1262                         blk_x,
   1263                         offset_val,
   1264                         check_dep_pos,
   1265                         0, /* Col Tile No. : Not supported in PreEnc*/
   1266                         ps_ctxt->thrd_id);
   1267                 }
   1268 
   1269                 /***************************************************************/
   1270                 /* Get Weighted input for all references                       */
   1271                 /***************************************************************/
   1272                 fp_get_wt_inp(
   1273                     ps_curr_layer,
   1274                     &ps_ctxt->s_wt_pred,
   1275                     1 << (blk_size_shift + 1),
   1276                     blk_x << blk_size_shift,
   1277                     (blk_y - 1) << blk_size_shift,
   1278                     1 << (blk_size_shift + 1),
   1279                     i4_num_ref,
   1280                     ps_ctxt->i4_wt_pred_enable_flag);
   1281 
   1282                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
   1283                 hme_reset_search_results(
   1284                     ps_search_results,
   1285                     s_search_prms_8x4.i4_part_mask | s_search_prms_4x8.i4_part_mask,
   1286                     MV_RES_FPEL);
   1287 
   1288                 /* Compute the search node offsets */
   1289                 /* MAX is used to clip when left and top neighbours are not availbale at coarse boundaries  */
   1290                 search_node_top_offset =
   1291                     blk_x + ps_ctxt->ai4_row_index[MAX((blk_y - 2), 0)] * num_blks_in_row;
   1292                 search_node_left_offset =
   1293                     MAX((blk_x - 1), 0) +
   1294                     ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * num_blks_in_row;
   1295 
   1296                 /* Input offset: wrt CU start. Offset for South block */
   1297                 s_search_prms_4x4.i4_cu_x_off = 0;
   1298                 s_search_prms_4x4.i4_cu_y_off = 4;
   1299                 s_search_prms_4x4.i4_inp_stride = 8;
   1300                 s_search_prms_4x4.i4_x_off = blk_x << blk_size_shift;
   1301                 s_search_prms_4x4.i4_y_off = blk_y << blk_size_shift;
   1302 
   1303                 s_search_prms_4x8.i4_x_off = s_search_prms_8x4.i4_x_off = blk_x << blk_size_shift;
   1304                 s_search_prms_4x8.i4_y_off = s_search_prms_8x4.i4_y_off = (blk_y - 1)
   1305                                                                           << blk_size_shift;
   1306 
   1307                 /* This layer will always use explicit ME */
   1308                 /* Loop across different Ref IDx */
   1309                 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
   1310                 {
   1311                     sad_top_offset = (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
   1312                                      ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] *
   1313                                          ai4_sad_4x4_block_stride[i1_ref_idx];
   1314                     sad_current_offset =
   1315                         (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
   1316                         ps_ctxt->ai4_row_index[blk_y] * ai4_sad_4x4_block_stride[i1_ref_idx];
   1317 
   1318                     /* Initialize search node if blk_x == 0, as it doesn't have left neighbours */
   1319                     if(0 == blk_x)
   1320                         INIT_SEARCH_NODE(
   1321                             &ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx][blk_x],
   1322                             i1_ref_idx);
   1323 
   1324                     pi2_cur_ref_sads_4x4 = ps_ctxt->api2_sads_4x4_n_rows[i1_ref_idx];
   1325 
   1326                     /* Initialize changing params here */
   1327                     s_search_prms_8x4.i1_ref_idx = i1_ref_idx;
   1328                     s_search_prms_4x8.i1_ref_idx = i1_ref_idx;
   1329                     s_search_prms_4x4.i1_ref_idx = i1_ref_idx;
   1330 
   1331                     if(num_blks_in_row == blk_x)
   1332                     {
   1333                         S16 *pi2_sads_4x4_current;
   1334                         /* Since the current 4x4 block will be a padded region, which may not match with any of the reference  */
   1335                         pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
   1336 
   1337                         memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
   1338                     }
   1339 
   1340                     /* SAD to be computed and stored for the 4x4 block in 1st row and the last block of all rows*/
   1341                     if((0 == blk_y) || (num_blks_in_row == blk_x))
   1342                     {
   1343                         S16 *pi2_sads_4x4_current;
   1344                         /* Computer 4x4 SADs for current block */
   1345                         /* Pointer to store SADs */
   1346                         pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
   1347 
   1348                         hme_derive_worst_case_search_range(
   1349                             &s_range_prms,
   1350                             &s_pic_limit,
   1351                             &as_mv_limit[i1_ref_idx],
   1352                             blk_x << blk_size_shift,
   1353                             blk_y << blk_size_shift,
   1354                             blk_wd,
   1355                             blk_ht);
   1356 
   1357                         if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
   1358                         {
   1359                             ((ihevce_me_optimised_function_list_t *)
   1360                                  ps_ctxt->pv_me_optimised_function_list)
   1361                                 ->pf_store_4x4_sads_high_quality(
   1362                                     &s_search_prms_4x4,
   1363                                     ps_curr_layer,
   1364                                     &as_mv_limit[i1_ref_idx],
   1365                                     &ps_ctxt->s_wt_pred,
   1366                                     pi2_sads_4x4_current);
   1367                         }
   1368                         else
   1369                         {
   1370                             ((ihevce_me_optimised_function_list_t *)
   1371                                  ps_ctxt->pv_me_optimised_function_list)
   1372                                 ->pf_store_4x4_sads_high_speed(
   1373                                     &s_search_prms_4x4,
   1374                                     ps_curr_layer,
   1375                                     &as_mv_limit[i1_ref_idx],
   1376                                     &ps_ctxt->s_wt_pred,
   1377                                     pi2_sads_4x4_current);
   1378                         }
   1379                     }
   1380                     else
   1381                     {
   1382                         /* For the zero mv candt, the ref idx to be modified */
   1383                         ps_candt_zeromv_8x4->i1_ref_idx = i1_ref_idx;
   1384                         ps_candt_zeromv_4x8->i1_ref_idx = i1_ref_idx;
   1385 
   1386                         if(ME_XTREME_SPEED_25 != e_me_quality_preset)
   1387                         {
   1388                             /* For the global mvs alone, the search node points to a local variable */
   1389                             ps_search_candts_8x4[global_id_8x4].ps_search_node =
   1390                                 &s_candt_global[i1_ref_idx];
   1391                             ps_search_candts_4x8[global_id_4x8].ps_search_node =
   1392                                 &s_candt_global[i1_ref_idx];
   1393                         }
   1394 
   1395                         hme_get_spatial_candt(
   1396                             ps_curr_layer,
   1397                             BLK_4x4,
   1398                             blk_x,
   1399                             blk_y - 1,
   1400                             i1_ref_idx,
   1401                             as_top_neighbours,
   1402                             as_left_neighbours,
   1403                             0,
   1404                             1,
   1405                             0,
   1406                             0);
   1407                         /* set up the various candts */
   1408                         *ps_candt_4x8_l = as_left_neighbours[0];
   1409                         *ps_candt_4x8_t = as_top_neighbours[1];
   1410                         *ps_candt_4x8_tl = as_top_neighbours[0];
   1411                         *ps_candt_8x4_l = *ps_candt_4x8_l;
   1412                         *ps_candt_8x4_tl = *ps_candt_4x8_tl;
   1413                         *ps_candt_8x4_t = *ps_candt_4x8_t;
   1414 
   1415                         {
   1416                             S32 pred_lx;
   1417                             S16 *pi2_sads_4x4_current, *pi2_sads_4x4_top;
   1418                             pred_ctxt_t *ps_pred_ctxt;
   1419                             PF_MV_COST_FXN pf_mv_cost_compute;
   1420 
   1421                             /* Computer 4x4 SADs for current block */
   1422                             /* Pointer to store SADs */
   1423                             pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
   1424 
   1425                             hme_derive_worst_case_search_range(
   1426                                 &s_range_prms,
   1427                                 &s_pic_limit,
   1428                                 &as_mv_limit[i1_ref_idx],
   1429                                 blk_x << blk_size_shift,
   1430                                 blk_y << blk_size_shift,
   1431                                 blk_wd,
   1432                                 blk_ht);
   1433                             if(i4_pic_ht == blk_y)
   1434                             {
   1435                                 memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
   1436                             }
   1437                             else
   1438                             {
   1439                                 if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
   1440                                 {
   1441                                     ((ihevce_me_optimised_function_list_t *)
   1442                                          ps_ctxt->pv_me_optimised_function_list)
   1443                                         ->pf_store_4x4_sads_high_quality(
   1444                                             &s_search_prms_4x4,
   1445                                             ps_curr_layer,
   1446                                             &as_mv_limit[i1_ref_idx],
   1447                                             &ps_ctxt->s_wt_pred,
   1448                                             pi2_sads_4x4_current);
   1449                                 }
   1450                                 else
   1451                                 {
   1452                                     ((ihevce_me_optimised_function_list_t *)
   1453                                          ps_ctxt->pv_me_optimised_function_list)
   1454                                         ->pf_store_4x4_sads_high_speed(
   1455                                             &s_search_prms_4x4,
   1456                                             ps_curr_layer,
   1457                                             &as_mv_limit[i1_ref_idx],
   1458                                             &ps_ctxt->s_wt_pred,
   1459                                             pi2_sads_4x4_current);
   1460                                 }
   1461                             }
   1462                             /* Set pred direction to L0 or L1 */
   1463                             pred_lx = 1 - ps_search_results->pu1_is_past[i1_ref_idx];
   1464 
   1465                             /* Suitable context (L0 or L1) */
   1466                             ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
   1467 
   1468                             /* Coarse layer is always explicit */
   1469                             if(ME_PRISTINE_QUALITY > e_me_quality_preset)
   1470                             {
   1471                                 pf_mv_cost_compute = compute_mv_cost_coarse;
   1472                             }
   1473                             else
   1474                             {
   1475                                 /* Cost function is not called in high speed case. Below one is just a dummy function */
   1476                                 pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
   1477                             }
   1478 
   1479                             /*********************************************************************/
   1480                             /* Now, compute the mv for the top block                             */
   1481                             /*********************************************************************/
   1482                             pi2_sads_4x4_top = pi2_cur_ref_sads_4x4 + sad_top_offset;
   1483 
   1484                             /*********************************************************************/
   1485                             /* For every blk in the picture, the search range needs to be derived*/
   1486                             /* Any blk can have any mv, but practical search constraints are     */
   1487                             /* imposed by the picture boundary and amt of padding.               */
   1488                             /*********************************************************************/
   1489                             hme_derive_search_range(
   1490                                 &s_range_prms,
   1491                                 &s_pic_limit,
   1492                                 &as_mv_limit[i1_ref_idx],
   1493                                 blk_x << blk_size_shift,
   1494                                 (blk_y - 1) << blk_size_shift,
   1495                                 blk_wd,
   1496                                 blk_ht);
   1497 
   1498                             /* Computer the mv for the top block */
   1499                             if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
   1500                             {
   1501                                 ((ihevce_me_optimised_function_list_t *)
   1502                                      ps_ctxt->pv_me_optimised_function_list)
   1503                                     ->pf_combine_4x4_sads_and_compute_cost_high_quality(
   1504                                         i1_ref_idx,
   1505                                         &s_range_prms, /* Both 4x8 and 8x4 has same search range */
   1506                                         &as_mv_limit[i1_ref_idx],
   1507                                         &best_mv_4x8,
   1508                                         &best_mv_8x4,
   1509                                         ps_pred_ctxt,
   1510                                         pf_mv_cost_compute,
   1511                                         pi2_sads_4x4_top, /* Current SAD block */
   1512                                         (pi2_sads_4x4_top +
   1513                                          ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
   1514                                         pi2_sads_4x4_current); /* South SAD block */
   1515                             }
   1516                             else
   1517                             {
   1518                                 ((ihevce_me_optimised_function_list_t *)
   1519                                      ps_ctxt->pv_me_optimised_function_list)
   1520                                     ->pf_combine_4x4_sads_and_compute_cost_high_speed(
   1521                                         i1_ref_idx,
   1522                                         &s_range_prms, /* Both 4x8 and 8x4 has same search range */
   1523                                         &as_mv_limit[i1_ref_idx],
   1524                                         &best_mv_4x8,
   1525                                         &best_mv_8x4,
   1526                                         ps_pred_ctxt,
   1527                                         pf_mv_cost_compute,
   1528                                         pi2_sads_4x4_top, /* Current SAD block */
   1529                                         (pi2_sads_4x4_top +
   1530                                          ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
   1531                                         pi2_sads_4x4_current); /* South SAD block */
   1532                             }
   1533 
   1534                             ps_candt_fs_4x8->s_mv.i2_mvx = best_mv_4x8.i2_mv_x;
   1535                             ps_candt_fs_4x8->s_mv.i2_mvy = best_mv_4x8.i2_mv_y;
   1536                             ps_candt_fs_4x8->i1_ref_idx = i1_ref_idx;
   1537 
   1538                             ps_candt_fs_8x4->s_mv.i2_mvx = best_mv_8x4.i2_mv_x;
   1539                             ps_candt_fs_8x4->s_mv.i2_mvy = best_mv_8x4.i2_mv_y;
   1540                             ps_candt_fs_8x4->i1_ref_idx = i1_ref_idx;
   1541                         }
   1542 
   1543                         /* call the appropriate Search Algo for 4x8S. The 4x8N would  */
   1544                         /* have already been called by top block */
   1545                         hme_pred_search_square_stepn(
   1546                             &s_search_prms_8x4,
   1547                             ps_curr_layer,
   1548                             &ps_ctxt->s_wt_pred,
   1549                             e_me_quality_preset,
   1550                             (ihevce_me_optimised_function_list_t *)
   1551                                 ps_ctxt->pv_me_optimised_function_list
   1552 
   1553                         );
   1554 
   1555                         /* Call the appropriate search algo for 8x4E */
   1556                         hme_pred_search_square_stepn(
   1557                             &s_search_prms_4x8,
   1558                             ps_curr_layer,
   1559                             &ps_ctxt->s_wt_pred,
   1560                             e_me_quality_preset,
   1561                             (ihevce_me_optimised_function_list_t *)
   1562                                 ps_ctxt->pv_me_optimised_function_list);
   1563 
   1564                         if(ME_XTREME_SPEED_25 != e_me_quality_preset)
   1565                         {
   1566                             /* Histogram updates across different Ref ID for global MV */
   1567                             hme_update_histogram(
   1568                                 ps_ctxt->aps_mv_hist[i1_ref_idx],
   1569                                 aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvx,
   1570                                 aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvy);
   1571                             hme_update_histogram(
   1572                                 ps_ctxt->aps_mv_hist[i1_ref_idx],
   1573                                 aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvx,
   1574                                 aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvy);
   1575                         }
   1576 
   1577                         /* update the best results to the mv bank */
   1578                         hme_update_mv_bank_coarse(
   1579                             ps_search_results,
   1580                             ps_curr_layer->ps_layer_mvbank,
   1581                             blk_x,
   1582                             (blk_y - 1),
   1583                             ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
   1584                                 search_node_top_offset, /* Top Candidate */
   1585                             ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
   1586                                 search_node_left_offset, /* Left candidate */
   1587                             i1_ref_idx,
   1588                             &s_mv_update_prms);
   1589 
   1590                         /* Copy the best search result to 5 row array for future use */
   1591                         *(ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + blk_x +
   1592                           ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
   1593                             *(aps_best_search_node_4x8[i1_ref_idx]);
   1594 
   1595                         *(ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + blk_x +
   1596                           ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
   1597                             *(aps_best_search_node_8x4[i1_ref_idx]);
   1598 
   1599                         /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
   1600                         /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
   1601                         if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
   1602                         {
   1603                             WORD32 num_mvs, i, j;
   1604                             search_node_t *aps_search_nodes[4];
   1605                             /* Best results for 8x4R and 4x8B blocks */
   1606                             search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
   1607 
   1608                             num_mvs = ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
   1609 
   1610                             /*************************************************************************/
   1611                             /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
   1612                             /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp.      */
   1613                             /* If number of results to be stored is 4, then we store all these 4     */
   1614                             /* results, else we pick best ones                                       */
   1615                             /*************************************************************************/
   1616                             ps_search_node_8x4_r =
   1617                                 ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
   1618                             ps_search_node_4x8_b =
   1619                                 ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
   1620 
   1621                             ASSERT(num_mvs <= 4);
   1622 
   1623                             /* Doing this to sort best results */
   1624                             aps_search_nodes[0] = ps_search_node_8x4_r;
   1625                             aps_search_nodes[1] = ps_search_node_4x8_b;
   1626                             aps_search_nodes[2] =
   1627                                 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
   1628                                 search_node_left_offset; /* Left candidate */
   1629                             aps_search_nodes[3] =
   1630                                 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
   1631                                 search_node_top_offset; /* Top Candidate */
   1632 
   1633                             /* Note : Need to be resolved!!! */
   1634                             /* Added this to match with "hme_update_mv_bank_coarse" */
   1635                             if(num_mvs != 4)
   1636                             {
   1637                                 /* Run through the results, store them in best to worst order */
   1638                                 for(i = 0; i < num_mvs; i++)
   1639                                 {
   1640                                     for(j = i + 1; j < 4; j++)
   1641                                     {
   1642                                         if(aps_search_nodes[j]->i4_tot_cost <
   1643                                            aps_search_nodes[i]->i4_tot_cost)
   1644                                         {
   1645                                             SWAP_HME(
   1646                                                 aps_search_nodes[j],
   1647                                                 aps_search_nodes[i],
   1648                                                 search_node_t *);
   1649                                         }
   1650                                     }
   1651                                 }
   1652                             }
   1653 
   1654                             /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
   1655                             for(i = 0; i < num_mvs; i++)
   1656                             {
   1657                                 hme_update_dynamic_search_params(
   1658                                     &ps_ctxt->s_coarse_dyn_range_prms
   1659                                          .as_dyn_range_prms[i4_layer_id][i1_ref_idx],
   1660                                     aps_search_nodes[i]->s_mv.i2_mvy);
   1661                             }
   1662                         }
   1663                     }
   1664                 }
   1665 
   1666                 /* Update the number of blocks processed in the current row */
   1667                 ihevce_dmgr_set_row_row_sync(
   1668                     pv_hme_dep_mngr,
   1669                     (blk_x + 1),
   1670                     blk_y,
   1671                     0 /* Col Tile No. : Not supported in PreEnc*/);
   1672             }
   1673 
   1674             /* set the output dependency after completion of row */
   1675             ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
   1676         }
   1677     }
   1678 
   1679     return;
   1680 }
   1681