Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /*!
     21 ******************************************************************************
     22 * \file hme_utils.h
     23 *
     24 * \brief
     25 *    Prototypes for various utilities used by coarse/refinement/subpel fxns
     26 *
     27 * \date
     28 *    18/09/2012
     29 *
     30 * \author
     31 *    Ittiam
     32 *
     33 ******************************************************************************
     34 */
     35 
     36 #ifndef _HME_UTILS_H_
     37 #define _HME_UTILS_H_
     38 
     39 /*****************************************************************************/
     40 /* Functions                                                                 */
     41 /*****************************************************************************/
     42 
     43 /**
     44 ********************************************************************************
     45 *  @fn     hme_init_histogram(
     46 *
     47 *  @brief  Top level entry point for Coarse ME. Runs across blocks and does the
     48 *          needful by calling other low level routines.
     49 *
     50 *  @param[in,out]  ps_hist : the histogram structure
     51 *
     52 *  @param[in]  i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
     53 *
     54 *  @param[in]  i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
     55 *
     56 *  @return None
     57 ********************************************************************************
     58 */
     59 void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y);
     60 
     61 /**
     62 ********************************************************************************
     63 *  @fn     hme_update_histogram(
     64 *
     65 *  @brief  Updates the histogram given an mv entry
     66 *
     67 *  @param[in,out]  ps_hist : the histogram structure
     68 *
     69 *  @param[in]  i4_mv_x : x component of the mv (fpel units)
     70 *
     71 *  @param[in]  i4_mv_y : y component of the mv (fpel units)
     72 *
     73 *  @return None
     74 ********************************************************************************
     75 */
     76 void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y);
     77 
     78 /**
     79 ********************************************************************************
     80 *  @fn     hme_get_global_mv(
     81 *
     82 *  @brief  returns the global mv of a previous picture. Accounts for the fact
     83 *          that the delta poc of the previous picture may have been different
     84 *          from delta poc of current picture. Delta poc is POC difference
     85 *          between a picture and its reference.
     86 *
     87 *  @param[out]  ps_mv: mv_t structure where the motion vector is returned
     88 *
     89 *  @param[in]  i4_delta_poc: the delta poc for the current pic w.r.t. reference
     90 *
     91 *  @return None
     92 ********************************************************************************
     93 */
     94 void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc);
     95 
     96 /**
     97 ********************************************************************************
     98 *  @fn     hme_calculate_global_mv(
     99 *
    100 *  @brief  Calculates global mv for a given histogram
    101 *
    102 *  @param[in]  ps_hist : the histogram structure
    103 *
    104 *  @param[in]  ps_mv : used to return the global mv
    105 *
    106 *  @param[in]  e_lobe_type : refer to GMV_MVTYPE_T
    107 *
    108 *  @return None
    109 ********************************************************************************
    110 */
    111 void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type);
    112 
    113 /**
    114 ********************************************************************************
    115 *  @fn     hme_collate_fpel_results(search_results_t *ps_search_results,
    116 *           S32 i1_ref_idx, S32 i1_idx_to_merge)
    117 *
    118 *  @brief  After full pel search and result seeding in every search iteration
    119 *          results, this function called to collapse a given search iteration
    120 *          results into another.
    121 *
    122 *  @param[in,out] ps_search_results : Search results data structure
    123 *  @param[in]     i1_ref_idx: id of the search iteration where the results
    124                               will be collapsed
    125 *  @param[in]     i1_idx_to_merge : id of the search iteration from which the
    126 *                   results are picked up.
    127 
    128 *
    129 *  @return None
    130 ********************************************************************************
    131 */
    132 void hme_collate_fpel_results(
    133     search_results_t *ps_search_results, S08 i1_ref_idx, S08 i1_idx_to_merge);
    134 
    135 /**
    136 ********************************************************************************
    137 *  @fn     hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
    138             search_results_t *ps_search_results, S32 i4_num_ref)
    139 *
    140 *  @brief  For a given CU whose results are in ps_search_results, the 17x17
    141 *          mv grid is updated for future use within the CTB
    142 *
    143 *  @param[in] ps_search_results : Search results data structure
    144 *
    145 *  @param[out] pps_mv_grid: The mv grid (as many as num ref)
    146 *
    147 *  @param[in]  i4_num_ref: nuber of search iterations to update
    148 *
    149 *  @param[in]  mv_res_shift: Shift for resolution of mv (fpel/qpel)
    150 *
    151 *  @return None
    152 ********************************************************************************
    153 */
    154 void hme_map_mvs_to_grid(
    155     mv_grid_t **pps_mv_grid,
    156     search_results_t *ps_search_results,
    157     U08 *pu1_pred_dir_searched,
    158     S32 i4_num_pred_dir);
    159 
    160 /**
    161 ********************************************************************************
    162 *  @fn     hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
    163 *
    164 *  @brief  Expands the part mask to a list of valid part ids terminated by -1
    165 *
    166 *  @param[in] i4_part_mask : bit mask of active partitino ids
    167 *
    168 *  @param[out] pi4_valid_part_ids : array, each entry has one valid part id
    169 *               Terminated by -1 to signal end.
    170 *
    171 *  @return number of partitions
    172 ********************************************************************************
    173 */
    174 S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids);
    175 
    176 /**
    177 ********************************************************************************
    178 *  @fn     get_num_blks_in_ctb(S32 i4_ctb_x,
    179                         S32 i4_ctb_y,
    180                         S32 i4_pic_wd,
    181                         S32 i4_pic_ht,
    182                         S32 i4_blk_size)
    183 *
    184 *  @brief  returns the number of blks in the ctb (64x64 ctb)
    185 *
    186 *  @param[in] i4_ctb_x : pixel x offset of the top left corner of ctb in pic
    187 *
    188 *  @param[in] i4_ctb_y : pixel y offset of the top left corner of ctb in pic
    189 *
    190 *  @param[in] i4_ctb_x : width of the picture in pixels
    191 *
    192 *  @param[in] i4_pic_ht : height of hte picture in pixels
    193 *
    194 *  @param[in] i4_blk_size : Size of the blk in pixels
    195 *
    196 *  @return number of blks in the ctb
    197 ********************************************************************************
    198 */
    199 S32 get_num_blks_in_ctb(S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_pic_wd, S32 i4_pic_ht, S32 i4_blk_size);
    200 
    201 /**
    202 ********************************************************************************
    203 *  @fn     hevc_avg_2d(U08 *pu1_src1,
    204 *                   U08 *pu1_src2,
    205 *                   S32 i4_src1_stride,
    206 *                   S32 i4_src2_stride,
    207 *                   S32 i4_blk_wd,
    208 *                   S32 i4_blk_ht,
    209 *                   U08 *pu1_dst,
    210 *                   S32 i4_dst_stride)
    211 *
    212 *
    213 *  @brief  point wise average of two buffers into a third buffer
    214 *
    215 *  @param[in] pu1_src1 : first source buffer
    216 *
    217 *  @param[in] pu1_src2 : 2nd source buffer
    218 *
    219 *  @param[in] i4_src1_stride : stride of source 1 buffer
    220 *
    221 *  @param[in] i4_src2_stride : stride of source 2 buffer
    222 *
    223 *  @param[in] i4_blk_wd : block width
    224 *
    225 *  @param[in] i4_blk_ht : block height
    226 *
    227 *  @param[out] pu1_dst : destination buffer
    228 *
    229 *  @param[in] i4_dst_stride : stride of the destination buffer
    230 *
    231 *  @return void
    232 ********************************************************************************
    233 */
    234 void hevc_avg_2d(
    235     U08 *pu1_src1,
    236     U08 *pu1_src2,
    237     S32 i4_src1_stride,
    238     S32 i4_src2_stride,
    239     S32 i4_blk_wd,
    240     S32 i4_blk_ht,
    241     U08 *pu1_dst,
    242     S32 i4_dst_stride);
    243 
    244 /**
    245 ********************************************************************************
    246 *  @fn     hme_pick_back_search_node(search_results_t *ps_search_results,
    247 *                                   search_node_t *ps_search_node_fwd,
    248 *                                   S32 i4_part_idx,
    249 *                                   layer_ctxt_t *ps_curr_layer)
    250 *
    251 *
    252 *  @brief  returns the search node corresponding to a ref idx in same or
    253 *          opp direction. Preference is given to opp direction, but if that
    254 *          does not yield results, same direction is attempted.
    255 *
    256 *  @param[in] ps_search_results: search results overall
    257 *
    258 *  @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
    259 *
    260 *  @param[in] i4_part_idx : partition id
    261 *
    262 *  @param[in] ps_curr_layer : layer context for current layer.
    263 *
    264 *  @return search node corresponding to hte "other direction"
    265 ********************************************************************************
    266 */
    267 search_node_t *hme_pick_back_search_node(
    268     search_results_t *ps_search_results,
    269     search_node_t *ps_search_node_fwd,
    270     S32 i4_part_idx,
    271     layer_ctxt_t *ps_curr_layer);
    272 
    273 /**
    274 ********************************************************************************
    275 *  @fn     hme_study_input_segmentation(U08 *pu1_inp,
    276 *                                       S32 i4_inp_stride,
    277 *                                       S32 limit_active_partitions)
    278 *
    279 *
    280 *  @brief  Examines input 16x16 for possible edges and orientations of those,
    281 *          and returns a bit mask of partitions that should be searched for
    282 *
    283 *  @param[in] pu1_inp : input buffer
    284 *
    285 *  @param[in] i4_inp_stride: input stride
    286 *
    287 *  @param[in] limit_active_partitions : 1: Edge algo done and partitions are
    288 *               limited, 0 : Brute force, all partitions considered
    289 *
    290 *  @return part mask (bit mask of active partitions to search)
    291 ********************************************************************************
    292 */
    293 S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions);
    294 
    295 /**
    296 ********************************************************************************
    297 *  @fn     hme_init_search_results(search_results_t *ps_search_results,
    298 *                           S32 i4_num_ref,
    299 *                           S32 i4_num_best_results,
    300 *                           S32 i4_num_results_per_part,
    301 *                           BLK_SIZE_T e_blk_size,
    302 *                           S32 i4_x_off,
    303 *                           S32 i4_y_off)
    304 *
    305 *  @brief  Initializes the search results structure with some key attributes
    306 *
    307 *  @param[out] ps_search_results : search results structure to initialise
    308 *
    309 *  @param[in] i4_num_Ref: corresponds to the number of ref ids searched
    310 *
    311 *  @param[in] i4_num_best_results: Number of best results for the CU to
    312 *               be maintained in the result structure
    313 *
    314 *  @param[in] i4_num_results_per_part: Per active partition the number of best
    315 *               results to be maintained
    316 *
    317 *  @param[in] e_blk_size: blk size of the CU for which this structure used
    318 *
    319 *  @param[in] i4_x_off: x offset of the top left of CU from CTB top left
    320 *
    321 *  @param[in] i4_y_off: y offset of the top left of CU from CTB top left
    322 *
    323 *  @return void
    324 ********************************************************************************
    325 */
    326 void hme_init_search_results(
    327     search_results_t *ps_search_results,
    328     S32 i4_num_ref,
    329     S32 i4_num_best_results,
    330     S32 i4_num_results_per_part,
    331     BLK_SIZE_T e_blk_size,
    332     S32 i4_x_off,
    333     S32 i4_y_off,
    334     U08 *pu1_is_past);
    335 
    336 /**
    337 ********************************************************************************
    338 *  @fn     hme_reset_search_results((search_results_t *ps_search_results,
    339 *                               S32 i4_part_mask)
    340 *
    341 *
    342 *  @brief  Resets the best results to maximum values, so as to allow search
    343 *          for the new CU's partitions. The existing results may be from an
    344 *          older CU using same structure.
    345 *
    346 *  @param[in] ps_search_results: search results structure
    347 *
    348 *  @param[in] i4_part_mask : bit mask of active partitions
    349 *
    350 *  @param[in] mv_res : Resolution of the mv predictors (fpel/qpel)
    351 *
    352 *  @return void
    353 ********************************************************************************
    354 */
    355 void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res);
    356 
    357 /**
    358 ********************************************************************************
    359 *  @fn     hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
    360 *                               S32 i4_step,
    361 *                               range_prms_t *ps_mvrange)
    362 *
    363 *  @brief  Given a central pt within mv range, and a grid of points surrounding
    364 *           this pt, this function returns a grid mask of pts within search rng
    365 *
    366 *  @param[in] ps_search_node: the centre pt of the grid
    367 *
    368 *  @param[in] i4_step: step size of grid
    369 *
    370 *  @param[in] ps_mvrange: structure containing the current mv range
    371 *
    372 *  @return bitmask of the  pts in grid within search range
    373 ********************************************************************************
    374 */
    375 S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange);
    376 
    377 /**
    378 ********************************************************************************
    379 *  @fn    layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
    380                                     S32 i4_layer_id)
    381 *
    382 *  @brief  returns the layer ctxt of the layer with given id from the temporally
    383 *          previous frame
    384 *
    385 *  @param[in] ps_ctxt : ME context
    386 *
    387 *  @param[in] i4_layer_id : id of layer required
    388 *
    389 *  @return layer ctxt of given layer id in temporally previous frame
    390 ********************************************************************************
    391 */
    392 layer_ctxt_t *hme_get_past_layer_ctxt(
    393     me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel);
    394 
    395 layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id);
    396 
    397 /**
    398 ********************************************************************************
    399 *  @fn    void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
    400                         BLK_SIZE_T e_blk_size,
    401                         S32 i4_num_ref,
    402                         S32 i4_num_results_per_part)
    403 *
    404 *  @brief  Given a blk size to be used for this layer, this function initialize
    405 *          the mv bank to make it ready to store and return results.
    406 *
    407 *  @param[in, out] ps_layer_ctxt: pointer to layer ctxt
    408 *
    409 *  @param[in] e_blk_size : resolution at which mvs are stored
    410 *
    411 *  @param[in] i4_num_ref: number of reference frames corresponding to which
    412 *              results are stored.
    413 *
    414 *  @param[in] e_blk_size : resolution at which mvs are stored
    415 *
    416 *  @param[in] i4_num_results_per_part : Number of results to be stored per
    417 *               ref idx. So these many best results stored
    418 *
    419 *  @return void
    420 ********************************************************************************
    421 */
    422 void hme_init_mv_bank(
    423     layer_ctxt_t *ps_layer_ctxt,
    424     BLK_SIZE_T e_blk_size,
    425     S32 i4_num_ref,
    426     S32 i4_num_results_per_part,
    427     U08 u1_enc);
    428 
    429 /**
    430 ********************************************************************************
    431 *  @fn    void hme_derive_search_range(range_prms_t *ps_range,
    432 *                                   range_prms_t *ps_pic_limit,
    433 *                                   range_prms_t *ps_mv_limit,
    434 *                                   S32 i4_x,
    435 *                                   S32 i4_y,
    436 *                                   S32 blk_wd,
    437 *                                   S32 blk_ht)
    438 *
    439 *  @brief  given picture limits and blk dimensions and mv search limits, obtains
    440 *          teh valid search range such that the blk stays within pic boundaries,
    441 *          where picture boundaries include padded portions of picture
    442 *
    443 *  @param[out] ps_range: updated with actual search range
    444 *
    445 *  @param[in] ps_pic_limit : picture boundaries
    446 *
    447 *  @param[in] ps_mv_limit: Search range limits for the mvs
    448 *
    449 *  @param[in] i4_x : x coordinate of the blk
    450 *
    451 *  @param[in] i4_y : y coordinate of the blk
    452 *
    453 *  @param[in] blk_wd : blk width
    454 *
    455 *  @param[in] blk_ht : blk height
    456 *
    457 *  @return void
    458 ********************************************************************************
    459 */
    460 void hme_derive_search_range(
    461     range_prms_t *ps_range,
    462     range_prms_t *ps_pic_limit,
    463     range_prms_t *ps_mv_limit,
    464     S32 i4_x,
    465     S32 i4_y,
    466     S32 blk_wd,
    467     S32 blk_ht);
    468 
    469 /**
    470 ********************************************************************************
    471 *  @fn    void hme_get_spatial_candt(layer_ctxt_t *ps_curr_layer,
    472 *                                   BLK_SIZE_T e_search_blk_size,
    473 *                                   S32 blk_x,
    474 *                                   S32 blk_y,
    475 *                                   S08 i1_ref_idx,
    476 *                                   search_node_t *ps_top_neighbours,
    477 *                                   search_node_t *ps_left_neighbours,
    478 *                                   S32 i4_result_id);
    479 *
    480 *  @brief  Obtains top, top left, top right and left adn bottom left candts
    481 *
    482 *  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
    483 *
    484 *  @param[in] e_search_blk_size : search blk size of current layer
    485 *
    486 *  @param[in] i4_blk_x : x coordinate of the block in mv bank
    487 *
    488 *  @param[in] i4_blk_y : y coordinate of the block in mv bank
    489 *
    490 *  @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
    491 *              results, useful if multiple ref idx candts maintained separately.
    492 *
    493 *  @param[out] ps_top_neighbours : T, TL, TR candts are output here
    494 *
    495 *  @param[out] ps_left_neighbours : L BL candts outptu here
    496 *
    497 *  @param[in] i4_result_id : If multiple results stored per ref idx, this
    498 *              pts to the id of the result
    499 *
    500 *  @return void
    501 ********************************************************************************
    502 */
    503 void hme_get_spatial_candt(
    504     layer_ctxt_t *ps_curr_layer,
    505     BLK_SIZE_T e_search_blk_size,
    506     S32 blk_x,
    507     S32 blk_y,
    508     S08 i1_ref_idx,
    509     search_node_t *ps_top_neighbours,
    510     search_node_t *ps_left_neighbours,
    511     S32 i4_result_id,
    512     S32 i4_tr_avail,
    513     S32 i4_bl_avail,
    514     S32 encode);
    515 
    516 void hme_get_spatial_candt_in_l1_me(
    517     layer_ctxt_t *ps_curr_layer,
    518     BLK_SIZE_T e_search_blk_size,
    519     S32 i4_blk_x,
    520     S32 i4_blk_y,
    521     S08 i1_ref_idx,
    522     U08 u1_pred_dir,
    523     search_node_t *ps_top_neighbours,
    524     search_node_t *ps_left_neighbours,
    525     S32 i4_result_id,
    526     S32 tr_avail,
    527     S32 bl_avail,
    528     S32 i4_num_act_ref_l0,
    529     S32 i4_num_act_ref_l1);
    530 
    531 /**
    532 ********************************************************************************
    533 *  @fn    void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
    534 *                                   S32 i4_blk_x,
    535 *                                   S32 i4_blk_y,
    536 *                                   mvgrid_t *ps_mv_grid ,
    537 *                                   S32 i1_ref_id)
    538 *
    539 *  @brief  The 18x18 MV grid for a ctb, is filled in first row and 1st col
    540 *          this corresponds to neighbours (TL, T, TR, L, BL)
    541 *
    542 *  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
    543 *
    544 *  @param[in] blk_x : x coordinate of the block in mv bank
    545 *
    546 *  @param[in] blk_y : y coordinate of the block in mv bank
    547 *
    548 *  @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
    549 *
    550 *  @param[in] u1_pred_lx : Corresponds to pred dir from which to pick up mv
    551 *              results
    552 *
    553 *  @return void
    554 ********************************************************************************
    555 */
    556 void hme_fill_ctb_neighbour_mvs(
    557     layer_ctxt_t *ps_curr_layer,
    558     S32 blk_x,
    559     S32 blk_y,
    560     mv_grid_t *ps_mv_grid,
    561     U08 u1_pred_dir_ctr,
    562     U08 u1_default_ref_id,
    563     S32 i4_num_act_ref_l0);
    564 
    565 /**
    566 ********************************************************************************
    567 *  @fn     void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
    568 *
    569 *  @brief  Allocates a block of size = i4_size from working memory and returns
    570 *
    571 *  @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
    572 *
    573 *  @param[in]  i4_size : size required
    574 *
    575 *  @return void pointer to allocated memory, NULL if failure
    576 ********************************************************************************
    577 */
    578 void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size);
    579 
    580 void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr);
    581 
    582 void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size);
    583 
    584 void hme_reset_ctb_mem_mgr(ctb_mem_mgr_t *ps_ctb_mem_mgr);
    585 
    586 void hme_init_ctb_mem_mgr(ctb_mem_mgr_t *ps_ctb_mem_mgr, U08 *pu1_mem, S32 size);
    587 
    588 void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt);
    589 
    590 void hme_scale_mv_grid(mv_grid_t *ps_mv_grid);
    591 
    592 void hme_downscale_mv_grid(mv_grid_t *ps_mv_grid);
    593 
    594 void hme_create_parent_ctb(
    595     ctb_node_t *ps_ctb_node_parent,
    596     ctb_node_t *ps_ctb_child_tl,
    597     ctb_node_t *ps_ctb_child_tr,
    598     ctb_node_t *ps_ctb_child_bl,
    599     ctb_node_t *ps_ctb_child_br,
    600     CU_SIZE_T e_cu_size_parent,
    601     buf_mgr_t *ps_buf_mgr);
    602 
    603 void hme_create_merged_ctbs(
    604     search_results_t *ps_results_merged,
    605     ctb_mem_mgr_t *ps_ctb_mem_mgr,
    606     buf_mgr_t *ps_buf_mgr,
    607     ctb_node_t **pps_ctb_list_unified,
    608     S32 num_candts);
    609 
    610 void hme_init_mv_grid(mv_grid_t *ps_mv_grid);
    611 
    612 typedef void (*pf_get_wt_inp)(
    613     layer_ctxt_t *ps_curr_layer,
    614     wgt_pred_ctxt_t *ps_wt_inp_prms,
    615     S32 dst_stride,
    616     S32 pos_x,
    617     S32 pos_y,
    618     S32 size,
    619     S32 num_ref,
    620     U08 u1_is_wt_pred_on);
    621 
    622 /**
    623 ********************************************************************************
    624 *  @fn    void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
    625 *
    626 *  @brief  Pads horizontally to left side. Each pixel replicated across a line
    627 *
    628 *  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
    629 *
    630 *  @param[in] stride : stride of destination buffer
    631 *
    632 *  @param[in] pad_wd : Amt of horizontal padding to be done
    633 *
    634 *  @param[in] pad_ht : Number of lines for which horizontal padding to be done
    635 *
    636 *  @return void
    637 ********************************************************************************
    638 */
    639 void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht);
    640 
    641 /**
    642 ********************************************************************************
    643 *  @fn    void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
    644 *
    645 *  @brief  Pads horizontally to rt side. Each pixel replicated across a line
    646 *
    647 *  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
    648 *
    649 *  @param[in] stride : stride of destination buffer
    650 *
    651 *  @param[in] pad_wd : Amt of horizontal padding to be done
    652 *
    653 *  @param[in] pad_ht : Number of lines for which horizontal padding to be done
    654 *
    655 *  @return void
    656 ********************************************************************************
    657 */
    658 void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht);
    659 
    660 /**
    661 ********************************************************************************
    662 *  @fn    void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
    663 *
    664 *  @brief  Pads vertically on the top. Repeats the top line for top padding
    665 *
    666 *  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
    667 *
    668 *  @param[in] stride : stride of destination buffer
    669 *
    670 *  @param[in] pad_ht : Amt of vertical padding to be done
    671 *
    672 *  @param[in] pad_wd : Number of columns for which vertical padding to be done
    673 *
    674 *  @return void
    675 ********************************************************************************
    676 */
    677 void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd);
    678 
    679 /**
    680 ********************************************************************************
    681 *  @fn    void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
    682 *
    683 *  @brief  Pads vertically on the bot. Repeats the top line for top padding
    684 *
    685 *  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
    686 *
    687 *  @param[in] stride : stride of destination buffer
    688 *
    689 *  @param[in] pad_ht : Amt of vertical padding to be done
    690 *
    691 *  @param[in] pad_wd : Number of columns for which vertical padding to be done
    692 *
    693 *  @return void
    694 ********************************************************************************
    695 */
    696 void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd);
    697 
    698 /**
    699 **************************************************************************************************
    700 *  @fn     hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
    701 *
    702 *  @brief  Population the pu_results structure with the results after the subpel refinement
    703 *
    704 *          This is called post subpel refinmenent for 16x16s, 8x8s and
    705 *          for post merge evaluation for 32x32,64x64 CUs
    706 *
    707 *  @param[in,out] ps_search_results : Search results data structure
    708 *                 - ps_cu_results : cu_results data structure
    709 *                   ps_pu_result  : Pointer to the memory for storing PU's
    710 *
    711 ****************************************************************************************************
    712 */
    713 void hme_populate_pus(
    714     me_ctxt_t *ps_thrd_ctxt,
    715     me_frm_ctxt_t *ps_ctxt,
    716     hme_subpel_prms_t *ps_subpel_prms,
    717     search_results_t *ps_search_results,
    718     inter_cu_results_t *ps_cu_results,
    719     inter_pu_results_t *ps_pu_results,
    720     pu_result_t *ps_pu_result,
    721     inter_ctb_prms_t *ps_inter_ctb_prms,
    722     wgt_pred_ctxt_t *ps_wt_prms,
    723     layer_ctxt_t *ps_curr_layer,
    724     U08 *pu1_pred_dir_searched,
    725     WORD32 i4_num_active_ref);
    726 
    727 void hme_populate_pus_8x8_cu(
    728     me_ctxt_t *ps_thrd_ctxt,
    729     me_frm_ctxt_t *ps_ctxt,
    730     hme_subpel_prms_t *ps_subpel_prms,
    731     search_results_t *ps_search_results,
    732     inter_cu_results_t *ps_cu_results,
    733     inter_pu_results_t *ps_pu_results,
    734     pu_result_t *ps_pu_result,
    735     inter_ctb_prms_t *ps_inter_ctb_prms,
    736     U08 *pu1_pred_dir_searched,
    737     WORD32 i4_num_active_ref,
    738     U08 u1_blk_8x8_mask);
    739 
    740 S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
    741     me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb);
    742 
    743 /**
    744 ********************************************************************************
    745 *  @fn     hme_update_dynamic_search_params
    746 *
    747 *  @brief  Update the Dynamic search params based on the current MVs
    748 *
    749 *  @param[in,out]  ps_dyn_range_prms    [inout] : Dyn. Range Param str.
    750 *                  i2_mvy               [in]    : current MV y comp.
    751 *
    752 *  @return None
    753 ********************************************************************************
    754 */
    755 void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy);
    756 
    757 S32 hme_create_child_nodes_cu_tree(
    758     cur_ctb_cu_tree_t *ps_cu_tree_root,
    759     cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
    760     S32 nodes_already_created);
    761 
    762 void hme_add_new_node_to_a_sorted_array(
    763     search_node_t *ps_result_node,
    764     search_node_t **pps_sorted_array,
    765     U08 *pu1_shifts,
    766     U32 u4_num_results_updated,
    767     U08 u1_shift);
    768 
    769 S32 hme_find_pos_of_implicitly_stored_ref_id(
    770     S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results);
    771 
    772 S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt);
    773 
    774 void hme_init_pred_buf_info(
    775     hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
    776     hme_pred_buf_mngr_t *ps_buf_mngr,
    777     U08 u1_pu1_wd,
    778     U08 u1_pu1_ht,
    779     PART_TYPE_T e_part_type);
    780 
    781 void hme_debrief_bipred_eval(
    782     part_type_results_t *ps_part_type_result,
    783     hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
    784     hme_pred_buf_mngr_t *ps_pred_buf_mngr,
    785     U08 *pu1_allocated_pred_buf_array_indixes,
    786     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list);
    787 
    788 U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
    789     SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset);
    790 
    791 U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index);
    792 
    793 void hme_search_cand_data_init(
    794     S32 *pi4_id_Z,
    795     S32 *pi4_id_coloc,
    796     S32 *pi4_num_coloc_cands,
    797     U08 *pu1_search_candidate_list_index,
    798     S32 i4_num_act_ref_l0,
    799     S32 i4_num_act_ref_l1,
    800     U08 u1_is_bidir_enabled,
    801     U08 u1_4x4_blk_in_l1me);
    802 
    803 void hme_compute_variance_for_all_parts(
    804     U08 *pu1_data,
    805     S32 i4_data_stride,
    806     S32 *pi4_valid_part_array,
    807     U32 *pu4_variance,
    808     S32 i4_num_valid_parts,
    809     U08 u1_cu_size);
    810 
    811 void hme_compute_sigmaX_and_sigmaXSquared(
    812     U08 *pu1_data,
    813     S32 i4_buf_stride,
    814     void *pv_sigmaX,
    815     void *pv_sigmaXSquared,
    816     U08 u1_base_blk_wd,
    817     U08 u1_base_blk_ht,
    818     U08 u1_blk_wd,
    819     U08 u1_blk_ht,
    820     U08 u1_is_sigma_pointer_size_32_bit,
    821     U08 u1_array_stride);
    822 
    823 void hme_compute_final_sigma_of_pu_from_base_blocks(
    824     U32 *pu4_SigmaX,
    825     U32 *pu4_SigmaXSquared,
    826     ULWORD64 *pu8_final_sigmaX,
    827     ULWORD64 *pu8_final_sigmaX_Squared,
    828     U08 u1_cu_size,
    829     U08 u1_base_block_size,
    830     S32 i4_part_id,
    831     U08 u1_base_blk_array_stride);
    832 
    833 void hme_compute_stim_injected_distortion_for_all_parts(
    834     U08 *pu1_pred,
    835     S32 i4_pred_stride,
    836     S32 *pi4_valid_part_array,
    837     ULWORD64 *pu8_src_sigmaX,
    838     ULWORD64 *pu8_src_sigmaXSquared,
    839     S32 *pi4_sad_array,
    840     S32 i4_alpha_stim_multiplier,
    841     S32 i4_inv_wt,
    842     S32 i4_inv_wt_shift_val,
    843     S32 i4_num_valid_parts,
    844     S32 i4_wpred_log_wdc,
    845     U08 u1_cu_size);
    846 
    847 void sigma_for_cusize_16_and_baseblock_size_16(
    848     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
    849 
    850 void sigma_for_cusize_16_and_baseblock_size_8(
    851     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared, U08 diff_cu_size);
    852 
    853 void sigma_for_cusize_16_and_baseblock_size_4(
    854     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
    855 
    856 void sigma_for_cusize_32_and_baseblock_size_32(
    857     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
    858 
    859 void sigma_for_cusize_64_and_baseblock_size_64(
    860     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
    861 
    862 void hme_choose_best_noise_preserver_amongst_fpel_and_subpel_winners(
    863     fullpel_refine_ctxt_t *ps_fullpel_winner_data,
    864     search_node_t **pps_part_results,
    865     layer_ctxt_t *ps_curr_layer,
    866     wgt_pred_ctxt_t *ps_wt_inp_prms,
    867     U32 *pu4_src_variance,
    868     S32 i4_cu_x_off_in_ctb,
    869     S32 i4_cu_y_off_in_ctb,
    870     S32 i4_ctb_x_off,
    871     S32 i4_ctb_y_off,
    872     S32 i4_inp_stride,
    873     S32 i4_alpha_stim_multiplier,
    874     U08 u1_subpel_uses_satd);
    875 
    876 #if TEMPORAL_NOISE_DETECT
    877 WORD32 ihevce_16x16block_temporal_noise_detect(
    878     WORD32 had_block_size,
    879     WORD32 ctb_width,
    880     WORD32 ctb_height,
    881     ihevce_ctb_noise_params *ps_ctb_noise_params,
    882     fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
    883     hme_search_prms_t *s_search_prms_blk,
    884     me_frm_ctxt_t *ps_ctxt,
    885     WORD32 num_pred_dir,
    886     WORD32 i4_num_act_ref_l0,
    887     WORD32 i4_num_act_ref_l1,
    888     WORD32 i4_cu_x_off,
    889     WORD32 i4_cu_y_off,
    890     wgt_pred_ctxt_t *ps_wt_inp_prms,
    891     WORD32 input_stride,
    892     WORD32 index_8x8_block,
    893     WORD32 num_horz_blocks,
    894     WORD32 num_8x8_in_ctb_row,
    895     WORD32 i4_index_variance);
    896 #endif
    897 
    898 /**
    899 ********************************************************************************
    900 *  @fn     hme_decide_part_types(search_results_t *ps_search_results)
    901 *
    902 *  @brief  Does uni/bi evaluation accross various partition types,
    903 *          decides best inter partition types for the CU, compares
    904 *          intra cost and decides the best K results for the CU
    905 *
    906 *          This is called post subpel refinmenent for 16x16s, 8x8s and
    907 *          for post merge evaluation for 32x32,64x64 CUs
    908 *
    909 *  @param[in,out] ps_search_results : Search results data structure
    910 *                 - In : 2 lists of upto 2mvs & refids, active partition mask
    911 *                 - Out: Best results for final rdo evaluation of the cu
    912 *
    913 *  @param[in]     ps_subpel_prms : Sub pel params data structure
    914 
    915 *
    916 *  @par Description
    917 *    --------------------------------------------------------------------------------
    918 *     Flow:
    919 *            for each category (SMP,AMP,2Nx2N based on part mask)
    920 *            {
    921 *                for each part_type
    922 *                {
    923 *                    for each part
    924 *                        pick best candidate from each list
    925 *                    combine uni part type
    926 *                    update best results for part type
    927 *                }
    928 *                pick the best part type for given category (for SMP & AMP)
    929 *            }
    930 *                    ||
    931 *                    ||
    932 *                    \/
    933 *            for upto 3 best part types
    934 *            {
    935 *                for each part
    936 *                {
    937 *                    compute fixed size had for all uni and remember coeffs
    938 *                    compute bisatd
    939 *                    uni vs bi and gives upto two results
    940 *                    also gives the pt level pred buffer
    941 *                }
    942 *             }
    943 *                    ||
    944 *                    ||
    945 *                    \/
    946 *            select X candidates for tu recursion as per the Note below
    947 *               tu_rec_on_part_type (reuse transform coeffs)
    948 *                    ||
    949 *                    ||
    950 *                    \/
    951 *            insert intra nodes at appropriate result id
    952 *                    ||
    953 *                    ||
    954 *                    \/
    955 *            populate y best resuls for rdo based on preset
    956 *
    957 *     Note :
    958 *     number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
    959 *     number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
    960 *     --------------------------------------------------------------------------------
    961 *
    962 *  @return None
    963 ********************************************************************************
    964 */
    965 void hme_decide_part_types(
    966     inter_cu_results_t *ps_cu_results,
    967     inter_pu_results_t *ps_pu_results,
    968     inter_ctb_prms_t *ps_inter_ctb_prms,
    969     me_frm_ctxt_t *ps_ctxt,
    970     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
    971     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
    972 
    973 void hme_compute_pred_and_evaluate_bi(
    974     inter_cu_results_t *ps_cu_results,
    975     inter_pu_results_t *ps_pu_results,
    976     inter_ctb_prms_t *ps_inter_ctb_prms,
    977     part_type_results_t *ps_part_type_result,
    978     ULWORD64 *pu8_winning_pred_sigmaXSquare,
    979     ULWORD64 *pu8_winning_pred_sigmaX,
    980     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
    981     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
    982 
    983 /**
    984 ********************************************************************************
    985 *  @fn     hme_insert_intra_nodes_post_bipred
    986 *
    987 *  @brief  Compares intra costs (populated by IPE) with the best inter costs
    988 *          (populated after evaluating bi-pred) and updates the best results
    989 *          if intra cost is better
    990 *
    991 *  @param[in,out]  ps_cu_results    [inout] : Best results structure of CU
    992 *                  ps_cur_ipe_ctb   [in]    : intra results for the current CTB
    993 *                  i4_frm_qstep     [in]    : current frame quantizer(qscale)*
    994 *
    995 *  @return None
    996 ********************************************************************************
    997 */
    998 void hme_insert_intra_nodes_post_bipred(
    999     inter_cu_results_t *ps_cu_results,
   1000     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
   1001     WORD32 i4_frm_qstep);
   1002 
   1003 void hme_set_mv_limit_using_dvsr_data(
   1004     me_frm_ctxt_t *ps_ctxt,
   1005     layer_ctxt_t *ps_curr_layer,
   1006     range_prms_t *ps_mv_limit,
   1007     S16 *pi2_prev_enc_frm_max_mv_y,
   1008     U08 u1_num_act_ref_pics);
   1009 
   1010 S32 hme_part_mask_populator(
   1011     U08 *pu1_inp,
   1012     S32 i4_inp_stride,
   1013     U08 u1_limit_active_partitions,
   1014     U08 u1_is_bPic,
   1015     U08 u1_is_refPic,
   1016     U08 u1_blk_8x8_mask,
   1017     ME_QUALITY_PRESETS_T e_me_quality_preset);
   1018 
   1019 #endif /* #ifndef _HME_UTILS_H_ */
   1020