Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /*****************************************************************************/
     22 /* File Includes                                                             */
     23 /*****************************************************************************/
     24 /* System include files */
     25 #include <stdio.h>
     26 #include <string.h>
     27 #include <stdlib.h>
     28 #include <assert.h>
     29 #include <stdarg.h>
     30 #include <math.h>
     31 #include <limits.h>
     32 
     33 /* User include files */
     34 #include "ihevc_typedefs.h"
     35 #include "itt_video_api.h"
     36 #include "ihevce_api.h"
     37 
     38 #include "rc_cntrl_param.h"
     39 #include "rc_frame_info_collector.h"
     40 #include "rc_look_ahead_params.h"
     41 
     42 #include "ihevc_defs.h"
     43 #include "ihevc_structs.h"
     44 #include "ihevc_platform_macros.h"
     45 #include "ihevc_deblk.h"
     46 #include "ihevc_itrans_recon.h"
     47 #include "ihevc_chroma_itrans_recon.h"
     48 #include "ihevc_chroma_intra_pred.h"
     49 #include "ihevc_intra_pred.h"
     50 #include "ihevc_inter_pred.h"
     51 #include "ihevc_mem_fns.h"
     52 #include "ihevc_padding.h"
     53 #include "ihevc_weighted_pred.h"
     54 #include "ihevc_sao.h"
     55 #include "ihevc_resi_trans.h"
     56 #include "ihevc_quant_iquant_ssd.h"
     57 #include "ihevc_cabac_tables.h"
     58 
     59 #include "ihevce_defs.h"
     60 #include "ihevce_lap_enc_structs.h"
     61 #include "ihevce_multi_thrd_structs.h"
     62 #include "ihevce_multi_thrd_funcs.h"
     63 #include "ihevce_me_common_defs.h"
     64 #include "ihevce_had_satd.h"
     65 #include "ihevce_error_codes.h"
     66 #include "ihevce_bitstream.h"
     67 #include "ihevce_cabac.h"
     68 #include "ihevce_rdoq_macros.h"
     69 #include "ihevce_function_selector.h"
     70 #include "ihevce_enc_structs.h"
     71 #include "ihevce_entropy_structs.h"
     72 #include "ihevce_cmn_utils_instr_set_router.h"
     73 #include "ihevce_enc_loop_structs.h"
     74 #include "ihevce_bs_compute_ctb.h"
     75 #include "ihevce_global_tables.h"
     76 #include "ihevce_dep_mngr_interface.h"
     77 #include "hme_datatype.h"
     78 #include "hme_interface.h"
     79 #include "hme_common_defs.h"
     80 #include "hme_defs.h"
     81 #include "ihevce_me_instr_set_router.h"
     82 #include "hme_globals.h"
     83 #include "hme_utils.h"
     84 #include "hme_coarse.h"
     85 #include "hme_refine.h"
     86 #include "hme_err_compute.h"
     87 #include "hme_common_utils.h"
     88 #include "hme_search_algo.h"
     89 #include "ihevce_profile.h"
     90 
     91 /*****************************************************************************/
     92 /* Function Definitions                                                      */
     93 /*****************************************************************************/
     94 
     95 void hme_init_globals()
     96 {
     97     GRID_PT_T id;
     98     S32 i, j;
     99     /*************************************************************************/
    100     /* Initialize the lookup table for x offset, y offset, optimized mask    */
    101     /* based on grid id. The design is as follows:                           */
    102     /*                                                                       */
    103     /*     a  b  c  d                                                        */
    104     /*    TL  T TR  e                                                        */
    105     /*     L  C  R  f                                                        */
    106     /*    BL  B BR                                                           */
    107     /*                                                                       */
    108     /*  IF a non corner pt, like T is the new minima, then we need to        */
    109     /*  evaluate only 3 new pts, in this case, a, b, c. So the optimal       */
    110     /*  grid mask would reflect this. If a corner pt like TR is the new      */
    111     /*  minima, then we need to evaluate 5 new pts, in this case, b, c, d,   */
    112     /*  e and f. So the grid mask will have 5 pts enabled.                   */
    113     /*************************************************************************/
    114 
    115     id = PT_C;
    116     gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
    117     gai1_grid_id_to_x[id] = 0;
    118     gai1_grid_id_to_y[id] = 0;
    119     gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C));
    120     gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
    121 
    122     id = PT_L;
    123     gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL);
    124     gai1_grid_id_to_x[id] = -1;
    125     gai1_grid_id_to_y[id] = 0;
    126     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
    127     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
    128 
    129     id = PT_R;
    130     gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR);
    131     gai1_grid_id_to_x[id] = 1;
    132     gai1_grid_id_to_y[id] = 0;
    133     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
    134     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
    135 
    136     id = PT_T;
    137     gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR);
    138     gai1_grid_id_to_x[id] = 0;
    139     gai1_grid_id_to_y[id] = -1;
    140     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
    141     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
    142 
    143     id = PT_B;
    144     gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR);
    145     gai1_grid_id_to_x[id] = 0;
    146     gai1_grid_id_to_y[id] = 1;
    147     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
    148     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
    149 
    150     id = PT_TL;
    151     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T];
    152     gai1_grid_id_to_x[id] = -1;
    153     gai1_grid_id_to_y[id] = -1;
    154     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L);
    155 
    156     id = PT_TR;
    157     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T];
    158     gai1_grid_id_to_x[id] = 1;
    159     gai1_grid_id_to_y[id] = -1;
    160     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R);
    161 
    162     id = PT_BL;
    163     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B];
    164     gai1_grid_id_to_x[id] = -1;
    165     gai1_grid_id_to_y[id] = 1;
    166     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B);
    167 
    168     id = PT_BR;
    169     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B];
    170     gai1_grid_id_to_x[id] = 1;
    171     gai1_grid_id_to_y[id] = 1;
    172     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B);
    173 
    174     ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8;
    175     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4;
    176     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4;
    177     ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8;
    178     ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8;
    179     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4;
    180     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4;
    181     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4;
    182     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4;
    183     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID;
    184     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID;
    185     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID;
    186     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID;
    187     ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID;
    188     ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID;
    189     ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID;
    190     ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID;
    191 
    192     ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16;
    193     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8;
    194     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8;
    195     ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16;
    196     ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16;
    197     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8;
    198     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8;
    199     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8;
    200     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8;
    201     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4;
    202     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12;
    203     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12;
    204     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4;
    205     ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16;
    206     ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16;
    207     ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16;
    208     ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16;
    209 
    210     ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32;
    211     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16;
    212     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16;
    213     ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32;
    214     ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32;
    215     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16;
    216     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16;
    217     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16;
    218     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16;
    219     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8;
    220     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24;
    221     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24;
    222     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8;
    223     ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32;
    224     ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32;
    225     ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32;
    226     ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32;
    227 
    228     ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64;
    229     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32;
    230     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32;
    231     ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64;
    232     ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64;
    233     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32;
    234     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32;
    235     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32;
    236     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32;
    237     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16;
    238     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48;
    239     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48;
    240     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16;
    241     ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64;
    242     ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64;
    243     ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64;
    244     ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64;
    245 
    246     gau1_num_parts_in_part_type[PRT_2Nx2N] = 1;
    247     gau1_num_parts_in_part_type[PRT_2NxN] = 2;
    248     gau1_num_parts_in_part_type[PRT_Nx2N] = 2;
    249     gau1_num_parts_in_part_type[PRT_NxN] = 4;
    250     gau1_num_parts_in_part_type[PRT_2NxnU] = 2;
    251     gau1_num_parts_in_part_type[PRT_2NxnD] = 2;
    252     gau1_num_parts_in_part_type[PRT_nLx2N] = 2;
    253     gau1_num_parts_in_part_type[PRT_nRx2N] = 2;
    254 
    255     for(i = 0; i < MAX_PART_TYPES; i++)
    256         for(j = 0; j < MAX_NUM_PARTS; j++)
    257             ge_part_type_to_part_id[i][j] = PART_ID_INVALID;
    258 
    259     /* 2Nx2N only one partition */
    260     ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N;
    261 
    262     /* 2NxN 2 partitions */
    263     ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T;
    264     ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B;
    265 
    266     /* Nx2N 2 partitions */
    267     ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L;
    268     ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R;
    269 
    270     /* NxN 4 partitions */
    271     ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL;
    272     ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR;
    273     ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL;
    274     ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR;
    275 
    276     /* AMP 2Nx (N/2 + 3N/2) 2 partitions */
    277     ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T;
    278     ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B;
    279 
    280     /* AMP 2Nx (3N/2 + N/2) 2 partitions */
    281     ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T;
    282     ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B;
    283 
    284     /* AMP (N/2 + 3N/2) x 2N 2 partitions */
    285     ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L;
    286     ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R;
    287 
    288     /* AMP (3N/2 + N/2) x 2N 2 partitions */
    289     ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L;
    290     ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R;
    291 
    292     /*************************************************************************/
    293     /* initialize attributes for each partition id within the cu.            */
    294     /*************************************************************************/
    295     {
    296         part_attr_t *ps_part_attr;
    297 
    298         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N];
    299         ps_part_attr->u1_x_start = 0;
    300         ps_part_attr->u1_y_start = 0;
    301         ps_part_attr->u1_x_count = 8;
    302         ps_part_attr->u1_y_count = 8;
    303 
    304         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T];
    305         ps_part_attr->u1_x_start = 0;
    306         ps_part_attr->u1_y_start = 0;
    307         ps_part_attr->u1_x_count = 8;
    308         ps_part_attr->u1_y_count = 4;
    309 
    310         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B];
    311         ps_part_attr->u1_x_start = 0;
    312         ps_part_attr->u1_y_start = 4;
    313         ps_part_attr->u1_x_count = 8;
    314         ps_part_attr->u1_y_count = 4;
    315 
    316         ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L];
    317         ps_part_attr->u1_x_start = 0;
    318         ps_part_attr->u1_y_start = 0;
    319         ps_part_attr->u1_x_count = 4;
    320         ps_part_attr->u1_y_count = 8;
    321 
    322         ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R];
    323         ps_part_attr->u1_x_start = 4;
    324         ps_part_attr->u1_y_start = 0;
    325         ps_part_attr->u1_x_count = 4;
    326         ps_part_attr->u1_y_count = 8;
    327 
    328         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL];
    329         ps_part_attr->u1_x_start = 0;
    330         ps_part_attr->u1_y_start = 0;
    331         ps_part_attr->u1_x_count = 4;
    332         ps_part_attr->u1_y_count = 4;
    333 
    334         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR];
    335         ps_part_attr->u1_x_start = 4;
    336         ps_part_attr->u1_y_start = 0;
    337         ps_part_attr->u1_x_count = 4;
    338         ps_part_attr->u1_y_count = 4;
    339 
    340         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL];
    341         ps_part_attr->u1_x_start = 0;
    342         ps_part_attr->u1_y_start = 4;
    343         ps_part_attr->u1_x_count = 4;
    344         ps_part_attr->u1_y_count = 4;
    345 
    346         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR];
    347         ps_part_attr->u1_x_start = 4;
    348         ps_part_attr->u1_y_start = 4;
    349         ps_part_attr->u1_x_count = 4;
    350         ps_part_attr->u1_y_count = 4;
    351 
    352         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T];
    353         ps_part_attr->u1_x_start = 0;
    354         ps_part_attr->u1_y_start = 0;
    355         ps_part_attr->u1_x_count = 8;
    356         ps_part_attr->u1_y_count = 2;
    357 
    358         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B];
    359         ps_part_attr->u1_x_start = 0;
    360         ps_part_attr->u1_y_start = 2;
    361         ps_part_attr->u1_x_count = 8;
    362         ps_part_attr->u1_y_count = 6;
    363 
    364         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T];
    365         ps_part_attr->u1_x_start = 0;
    366         ps_part_attr->u1_y_start = 0;
    367         ps_part_attr->u1_x_count = 8;
    368         ps_part_attr->u1_y_count = 6;
    369 
    370         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B];
    371         ps_part_attr->u1_x_start = 0;
    372         ps_part_attr->u1_y_start = 6;
    373         ps_part_attr->u1_x_count = 8;
    374         ps_part_attr->u1_y_count = 2;
    375 
    376         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L];
    377         ps_part_attr->u1_x_start = 0;
    378         ps_part_attr->u1_y_start = 0;
    379         ps_part_attr->u1_x_count = 2;
    380         ps_part_attr->u1_y_count = 8;
    381 
    382         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R];
    383         ps_part_attr->u1_x_start = 2;
    384         ps_part_attr->u1_y_start = 0;
    385         ps_part_attr->u1_x_count = 6;
    386         ps_part_attr->u1_y_count = 8;
    387 
    388         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L];
    389         ps_part_attr->u1_x_start = 0;
    390         ps_part_attr->u1_y_start = 0;
    391         ps_part_attr->u1_x_count = 6;
    392         ps_part_attr->u1_y_count = 8;
    393 
    394         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R];
    395         ps_part_attr->u1_x_start = 6;
    396         ps_part_attr->u1_y_start = 0;
    397         ps_part_attr->u1_x_count = 2;
    398         ps_part_attr->u1_y_count = 8;
    399     }
    400     for(i = 0; i < NUM_BLK_SIZES; i++)
    401         ge_blk_size_to_cu_size[i] = CU_INVALID;
    402 
    403     ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8;
    404     ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16;
    405     ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32;
    406     ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64;
    407 
    408     /* This is the reverse, given cU size, get blk size */
    409     ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8;
    410     ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16;
    411     ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32;
    412     ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64;
    413 
    414     gau1_is_vert_part[PRT_2Nx2N] = 0;
    415     gau1_is_vert_part[PRT_2NxN] = 0;
    416     gau1_is_vert_part[PRT_Nx2N] = 1;
    417     gau1_is_vert_part[PRT_NxN] = 1;
    418     gau1_is_vert_part[PRT_2NxnU] = 0;
    419     gau1_is_vert_part[PRT_2NxnD] = 0;
    420     gau1_is_vert_part[PRT_nLx2N] = 1;
    421     gau1_is_vert_part[PRT_nRx2N] = 1;
    422 
    423     /* Initialise the number of best results for the full pell refinement */
    424     gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2;
    425     gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0;
    426     gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0;
    427     gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0;
    428     gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0;
    429     gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1;
    430     gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1;
    431     gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1;
    432     gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1;
    433     gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1;
    434     gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0;
    435     gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0;
    436     gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1;
    437     gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1;
    438     gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0;
    439     gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0;
    440     gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1;
    441 
    442     gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2;
    443     gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0;
    444     gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0;
    445     gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0;
    446     gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0;
    447     gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1;
    448     gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1;
    449     gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1;
    450     gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1;
    451     gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1;
    452     gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0;
    453     gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0;
    454     gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1;
    455     gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1;
    456     gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0;
    457     gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0;
    458     gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1;
    459 
    460     gau1_num_best_results_MS[PART_ID_2Nx2N] = 2;
    461     gau1_num_best_results_MS[PART_ID_2NxN_T] = 0;
    462     gau1_num_best_results_MS[PART_ID_2NxN_B] = 0;
    463     gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0;
    464     gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0;
    465     gau1_num_best_results_MS[PART_ID_NxN_TL] = 1;
    466     gau1_num_best_results_MS[PART_ID_NxN_TR] = 1;
    467     gau1_num_best_results_MS[PART_ID_NxN_BL] = 1;
    468     gau1_num_best_results_MS[PART_ID_NxN_BR] = 1;
    469     gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1;
    470     gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0;
    471     gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0;
    472     gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1;
    473     gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1;
    474     gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0;
    475     gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0;
    476     gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1;
    477 
    478     gau1_num_best_results_HS[PART_ID_2Nx2N] = 2;
    479     gau1_num_best_results_HS[PART_ID_2NxN_T] = 0;
    480     gau1_num_best_results_HS[PART_ID_2NxN_B] = 0;
    481     gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0;
    482     gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0;
    483     gau1_num_best_results_HS[PART_ID_NxN_TL] = 0;
    484     gau1_num_best_results_HS[PART_ID_NxN_TR] = 0;
    485     gau1_num_best_results_HS[PART_ID_NxN_BL] = 0;
    486     gau1_num_best_results_HS[PART_ID_NxN_BR] = 0;
    487     gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0;
    488     gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0;
    489     gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0;
    490     gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0;
    491     gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0;
    492     gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0;
    493     gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0;
    494     gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0;
    495 
    496     gau1_num_best_results_XS[PART_ID_2Nx2N] = 2;
    497     gau1_num_best_results_XS[PART_ID_2NxN_T] = 0;
    498     gau1_num_best_results_XS[PART_ID_2NxN_B] = 0;
    499     gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0;
    500     gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0;
    501     gau1_num_best_results_XS[PART_ID_NxN_TL] = 0;
    502     gau1_num_best_results_XS[PART_ID_NxN_TR] = 0;
    503     gau1_num_best_results_XS[PART_ID_NxN_BL] = 0;
    504     gau1_num_best_results_XS[PART_ID_NxN_BR] = 0;
    505     gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0;
    506     gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0;
    507     gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0;
    508     gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0;
    509     gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0;
    510     gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0;
    511     gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0;
    512     gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0;
    513 
    514     gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25;
    515     gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0;
    516     gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0;
    517     gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0;
    518     gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0;
    519     gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0;
    520     gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0;
    521     gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0;
    522     gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0;
    523     gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0;
    524     gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0;
    525     gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0;
    526     gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0;
    527     gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0;
    528     gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0;
    529     gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0;
    530     gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0;
    531 
    532     /* Top right validity for each part id */
    533     gau1_partid_tr_valid[PART_ID_2Nx2N] = 1;
    534     gau1_partid_tr_valid[PART_ID_2NxN_T] = 1;
    535     gau1_partid_tr_valid[PART_ID_2NxN_B] = 0;
    536     gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1;
    537     gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1;
    538     gau1_partid_tr_valid[PART_ID_NxN_TL] = 1;
    539     gau1_partid_tr_valid[PART_ID_NxN_TR] = 1;
    540     gau1_partid_tr_valid[PART_ID_NxN_BL] = 1;
    541     gau1_partid_tr_valid[PART_ID_NxN_BR] = 0;
    542     gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1;
    543     gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0;
    544     gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1;
    545     gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0;
    546     gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1;
    547     gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1;
    548     gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1;
    549     gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1;
    550 
    551     /* Bot Left validity for each part id */
    552     gau1_partid_bl_valid[PART_ID_2Nx2N] = 1;
    553     gau1_partid_bl_valid[PART_ID_2NxN_T] = 1;
    554     gau1_partid_bl_valid[PART_ID_2NxN_B] = 1;
    555     gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1;
    556     gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0;
    557     gau1_partid_bl_valid[PART_ID_NxN_TL] = 1;
    558     gau1_partid_bl_valid[PART_ID_NxN_TR] = 0;
    559     gau1_partid_bl_valid[PART_ID_NxN_BL] = 1;
    560     gau1_partid_bl_valid[PART_ID_NxN_BR] = 0;
    561     gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1;
    562     gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1;
    563     gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1;
    564     gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1;
    565     gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1;
    566     gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0;
    567     gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1;
    568     gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0;
    569 
    570     /*Part id to part num of this partition id in the CU */
    571     gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0;
    572     gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0;
    573     gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1;
    574     gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0;
    575     gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1;
    576     gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0;
    577     gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1;
    578     gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2;
    579     gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3;
    580     gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0;
    581     gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1;
    582     gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0;
    583     gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1;
    584     gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0;
    585     gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1;
    586     gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0;
    587     gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1;
    588 
    589     /*Which partition type does this partition id belong to */
    590     ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N;
    591     ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN;
    592     ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN;
    593     ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N;
    594     ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N;
    595     ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN;
    596     ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN;
    597     ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN;
    598     ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN;
    599     ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU;
    600     ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU;
    601     ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD;
    602     ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD;
    603     ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N;
    604     ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N;
    605     ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N;
    606     ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N;
    607 
    608     /*************************************************************************/
    609     /* Set up the bits to be taken up for the part type. This is equally     */
    610     /* divided up between the various partitions in the part-type.           */
    611     /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as         */
    612     /* partition 2Nx2N.                                                      */
    613     /*************************************************************************/
    614     /* 1 bit for 2Nx2N partition */
    615     gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2;
    616 
    617     /* 3 bits for symmetric part types, so 1.5 bits per partition */
    618     gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3;
    619     gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3;
    620     gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3;
    621     gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3;
    622 
    623     /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */
    624     gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2;
    625     gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2;
    626     gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2;
    627     gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2;
    628 
    629     /* 4 bits for AMP so 2 bits per partition */
    630     gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4;
    631     gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4;
    632     gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4;
    633     gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4;
    634     gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4;
    635     gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4;
    636     gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4;
    637     gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4;
    638 }
    639 
    640 /**
    641 ********************************************************************************
    642 *  @fn     hme_enc_num_alloc()
    643 *
    644 *  @brief  returns number of memtabs that is required by hme module
    645 *
    646 *  @return   Number of memtabs required
    647 ********************************************************************************
    648 */
    649 S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)
    650 {
    651     if(i4_num_me_frm_pllel > 1)
    652     {
    653         return ((S32)MAX_HME_ENC_TOT_MEMTABS);
    654     }
    655     else
    656     {
    657         return ((S32)MIN_HME_ENC_TOT_MEMTABS);
    658     }
    659 }
    660 
    661 /**
    662 ********************************************************************************
    663 *  @fn     hme_coarse_num_alloc()
    664 *
    665 *  @brief  returns number of memtabs that is required by hme module
    666 *
    667 *  @return   Number of memtabs required
    668 ********************************************************************************
    669 */
    670 S32 hme_coarse_num_alloc()
    671 {
    672     return ((S32)HME_COARSE_TOT_MEMTABS);
    673 }
    674 
    675 /**
    676 ********************************************************************************
    677 *  @fn     hme_coarse_dep_mngr_num_alloc()
    678 *
    679 *  @brief  returns number of memtabs that is required by Dep Mngr for hme module
    680 *
    681 *  @return   Number of memtabs required
    682 ********************************************************************************
    683 */
    684 WORD32 hme_coarse_dep_mngr_num_alloc()
    685 {
    686     return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs()));
    687 }
    688 
    689 S32 hme_validate_init_prms(hme_init_prms_t *ps_prms)
    690 {
    691     S32 n_layers = ps_prms->num_simulcast_layers;
    692 
    693     /* The final layer has got to be a non encode coarse layer */
    694     if(n_layers > (MAX_NUM_LAYERS - 1))
    695         return (-1);
    696 
    697     if(n_layers < 1)
    698         return (-1);
    699 
    700     /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */
    701     /* represents the min allowed width in any layer. Ditto with ht          */
    702     if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE))
    703         return (-1);
    704     if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE))
    705         return (-1);
    706     if(ps_prms->max_num_ref > MAX_NUM_REF)
    707         return (-1);
    708     if(ps_prms->max_num_ref < 0)
    709         return (-1);
    710 
    711     return (0);
    712 }
    713 void hme_set_layer_res_attrs(
    714     layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc)
    715 {
    716     ps_layer->i4_wd = wd;
    717     ps_layer->i4_ht = ht;
    718     ps_layer->i4_disp_wd = disp_wd;
    719     ps_layer->i4_disp_ht = disp_ht;
    720     if(0 == u1_enc)
    721     {
    722         ps_layer->i4_inp_stride = wd + 32 + 4;
    723         ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16;
    724         ps_layer->i4_pad_x_inp = 16;
    725         ps_layer->i4_pad_y_inp = 16;
    726         ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset;
    727     }
    728 }
    729 
    730 /**
    731 ********************************************************************************
    732 *  @fn     hme_coarse_get_layer1_mv_bank_ref_idx_size()
    733 *
    734 *  @brief  returns the MV bank and ref idx size of Layer 1 (penultimate)
    735 *
    736 *  @return   none
    737 ********************************************************************************
    738 */
    739 void hme_coarse_get_layer1_mv_bank_ref_idx_size(
    740     S32 n_tot_layers,
    741     S32 *a_wd,
    742     S32 *a_ht,
    743     S32 max_num_ref,
    744     S32 *pi4_mv_bank_size,
    745     S32 *pi4_ref_idx_size)
    746 {
    747     S32 num_blks, num_mvs_per_blk, num_ref;
    748     S32 num_cols, num_rows, num_mvs_per_row;
    749     S32 is_explicit_store = 1;
    750     S32 wd, ht, num_layers_explicit_search;
    751     S32 num_results, use_4x4;
    752     wd = a_wd[1];
    753     ht = a_ht[1];
    754 
    755     /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
    756     /* frames in all but final layer In final layer, it could be 1/2 */
    757     //ps_hme_init_prms->num_layers_explicit_search = 3;
    758     num_layers_explicit_search = 3;
    759 
    760     if(num_layers_explicit_search <= 0)
    761         num_layers_explicit_search = n_tot_layers - 1;
    762 
    763     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
    764 
    765     /* Possibly implicit search for lower (finer) layers */
    766     if(n_tot_layers - 1 > num_layers_explicit_search)
    767         is_explicit_store = 0;
    768 
    769     /* coarsest layer alwasy uses 4x4 blks to store results */
    770     if(1 == (n_tot_layers - 1))
    771     {
    772         /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
    773         //ps_hme_init_prms->max_num_results_coarse = 4;
    774         //vijay : with new algo in coarseset layer this has to be revisited
    775         num_results = 4;
    776     }
    777     else
    778     {
    779         /* Every refinement layer stores a max of 2 results per partition */
    780         //ps_hme_init_prms->max_num_results = 2;
    781         num_results = 2;
    782     }
    783     use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0);
    784 
    785     num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
    786     num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
    787 
    788     if(is_explicit_store)
    789         num_ref = max_num_ref;
    790     else
    791         num_ref = 2;
    792 
    793     num_blks = num_cols * num_rows;
    794     num_mvs_per_blk = num_ref * num_results;
    795     num_mvs_per_row = num_mvs_per_blk * num_cols;
    796 
    797     /* stroe the sizes */
    798     *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
    799     *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08);
    800 
    801     return;
    802 }
    803 /**
    804 ********************************************************************************
    805 *  @fn     hme_alloc_init_layer_mv_bank()
    806 *
    807 *  @brief  memory alloc and init function for MV bank
    808 *
    809 *  @return   Number of memtabs required
    810 ********************************************************************************
    811 */
    812 S32 hme_alloc_init_layer_mv_bank(
    813     hme_memtab_t *ps_memtab,
    814     S32 max_num_results,
    815     S32 max_num_ref,
    816     S32 use_4x4,
    817     S32 mem_avail,
    818     S32 u1_enc,
    819     S32 wd,
    820     S32 ht,
    821     S32 is_explicit_store,
    822     hme_mv_t **pps_mv_base,
    823     S08 **pi1_ref_idx_base,
    824     S32 *pi4_num_mvs_per_row)
    825 {
    826     S32 count = 0;
    827     S32 size;
    828     S32 num_blks, num_mvs_per_blk;
    829     S32 num_ref;
    830     S32 num_cols, num_rows, num_mvs_per_row;
    831 
    832     if(is_explicit_store)
    833         num_ref = max_num_ref;
    834     else
    835         num_ref = 2;
    836 
    837     /* MV Bank allocation takes into consideration following */
    838     /* number of results per reference x max num refrences is the amount     */
    839     /* bufffered up per blk. Numbero f blks in pic deps on the blk size,     */
    840     /* which could be either 4x4 or 8x8.                                     */
    841     num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
    842     num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
    843 
    844     if(u1_enc)
    845     {
    846         /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
    847         WORD32 num_ctb_cols = ((wd + 63) >> 6);
    848         WORD32 num_ctb_rows = ((ht + 63) >> 6);
    849 
    850         num_cols = (num_ctb_cols << 3) + 2;
    851         num_rows = (num_ctb_rows << 3) + 2;
    852     }
    853     num_blks = num_cols * num_rows;
    854     num_mvs_per_blk = num_ref * max_num_results;
    855     num_mvs_per_row = num_mvs_per_blk * num_cols;
    856 
    857     size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
    858     if(mem_avail)
    859     {
    860         /* store this for run time verifications */
    861         *pi4_num_mvs_per_row = num_mvs_per_row;
    862         ASSERT(ps_memtab[count].size == size);
    863         *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem;
    864     }
    865     else
    866     {
    867         ps_memtab[count].size = size;
    868         ps_memtab[count].align = 4;
    869         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
    870     }
    871 
    872     count++;
    873     /* Ref idx takes the same route as mvbase */
    874 
    875     size = num_blks * num_mvs_per_blk * sizeof(S08);
    876     if(mem_avail)
    877     {
    878         ASSERT(ps_memtab[count].size == size);
    879         *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem;
    880     }
    881     else
    882     {
    883         ps_memtab[count].size = size;
    884         ps_memtab[count].align = 4;
    885         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
    886     }
    887     count++;
    888 
    889     return (count);
    890 }
    891 /**
    892 ********************************************************************************
    893 *  @fn     hme_alloc_init_layer()
    894 *
    895 *  @brief  memory alloc and init function
    896 *
    897 *  @return   Number of memtabs required
    898 ********************************************************************************
    899 */
    900 S32 hme_alloc_init_layer(
    901     hme_memtab_t *ps_memtab,
    902     S32 max_num_results,
    903     S32 max_num_ref,
    904     S32 use_4x4,
    905     S32 mem_avail,
    906     S32 u1_enc,
    907     S32 wd,
    908     S32 ht,
    909     S32 disp_wd,
    910     S32 disp_ht,
    911     S32 segment_layer,
    912     S32 is_explicit_store,
    913     layer_ctxt_t **pps_layer)
    914 {
    915     S32 count = 0;
    916     layer_ctxt_t *ps_layer = NULL;
    917     S32 size;
    918     S32 num_ref;
    919 
    920     ARG_NOT_USED(segment_layer);
    921 
    922     if(is_explicit_store)
    923         num_ref = max_num_ref;
    924     else
    925         num_ref = 2;
    926 
    927     /* We do not store 4x4 results for encoding layers */
    928     if(u1_enc)
    929         use_4x4 = 0;
    930 
    931     size = sizeof(layer_ctxt_t);
    932     if(mem_avail)
    933     {
    934         ASSERT(ps_memtab[count].size == size);
    935         ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem;
    936         *pps_layer = ps_layer;
    937     }
    938     else
    939     {
    940         ps_memtab[count].size = size;
    941         ps_memtab[count].align = 8;
    942         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
    943     }
    944 
    945     count++;
    946 
    947     /* Input luma buffer allocated only for non encode case */
    948     if(0 == u1_enc)
    949     {
    950         /* Allocate input with padding of 16 pixels */
    951         size = (wd + 32 + 4) * (ht + 32 + 4);
    952         if(mem_avail)
    953         {
    954             ASSERT(ps_memtab[count].size == size);
    955             ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem;
    956         }
    957         else
    958         {
    959             ps_memtab[count].size = size;
    960             ps_memtab[count].align = 16;
    961             ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
    962         }
    963         count++;
    964     }
    965 
    966     /* Allocate memory or just the layer mvbank strcture. */
    967     /* TODO : see if this can be removed by moving it to layer_ctxt */
    968     size = sizeof(layer_mv_t);
    969 
    970     if(mem_avail)
    971     {
    972         ASSERT(ps_memtab[count].size == size);
    973         ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem;
    974     }
    975     else
    976     {
    977         ps_memtab[count].size = size;
    978         ps_memtab[count].align = 8;
    979         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
    980     }
    981 
    982     count++;
    983 
    984     if(mem_avail)
    985     {
    986         hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc);
    987     }
    988 
    989     return (count);
    990 }
    991 
    992 S32 hme_alloc_init_search_nodes(
    993     search_results_t *ps_search_results,
    994     hme_memtab_t *ps_memtabs,
    995     S32 mem_avail,
    996     S32 max_num_ref,
    997     S32 max_num_results)
    998 {
    999     S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS;
   1000     S32 j, k;
   1001     search_node_t *ps_search_node;
   1002 
   1003     if(mem_avail == 0)
   1004     {
   1005         ps_memtabs->size = size;
   1006         ps_memtabs->align = 4;
   1007         ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM;
   1008         return (1);
   1009     }
   1010 
   1011     ps_search_node = (search_node_t *)ps_memtabs->pu1_mem;
   1012     ASSERT(ps_memtabs->size == size);
   1013     /****************************************************************************/
   1014     /* For each CU, we search and store N best results, per partition, per ref  */
   1015     /* So, number of memtabs is  num_refs * num_parts                           */
   1016     /****************************************************************************/
   1017     for(j = 0; j < max_num_ref; j++)
   1018     {
   1019         for(k = 0; k < TOT_NUM_PARTS; k++)
   1020         {
   1021             ps_search_results->aps_part_results[j][k] = ps_search_node;
   1022             ps_search_node += max_num_results;
   1023         }
   1024     }
   1025     return (1);
   1026 }
   1027 
   1028 S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht)
   1029 {
   1030     S32 i;
   1031     /* We keep downscaling by 2 till we hit one of the conditions:           */
   1032     /* 1. MAX_NUM_LAYERS reached.                                            */
   1033     /* 2. Width or ht goes below min width and ht allowed at coarsest layer  */
   1034     ASSERT(n_enc_layers < MAX_NUM_LAYERS);
   1035     ASSERT(n_enc_layers > 0);
   1036     ASSERT(p_wd[0] <= HME_MAX_WIDTH);
   1037     ASSERT(p_ht[0] <= HME_MAX_HEIGHT);
   1038 
   1039     p_disp_wd[0] = p_wd[0];
   1040     p_disp_ht[0] = p_ht[0];
   1041     /*************************************************************************/
   1042     /* Verify that for simulcast, lower layer to higher layer ratio is bet   */
   1043     /* 2 (dyadic) and 1.33. Typically it should be 1.5.                      */
   1044     /* TODO : for interlace, we may choose to have additional downscaling for*/
   1045     /* width alone in coarsest layer to next layer.                          */
   1046     /*************************************************************************/
   1047     for(i = 1; i < n_enc_layers; i++)
   1048     {
   1049         S32 wd1, wd2, ht1, ht2;
   1050         wd1 = FLOOR16(p_wd[i - 1] >> 1);
   1051         wd2 = CEIL16((p_wd[i - 1] * 3) >> 2);
   1052         ASSERT(p_wd[i] >= wd1);
   1053         ASSERT(p_wd[i] <= wd2);
   1054         ht1 = FLOOR16(p_ht[i - 1] >> 1);
   1055         ht2 = CEIL16((p_ht[i - 1] * 3) >> 2);
   1056         ASSERT(p_ht[i] >= ht1);
   1057         ASSERT(p_ht[i] <= ht2);
   1058     }
   1059     ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE);
   1060     ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE);
   1061 
   1062     for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++)
   1063     {
   1064         if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE))
   1065         {
   1066             return (i);
   1067         }
   1068         /* Use CEIL16 to facilitate 16x16 searches in future, or to do       */
   1069         /* segmentation study in future                                      */
   1070         p_wd[i] = CEIL16(p_wd[i - 1] >> 1);
   1071         p_ht[i] = CEIL16(p_ht[i - 1] >> 1);
   1072 
   1073         p_disp_wd[i] = p_disp_wd[i - 1] >> 1;
   1074         p_disp_ht[i] = p_disp_ht[i - 1] >> 1;
   1075     }
   1076     return (i);
   1077 }
   1078 
   1079 /**
   1080 ********************************************************************************
   1081 *  @fn     hme_get_mv_blk_size()
   1082 *
   1083 *  @brief  returns whether blk uses 4x4 size or something else.
   1084 *
   1085 *  @param[in] enable_4x4 : input param from application to enable 4x4
   1086 *
   1087 *  @param[in] layer_id : id of current layer (0 finest)
   1088 *
   1089 *  @param[in] num_layeers : total num layers
   1090 *
   1091 *  @param[in] is_enc : Whether encoding enabled for layer
   1092 *
   1093 *  @return   1 for 4x4 blks, 0 for 8x8
   1094 ********************************************************************************
   1095 */
   1096 S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc)
   1097 {
   1098     S32 use_4x4 = enable_4x4;
   1099 
   1100     if((layer_id <= 1) && (num_layers >= 4))
   1101         use_4x4 = USE_4x4_IN_L1;
   1102     if(layer_id == num_layers - 1)
   1103         use_4x4 = 1;
   1104     if(is_enc)
   1105         use_4x4 = 0;
   1106 
   1107     return (use_4x4);
   1108 }
   1109 
   1110 /**
   1111 ********************************************************************************
   1112 *  @fn     hme_enc_alloc_init_mem()
   1113 *
   1114 *  @brief  Requests/ assign memory based on mem avail
   1115 *
   1116 *  @param[in] ps_memtabs : memtab array
   1117 *
   1118 *  @param[in] ps_prms : init prms
   1119 *
   1120 *  @param[in] pv_ctxt : ME ctxt
   1121 *
   1122 *  @param[in] mem_avail : request/assign flag
   1123 *
   1124 *  @return   1 for 4x4 blks, 0 for 8x8
   1125 ********************************************************************************
   1126 */
   1127 S32 hme_enc_alloc_init_mem(
   1128     hme_memtab_t *ps_memtabs,
   1129     hme_init_prms_t *ps_prms,
   1130     void *pv_ctxt,
   1131     S32 mem_avail,
   1132     S32 i4_num_me_frm_pllel)
   1133 {
   1134     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt;
   1135     me_ctxt_t *ps_ctxt;
   1136     S32 count = 0, size, i, j, use_4x4;
   1137     S32 n_tot_layers, n_enc_layers;
   1138     S32 num_layers_explicit_search;
   1139     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
   1140     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
   1141     S32 num_results;
   1142     S32 num_thrds;
   1143     S32 ctb_wd = 1 << ps_prms->log_ctb_size;
   1144 
   1145     /* MV bank changes */
   1146     hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
   1147     S32 i4_num_mvs_per_row = 0;
   1148     S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
   1149 
   1150     n_enc_layers = ps_prms->num_simulcast_layers;
   1151 
   1152     /* Memtab 0: handle */
   1153     size = sizeof(me_master_ctxt_t);
   1154     if(mem_avail)
   1155     {
   1156         /* store the number of processing threads */
   1157         ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
   1158     }
   1159     else
   1160     {
   1161         ps_memtabs[count].size = size;
   1162         ps_memtabs[count].align = 8;
   1163         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
   1164     }
   1165 
   1166     count++;
   1167 
   1168     /* Memtab 1: ME threads ctxt */
   1169     size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t);
   1170     if(mem_avail)
   1171     {
   1172         me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem;
   1173 
   1174         /* store the indivisual thread ctxt pointers */
   1175         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1176         {
   1177             ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
   1178         }
   1179     }
   1180     else
   1181     {
   1182         ps_memtabs[count].size = size;
   1183         ps_memtabs[count].align = 8;
   1184         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
   1185     }
   1186 
   1187     count++;
   1188 
   1189     /* Memtab 2: ME frame ctxts */
   1190     size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds;
   1191     if(mem_avail)
   1192     {
   1193         me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem;
   1194 
   1195         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   1196         {
   1197             /* store the indivisual thread ctxt pointers */
   1198             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1199             {
   1200                 ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt;
   1201 
   1202                 ps_me_frm_tmp_ctxt++;
   1203             }
   1204         }
   1205     }
   1206     else
   1207     {
   1208         ps_memtabs[count].size = size;
   1209         ps_memtabs[count].align = 8;
   1210         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
   1211     }
   1212 
   1213     count++;
   1214 
   1215     memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
   1216     memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
   1217     /*************************************************************************/
   1218     /* Derive the number of HME layers, including both encoded and non encode*/
   1219     /* This function also derives the width and ht of each layer.            */
   1220     /*************************************************************************/
   1221     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
   1222     num_layers_explicit_search = ps_prms->num_layers_explicit_search;
   1223     if(num_layers_explicit_search <= 0)
   1224         num_layers_explicit_search = n_tot_layers - 1;
   1225 
   1226     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
   1227 
   1228     if(mem_avail)
   1229     {
   1230         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1231         {
   1232             me_frm_ctxt_t *ps_frm_ctxt;
   1233             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1234 
   1235             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   1236             {
   1237                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
   1238 
   1239                 memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers);
   1240                 memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers);
   1241 
   1242                 /* only one enocde layer is used */
   1243                 ps_frm_ctxt->num_layers = 1;
   1244 
   1245                 ps_frm_ctxt->i4_wd = a_wd[0];
   1246                 ps_frm_ctxt->i4_ht = a_ht[0];
   1247                 /*
   1248             memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers);
   1249             memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers);
   1250 */
   1251                 ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search;
   1252                 ps_frm_ctxt->max_num_results = ps_prms->max_num_results;
   1253                 ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
   1254                 ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref;
   1255             }
   1256         }
   1257     }
   1258 
   1259     /* Memtabs : Layers MV bank for encode layer */
   1260     /* Each ref_desr in master ctxt will have seperate layer ctxt */
   1261 
   1262     for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
   1263     {
   1264         for(j = 0; j < 1; j++)
   1265         {
   1266             S32 is_explicit_store = 1;
   1267             S32 wd, ht;
   1268             U08 u1_enc = 1;
   1269             wd = a_wd[j];
   1270             ht = a_ht[j];
   1271 
   1272             /* Possibly implicit search for lower (finer) layers */
   1273             if(n_tot_layers - j > num_layers_explicit_search)
   1274                 is_explicit_store = 0;
   1275 
   1276             /* Even if explicit search, we store only 2 results (L0 and L1) */
   1277             /* in finest layer */
   1278             if(j == 0)
   1279             {
   1280                 is_explicit_store = 0;
   1281             }
   1282 
   1283             /* coarsest layer alwasy uses 4x4 blks to store results */
   1284             if(j == n_tot_layers - 1)
   1285             {
   1286                 num_results = ps_prms->max_num_results_coarse;
   1287             }
   1288             else
   1289             {
   1290                 num_results = ps_prms->max_num_results;
   1291                 if(j == 0)
   1292                     num_results = 1;
   1293             }
   1294             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
   1295 
   1296             count += hme_alloc_init_layer_mv_bank(
   1297                 &ps_memtabs[count],
   1298                 num_results,
   1299                 ps_prms->max_num_ref,
   1300                 use_4x4,
   1301                 mem_avail,
   1302                 u1_enc,
   1303                 wd,
   1304                 ht,
   1305                 is_explicit_store,
   1306                 &aps_mv_bank[i],
   1307                 &api1_ref_idx[i],
   1308                 &i4_num_mvs_per_row);
   1309         }
   1310     }
   1311 
   1312     /* Memtabs : Layers * num-ref + 1 */
   1313     for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
   1314     {
   1315         /* layer memory allocated only for enocde layer */
   1316         for(j = 0; j < 1; j++)
   1317         {
   1318             layer_ctxt_t *ps_layer;
   1319             S32 is_explicit_store = 1;
   1320             S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
   1321             S32 wd, ht;
   1322             U08 u1_enc = 1;
   1323             wd = a_wd[j];
   1324             ht = a_ht[j];
   1325 
   1326             /* Possibly implicit search for lower (finer) layers */
   1327             if(n_tot_layers - j > num_layers_explicit_search)
   1328                 is_explicit_store = 0;
   1329 
   1330             /* Even if explicit search, we store only 2 results (L0 and L1) */
   1331             /* in finest layer */
   1332             if(j == 0)
   1333             {
   1334                 is_explicit_store = 0;
   1335             }
   1336 
   1337             /* coarsest layer alwasy uses 4x4 blks to store results */
   1338             if(j == n_tot_layers - 1)
   1339             {
   1340                 num_results = ps_prms->max_num_results_coarse;
   1341             }
   1342             else
   1343             {
   1344                 num_results = ps_prms->max_num_results;
   1345                 if(j == 0)
   1346                     num_results = 1;
   1347             }
   1348             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
   1349 
   1350             count += hme_alloc_init_layer(
   1351                 &ps_memtabs[count],
   1352                 num_results,
   1353                 ps_prms->max_num_ref,
   1354                 use_4x4,
   1355                 mem_avail,
   1356                 u1_enc,
   1357                 wd,
   1358                 ht,
   1359                 a_disp_wd[j],
   1360                 a_disp_ht[j],
   1361                 segment_this_layer,
   1362                 is_explicit_store,
   1363                 &ps_layer);
   1364             if(mem_avail)
   1365             {
   1366                 /* same ps_layer memory pointer is stored in all the threads */
   1367                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1368                 {
   1369                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1370                     ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
   1371                 }
   1372 
   1373                 /* store the MV bank pointers */
   1374                 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row;
   1375                 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i];
   1376                 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i];
   1377             }
   1378         }
   1379     }
   1380 
   1381     /* Memtabs : Buf Mgr for predictor bufs and working mem */
   1382     /* TODO : Parameterise this appropriately */
   1383     size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel;
   1384 
   1385     if(mem_avail)
   1386     {
   1387         U08 *pu1_mem = ps_memtabs[count].pu1_mem;
   1388 
   1389         ASSERT(ps_memtabs[count].size == size);
   1390 
   1391         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1392         {
   1393             me_frm_ctxt_t *ps_frm_ctxt;
   1394             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1395 
   1396             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   1397             {
   1398                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
   1399 
   1400                 hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD);
   1401 
   1402                 if(i4_num_me_frm_pllel != 1)
   1403                 {
   1404                     /* update the memory buffer pointer */
   1405                     pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
   1406                 }
   1407             }
   1408             if(i4_num_me_frm_pllel == 1)
   1409             {
   1410                 pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
   1411             }
   1412         }
   1413     }
   1414     else
   1415     {
   1416         ps_memtabs[count].size = size;
   1417         ps_memtabs[count].align = 4;
   1418         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   1419     }
   1420     count++;
   1421 
   1422     /*************************************************************************/
   1423     /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */
   1424     /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred  */
   1425     /*************************************************************************/
   1426     size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds *
   1427            i4_num_me_frm_pllel;
   1428 
   1429     if(mem_avail)
   1430     {
   1431         S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem;
   1432 
   1433         ASSERT(ps_memtabs[count].size == size);
   1434 
   1435         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1436         {
   1437             me_frm_ctxt_t *ps_frm_ctxt;
   1438             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1439 
   1440             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   1441             {
   1442                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
   1443 
   1444                 ps_frm_ctxt->pi2_inp_bck = pi2_mem;
   1445                 /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
   1446                 if(i4_num_me_frm_pllel != 1)
   1447                 {
   1448                     pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
   1449                 }
   1450             }
   1451             if(i4_num_me_frm_pllel == 1)
   1452             {
   1453                 pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
   1454             }
   1455         }
   1456     }
   1457     else
   1458     {
   1459         ps_memtabs[count].size = size;
   1460         ps_memtabs[count].align = 16;
   1461         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   1462     }
   1463 
   1464     count++;
   1465 
   1466     /* Allocate a memtab for each histogram. As many as num ref and number of threads */
   1467     /* Loop across for each ME_FRM in PARALLEL */
   1468     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
   1469     {
   1470         for(i = 0; i < ps_prms->max_num_ref; i++)
   1471         {
   1472             size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
   1473             if(mem_avail)
   1474             {
   1475                 mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
   1476 
   1477                 ASSERT(size == ps_memtabs[count].size);
   1478 
   1479                 /* divide the memory accross the threads */
   1480                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1481                 {
   1482                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1483 
   1484                     ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist;
   1485                     ps_mv_hist++;
   1486                 }
   1487             }
   1488             else
   1489             {
   1490                 ps_memtabs[count].size = size;
   1491                 ps_memtabs[count].align = 8;
   1492                 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
   1493             }
   1494             count++;
   1495         }
   1496         if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
   1497         {
   1498             /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
   1499             /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/
   1500             count -= ps_prms->max_num_ref;
   1501         }
   1502     }
   1503 
   1504     /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */
   1505     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
   1506     {
   1507         S32 count_cpy = count;
   1508         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1509         {
   1510             if(mem_avail)
   1511             {
   1512                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1513             }
   1514 
   1515             for(i = 0; i < 21; i++)
   1516             {
   1517                 search_results_t *ps_search_results = NULL;
   1518                 if(mem_avail)
   1519                 {
   1520                     if(i < 16)
   1521                     {
   1522                         ps_search_results =
   1523                             &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i];
   1524                     }
   1525                     else if(i < 20)
   1526                     {
   1527                         ps_search_results =
   1528                             &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16];
   1529                         ps_search_results->ps_cu_results =
   1530                             &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16];
   1531                     }
   1532                     else if(i == 20)
   1533                     {
   1534                         ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64;
   1535                         ps_search_results->ps_cu_results =
   1536                             &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results;
   1537                     }
   1538                     else
   1539                     {
   1540                         /* 8x8 search results are not required in LO ME */
   1541                         ASSERT(0);
   1542                     }
   1543                 }
   1544                 count += hme_alloc_init_search_nodes(
   1545                     ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results);
   1546             }
   1547         }
   1548 
   1549         if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
   1550         {
   1551             count = count_cpy;
   1552         }
   1553     }
   1554 
   1555     /* Weighted inputs, one for each ref + one non weighted */
   1556     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
   1557     {
   1558         size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds;
   1559         if(mem_avail)
   1560         {
   1561             U08 *pu1_mem;
   1562             ASSERT(ps_memtabs[count].size == size);
   1563             pu1_mem = ps_memtabs[count].pu1_mem;
   1564 
   1565             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1566             {
   1567                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1568 
   1569                 for(i = 0; i < ps_prms->max_num_ref + 1; i++)
   1570                 {
   1571                     ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
   1572                     pu1_mem += (ctb_wd * ctb_wd);
   1573                 }
   1574             }
   1575         }
   1576         else
   1577         {
   1578             ps_memtabs[count].size = size;
   1579             ps_memtabs[count].align = 16;
   1580             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   1581         }
   1582         if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1)))
   1583         {
   1584             count++;
   1585         }
   1586     }
   1587 
   1588     /* if memory is allocated the intislaise the frm prms ptr to each thrd */
   1589     if(mem_avail)
   1590     {
   1591         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1592         {
   1593             me_frm_ctxt_t *ps_frm_ctxt;
   1594             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1595 
   1596             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   1597             {
   1598                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
   1599 
   1600                 ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i];
   1601                 ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i];
   1602             }
   1603         }
   1604     }
   1605 
   1606     /* Memory allocation for use in Clustering */
   1607     if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY)
   1608     {
   1609         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   1610         {
   1611             size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) +
   1612                    sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t);
   1613             size *= ps_prms->i4_num_proc_thrds;
   1614 
   1615             if(mem_avail)
   1616             {
   1617                 U08 *pu1_mem;
   1618 
   1619                 ASSERT(ps_memtabs[count].size == size);
   1620                 pu1_mem = ps_memtabs[count].pu1_mem;
   1621 
   1622                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1623                 {
   1624                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1625 
   1626                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem;
   1627                     pu1_mem += (16 * sizeof(cluster_16x16_blk_t));
   1628 
   1629                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem;
   1630                     pu1_mem += (4 * sizeof(cluster_32x32_blk_t));
   1631 
   1632                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem;
   1633                     pu1_mem += (sizeof(cluster_64x64_blk_t));
   1634 
   1635                     ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info =
   1636                         (ctb_cluster_info_t *)pu1_mem;
   1637                     pu1_mem += (sizeof(ctb_cluster_info_t));
   1638                 }
   1639             }
   1640             else
   1641             {
   1642                 ps_memtabs[count].size = size;
   1643                 ps_memtabs[count].align = 16;
   1644                 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   1645             }
   1646 
   1647             if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
   1648             {
   1649                 count++;
   1650             }
   1651         }
   1652     }
   1653     else if(mem_avail)
   1654     {
   1655         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   1656         {
   1657             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1658             {
   1659                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1660 
   1661                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL;
   1662 
   1663                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL;
   1664 
   1665                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL;
   1666 
   1667                 ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL;
   1668             }
   1669         }
   1670     }
   1671 
   1672     for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   1673     {
   1674         size = sizeof(fullpel_refine_ctxt_t);
   1675         size *= ps_prms->i4_num_proc_thrds;
   1676 
   1677         if(mem_avail)
   1678         {
   1679             U08 *pu1_mem;
   1680 
   1681             ASSERT(ps_memtabs[count].size == size);
   1682             pu1_mem = ps_memtabs[count].pu1_mem;
   1683 
   1684             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1685             {
   1686                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1687 
   1688                 ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt =
   1689                     (fullpel_refine_ctxt_t *)pu1_mem;
   1690                 pu1_mem += (sizeof(fullpel_refine_ctxt_t));
   1691             }
   1692         }
   1693         else
   1694         {
   1695             ps_memtabs[count].size = size;
   1696             ps_memtabs[count].align = 16;
   1697             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   1698         }
   1699 
   1700         if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
   1701         {
   1702             count++;
   1703         }
   1704     }
   1705 
   1706     /* Memory for ihevce_me_optimised_function_list_t struct  */
   1707     if(mem_avail)
   1708     {
   1709         ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
   1710     }
   1711     else
   1712     {
   1713         ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
   1714         ps_memtabs[count].align = 16;
   1715         ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   1716     }
   1717 
   1718     ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel));
   1719     return (count);
   1720 }
   1721 
   1722 /**
   1723 ********************************************************************************
   1724 *  @fn     hme_coarse_alloc_init_mem()
   1725 *
   1726 *  @brief  Requests/ assign memory based on mem avail
   1727 *
   1728 *  @param[in] ps_memtabs : memtab array
   1729 *
   1730 *  @param[in] ps_prms : init prms
   1731 *
   1732 *  @param[in] pv_ctxt : ME ctxt
   1733 *
   1734 *  @param[in] mem_avail : request/assign flag
   1735 *
   1736 *  @return  number of memtabs
   1737 ********************************************************************************
   1738 */
   1739 S32 hme_coarse_alloc_init_mem(
   1740     hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail)
   1741 {
   1742     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
   1743     coarse_me_ctxt_t *ps_ctxt;
   1744     S32 count = 0, size, i, j, use_4x4, wd;
   1745     S32 n_tot_layers;
   1746     S32 num_layers_explicit_search;
   1747     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
   1748     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
   1749     S32 num_results;
   1750     S32 num_thrds;
   1751     //S32 ctb_wd = 1 << ps_prms->log_ctb_size;
   1752     S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows;
   1753     S32 layer1_blk_width = 8;  // 8x8 search
   1754     S32 blk_shift;
   1755 
   1756     /* MV bank changes */
   1757     hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL };
   1758     S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 };
   1759     S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL };
   1760 
   1761     /* Memtab 0: handle */
   1762     size = sizeof(coarse_me_master_ctxt_t);
   1763     if(mem_avail)
   1764     {
   1765         /* store the number of processing threads */
   1766         ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
   1767     }
   1768     else
   1769     {
   1770         ps_memtabs[count].size = size;
   1771         ps_memtabs[count].align = 8;
   1772         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
   1773     }
   1774 
   1775     count++;
   1776 
   1777     /* Memtab 1: ME threads ctxt */
   1778     size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t);
   1779     if(mem_avail)
   1780     {
   1781         coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem;
   1782 
   1783         /* store the indivisual thread ctxt pointers */
   1784         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1785         {
   1786             ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
   1787         }
   1788     }
   1789     else
   1790     {
   1791         ps_memtabs[count].size = size;
   1792         ps_memtabs[count].align = 8;
   1793         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
   1794     }
   1795 
   1796     count++;
   1797 
   1798     memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
   1799     memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
   1800     /*************************************************************************/
   1801     /* Derive the number of HME layers, including both encoded and non encode*/
   1802     /* This function also derives the width and ht of each layer.            */
   1803     /*************************************************************************/
   1804     n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
   1805 
   1806     num_layers_explicit_search = ps_prms->num_layers_explicit_search;
   1807 
   1808     if(num_layers_explicit_search <= 0)
   1809         num_layers_explicit_search = n_tot_layers - 1;
   1810 
   1811     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
   1812 
   1813     if(mem_avail)
   1814     {
   1815         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1816         {
   1817             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1818             memset(ps_ctxt->u1_encode, 0, n_tot_layers);
   1819 
   1820             /* encode layer should be excluded during processing */
   1821             ps_ctxt->num_layers = n_tot_layers;
   1822 
   1823             memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
   1824             memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
   1825 
   1826             ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
   1827             ps_ctxt->max_num_results = ps_prms->max_num_results;
   1828             ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
   1829             ps_ctxt->max_num_ref = ps_prms->max_num_ref;
   1830         }
   1831     }
   1832 
   1833     /* Memtabs : Layers MV bank for total layers - 2  */
   1834     /* for penultimate layer MV bank will be initialsed at every frame level */
   1835     for(j = 1; j < n_tot_layers; j++)
   1836     {
   1837         S32 is_explicit_store = 1;
   1838         S32 wd, ht;
   1839         U08 u1_enc = 0;
   1840         wd = a_wd[j];
   1841         ht = a_ht[j];
   1842 
   1843         /* Possibly implicit search for lower (finer) layers */
   1844         if(n_tot_layers - j > num_layers_explicit_search)
   1845             is_explicit_store = 0;
   1846 
   1847         /* Even if explicit search, we store only 2 results (L0 and L1) */
   1848         /* in finest layer */
   1849         if(j == 0)
   1850         {
   1851             is_explicit_store = 0;
   1852         }
   1853 
   1854         /* coarsest layer alwasy uses 4x4 blks to store results */
   1855         if(j == n_tot_layers - 1)
   1856         {
   1857             num_results = ps_prms->max_num_results_coarse;
   1858         }
   1859         else
   1860         {
   1861             num_results = ps_prms->max_num_results;
   1862             if(j == 0)
   1863                 num_results = 1;
   1864         }
   1865         use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
   1866 
   1867         /* for penultimate compute the parameters and store */
   1868         if(j == 1)
   1869         {
   1870             S32 num_blks, num_mvs_per_blk, num_ref;
   1871             S32 num_cols, num_rows, num_mvs_per_row;
   1872 
   1873             num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
   1874             num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
   1875 
   1876             if(is_explicit_store)
   1877                 num_ref = ps_prms->max_num_ref;
   1878             else
   1879                 num_ref = 2;
   1880 
   1881             num_blks = num_cols * num_rows;
   1882             num_mvs_per_blk = num_ref * num_results;
   1883             num_mvs_per_row = num_mvs_per_blk * num_cols;
   1884 
   1885             ai4_num_mvs_per_row[j] = num_mvs_per_row;
   1886             aps_mv_bank[j] = NULL;
   1887             api1_ref_idx[j] = NULL;
   1888         }
   1889         else
   1890         {
   1891             count += hme_alloc_init_layer_mv_bank(
   1892                 &ps_memtabs[count],
   1893                 num_results,
   1894                 ps_prms->max_num_ref,
   1895                 use_4x4,
   1896                 mem_avail,
   1897                 u1_enc,
   1898                 wd,
   1899                 ht,
   1900                 is_explicit_store,
   1901                 &aps_mv_bank[j],
   1902                 &api1_ref_idx[j],
   1903                 &ai4_num_mvs_per_row[j]);
   1904         }
   1905     }
   1906 
   1907     /* Memtabs : Layers * num-ref + 1 */
   1908     for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
   1909     {
   1910         /* for all layer except encode layer */
   1911         for(j = 1; j < n_tot_layers; j++)
   1912         {
   1913             layer_ctxt_t *ps_layer;
   1914             S32 is_explicit_store = 1;
   1915             S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
   1916             S32 wd, ht;
   1917             U08 u1_enc = 0;
   1918             wd = a_wd[j];
   1919             ht = a_ht[j];
   1920 
   1921             /* Possibly implicit search for lower (finer) layers */
   1922             if(n_tot_layers - j > num_layers_explicit_search)
   1923                 is_explicit_store = 0;
   1924 
   1925             /* Even if explicit search, we store only 2 results (L0 and L1) */
   1926             /* in finest layer */
   1927             if(j == 0)
   1928             {
   1929                 is_explicit_store = 0;
   1930             }
   1931 
   1932             /* coarsest layer alwasy uses 4x4 blks to store results */
   1933             if(j == n_tot_layers - 1)
   1934             {
   1935                 num_results = ps_prms->max_num_results_coarse;
   1936             }
   1937             else
   1938             {
   1939                 num_results = ps_prms->max_num_results;
   1940                 if(j == 0)
   1941                     num_results = 1;
   1942             }
   1943             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
   1944 
   1945             count += hme_alloc_init_layer(
   1946                 &ps_memtabs[count],
   1947                 num_results,
   1948                 ps_prms->max_num_ref,
   1949                 use_4x4,
   1950                 mem_avail,
   1951                 u1_enc,
   1952                 wd,
   1953                 ht,
   1954                 a_disp_wd[j],
   1955                 a_disp_ht[j],
   1956                 segment_this_layer,
   1957                 is_explicit_store,
   1958                 &ps_layer);
   1959             if(mem_avail)
   1960             {
   1961                 /* same ps_layer memory pointer is stored in all the threads */
   1962                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   1963                 {
   1964                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   1965                     ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
   1966                 }
   1967 
   1968                 /* store the MV bank pointers */
   1969                 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j];
   1970                 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j];
   1971                 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j];
   1972             }
   1973         }
   1974     }
   1975 
   1976     /* Memtabs : Prev Row search node at coarsest layer */
   1977     wd = a_wd[n_tot_layers - 1];
   1978 
   1979     /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */
   1980     num_rows = ps_prms->i4_num_proc_thrds + 1;
   1981     if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
   1982         search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
   1983     else
   1984         search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
   1985 
   1986     /*shift factor*/
   1987     blk_shift = 2; /*4x4*/
   1988     search_step >>= 1;
   1989 
   1990     sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) *
   1991                          ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step);
   1992     sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size;
   1993 
   1994     size = num_rows * sad_4x4_block_stride * sizeof(S16);
   1995     for(i = 0; i < ps_prms->max_num_ref; i++)
   1996     {
   1997         if(mem_avail)
   1998         {
   1999             ASSERT(size == ps_memtabs[count].size);
   2000 
   2001             /* same row memory pointer is stored in all the threads */
   2002             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   2003             {
   2004                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2005                 ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem;
   2006             }
   2007         }
   2008         else
   2009         {
   2010             ps_memtabs[count].size = size;
   2011             ps_memtabs[count].align = 4;
   2012             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   2013         }
   2014         count++;
   2015     }
   2016 
   2017     /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
   2018     size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
   2019     for(i = 0; i < ps_prms->max_num_ref; i++)
   2020     {
   2021         if(mem_avail)
   2022         {
   2023             ASSERT(size == ps_memtabs[count].size);
   2024 
   2025             /* same row memory pointer is stored in all the threads */
   2026             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   2027             {
   2028                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2029                 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] =
   2030                     (search_node_t *)ps_memtabs[count].pu1_mem;
   2031             }
   2032         }
   2033         else
   2034         {
   2035             ps_memtabs[count].size = size;
   2036             ps_memtabs[count].align = 4;
   2037             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   2038         }
   2039         count++;
   2040     }
   2041     /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
   2042     size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
   2043     for(i = 0; i < ps_prms->max_num_ref; i++)
   2044     {
   2045         if(mem_avail)
   2046         {
   2047             ASSERT(size == ps_memtabs[count].size);
   2048 
   2049             /* same row memory pointer is stored in all the threads */
   2050             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   2051             {
   2052                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2053                 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] =
   2054                     (search_node_t *)ps_memtabs[count].pu1_mem;
   2055             }
   2056         }
   2057         else
   2058         {
   2059             ps_memtabs[count].size = size;
   2060             ps_memtabs[count].align = 4;
   2061             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   2062         }
   2063         count++;
   2064     }
   2065 
   2066     /* Allocate a memtab for each histogram. As many as num ref and number of threads */
   2067     for(i = 0; i < ps_prms->max_num_ref; i++)
   2068     {
   2069         size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
   2070         if(mem_avail)
   2071         {
   2072             mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
   2073 
   2074             ASSERT(size == ps_memtabs[count].size);
   2075 
   2076             /* divide the memory accross the threads */
   2077             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   2078             {
   2079                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2080                 ps_ctxt->aps_mv_hist[i] = ps_mv_hist;
   2081                 ps_mv_hist++;
   2082             }
   2083         }
   2084         else
   2085         {
   2086             ps_memtabs[count].size = size;
   2087             ps_memtabs[count].align = 8;
   2088             ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
   2089         }
   2090         count++;
   2091     }
   2092 
   2093     /* Memtabs : Search nodes for 8x8 blks */
   2094     for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   2095     {
   2096         search_results_t *ps_search_results = NULL;
   2097 
   2098         if(mem_avail)
   2099         {
   2100             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2101         }
   2102 
   2103         if(mem_avail)
   2104         {
   2105             ps_search_results = &ps_ctxt->s_search_results_8x8;
   2106         }
   2107         count += hme_alloc_init_search_nodes(
   2108             ps_search_results,
   2109             &ps_memtabs[count],
   2110             mem_avail,
   2111             ps_prms->max_num_ref,
   2112             ps_prms->max_num_results);
   2113     }
   2114 
   2115     /* Weighted inputs, one for each ref  */
   2116     size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width *
   2117            ps_prms->i4_num_proc_thrds;
   2118     if(mem_avail)
   2119     {
   2120         U08 *pu1_mem;
   2121         ASSERT(ps_memtabs[count].size == size);
   2122         pu1_mem = ps_memtabs[count].pu1_mem;
   2123 
   2124         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   2125         {
   2126             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2127 
   2128             for(i = 0; i < ps_prms->max_num_ref + 1; i++)
   2129             {
   2130                 ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
   2131                 pu1_mem += (layer1_blk_width * layer1_blk_width);
   2132             }
   2133         }
   2134     }
   2135     else
   2136     {
   2137         ps_memtabs[count].size = size;
   2138         ps_memtabs[count].align = 16;
   2139         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   2140     }
   2141     count++;
   2142 
   2143     /* if memory is allocated the intislaise the frm prms ptr to each thrd */
   2144     if(mem_avail)
   2145     {
   2146         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
   2147         {
   2148             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2149 
   2150             ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms;
   2151             ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map;
   2152         }
   2153     }
   2154 
   2155     /* Memory for ihevce_me_optimised_function_list_t struct  */
   2156     if(mem_avail)
   2157     {
   2158         ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
   2159     }
   2160     else
   2161     {
   2162         ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
   2163         ps_memtabs[count].align = 16;
   2164         ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
   2165     }
   2166 
   2167     //ASSERT(count < hme_enc_num_alloc());
   2168     ASSERT(count < hme_coarse_num_alloc());
   2169     return (count);
   2170 }
   2171 
   2172 /*!
   2173 ******************************************************************************
   2174 * \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif
   2175 *
   2176 * \brief Returns to the caller key attributes relevant for dependency manager,
   2177 *        ie, the number of vertical units in each layer
   2178 *
   2179 * \par Description:
   2180 *    This function requires the precondition that the width and ht of encode
   2181 *    layer is known.
   2182 *    The number of layers, number of vertical units in each layer, and for
   2183 *    each vertial unit in each layer, its dependency on previous layer's units
   2184 *    From ME's perspective, a vertical unit is one which is smallest min size
   2185 *    vertically (and spans the entire row horizontally). This is CTB for encode
   2186 *    layer, and 8x8 / 4x4 for non encode layers.
   2187 *
   2188 * \param[in] num_layers : Number of ME Layers
   2189 * \param[in] pai4_ht    : Array storing ht at each layer
   2190 * \param[in] pai4_wd    : Array storing wd at each layer
   2191 * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
   2192 *                     entry has num vertical units in that particular layer
   2193 *
   2194 * \return
   2195 *    None
   2196 *
   2197 * \author
   2198 *  Ittiam
   2199 *
   2200 *****************************************************************************
   2201 */
   2202 void ihevce_coarse_me_get_lyr_prms_dep_mngr(
   2203     WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr)
   2204 {
   2205     /* Height of current and next layers */
   2206     WORD32 ht_c, ht_n;
   2207     /* Blk ht at a given layer and next layer*/
   2208     WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
   2209     /* Number of vertical units in current and next layer */
   2210     WORD32 num_vert_c, num_vert_n;
   2211 
   2212     WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i;
   2213     UWORD8 au1_encode[MAX_NUM_LAYERS];
   2214 
   2215     memset(au1_encode, 0, num_layers);
   2216     memset(au1_encode, 1, num_enc_layers);
   2217 
   2218     ht_n = pai4_ht[num_layers - 2];
   2219     ht_c = pai4_ht[num_layers - 1];
   2220 
   2221     /* compute blk ht and unit ht for c and n */
   2222     if(au1_encode[num_layers - 1])
   2223     {
   2224         blk_ht_c = 16;
   2225         unit_ht_c = ctb_size;
   2226     }
   2227     else
   2228     {
   2229         blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0);
   2230         unit_ht_c = blk_ht_c;
   2231     }
   2232 
   2233     num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
   2234     /* For new design in Coarsest HME layer we need */
   2235     /* one additional row extra at the end of frame */
   2236     /* hence num_vert_c is incremented by 1         */
   2237     num_vert_c++;
   2238 
   2239     /*************************************************************************/
   2240     /* Run through each layer, set the number of vertical units              */
   2241     /*************************************************************************/
   2242     for(i = num_layers - 1; i > 0; i--)
   2243     {
   2244         pai4_num_vert_units_in_lyr[i] = num_vert_c;
   2245 
   2246         /* "n" is computed for first time */
   2247         ht_n = pai4_ht[i - 1];
   2248         blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0);
   2249         unit_ht_n = blk_ht_n;
   2250         if(au1_encode[i - 1])
   2251             unit_ht_n = ctb_size;
   2252 
   2253         num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
   2254 
   2255         /* Compute the blk size and vert unit size in each layer             */
   2256         /* "c" denotes curr layer, and "n" denotes the layer to which result */
   2257         /* is projected to                                                   */
   2258         ht_c = ht_n;
   2259         blk_ht_c = blk_ht_n;
   2260         unit_ht_c = unit_ht_n;
   2261         num_vert_c = num_vert_n;
   2262     }
   2263 
   2264     /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
   2265     /* set the numebr of vertical units */
   2266     pai4_num_vert_units_in_lyr[0] = num_vert_c;
   2267 }
   2268 
   2269 /**
   2270 ********************************************************************************
   2271 *  @fn     hme_coarse_dep_mngr_alloc_mem()
   2272 *
   2273 *  @brief  Requests memory for HME Dep Mngr
   2274 *
   2275 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
   2276 * \param[in] ps_init_prms : Create time static parameters
   2277 * \param[in] i4_mem_space : memspace in whihc memory request should be done
   2278 *
   2279 *  @return  number of memtabs
   2280 ********************************************************************************
   2281 */
   2282 WORD32 hme_coarse_dep_mngr_alloc_mem(
   2283     iv_mem_rec_t *ps_mem_tab,
   2284     ihevce_static_cfg_params_t *ps_init_prms,
   2285     WORD32 i4_mem_space,
   2286     WORD32 i4_num_proc_thrds,
   2287     WORD32 i4_resolution_id)
   2288 {
   2289     WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
   2290     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
   2291     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
   2292     WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
   2293     WORD32 min_cu_size;
   2294 
   2295     /* get the min cu size from config params */
   2296     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
   2297 
   2298     min_cu_size = 1 << min_cu_size;
   2299 
   2300     /* Get the width and heights of different decomp layers */
   2301     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
   2302             SET_CTB_ALIGN(
   2303                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
   2304 
   2305     *a_ht =
   2306         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
   2307         SET_CTB_ALIGN(
   2308             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
   2309 
   2310     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
   2311     ASSERT(n_tot_layers >= 3);
   2312 
   2313     /* --- Get the number of vartical units in each layer for dep. mngr -- */
   2314     ihevce_coarse_me_get_lyr_prms_dep_mngr(
   2315         n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
   2316 
   2317     /* Fill memtabs for HME layers,except for L0 layer */
   2318     for(i = 1; i < n_tot_layers; i++)
   2319     {
   2320         n_dep_tabs += ihevce_dmgr_get_mem_recs(
   2321             &ps_mem_tab[n_dep_tabs],
   2322             DEP_MNGR_ROW_ROW_SYNC,
   2323             ai4_num_vert_units_in_lyr[i],
   2324             1, /* Number of Col Tiles :  Not supported in PreEnc */
   2325             i4_num_proc_thrds,
   2326             i4_mem_space);
   2327     }
   2328 
   2329     ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
   2330 
   2331     return (n_dep_tabs);
   2332 }
   2333 
   2334 /**
   2335 ********************************************************************************
   2336 *  @fn     hme_coarse_dep_mngr_init()
   2337 *
   2338 *  @brief  Assign memory for HME Dep Mngr
   2339 *
   2340 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
   2341 * \param[in] ps_init_prms : Create time static parameters
   2342 *  @param[in] pv_ctxt : ME ctxt
   2343 * \param[in] pv_osal_handle : Osal handle
   2344 *
   2345 *  @return  number of memtabs
   2346 ********************************************************************************
   2347 */
   2348 WORD32 hme_coarse_dep_mngr_init(
   2349     iv_mem_rec_t *ps_mem_tab,
   2350     ihevce_static_cfg_params_t *ps_init_prms,
   2351     void *pv_ctxt,
   2352     void *pv_osal_handle,
   2353     WORD32 i4_num_proc_thrds,
   2354     WORD32 i4_resolution_id)
   2355 {
   2356     WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
   2357     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
   2358     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
   2359     WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
   2360     WORD32 min_cu_size;
   2361 
   2362     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
   2363 
   2364     /* get the min cu size from config params */
   2365     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
   2366 
   2367     min_cu_size = 1 << min_cu_size;
   2368 
   2369     /* Get the width and heights of different decomp layers */
   2370     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
   2371             SET_CTB_ALIGN(
   2372                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
   2373     *a_ht =
   2374         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
   2375         SET_CTB_ALIGN(
   2376             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
   2377 
   2378     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
   2379     ASSERT(n_tot_layers >= 3);
   2380 
   2381     /* --- Get the number of vartical units in each layer for dep. mngr -- */
   2382     ihevce_coarse_me_get_lyr_prms_dep_mngr(
   2383         n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
   2384 
   2385     /* --- HME sync Dep Mngr Mem init --    */
   2386     for(i = 1; i < n_tot_layers; i++)
   2387     {
   2388         WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift;
   2389 
   2390         if(i == (n_tot_layers - 1)) /* coarsest layer */
   2391             blk_size_shift = 2;
   2392         else
   2393             blk_size_shift = 3; /* refine layers */
   2394 
   2395         GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic);
   2396 
   2397         /* Coarsest layer : 1 block extra, since the last block */
   2398         if(i == (n_tot_layers - 1)) /*  in a row needs East block */
   2399             num_blks_in_row += 1;
   2400 
   2401         /* Note : i-1, only for HME layers, L0 is separate */
   2402         ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init(
   2403             &ps_mem_tab[n_dep_tabs],
   2404             pv_osal_handle,
   2405             DEP_MNGR_ROW_ROW_SYNC,
   2406             ai4_num_vert_units_in_lyr[i],
   2407             num_blks_in_row,
   2408             1, /* Number of Col Tiles : Not supported in PreEnc */
   2409             i4_num_proc_thrds,
   2410             1 /*Sem disabled*/
   2411         );
   2412 
   2413         n_dep_tabs += ihevce_dmgr_get_num_mem_recs();
   2414     }
   2415 
   2416     return n_dep_tabs;
   2417 }
   2418 
   2419 /**
   2420 ********************************************************************************
   2421 *  @fn     hme_coarse_dep_mngr_reg_sem()
   2422 *
   2423 *  @brief  Assign semaphores for HME Dep Mngr
   2424 *
   2425 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
   2426 * \param[in] ppv_sem_hdls : Arry of semaphore handles
   2427 * \param[in] i4_num_proc_thrds : Number of processing threads
   2428 *
   2429 *  @return  number of memtabs
   2430 ********************************************************************************
   2431 */
   2432 void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
   2433 {
   2434     WORD32 i;
   2435     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
   2436     coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0];
   2437 
   2438     /* --- HME sync Dep Mngr semaphore init --    */
   2439     for(i = 1; i < ps_ctxt->num_layers; i++)
   2440     {
   2441         ihevce_dmgr_reg_sem_hdls(
   2442             ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds);
   2443     }
   2444 
   2445     return;
   2446 }
   2447 
   2448 /**
   2449 ********************************************************************************
   2450 *  @fn     hme_coarse_dep_mngr_delete()
   2451 *
   2452 *    Destroy Coarse ME Dep Mngr module
   2453 *   Note : Only Destroys the resources allocated in the module like
   2454 *   semaphore,etc. Memory free is done Separately using memtabs
   2455 *
   2456 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
   2457 * \param[in] ps_init_prms : Create time static parameters
   2458 *
   2459 *  @return  none
   2460 ********************************************************************************
   2461 */
   2462 void hme_coarse_dep_mngr_delete(
   2463     void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
   2464 {
   2465     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
   2466     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
   2467     WORD32 n_enc_layers = 1, n_tot_layers, i;
   2468     WORD32 min_cu_size;
   2469 
   2470     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
   2471 
   2472     /* get the min cu size from config params */
   2473     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
   2474 
   2475     min_cu_size = 1 << min_cu_size;
   2476 
   2477     /* Get the width and heights of different decomp layers */
   2478     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
   2479             SET_CTB_ALIGN(
   2480                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
   2481     *a_ht =
   2482         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
   2483         SET_CTB_ALIGN(
   2484             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
   2485     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
   2486     ASSERT(n_tot_layers >= 3);
   2487 
   2488     /* --- HME sync Dep Mngr Delete --    */
   2489     for(i = 1; i < n_tot_layers; i++)
   2490     {
   2491         /* Note : i-1, only for HME layers, L0 is separate */
   2492         ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]);
   2493     }
   2494 }
   2495 
   2496 /**
   2497 *******************************************************************************
   2498 *  @fn     S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
   2499 *
   2500 *  @brief  Fills up memtabs with memory information details required by HME
   2501 *
   2502 *  @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
   2503 *              up its requirements of memory
   2504 *
   2505 *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
   2506 *                       amt of memory
   2507 *
   2508 *  @return   Number of memtabs required
   2509 *******************************************************************************
   2510 */
   2511 S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel)
   2512 {
   2513     S32 num, tot, i;
   2514 
   2515     /* Validation of init params */
   2516     if(-1 == hme_validate_init_prms(ps_prms))
   2517         return (-1);
   2518 
   2519     num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel);
   2520     tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
   2521     for(i = num; i < tot; i++)
   2522     {
   2523         ps_memtabs[i].size = 4;
   2524         ps_memtabs[i].align = 4;
   2525         ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
   2526     }
   2527     return (tot);
   2528 }
   2529 
   2530 /**
   2531 *******************************************************************************
   2532 *  @fn     S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
   2533 *
   2534 *  @brief  Fills up memtabs with memory information details required by Coarse HME
   2535 *
   2536 *  @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
   2537 *              up its requirements of memory
   2538 *
   2539 *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
   2540 *                       amt of memory
   2541 *
   2542 *  @return   Number of memtabs required
   2543 *******************************************************************************
   2544 */
   2545 S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
   2546 {
   2547     S32 num, tot, i;
   2548 
   2549     /* Validation of init params */
   2550     if(-1 == hme_validate_init_prms(ps_prms))
   2551         return (-1);
   2552 
   2553     num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0);
   2554     tot = hme_coarse_num_alloc();
   2555     for(i = num; i < tot; i++)
   2556     {
   2557         ps_memtabs[i].size = 4;
   2558         ps_memtabs[i].align = 4;
   2559         ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
   2560     }
   2561     return (tot);
   2562 }
   2563 
   2564 /**
   2565 *******************************************************************************
   2566 *  @fn hme_coarse_dep_mngr_alloc
   2567 *
   2568 *  @brief  Fills up memtabs with memory information details required by Coarse HME
   2569 *
   2570 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
   2571 * \param[in] ps_init_prms : Create time static parameters
   2572 * \param[in] i4_mem_space : memspace in whihc memory request should be done
   2573 *
   2574 *  @return   Number of memtabs required
   2575 *******************************************************************************
   2576 */
   2577 WORD32 hme_coarse_dep_mngr_alloc(
   2578     iv_mem_rec_t *ps_mem_tab,
   2579     ihevce_static_cfg_params_t *ps_init_prms,
   2580     WORD32 i4_mem_space,
   2581     WORD32 i4_num_proc_thrds,
   2582     WORD32 i4_resolution_id)
   2583 {
   2584     S32 num, tot, i;
   2585 
   2586     num = hme_coarse_dep_mngr_alloc_mem(
   2587         ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
   2588     tot = hme_coarse_dep_mngr_num_alloc();
   2589     for(i = num; i < tot; i++)
   2590     {
   2591         ps_mem_tab[i].i4_mem_size = 4;
   2592         ps_mem_tab[i].i4_mem_alignment = 4;
   2593         ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   2594     }
   2595     return (tot);
   2596 }
   2597 
   2598 /**
   2599 ********************************************************************************
   2600 *  @fn     hme_coarse_init_ctxt()
   2601 *
   2602 *  @brief  initialise context memory
   2603 *
   2604 *  @param[in] ps_prms : init prms
   2605 *
   2606 *  @param[in] pv_ctxt : ME ctxt
   2607 *
   2608 *  @return  number of memtabs
   2609 ********************************************************************************
   2610 */
   2611 void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms)
   2612 {
   2613     S32 i, j, num_thrds;
   2614     coarse_me_ctxt_t *ps_ctxt;
   2615     S32 num_rows_coarse;
   2616 
   2617     /* initialise the parameters inot context of all threads */
   2618     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
   2619     {
   2620         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2621 
   2622         /* Copy the init prms to context */
   2623         ps_ctxt->s_init_prms = *ps_prms;
   2624 
   2625         /* Initialize some other variables in ctxt */
   2626         ps_ctxt->i4_prev_poc = -1;
   2627 
   2628         ps_ctxt->num_b_frms = ps_prms->num_b_frms;
   2629 
   2630         ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0];
   2631         ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0];
   2632 
   2633         /* Initialize num rows lookuptable */
   2634         ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1;
   2635         num_rows_coarse = ps_ctxt->i4_num_row_bufs;
   2636         for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++)
   2637         {
   2638             ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse);
   2639         }
   2640     }
   2641 
   2642     /* since same layer desc pointer is stored in all the threads ctxt */
   2643     /* layer init is done only using 0th thread ctxt                   */
   2644     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   2645 
   2646     /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
   2647     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
   2648     {
   2649         for(j = 1; j < ps_ctxt->num_layers; j++)
   2650         {
   2651             layer_ctxt_t *ps_layer;
   2652             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
   2653             ps_layer->i4_poc = -1;
   2654             ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0];
   2655             memset(
   2656                 ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
   2657         }
   2658     }
   2659 }
   2660 
   2661 /**
   2662 ********************************************************************************
   2663 *  @fn     hme_enc_init_ctxt()
   2664 *
   2665 *  @brief  initialise context memory
   2666 *
   2667 *  @param[in] ps_prms : init prms
   2668 *
   2669 *  @param[in] pv_ctxt : ME ctxt
   2670 *
   2671 *  @return  number of memtabs
   2672 ********************************************************************************
   2673 */
   2674 void hme_enc_init_ctxt(
   2675     me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt)
   2676 {
   2677     S32 i, j, num_thrds;
   2678     me_ctxt_t *ps_ctxt;
   2679     me_frm_ctxt_t *ps_frm_ctxt;
   2680 
   2681     /* initialise the parameters in context of all threads */
   2682     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
   2683     {
   2684         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   2685         /* Store Tile params base into ME context */
   2686         ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base;
   2687 
   2688         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
   2689         {
   2690             ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
   2691 
   2692             /* Copy the init prms to context */
   2693             ps_ctxt->s_init_prms = *ps_prms;
   2694 
   2695             /* Initialize some other variables in ctxt */
   2696             ps_frm_ctxt->i4_prev_poc = INVALID_POC;
   2697 
   2698             ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size;
   2699 
   2700             ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms;
   2701 
   2702             ps_frm_ctxt->i4_is_prev_frame_reference = 0;
   2703 
   2704             ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
   2705 
   2706             /* Initialize mv grids for L0 and L1 used in final refinement layer */
   2707             {
   2708                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]);
   2709                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]);
   2710                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]);
   2711                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]);
   2712                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]);
   2713                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]);
   2714             }
   2715 
   2716             ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0];
   2717             ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0];
   2718         }
   2719     }
   2720 
   2721     /* since same layer desc pointer is stored in all the threads ctxt */
   2722     /* layer init is done only using 0th thread ctxt                   */
   2723     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   2724 
   2725     ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0];
   2726 
   2727     /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
   2728     for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++)
   2729     {
   2730         /* only enocde layer is processed */
   2731         for(j = 0; j < 1; j++)
   2732         {
   2733             layer_ctxt_t *ps_layer;
   2734             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
   2735             ps_layer->i4_poc = INVALID_POC;
   2736             ps_layer->i4_is_free = 1;
   2737             ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0];
   2738             ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0];
   2739             ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0];
   2740             ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0];
   2741             ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0];
   2742             ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0];
   2743 
   2744             memset(
   2745                 ps_layer->s_global_mv,
   2746                 0,
   2747                 sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES);
   2748         }
   2749     }
   2750 }
   2751 
   2752 /**
   2753 *******************************************************************************
   2754 *  @fn     S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt)
   2755 *
   2756 *  @brief  Initialises the Encode Layer HME ctxt
   2757 *
   2758 *  @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
   2759 *              up its requirements of memory
   2760 *
   2761 *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
   2762 *                       amt of memory
   2763 *
   2764 *  @return   Number of memtabs required
   2765 *******************************************************************************
   2766 */
   2767 S32 hme_enc_init(
   2768     void *pv_ctxt,
   2769     hme_memtab_t *ps_memtabs,
   2770     hme_init_prms_t *ps_prms,
   2771     rc_quant_t *ps_rc_quant_ctxt,
   2772     WORD32 i4_num_me_frm_pllel)
   2773 {
   2774     S32 num, tot;
   2775     me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt;
   2776 
   2777     tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
   2778     /* Validation of init params */
   2779     if(-1 == hme_validate_init_prms(ps_prms))
   2780         return (-1);
   2781 
   2782     num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel);
   2783     if(num > tot)
   2784         return (-1);
   2785 
   2786     /* Initialize all enumerations based globals */
   2787     //hme_init_globals(); /* done as part of coarse me */
   2788 
   2789     /* Copy the memtabs into the context for returning during free */
   2790     memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
   2791 
   2792     /* initialize the context and related buffers */
   2793     hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt);
   2794     return (0);
   2795 }
   2796 
   2797 /**
   2798 *******************************************************************************
   2799 *  @fn     S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
   2800 *
   2801 *  @brief  Initialises the Coarse HME ctxt
   2802 *
   2803 *  @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
   2804 *              up its requirements of memory
   2805 *
   2806 *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
   2807 *                       amt of memory
   2808 *
   2809 *  @return   Number of memtabs required
   2810 *******************************************************************************
   2811 */
   2812 S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
   2813 {
   2814     S32 num, tot;
   2815     coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
   2816 
   2817     tot = hme_coarse_num_alloc();
   2818     /* Validation of init params */
   2819     if(-1 == hme_validate_init_prms(ps_prms))
   2820         return (-1);
   2821 
   2822     num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1);
   2823     if(num > tot)
   2824         return (-1);
   2825 
   2826     /* Initialize all enumerations based globals */
   2827     hme_init_globals();
   2828 
   2829     /* Copy the memtabs into the context for returning during free */
   2830     memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
   2831 
   2832     /* initialize the context and related buffers */
   2833     hme_coarse_init_ctxt(ps_ctxt, ps_prms);
   2834 
   2835     return (0);
   2836 }
   2837 
   2838 /**
   2839 *******************************************************************************
   2840 *  @fn     S32 hme_set_resolution(void *pv_me_ctxt,
   2841 *                                   S32 n_enc_layers,
   2842 *                                   S32 *p_wd,
   2843 *                                   S32 *p_ht
   2844 *
   2845 *  @brief  Sets up the layers based on resolution information.
   2846 *
   2847 *  @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
   2848 *
   2849 *  @param[in] n_enc_layers : Number of layers encoded
   2850 *
   2851 *  @param[in] p_wd : Pointer to an array having widths for each encode layer
   2852 *
   2853 *  @param[in] p_ht : Pointer to an array having heights for each encode layer
   2854 *
   2855 *  @return   void
   2856 *******************************************************************************
   2857 */
   2858 
   2859 void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id)
   2860 {
   2861     S32 n_tot_layers, num_layers_explicit_search, i, j;
   2862     me_ctxt_t *ps_thrd_ctxt;
   2863     me_frm_ctxt_t *ps_ctxt;
   2864 
   2865     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
   2866     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
   2867     memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
   2868     memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
   2869 
   2870     ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
   2871 
   2872     ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
   2873 
   2874     /*************************************************************************/
   2875     /* Derive the number of HME layers, including both encoded and non encode*/
   2876     /* This function also derives the width and ht of each layer.            */
   2877     /*************************************************************************/
   2878     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
   2879     num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search;
   2880     if(num_layers_explicit_search <= 0)
   2881         num_layers_explicit_search = n_tot_layers - 1;
   2882 
   2883     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
   2884     ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
   2885     memset(ps_ctxt->u1_encode, 0, n_tot_layers);
   2886     memset(ps_ctxt->u1_encode, 1, n_enc_layers);
   2887 
   2888     /* only encode layer should be processed */
   2889     ps_ctxt->num_layers = n_tot_layers;
   2890 
   2891     ps_ctxt->i4_wd = a_wd[0];
   2892     ps_ctxt->i4_ht = a_ht[0];
   2893 
   2894     /* Memtabs : Layers * num-ref + 1 */
   2895     for(i = 0; i < ps_ctxt->max_num_ref + 1; i++)
   2896     {
   2897         for(j = 0; j < 1; j++)
   2898         {
   2899             S32 wd, ht;
   2900             layer_ctxt_t *ps_layer;
   2901             U08 u1_enc = ps_ctxt->u1_encode[j];
   2902             wd = a_wd[j];
   2903             ht = a_ht[j];
   2904             ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j];
   2905             hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
   2906         }
   2907     }
   2908 }
   2909 
   2910 /**
   2911 *******************************************************************************
   2912 *  @fn     S32 hme_coarse_set_resolution(void *pv_me_ctxt,
   2913 *                                   S32 n_enc_layers,
   2914 *                                   S32 *p_wd,
   2915 *                                   S32 *p_ht
   2916 *
   2917 *  @brief  Sets up the layers based on resolution information.
   2918 *
   2919 *  @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
   2920 *
   2921 *  @param[in] n_enc_layers : Number of layers encoded
   2922 *
   2923 *  @param[in] p_wd : Pointer to an array having widths for each encode layer
   2924 *
   2925 *  @param[in] p_ht : Pointer to an array having heights for each encode layer
   2926 *
   2927 *  @return   void
   2928 *******************************************************************************
   2929 */
   2930 
   2931 void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht)
   2932 {
   2933     S32 n_tot_layers, num_layers_explicit_search, i, j;
   2934     coarse_me_ctxt_t *ps_ctxt;
   2935     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
   2936     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
   2937     memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
   2938     memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
   2939 
   2940     ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
   2941     /*************************************************************************/
   2942     /* Derive the number of HME layers, including both encoded and non encode*/
   2943     /* This function also derives the width and ht of each layer.            */
   2944     /*************************************************************************/
   2945     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
   2946     num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search;
   2947     if(num_layers_explicit_search <= 0)
   2948         num_layers_explicit_search = n_tot_layers - 1;
   2949 
   2950     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
   2951     ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
   2952     memset(ps_ctxt->u1_encode, 0, n_tot_layers);
   2953     memset(ps_ctxt->u1_encode, 1, n_enc_layers);
   2954 
   2955     /* encode layer should be excluded */
   2956     ps_ctxt->num_layers = n_tot_layers;
   2957 
   2958     memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
   2959     memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
   2960 
   2961     /* Memtabs : Layers * num-ref + 1 */
   2962     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
   2963     {
   2964         for(j = 1; j < n_tot_layers; j++)
   2965         {
   2966             S32 wd, ht;
   2967             layer_ctxt_t *ps_layer;
   2968             U08 u1_enc = ps_ctxt->u1_encode[j];
   2969             wd = a_wd[j];
   2970             ht = a_ht[j];
   2971             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
   2972             hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
   2973         }
   2974     }
   2975 }
   2976 
   2977 S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
   2978 {
   2979     S32 i;
   2980 
   2981     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
   2982     {
   2983         if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc &&
   2984            ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num)
   2985             return i;
   2986     }
   2987     /* Should not come here */
   2988     ASSERT(0);
   2989     return (-1);
   2990 }
   2991 
   2992 S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc)
   2993 {
   2994     S32 i;
   2995 
   2996     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
   2997     {
   2998         if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc)
   2999             return i;
   3000     }
   3001     /* Should not come here */
   3002     ASSERT(0);
   3003     return (-1);
   3004 }
   3005 
   3006 S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel)
   3007 {
   3008     S32 i;
   3009 
   3010     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
   3011     {
   3012         if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1)
   3013         {
   3014             ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0;
   3015             return i;
   3016         }
   3017     }
   3018     /* Should not come here */
   3019     ASSERT(0);
   3020     return (-1);
   3021 }
   3022 
   3023 S32 hme_coarse_find_free_descr_idx(void *pv_ctxt)
   3024 {
   3025     S32 i;
   3026 
   3027     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt;
   3028 
   3029     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
   3030     {
   3031         if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1)
   3032             return i;
   3033     }
   3034     /* Should not come here */
   3035     ASSERT(0);
   3036     return (-1);
   3037 }
   3038 
   3039 void hme_discard_frm(
   3040     void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
   3041 {
   3042     me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
   3043     S32 count = 0, idx, i;
   3044     layers_descr_t *ps_descr;
   3045 
   3046     /* Search for the id of the layer descriptor that has this poc */
   3047     while(p_pocs_to_remove[count] != INVALID_POC)
   3048     {
   3049         ASSERT(count == 0);
   3050         idx = hme_find_descr_idx(
   3051             ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel);
   3052         ps_descr = &ps_ctxt->as_ref_descr[idx];
   3053         /*********************************************************************/
   3054         /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt        */
   3055         /* Now this can be used for a fresh picture.                         */
   3056         /*********************************************************************/
   3057         for(i = 0; i < 1; i++)
   3058         {
   3059             ps_descr->aps_layers[i]->i4_is_free = 1;
   3060         }
   3061         count++;
   3062     }
   3063 }
   3064 
   3065 void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove)
   3066 {
   3067     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
   3068     S32 count = 0, idx, i;
   3069     layers_descr_t *ps_descr;
   3070 
   3071     /* Search for the id of the layer descriptor that has this poc */
   3072     while(p_pocs_to_remove[count] != -1)
   3073     {
   3074         idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]);
   3075         ps_descr = &ps_ctxt->as_ref_descr[idx];
   3076         /*********************************************************************/
   3077         /* Setting poc = -1 in all layers invalidates this layer ctxt        */
   3078         /* Now this can be used for a fresh picture.                         */
   3079         /*********************************************************************/
   3080         for(i = 1; i < ps_ctxt->num_layers; i++)
   3081         {
   3082             ps_descr->aps_layers[i]->i4_poc = -1;
   3083         }
   3084         count++;
   3085     }
   3086 }
   3087 
   3088 void hme_update_layer_desc(
   3089     layers_descr_t *ps_layers_desc,
   3090     hme_ref_desc_t *ps_ref_desc,
   3091     S32 start_lyr_id,
   3092     S32 num_layers,
   3093     layers_descr_t *ps_curr_desc)
   3094 {
   3095     layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer;
   3096     S32 i;
   3097     for(i = start_lyr_id; i < num_layers; i++)
   3098     {
   3099         ps_layer_ctxt = ps_layers_desc->aps_layers[i];
   3100         ps_curr_layer = ps_curr_desc->aps_layers[i];
   3101 
   3102         ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc;
   3103         ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num;
   3104 
   3105         /* Copy the recon planes for the given reference pic at given layer */
   3106         ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy;
   3107         ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy;
   3108         ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy;
   3109         ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy;
   3110 
   3111         /*********************************************************************/
   3112         /* reconstruction strides, offsets and padding info are copied for   */
   3113         /* this reference pic. It is assumed that these will be same across  */
   3114         /* pics, so even the current pic has this info updated, though the   */
   3115         /* current pic still does not have valid recon pointers.             */
   3116         /*********************************************************************/
   3117         ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
   3118         ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset;
   3119         ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
   3120         ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
   3121 
   3122         ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
   3123         ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
   3124         ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
   3125     }
   3126 }
   3127 
   3128 void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id)
   3129 {
   3130     layers_descr_t *ps_desc;
   3131     layer_ctxt_t *ps_layer_ctxt;
   3132     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
   3133     me_ctxt_t *ps_thrd_ctxt;
   3134     me_frm_ctxt_t *ps_ctxt;
   3135 
   3136     hme_inp_buf_attr_t *ps_attr;
   3137     S32 i4_poc, idx, i, i4_prev_poc;
   3138     S32 num_thrds, prev_me_frm_id;
   3139     S32 i4_idr_gop_num, i4_is_reference;
   3140 
   3141     /* since same layer desc pointer is stored in all thread ctxt */
   3142     /* a free idx is obtained using 0th thread ctxt pointer */
   3143 
   3144     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
   3145 
   3146     ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
   3147 
   3148     /* Deriving the previous poc from previous frames context */
   3149     if(me_frm_id == 0)
   3150         prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
   3151     else
   3152         prev_me_frm_id = me_frm_id - 1;
   3153 
   3154     i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc;
   3155 
   3156     /* Obtain an empty layer descriptor */
   3157     idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
   3158     ps_desc = &ps_thrd_ctxt->as_ref_descr[idx];
   3159 
   3160     /* initialise the parameters for all the threads */
   3161     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
   3162     {
   3163         me_frm_ctxt_t *ps_tmp_frm_ctxt;
   3164 
   3165         ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   3166         ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
   3167 
   3168         ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx];
   3169 
   3170         /* Do the initialization for the first thread alone */
   3171         i4_poc = ps_inp_desc->i4_poc;
   3172         i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num;
   3173         i4_is_reference = ps_inp_desc->i4_is_reference;
   3174         /*Update poc id of previously encoded frm and curr frm */
   3175         ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc;
   3176         ps_tmp_frm_ctxt->i4_curr_poc = i4_poc;
   3177     }
   3178 
   3179     /* since same layer desc pointer is stored in all thread ctxt */
   3180     /* following processing is done using 0th thread ctxt pointer */
   3181     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   3182 
   3183     /* only encode layer */
   3184     for(i = 0; i < 1; i++)
   3185     {
   3186         ps_layer_ctxt = ps_desc->aps_layers[i];
   3187         ps_attr = &ps_inp_desc->s_layer_desc[i];
   3188 
   3189         ps_layer_ctxt->i4_poc = i4_poc;
   3190         ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num;
   3191         ps_layer_ctxt->i4_is_reference = i4_is_reference;
   3192         ps_layer_ctxt->i4_non_ref_free = 0;
   3193 
   3194         /* If this layer is encoded, copy input attributes */
   3195         if(ps_ctxt->u1_encode[i])
   3196         {
   3197             ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
   3198             ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
   3199             ps_layer_ctxt->i4_pad_x_inp = 0;
   3200             ps_layer_ctxt->i4_pad_y_inp = 0;
   3201         }
   3202         else
   3203         {
   3204             /* If not encoded, then ME owns the buffer.*/
   3205             S32 wd, dst_stride;
   3206 
   3207             ASSERT(i != 0);
   3208 
   3209             wd = ps_ctxt->i4_wd;
   3210 
   3211             /* destination has padding on either side of 16 */
   3212             dst_stride = CEIL16((wd >> 1)) + 32 + 4;
   3213             ps_layer_ctxt->i4_inp_stride = dst_stride;
   3214         }
   3215     }
   3216 
   3217     return;
   3218 }
   3219 
   3220 void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx)
   3221 {
   3222     layers_descr_t *ps_desc;
   3223     layer_ctxt_t *ps_layer_ctxt;
   3224     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
   3225     coarse_me_ctxt_t *ps_ctxt;
   3226     hme_inp_buf_attr_t *ps_attr;
   3227     S32 i4_poc, i;
   3228     S32 num_thrds;
   3229 
   3230     /* since same layer desc pointer is stored in all thread ctxt */
   3231     /* a free idx is obtained using 0th thread ctxt pointer */
   3232     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   3233 
   3234     ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx];
   3235 
   3236     /* initialise the parameters for all the threads */
   3237     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
   3238     {
   3239         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
   3240         ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx];
   3241         i4_poc = ps_inp_desc->i4_poc;
   3242 
   3243         /*Update poc id of previously encoded frm and curr frm */
   3244         ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc;
   3245         ps_ctxt->i4_curr_poc = i4_poc;
   3246     }
   3247 
   3248     /* since same layer desc pointer is stored in all thread ctxt */
   3249     /* following processing is done using 0th thread ctxt pointer */
   3250     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
   3251 
   3252     /* only non encode layer */
   3253     for(i = 1; i < ps_ctxt->num_layers; i++)
   3254     {
   3255         ps_layer_ctxt = ps_desc->aps_layers[i];
   3256         ps_attr = &ps_inp_desc->s_layer_desc[i];
   3257 
   3258         ps_layer_ctxt->i4_poc = i4_poc;
   3259         /* If this layer is encoded, copy input attributes */
   3260         if(ps_ctxt->u1_encode[i])
   3261         {
   3262             ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
   3263             ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
   3264             ps_layer_ctxt->i4_pad_x_inp = 0;
   3265             ps_layer_ctxt->i4_pad_y_inp = 0;
   3266         }
   3267         else
   3268         {
   3269             /* If not encoded, then ME owns the buffer.           */
   3270             /* decomp of lower layers happens on a seperate pass  */
   3271             /* Coarse Me should export the pointers to the caller */
   3272             S32 wd, dst_stride;
   3273 
   3274             ASSERT(i != 0);
   3275 
   3276             wd = ps_ctxt->a_wd[i - 1];
   3277 
   3278             /* destination has padding on either side of 16 */
   3279             dst_stride = CEIL16((wd >> 1)) + 32 + 4;
   3280             ps_layer_ctxt->i4_inp_stride = dst_stride;
   3281         }
   3282     }
   3283 }
   3284 
   3285 static __inline U08 hme_determine_num_results_per_part(
   3286     U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset)
   3287 {
   3288     U08 u1_num_results_per_part = MAX_RESULTS_PER_PART;
   3289 
   3290     if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1)
   3291     {
   3292         switch(e_quality_preset)
   3293         {
   3294         case ME_XTREME_SPEED_25:
   3295         case ME_XTREME_SPEED:
   3296         case ME_HIGH_SPEED:
   3297         case ME_MEDIUM_SPEED:
   3298         case ME_HIGH_QUALITY:
   3299         case ME_PRISTINE_QUALITY:
   3300         {
   3301             u1_num_results_per_part = 1;
   3302 
   3303             break;
   3304         }
   3305         default:
   3306         {
   3307             u1_num_results_per_part = MAX_RESULTS_PER_PART;
   3308 
   3309             break;
   3310         }
   3311         }
   3312     }
   3313     else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1)
   3314     {
   3315         switch(e_quality_preset)
   3316         {
   3317         case ME_XTREME_SPEED_25:
   3318         case ME_HIGH_QUALITY:
   3319         case ME_PRISTINE_QUALITY:
   3320         {
   3321             u1_num_results_per_part = 1;
   3322 
   3323             break;
   3324         }
   3325         default:
   3326         {
   3327             u1_num_results_per_part = MAX_RESULTS_PER_PART;
   3328 
   3329             break;
   3330         }
   3331         }
   3332     }
   3333     else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1)
   3334     {
   3335         switch(e_quality_preset)
   3336         {
   3337         case ME_XTREME_SPEED_25:
   3338         case ME_XTREME_SPEED:
   3339         case ME_HIGH_SPEED:
   3340         case ME_MEDIUM_SPEED:
   3341         {
   3342             u1_num_results_per_part = 1;
   3343 
   3344             break;
   3345         }
   3346         default:
   3347         {
   3348             u1_num_results_per_part = MAX_RESULTS_PER_PART;
   3349 
   3350             break;
   3351         }
   3352         }
   3353     }
   3354 
   3355     return u1_num_results_per_part;
   3356 }
   3357 
   3358 static __inline void hme_max_search_cands_per_search_cand_loc_populator(
   3359     hme_frm_prms_t *ps_frm_prms,
   3360     U08 *pu1_num_fpel_search_cands,
   3361     U08 u1_layer_id,
   3362     ME_QUALITY_PRESETS_T e_quality_preset)
   3363 {
   3364     if(0 == u1_layer_id)
   3365     {
   3366         S32 i;
   3367 
   3368         for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++)
   3369         {
   3370             switch(e_quality_preset)
   3371             {
   3372 #if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC
   3373             case ME_XTREME_SPEED_25:
   3374             case ME_XTREME_SPEED:
   3375             case ME_HIGH_SPEED:
   3376             case ME_MEDIUM_SPEED:
   3377             {
   3378                 pu1_num_fpel_search_cands[i] = 1;
   3379 
   3380                 break;
   3381             }
   3382 #endif
   3383             default:
   3384             {
   3385                 pu1_num_fpel_search_cands[i] =
   3386                     MAX(2,
   3387                         MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) *
   3388                             ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1));
   3389 
   3390                 break;
   3391             }
   3392             }
   3393         }
   3394     }
   3395 }
   3396 
   3397 static __inline U08
   3398     hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
   3399 {
   3400     U08 u1_num_cands = 2;
   3401 
   3402     if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS)
   3403     {
   3404         switch(e_quality_preset)
   3405         {
   3406         case ME_XTREME_SPEED_25:
   3407         case ME_XTREME_SPEED:
   3408         case ME_HIGH_SPEED:
   3409         case ME_MEDIUM_SPEED:
   3410         {
   3411             u1_num_cands = 1;
   3412 
   3413             break;
   3414         }
   3415         default:
   3416         {
   3417             u1_num_cands = 2;
   3418 
   3419             break;
   3420         }
   3421         }
   3422     }
   3423 
   3424     return u1_num_cands;
   3425 }
   3426 
   3427 static __inline U08
   3428     hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
   3429 {
   3430     U08 i;
   3431 
   3432     U08 u1_num_centers = 0;
   3433 
   3434     if(0 == u1_layer_id)
   3435     {
   3436         switch(e_quality_preset)
   3437         {
   3438         case ME_XTREME_SPEED_25:
   3439         {
   3440             for(i = 0; i < TOT_NUM_PARTS; i++)
   3441             {
   3442                 u1_num_centers += gau1_num_best_results_XS25[i];
   3443             }
   3444 
   3445             break;
   3446         }
   3447         case ME_XTREME_SPEED:
   3448         {
   3449             for(i = 0; i < TOT_NUM_PARTS; i++)
   3450             {
   3451                 u1_num_centers += gau1_num_best_results_XS[i];
   3452             }
   3453 
   3454             break;
   3455         }
   3456         case ME_HIGH_SPEED:
   3457         {
   3458             for(i = 0; i < TOT_NUM_PARTS; i++)
   3459             {
   3460                 u1_num_centers += gau1_num_best_results_HS[i];
   3461             }
   3462 
   3463             break;
   3464         }
   3465         case ME_MEDIUM_SPEED:
   3466         {
   3467             for(i = 0; i < TOT_NUM_PARTS; i++)
   3468             {
   3469                 u1_num_centers += gau1_num_best_results_MS[i];
   3470             }
   3471 
   3472             break;
   3473         }
   3474         case ME_HIGH_QUALITY:
   3475         {
   3476             for(i = 0; i < TOT_NUM_PARTS; i++)
   3477             {
   3478                 u1_num_centers += gau1_num_best_results_HQ[i];
   3479             }
   3480 
   3481             break;
   3482         }
   3483         case ME_PRISTINE_QUALITY:
   3484         {
   3485             for(i = 0; i < TOT_NUM_PARTS; i++)
   3486             {
   3487                 u1_num_centers += gau1_num_best_results_PQ[i];
   3488             }
   3489 
   3490             break;
   3491         }
   3492         }
   3493     }
   3494 
   3495     return u1_num_centers;
   3496 }
   3497 
   3498 static __inline U08 hme_determine_max_num_subpel_refine_centers(
   3499     U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands)
   3500 {
   3501     U08 u1_num_centers = 0;
   3502 
   3503     if(0 == u1_layer_id)
   3504     {
   3505         u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands;
   3506     }
   3507 
   3508     return u1_num_centers;
   3509 }
   3510 
   3511 void hme_set_refine_prms(
   3512     void *pv_refine_prms,
   3513     U08 u1_encode,
   3514     S32 num_ref,
   3515     S32 layer_id,
   3516     S32 num_layers,
   3517     S32 num_layers_explicit_search,
   3518     S32 use_4x4,
   3519     hme_frm_prms_t *ps_frm_prms,
   3520     double **ppd_intra_costs,
   3521     me_coding_params_t *ps_me_coding_tools)
   3522 {
   3523     refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms;
   3524 
   3525     ps_refine_prms->i4_encode = u1_encode;
   3526     ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled;
   3527     ps_refine_prms->i4_layer_id = layer_id;
   3528     /*************************************************************************/
   3529     /* Refinement layers have two lambdas, one for closed loop, another for  */
   3530     /* open loop. Non encode layers use only open loop lambda.               */
   3531     /*************************************************************************/
   3532     ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf;
   3533     ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf;
   3534     ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift;
   3535     ps_refine_prms->lambda_inp =
   3536         ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
   3537     ps_refine_prms->lambda_recon =
   3538         ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
   3539 
   3540     if((u1_encode) && (NULL != ppd_intra_costs))
   3541     {
   3542         ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id];
   3543     }
   3544 
   3545     /* Explicit or implicit depends on number of layers having eplicit search */
   3546     if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search))
   3547     {
   3548         ps_refine_prms->explicit_ref = 0;
   3549         ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref);
   3550     }
   3551     else
   3552     {
   3553         ps_refine_prms->explicit_ref = 1;
   3554         ps_refine_prms->i4_num_ref_fpel = num_ref;
   3555     }
   3556 
   3557     ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH;
   3558 
   3559     ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine;
   3560     ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine;
   3561 
   3562     if(u1_encode)
   3563     {
   3564         ps_refine_prms->i4_num_mvbank_results = 1;
   3565         ps_refine_prms->i4_use_rec_in_fpel = 1;
   3566         ps_refine_prms->i4_num_steps_fpel_refine = 1;
   3567 
   3568         if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
   3569         {
   3570             ps_refine_prms->i4_num_fpel_results = 4;
   3571             ps_refine_prms->i4_num_32x32_merge_results = 4;
   3572             ps_refine_prms->i4_num_64x64_merge_results = 4;
   3573             ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
   3574             ps_refine_prms->i4_use_satd_subpel = 1;
   3575             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
   3576             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
   3577             ps_refine_prms->u1_subpel_candt_threshold = 1;
   3578             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
   3579             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
   3580             ps_refine_prms->limit_active_partitions = 0;
   3581         }
   3582         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
   3583         {
   3584             ps_refine_prms->i4_num_fpel_results = 4;
   3585             ps_refine_prms->i4_num_32x32_merge_results = 4;
   3586             ps_refine_prms->i4_num_64x64_merge_results = 4;
   3587             ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
   3588             ps_refine_prms->i4_use_satd_subpel = 1;
   3589             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
   3590             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
   3591             ps_refine_prms->u1_subpel_candt_threshold = 2;
   3592             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
   3593             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
   3594             ps_refine_prms->limit_active_partitions = 0;
   3595         }
   3596         else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
   3597         {
   3598             ps_refine_prms->i4_num_fpel_results = 1;
   3599             ps_refine_prms->i4_num_32x32_merge_results = 2;
   3600             ps_refine_prms->i4_num_64x64_merge_results = 2;
   3601             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
   3602             ps_refine_prms->i4_use_satd_subpel = 1;
   3603             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
   3604             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
   3605             ps_refine_prms->u1_subpel_candt_threshold = 3;
   3606             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
   3607             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
   3608             ps_refine_prms->limit_active_partitions = 1;
   3609         }
   3610         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
   3611         {
   3612             ps_refine_prms->i4_num_fpel_results = 1;
   3613             ps_refine_prms->i4_num_32x32_merge_results = 2;
   3614             ps_refine_prms->i4_num_64x64_merge_results = 2;
   3615             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
   3616             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
   3617             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
   3618             ps_refine_prms->i4_use_satd_subpel = 0;
   3619             ps_refine_prms->u1_subpel_candt_threshold = 0;
   3620             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
   3621             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
   3622             ps_refine_prms->limit_active_partitions = 1;
   3623         }
   3624         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
   3625         {
   3626             ps_refine_prms->i4_num_fpel_results = 1;
   3627             ps_refine_prms->i4_num_32x32_merge_results = 2;
   3628             ps_refine_prms->i4_num_64x64_merge_results = 2;
   3629             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
   3630             ps_refine_prms->i4_use_satd_subpel = 0;
   3631             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
   3632             ps_refine_prms->u1_max_subpel_candts_NxN = 0;
   3633             ps_refine_prms->u1_subpel_candt_threshold = 0;
   3634             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
   3635             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
   3636             ps_refine_prms->limit_active_partitions = 1;
   3637         }
   3638         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
   3639         {
   3640             ps_refine_prms->i4_num_fpel_results = 1;
   3641             ps_refine_prms->i4_num_32x32_merge_results = 2;
   3642             ps_refine_prms->i4_num_64x64_merge_results = 2;
   3643             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
   3644             ps_refine_prms->i4_use_satd_subpel = 0;
   3645             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
   3646             ps_refine_prms->u1_max_subpel_candts_NxN = 0;
   3647             ps_refine_prms->u1_subpel_candt_threshold = 0;
   3648             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
   3649             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
   3650             ps_refine_prms->limit_active_partitions = 1;
   3651         }
   3652     }
   3653     else
   3654     {
   3655         ps_refine_prms->i4_num_fpel_results = 2;
   3656         ps_refine_prms->i4_use_rec_in_fpel = 0;
   3657         ps_refine_prms->i4_num_steps_fpel_refine = 1;
   3658         ps_refine_prms->i4_num_steps_hpel_refine = 0;
   3659         ps_refine_prms->i4_num_steps_qpel_refine = 0;
   3660 
   3661         if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
   3662         {
   3663             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
   3664             ps_refine_prms->i4_use_satd_subpel = 1;
   3665             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
   3666             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
   3667         }
   3668         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
   3669         {
   3670             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
   3671             ps_refine_prms->i4_use_satd_subpel = 0;
   3672             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
   3673             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
   3674         }
   3675         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
   3676         {
   3677             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
   3678             ps_refine_prms->i4_use_satd_subpel = 0;
   3679             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
   3680             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
   3681         }
   3682         else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
   3683         {
   3684             ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
   3685             ps_refine_prms->i4_use_satd_subpel = 1;
   3686             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
   3687             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
   3688         }
   3689         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
   3690         {
   3691             ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
   3692             ps_refine_prms->i4_use_satd_subpel = 1;
   3693             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
   3694             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
   3695         }
   3696         else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
   3697         {
   3698             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
   3699             ps_refine_prms->i4_use_satd_subpel = 1;
   3700             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
   3701             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
   3702         }
   3703 
   3704         /* Following fields unused in the non-encode layers */
   3705         /* But setting the same to default values           */
   3706         ps_refine_prms->i4_num_32x32_merge_results = 4;
   3707         ps_refine_prms->i4_num_64x64_merge_results = 4;
   3708 
   3709         if(!ps_frm_prms->bidir_enabled)
   3710         {
   3711             ps_refine_prms->limit_active_partitions = 0;
   3712         }
   3713         else
   3714         {
   3715             ps_refine_prms->limit_active_partitions = 1;
   3716         }
   3717     }
   3718 
   3719     ps_refine_prms->i4_enable_4x4_part =
   3720         hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode);
   3721 
   3722     if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line)
   3723     {
   3724         ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
   3725             layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
   3726 
   3727         hme_max_search_cands_per_search_cand_loc_populator(
   3728             ps_frm_prms,
   3729             ps_refine_prms->au1_num_fpel_search_cands,
   3730             layer_id,
   3731             ps_me_coding_tools->e_me_quality_presets);
   3732 
   3733         ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands(
   3734             layer_id, ps_me_coding_tools->e_me_quality_presets);
   3735 
   3736         ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers(
   3737             layer_id, ps_me_coding_tools->e_me_quality_presets);
   3738 
   3739         ps_refine_prms->u1_max_num_subpel_refine_centers =
   3740             hme_determine_max_num_subpel_refine_centers(
   3741                 layer_id,
   3742                 ps_refine_prms->u1_max_subpel_candts_2Nx2N,
   3743                 ps_refine_prms->u1_max_subpel_candts_NxN);
   3744     }
   3745     else
   3746     {
   3747         if(0 == layer_id)
   3748         {
   3749             ps_refine_prms->i4_num_results_per_part =
   3750                 ps_me_coding_tools->u1_num_results_per_part_in_l0me;
   3751         }
   3752         else if(1 == layer_id)
   3753         {
   3754             ps_refine_prms->i4_num_results_per_part =
   3755                 ps_me_coding_tools->u1_num_results_per_part_in_l1me;
   3756         }
   3757         else if((2 == layer_id) && (num_layers > 3))
   3758         {
   3759             ps_refine_prms->i4_num_results_per_part =
   3760                 ps_me_coding_tools->u1_num_results_per_part_in_l2me;
   3761         }
   3762         else
   3763         {
   3764             ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
   3765                 layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
   3766         }
   3767 
   3768         memset(
   3769             ps_refine_prms->au1_num_fpel_search_cands,
   3770             ps_me_coding_tools->u1_max_num_coloc_cands,
   3771             sizeof(ps_refine_prms->au1_num_fpel_search_cands));
   3772 
   3773         ps_refine_prms->u1_max_2nx2n_tu_recur_cands =
   3774             ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands;
   3775 
   3776         ps_refine_prms->u1_max_num_fpel_refine_centers =
   3777             ps_me_coding_tools->u1_max_num_fpel_refine_centers;
   3778 
   3779         ps_refine_prms->u1_max_num_subpel_refine_centers =
   3780             ps_me_coding_tools->u1_max_num_subpel_refine_centers;
   3781     }
   3782 
   3783     if(layer_id != 0)
   3784     {
   3785         ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part;
   3786     }
   3787 
   3788     /* 4 * lambda */
   3789     ps_refine_prms->sdi_threshold =
   3790         (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >>
   3791         (ps_frm_prms->lambda_q_shift - 2);
   3792 
   3793     ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb =
   3794         MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled;
   3795 }
   3796 
   3797 void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert)
   3798 {
   3799     S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y;
   3800     S32 blk, blk_x, blk_y;
   3801     S32 num_16x16_horz, num_16x16_vert;
   3802     blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0];
   3803 
   3804     num_16x16_horz = (num_8x8_horz + 1) >> 1;
   3805     num_16x16_vert = (num_8x8_vert + 1) >> 1;
   3806     ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert);
   3807 
   3808     /*************************************************************************/
   3809     /* Run through each blk assuming all 16x16 CUs valid. The order would be */
   3810     /* 0   1   4   5                                                         */
   3811     /* 2   3   6   7                                                         */
   3812     /* 8   9   12  13                                                        */
   3813     /* 10  11  14  15                                                        */
   3814     /* Out of these some may not be valid. For example, if num_16x16_horz is */
   3815     /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this    */
   3816     /* case, blks 8-11 get encoding number of 4-7. Further, the variable     */
   3817     /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable   */
   3818     /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0     */
   3819     /*************************************************************************/
   3820     for(blk = 0; blk < 16; blk++)
   3821     {
   3822         U08 u1_blk_8x8_mask = 0xF;
   3823         blk_x = gau1_encode_to_raster_x[blk];
   3824         blk_y = gau1_encode_to_raster_y[blk];
   3825         if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert))
   3826         {
   3827             continue;
   3828         }
   3829 
   3830         /* The CU at encode location blk is valid */
   3831         cu_16x16_valid_flag |= (1 << blk);
   3832         ps_blk_attrs->u1_blk_id_in_full_ctb = blk;
   3833         ps_blk_attrs->u1_blk_x = blk_x;
   3834         ps_blk_attrs->u1_blk_y = blk_y;
   3835 
   3836         /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */
   3837         if(((blk_x << 1) + 2) > num_8x8_horz)
   3838             u1_blk_8x8_mask &= 0x5;
   3839         /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */
   3840         if(((blk_y << 1) + 2) > num_8x8_vert)
   3841             u1_blk_8x8_mask &= 0x3;
   3842         ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask;
   3843         ps_blk_attrs++;
   3844     }
   3845 
   3846     ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag;
   3847 
   3848     /* 32x32 merge is logical combination of what merge is possible          */
   3849     /* horizontally as well as vertically.                                   */
   3850     if(num_8x8_horz < 4)
   3851         merge_pattern_x = 0x0;
   3852     else if(num_8x8_horz < 8)
   3853         merge_pattern_x = 0x5;
   3854     else
   3855         merge_pattern_x = 0xF;
   3856 
   3857     if(num_8x8_vert < 4)
   3858         merge_pattern_y = 0x0;
   3859     else if(num_8x8_vert < 8)
   3860         merge_pattern_y = 0x3;
   3861     else
   3862         merge_pattern_y = 0xF;
   3863 
   3864     ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y);
   3865 
   3866     /* Do not attempt 64x64 merge if any blk invalid */
   3867     if(ps_attrs->u1_merge_to_32x32_flag != 0xF)
   3868         ps_attrs->u1_merge_to_64x64_flag = 0;
   3869     else
   3870         ps_attrs->u1_merge_to_64x64_flag = 1;
   3871 }
   3872 
   3873 void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht)
   3874 {
   3875     S32 is_cropped_rt, is_cropped_bot;
   3876 
   3877     is_cropped_rt = ((wd & 63) != 0) ? 1 : 0;
   3878     is_cropped_bot = ((ht & 63) != 0) ? 1 : 0;
   3879 
   3880     if(is_cropped_rt)
   3881     {
   3882         hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8);
   3883     }
   3884     if(is_cropped_bot)
   3885     {
   3886         hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3);
   3887     }
   3888     if(is_cropped_rt & is_cropped_bot)
   3889     {
   3890         hme_set_ctb_boundary_attrs(
   3891             &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3);
   3892     }
   3893     hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8);
   3894 }
   3895 
   3896 /**
   3897 ********************************************************************************
   3898 *  @fn     hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
   3899 *
   3900 *  @brief  When we have an mv with ref id "poc_to" for which predictor to be
   3901 *          computed, and predictor is ref id "poc_from", this funciton returns
   3902 *          scale factor in Q8 for such a purpose
   3903 *
   3904 *  @param[in] curr_poc : input picture poc
   3905 *
   3906 *  @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled
   3907 *
   3908 *  @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to
   3909 *
   3910 *  @return Scale factor in Q8 format
   3911 ********************************************************************************
   3912 */
   3913 S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
   3914 {
   3915     S32 td, tx, tb;
   3916     S16 i2_scf;
   3917     /*************************************************************************/
   3918     /* Approximate scale factor: 256 * num / denom                           */
   3919     /* num = curr_poc - poc_to, denom = curr_poc - poc_from                  */
   3920     /* Exact implementation as per standard.                                 */
   3921     /*************************************************************************/
   3922 
   3923     tb = HME_CLIP((curr_poc - poc_to), -128, 127);
   3924     td = HME_CLIP((curr_poc - poc_from), -128, 127);
   3925 
   3926     tx = (16384 + (ABS(td) >> 1)) / td;
   3927     //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127);
   3928     i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095);
   3929 
   3930     return (i2_scf);
   3931 }
   3932 
   3933 /**
   3934 ********************************************************************************
   3935 *  @fn     hme_process_frm_init
   3936 *
   3937 *  @brief  HME frame level initialsation processing function
   3938 *
   3939 *  @param[in] pv_me_ctxt : ME ctxt pointer
   3940 *
   3941 *  @param[in] ps_ref_map : Reference map prms pointer
   3942 *
   3943 *  @param[in] ps_frm_prms :Pointer to frame params
   3944 *
   3945 *  called only for encode layer
   3946 *
   3947 *  @return Scale factor in Q8 format
   3948 ********************************************************************************
   3949 */
   3950 void hme_process_frm_init(
   3951     void *pv_me_ctxt,
   3952     hme_ref_map_t *ps_ref_map,
   3953     hme_frm_prms_t *ps_frm_prms,
   3954     WORD32 i4_me_frm_id,
   3955     WORD32 i4_num_me_frm_pllel)
   3956 {
   3957     me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
   3958     me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
   3959 
   3960     S32 i, j, desc_idx;
   3961     S16 i2_max_x = 0, i2_max_y = 0;
   3962 
   3963     /* Set the Qp of current frm passed by caller. Required for intra cost */
   3964     ps_ctxt->frm_qstep = ps_frm_prms->qstep;
   3965     ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8;
   3966 
   3967     /* Bidir enabled or not */
   3968     ps_ctxt->s_frm_prms = *ps_frm_prms;
   3969 
   3970     /*************************************************************************/
   3971     /* Set up the ref pic parameters across all layers. For this, we do the  */
   3972     /* following: the application has given us a ref pic list, we go index   */
   3973     /* by index and pick up the picture. A picture can be uniquely be mapped */
   3974     /* to a POC. So we search all layer descriptor array to find the POC     */
   3975     /* Once found, we update all attributes in this descriptor.              */
   3976     /* During this updation process we also create an index of descriptor id */
   3977     /* to ref id mapping. It is important to find the same POC in the layers */
   3978     /* descr strcture since it holds the pyramid inputs for non encode layers*/
   3979     /* Apart from this, e also update array containing the index of the descr*/
   3980     /* During processing for ease of access, each layer has a pointer to aray*/
   3981     /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
   3982     /* we update this too.                                                   */
   3983     /*************************************************************************/
   3984     ps_ctxt->num_ref_past = 0;
   3985     ps_ctxt->num_ref_future = 0;
   3986     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
   3987     {
   3988         S32 ref_id_lc, idx;
   3989         hme_ref_desc_t *ps_ref_desc;
   3990 
   3991         ps_ref_desc = &ps_ref_map->as_ref_desc[i];
   3992         ref_id_lc = ps_ref_desc->i1_ref_id_lc;
   3993         /* Obtain the id of descriptor that contains this POC */
   3994         idx = hme_find_descr_idx(
   3995             ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel);
   3996 
   3997         /* Update all layers in this descr with the reference attributes */
   3998         hme_update_layer_desc(
   3999             &ps_thrd_ctxt->as_ref_descr[idx],
   4000             ps_ref_desc,
   4001             0,
   4002             1,  //ps_ctxt->num_layers,
   4003             ps_ctxt->ps_curr_descr);
   4004 
   4005         /* Update the pointer holder for the recon planes */
   4006         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0];
   4007         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy =
   4008             &ps_ctxt->apu1_list_rec_fxfy[0][0];
   4009         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy =
   4010             &ps_ctxt->apu1_list_rec_hxfy[0][0];
   4011         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy =
   4012             &ps_ctxt->apu1_list_rec_fxhy[0][0];
   4013         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy =
   4014             &ps_ctxt->apu1_list_rec_hxhy[0][0];
   4015         ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon =
   4016             &ps_ctxt->apv_list_dep_mngr[0][0];
   4017 
   4018         /* Update the array having ref id lc to descr id mapping */
   4019         ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
   4020 
   4021         /* From ref id lc we need to work out the POC, So update this array */
   4022         ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
   4023 
   4024         /* When computing costs in L0 and L1 directions, we need the */
   4025         /* respective ref id L0 and L1, so update this mapping */
   4026         ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
   4027         ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
   4028         if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
   4029         {
   4030             ps_ctxt->au1_is_past[ref_id_lc] = 1;
   4031             ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
   4032             ps_ctxt->num_ref_past++;
   4033         }
   4034         else
   4035         {
   4036             ps_ctxt->au1_is_past[ref_id_lc] = 0;
   4037             ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
   4038             ps_ctxt->num_ref_future++;
   4039         }
   4040 
   4041         if(1 == ps_ctxt->i4_wt_pred_enable_flag)
   4042         {
   4043             /* copy the weight and offsets from current ref desc */
   4044             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
   4045 
   4046             /* inv weight is stored in Q15 format */
   4047             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
   4048                 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
   4049             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
   4050         }
   4051         else
   4052         {
   4053             /* store default wt and offset*/
   4054             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
   4055 
   4056             /* inv weight is stored in Q15 format */
   4057             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
   4058                 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
   4059 
   4060             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
   4061         }
   4062     }
   4063 
   4064     ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
   4065     ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
   4066 
   4067     /*************************************************************************/
   4068     /* Preparation of the TLU for bits for reference indices.                */
   4069     /* Special case is that of numref = 2. (TEV)                             */
   4070     /* Other cases uses UEV                                                  */
   4071     /*************************************************************************/
   4072     for(i = 0; i < MAX_NUM_REF; i++)
   4073     {
   4074         ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
   4075         ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
   4076     }
   4077 
   4078     if(ps_ref_map->i4_num_ref == 2)
   4079     {
   4080         ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
   4081         ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
   4082         ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
   4083         ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
   4084     }
   4085     else if(ps_ref_map->i4_num_ref > 2)
   4086     {
   4087         for(i = 0; i < ps_ref_map->i4_num_ref; i++)
   4088         {
   4089             S32 l0, l1;
   4090             l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
   4091             l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
   4092             ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
   4093             ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
   4094         }
   4095     }
   4096 
   4097     /*************************************************************************/
   4098     /* Preparation of the scaling factors for reference indices. The scale   */
   4099     /* factor depends on distance of the two ref indices from current input  */
   4100     /* in terms of poc delta.                                                */
   4101     /*************************************************************************/
   4102     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
   4103     {
   4104         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
   4105         {
   4106             S16 i2_scf_q8;
   4107             S32 poc_from, poc_to;
   4108 
   4109             poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
   4110             poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
   4111 
   4112             i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
   4113             ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
   4114         }
   4115     }
   4116 
   4117     /*************************************************************************/
   4118     /* We store simplified look ups for 4 hpel planes and inp y plane for    */
   4119     /* every layer and for every ref id in the layer. So update these lookups*/
   4120     /*************************************************************************/
   4121     for(i = 0; i < 1; i++)
   4122     {
   4123         U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy;
   4124         U08 **ppu1_inp;
   4125         void **ppvlist_dep_mngr;
   4126         layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
   4127 
   4128         ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0];
   4129         ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0];
   4130         ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0];
   4131         ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0];
   4132         ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0];
   4133         ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
   4134         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
   4135         {
   4136             hme_ref_desc_t *ps_ref_desc;
   4137             hme_ref_buf_info_t *ps_buf_info;
   4138             layer_ctxt_t *ps_layer;
   4139             S32 ref_id_lc;
   4140 
   4141             ps_ref_desc = &ps_ref_map->as_ref_desc[j];
   4142             ps_buf_info = &ps_ref_desc->as_ref_info[i];
   4143             ref_id_lc = ps_ref_desc->i1_ref_id_lc;
   4144 
   4145             desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
   4146             ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i];
   4147 
   4148             ppu1_inp[j] = ps_buf_info->pu1_ref_src;
   4149             ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
   4150             ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
   4151             ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
   4152             ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
   4153             ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr;
   4154 
   4155             /* Update the curr descriptors reference pointers here */
   4156             ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src;
   4157             ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
   4158             ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
   4159             ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
   4160             ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
   4161         }
   4162     }
   4163     /*************************************************************************/
   4164     /* The mv range for each layer is computed. For dyadic layers it will    */
   4165     /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
   4166     /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
   4167     /*************************************************************************/
   4168     for(i = 0; i < 1; i++)
   4169     {
   4170         layer_ctxt_t *ps_layer_ctxt;
   4171         if(i == 0)
   4172         {
   4173             i2_max_x = ps_frm_prms->i2_mv_range_x;
   4174             i2_max_y = ps_frm_prms->i2_mv_range_y;
   4175         }
   4176         else
   4177         {
   4178             i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd));
   4179             i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht));
   4180         }
   4181         ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
   4182         ps_layer_ctxt->i2_max_mv_x = i2_max_x;
   4183         ps_layer_ctxt->i2_max_mv_y = i2_max_y;
   4184 
   4185         /*********************************************************************/
   4186         /* Every layer maintains a reference id lc to POC mapping. This is   */
   4187         /* because the mapping is unique for every frm. Also, in next frm,   */
   4188         /* we require colocated mvs which means scaling according to temporal*/
   4189         /*distance. Hence this mapping needs to be maintained in every       */
   4190         /* layer ctxt                                                        */
   4191         /*********************************************************************/
   4192         memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
   4193         if(ps_ref_map->i4_num_ref)
   4194         {
   4195             memcpy(
   4196                 ps_layer_ctxt->ai4_ref_id_to_poc_lc,
   4197                 ps_ctxt->ai4_ref_idx_to_poc_lc,
   4198                 ps_ref_map->i4_num_ref * sizeof(S32));
   4199         }
   4200     }
   4201 
   4202     return;
   4203 }
   4204 
   4205 /**
   4206 ********************************************************************************
   4207 *  @fn     hme_coarse_process_frm_init
   4208 *
   4209 *  @brief  HME frame level initialsation processing function
   4210 *
   4211 *  @param[in] pv_me_ctxt : ME ctxt pointer
   4212 *
   4213 *  @param[in] ps_ref_map : Reference map prms pointer
   4214 *
   4215 *  @param[in] ps_frm_prms :Pointer to frame params
   4216 *
   4217 *  @return Scale factor in Q8 format
   4218 ********************************************************************************
   4219 */
   4220 void hme_coarse_process_frm_init(
   4221     void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms)
   4222 {
   4223     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
   4224     S32 i, j, desc_idx;
   4225     S16 i2_max_x = 0, i2_max_y = 0;
   4226 
   4227     /* Set the Qp of current frm passed by caller. Required for intra cost */
   4228     ps_ctxt->frm_qstep = ps_frm_prms->qstep;
   4229 
   4230     /* Bidir enabled or not */
   4231     ps_ctxt->s_frm_prms = *ps_frm_prms;
   4232 
   4233     /*************************************************************************/
   4234     /* Set up the ref pic parameters across all layers. For this, we do the  */
   4235     /* following: the application has given us a ref pic list, we go index   */
   4236     /* by index and pick up the picture. A picture can be uniquely be mapped */
   4237     /* to a POC. So we search all layer descriptor array to find the POC     */
   4238     /* Once found, we update all attributes in this descriptor.              */
   4239     /* During this updation process we also create an index of descriptor id */
   4240     /* to ref id mapping. It is important to find the same POC in the layers */
   4241     /* descr strcture since it holds the pyramid inputs for non encode layers*/
   4242     /* Apart from this, e also update array containing the index of the descr*/
   4243     /* During processing for ease of access, each layer has a pointer to aray*/
   4244     /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
   4245     /* we update this too.                                                   */
   4246     /*************************************************************************/
   4247     ps_ctxt->num_ref_past = 0;
   4248     ps_ctxt->num_ref_future = 0;
   4249     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
   4250     {
   4251         S32 ref_id_lc, idx;
   4252         hme_ref_desc_t *ps_ref_desc;
   4253 
   4254         ps_ref_desc = &ps_ref_map->as_ref_desc[i];
   4255         ref_id_lc = ps_ref_desc->i1_ref_id_lc;
   4256         /* Obtain the id of descriptor that contains this POC */
   4257         idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc);
   4258 
   4259         /* Update all layers in this descr with the reference attributes */
   4260         hme_update_layer_desc(
   4261             &ps_ctxt->as_ref_descr[idx],
   4262             ps_ref_desc,
   4263             1,
   4264             ps_ctxt->num_layers - 1,
   4265             ps_ctxt->ps_curr_descr);
   4266 
   4267         /* Update the array having ref id lc to descr id mapping */
   4268         ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
   4269 
   4270         /* From ref id lc we need to work out the POC, So update this array */
   4271         ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
   4272 
   4273         /* From ref id lc we need to work out the display num, So update this array */
   4274         ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num;
   4275 
   4276         /* When computing costs in L0 and L1 directions, we need the */
   4277         /* respective ref id L0 and L1, so update this mapping */
   4278         ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
   4279         ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
   4280         if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
   4281         {
   4282             ps_ctxt->au1_is_past[ref_id_lc] = 1;
   4283             ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
   4284             ps_ctxt->num_ref_past++;
   4285         }
   4286         else
   4287         {
   4288             ps_ctxt->au1_is_past[ref_id_lc] = 0;
   4289             ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
   4290             ps_ctxt->num_ref_future++;
   4291         }
   4292         if(1 == ps_ctxt->i4_wt_pred_enable_flag)
   4293         {
   4294             /* copy the weight and offsets from current ref desc */
   4295             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
   4296 
   4297             /* inv weight is stored in Q15 format */
   4298             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
   4299                 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
   4300 
   4301             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
   4302         }
   4303         else
   4304         {
   4305             /* store default wt and offset*/
   4306             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
   4307 
   4308             /* inv weight is stored in Q15 format */
   4309             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
   4310                 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
   4311 
   4312             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
   4313         }
   4314     }
   4315 
   4316     ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
   4317     ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
   4318 
   4319     /*************************************************************************/
   4320     /* Preparation of the TLU for bits for reference indices.                */
   4321     /* Special case is that of numref = 2. (TEV)                             */
   4322     /* Other cases uses UEV                                                  */
   4323     /*************************************************************************/
   4324     for(i = 0; i < MAX_NUM_REF; i++)
   4325     {
   4326         ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
   4327         ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
   4328     }
   4329 
   4330     if(ps_ref_map->i4_num_ref == 2)
   4331     {
   4332         ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
   4333         ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
   4334         ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
   4335         ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
   4336     }
   4337     else if(ps_ref_map->i4_num_ref > 2)
   4338     {
   4339         for(i = 0; i < ps_ref_map->i4_num_ref; i++)
   4340         {
   4341             S32 l0, l1;
   4342             l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
   4343             l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
   4344             ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
   4345             ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
   4346         }
   4347     }
   4348 
   4349     /*************************************************************************/
   4350     /* Preparation of the scaling factors for reference indices. The scale   */
   4351     /* factor depends on distance of the two ref indices from current input  */
   4352     /* in terms of poc delta.                                                */
   4353     /*************************************************************************/
   4354     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
   4355     {
   4356         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
   4357         {
   4358             S16 i2_scf_q8;
   4359             S32 poc_from, poc_to;
   4360 
   4361             poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
   4362             poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
   4363 
   4364             i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
   4365             ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
   4366         }
   4367     }
   4368 
   4369     /*************************************************************************/
   4370     /* We store simplified look ups for inp y plane for                      */
   4371     /* every layer and for every ref id in the layer.                        */
   4372     /*************************************************************************/
   4373     for(i = 1; i < ps_ctxt->num_layers; i++)
   4374     {
   4375         U08 **ppu1_inp;
   4376 
   4377         ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
   4378         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
   4379         {
   4380             hme_ref_desc_t *ps_ref_desc;
   4381             hme_ref_buf_info_t *ps_buf_info;
   4382             layer_ctxt_t *ps_layer;
   4383             S32 ref_id_lc;
   4384 
   4385             ps_ref_desc = &ps_ref_map->as_ref_desc[j];
   4386             ps_buf_info = &ps_ref_desc->as_ref_info[i];
   4387             ref_id_lc = ps_ref_desc->i1_ref_id_lc;
   4388 
   4389             desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
   4390             ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i];
   4391 
   4392             ppu1_inp[j] = ps_layer->pu1_inp;
   4393         }
   4394     }
   4395     /*************************************************************************/
   4396     /* The mv range for each layer is computed. For dyadic layers it will    */
   4397     /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
   4398     /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
   4399     /*************************************************************************/
   4400 
   4401     /* set to layer 0 search range params */
   4402     i2_max_x = ps_frm_prms->i2_mv_range_x;
   4403     i2_max_y = ps_frm_prms->i2_mv_range_y;
   4404 
   4405     for(i = 1; i < ps_ctxt->num_layers; i++)
   4406     {
   4407         layer_ctxt_t *ps_layer_ctxt;
   4408 
   4409         {
   4410             i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1]));
   4411             i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1]));
   4412         }
   4413         ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
   4414         ps_layer_ctxt->i2_max_mv_x = i2_max_x;
   4415         ps_layer_ctxt->i2_max_mv_y = i2_max_y;
   4416 
   4417         /*********************************************************************/
   4418         /* Every layer maintains a reference id lc to POC mapping. This is   */
   4419         /* because the mapping is unique for every frm. Also, in next frm,   */
   4420         /* we require colocated mvs which means scaling according to temporal*/
   4421         /*distance. Hence this mapping needs to be maintained in every       */
   4422         /* layer ctxt                                                        */
   4423         /*********************************************************************/
   4424         memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
   4425         if(ps_ref_map->i4_num_ref)
   4426         {
   4427             memcpy(
   4428                 ps_layer_ctxt->ai4_ref_id_to_poc_lc,
   4429                 ps_ctxt->ai4_ref_idx_to_poc_lc,
   4430                 ps_ref_map->i4_num_ref * sizeof(S32));
   4431             memcpy(
   4432                 ps_layer_ctxt->ai4_ref_id_to_disp_num,
   4433                 ps_ctxt->ai4_ref_idx_to_disp_num,
   4434                 ps_ref_map->i4_num_ref * sizeof(S32));
   4435         }
   4436     }
   4437 
   4438     return;
   4439 }
   4440 
   4441 /**
   4442 ********************************************************************************
   4443 *  @fn     hme_process_frm
   4444 *
   4445 *  @brief  HME frame level processing function
   4446 *
   4447 *  @param[in] pv_me_ctxt : ME ctxt pointer
   4448 *
   4449 *  @param[in] ps_ref_map : Reference map prms pointer
   4450 *
   4451 *  @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer
   4452 *
   4453 *  @param[in] ps_frm_prms : pointer to Frame level parameters of HME
   4454 *
   4455 *  @param[in] pf_ext_update_fxn : function pointer to update CTb results
   4456 *
   4457 *  @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost
   4458 *
   4459 *  @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost
   4460 *
   4461 *  @return Scale factor in Q8 format
   4462 ********************************************************************************
   4463 */
   4464 
   4465 void hme_process_frm(
   4466     void *pv_me_ctxt,
   4467     pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
   4468     hme_ref_map_t *ps_ref_map,
   4469     double **ppd_intra_costs,
   4470     hme_frm_prms_t *ps_frm_prms,
   4471     PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
   4472     void *pv_coarse_layer,
   4473     void *pv_multi_thrd_ctxt,
   4474     S32 i4_frame_parallelism_level,
   4475     S32 thrd_id,
   4476     S32 i4_me_frm_id)
   4477 {
   4478     refine_prms_t s_refine_prms;
   4479     me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
   4480     me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
   4481 
   4482     S32 lyr_job_type;
   4483     multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
   4484     layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
   4485 
   4486     ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
   4487 
   4488     lyr_job_type = ME_JOB_ENC_LYR;
   4489     /*************************************************************************/
   4490     /* Final L0 layer ME call                                                */
   4491     /*************************************************************************/
   4492     {
   4493         /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/
   4494         hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht);
   4495 
   4496         hme_set_refine_prms(
   4497             &s_refine_prms,
   4498             ps_ctxt->u1_encode[0],
   4499             ps_ref_map->i4_num_ref,
   4500             0,
   4501             ps_ctxt->num_layers,
   4502             ps_ctxt->num_layers_explicit_search,
   4503             ps_thrd_ctxt->s_init_prms.use_4x4,
   4504             ps_frm_prms,
   4505             ppd_intra_costs,
   4506             &ps_thrd_ctxt->s_init_prms.s_me_coding_tools);
   4507 
   4508         hme_refine(
   4509             ps_thrd_ctxt,
   4510             &s_refine_prms,
   4511             pf_ext_update_fxn,
   4512             ps_coarse_layer,
   4513             ps_multi_thrd_ctxt,
   4514             lyr_job_type,
   4515             thrd_id,
   4516             i4_me_frm_id,
   4517             ps_l0_ipe_input);
   4518 
   4519         /* Set current ref pic status which will used as perv frame ref pic */
   4520         if(i4_frame_parallelism_level)
   4521         {
   4522             ps_ctxt->i4_is_prev_frame_reference = 0;
   4523         }
   4524         else
   4525         {
   4526             ps_ctxt->i4_is_prev_frame_reference =
   4527                 ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id]
   4528                     ->ps_curr_inp->s_lap_out.i4_is_ref_pic;
   4529         }
   4530     }
   4531 
   4532     return;
   4533 }
   4534 
   4535 /**
   4536 ********************************************************************************
   4537 *  @fn     hme_coarse_process_frm
   4538 *
   4539 *  @brief  HME frame level processing function (coarse + refine)
   4540 *
   4541 *  @param[in] pv_me_ctxt : ME ctxt pointer
   4542 *
   4543 *  @param[in] ps_ref_map : Reference map prms pointer
   4544 *
   4545 *  @param[in] ps_frm_prms : pointer to Frame level parameters of HME
   4546 *
   4547 *  @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt
   4548 *
   4549 *  @return Scale factor in Q8 format
   4550 ********************************************************************************
   4551 */
   4552 
   4553 void hme_coarse_process_frm(
   4554     void *pv_me_ctxt,
   4555     hme_ref_map_t *ps_ref_map,
   4556     hme_frm_prms_t *ps_frm_prms,
   4557     void *pv_multi_thrd_ctxt,
   4558     WORD32 i4_ping_pong,
   4559     void **ppv_dep_mngr_hme_sync)
   4560 {
   4561     S16 i2_max;
   4562     S32 layer_id;
   4563     coarse_prms_t s_coarse_prms;
   4564     refine_prms_t s_refine_prms;
   4565     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
   4566     S32 lyr_job_type;
   4567     multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
   4568 
   4569     ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
   4570     /*************************************************************************/
   4571     /* Fire processing of all layers, starting with coarsest layer.          */
   4572     /*************************************************************************/
   4573     layer_id = ps_ctxt->num_layers - 1;
   4574     i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
   4575     i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
   4576     s_coarse_prms.i4_layer_id = layer_id;
   4577     {
   4578         S32 log_start_step;
   4579         /* Based on Preset, set the starting step size for Refinement */
   4580         if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
   4581         {
   4582             log_start_step = 0;
   4583         }
   4584         else
   4585         {
   4586             log_start_step = 1;
   4587         }
   4588 
   4589         s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
   4590         s_coarse_prms.i4_start_step = 1 << log_start_step;
   4591     }
   4592     s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref;
   4593     s_coarse_prms.do_full_search = 1;
   4594     if(s_coarse_prms.do_full_search)
   4595     {
   4596         /* Set to 2 or 4 */
   4597         if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
   4598             s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
   4599         else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED)
   4600             s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
   4601     }
   4602     s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse;
   4603 
   4604     /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */
   4605     s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf;
   4606     s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift;
   4607     s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0);
   4608 
   4609     hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync);
   4610 
   4611     /* all refinement layer processed in the loop below */
   4612     layer_id--;
   4613     lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1;
   4614 
   4615     /*************************************************************************/
   4616     /* This loop will run for all refine layers (non- encode layers)          */
   4617     /*************************************************************************/
   4618     while(layer_id > 0)
   4619     {
   4620         hme_set_refine_prms(
   4621             &s_refine_prms,
   4622             ps_ctxt->u1_encode[layer_id],
   4623             ps_ref_map->i4_num_ref,
   4624             layer_id,
   4625             ps_ctxt->num_layers,
   4626             ps_ctxt->num_layers_explicit_search,
   4627             ps_ctxt->s_init_prms.use_4x4,
   4628             ps_frm_prms,
   4629             NULL,
   4630             &ps_ctxt->s_init_prms.s_me_coding_tools);
   4631 
   4632         hme_refine_no_encode(
   4633             ps_ctxt,
   4634             &s_refine_prms,
   4635             ps_multi_thrd_ctxt,
   4636             lyr_job_type,
   4637             i4_ping_pong,
   4638             ppv_dep_mngr_hme_sync);
   4639 
   4640         layer_id--;
   4641         lyr_job_type++;
   4642     }
   4643 }
   4644 /**
   4645 ********************************************************************************
   4646 *  @fn     hme_fill_neighbour_mvs
   4647 *
   4648 *  @brief  HME neighbour MV population function
   4649 *
   4650 *  @param[in] pps_mv_grid : MV grid array pointer
   4651 *
   4652 *  @param[in] i4_ctb_x : CTB pos X
   4653 
   4654 *  @param[in] i4_ctb_y : CTB pos Y
   4655 *
   4656 *  @remarks :  Needs to be populated for proper implementation of cost fxn
   4657 *
   4658 *  @return Scale factor in Q8 format
   4659 ********************************************************************************
   4660 */
   4661 void hme_fill_neighbour_mvs(
   4662     mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt)
   4663 {
   4664     /* TODO : Needs to be populated for proper implementation of cost fxn */
   4665     ARG_NOT_USED(pps_mv_grid);
   4666     ARG_NOT_USED(i4_ctb_x);
   4667     ARG_NOT_USED(i4_ctb_y);
   4668     ARG_NOT_USED(i4_num_ref);
   4669     ARG_NOT_USED(pv_ctxt);
   4670 }
   4671 
   4672 /**
   4673 *******************************************************************************
   4674 *  @fn     void hme_get_active_pocs_list(void *pv_me_ctxt,
   4675 *                                       S32 *p_pocs_buffered_in_me)
   4676 *
   4677 *  @brief  Returns the list of active POCs in ME ctxt
   4678 *
   4679 *  @param[in] pv_me_ctxt : handle to ME context
   4680 *
   4681 *  @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
   4682 *                                      populates with pocs active
   4683 *
   4684 *  @return   void
   4685 *******************************************************************************
   4686 */
   4687 WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel)
   4688 {
   4689     me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
   4690     S32 i, count = 0;
   4691 
   4692     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
   4693     {
   4694         S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
   4695         S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free;
   4696 
   4697         if((i4_is_free == 0) && (poc != INVALID_POC))
   4698         {
   4699             count++;
   4700         }
   4701     }
   4702     if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1)
   4703     {
   4704         return 1;
   4705     }
   4706     else
   4707     {
   4708         return 0;
   4709     }
   4710 }
   4711 
   4712 /**
   4713 *******************************************************************************
   4714 *  @fn     void hme_coarse_get_active_pocs_list(void *pv_me_ctxt,
   4715 *                                       S32 *p_pocs_buffered_in_me)
   4716 *
   4717 *  @brief  Returns the list of active POCs in ME ctxt
   4718 *
   4719 *  @param[in] pv_me_ctxt : handle to ME context
   4720 *
   4721 *  @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
   4722 *                                      populates with pocs active
   4723 *
   4724 *  @return   void
   4725 *******************************************************************************
   4726 */
   4727 void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me)
   4728 {
   4729     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
   4730     S32 i, count = 0;
   4731 
   4732     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
   4733     {
   4734         S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc;
   4735 
   4736         if(poc != -1)
   4737         {
   4738             p_pocs_buffered_in_me[count] = poc;
   4739             count++;
   4740         }
   4741     }
   4742     p_pocs_buffered_in_me[count] = -1;
   4743 }
   4744 
   4745 S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode)
   4746 {
   4747     /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */
   4748     if(layer_id == n_layers - 1)
   4749         return 4;
   4750     else if((layer_id == 0) || (encode))
   4751         return 16;
   4752 
   4753     /* Intermediate non encode layers use 8 */
   4754     return 8;
   4755 }
   4756