Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /*!
     21 ******************************************************************************
     22 * \file hme_defs.h
     23 *
     24 * \brief
     25 *    Important definitions, enumerations, macros and structures used by ME
     26 *
     27 * \date
     28 *    18/09/2012
     29 *
     30 * \author
     31 *    Ittiam
     32 *
     33 ******************************************************************************
     34 */
     35 
     36 #ifndef _HME_DEFS_H_
     37 #define _HME_DEFS_H_
     38 
     39 /*****************************************************************************/
     40 /* Constant Macros                                                           */
     41 /*****************************************************************************/
     42 /**
     43 *******************************************************************************
     44 @brief Blk size of the CTB in the max possible case
     45 *******************************************************************************
     46  */
     47 #define CTB_BLK_SIZE 64
     48 
     49 /**
     50 *******************************************************************************
     51 @brief Maximun number of results per partition
     52 *******************************************************************************
     53  */
     54 #define MAX_RESULTS_PER_PART 2
     55 
     56 /**
     57 *******************************************************************************
     58 @brief Not used currently
     59 *******************************************************************************
     60  */
     61 #define MAX_NUM_UNIFIED_RESULTS 10
     62 #define MAX_NUM_CTB_NODES 10
     63 
     64 /**
     65 *******************************************************************************
     66 @brief For 64x64 CTB, we have 16x16 MV grid for prediction purposes (cost calc)
     67 This has 1 padding at boundaries for causal neighbours
     68 *******************************************************************************
     69  */
     70 #define CTB_MV_GRID_PAD 1
     71 
     72 /**
     73 *******************************************************************************
     74 @brief number of bits per bin
     75 *******************************************************************************
     76  */
     77 #define HME_CABAC_BITS_PER_BIN 0.5
     78 
     79 /**
     80 *******************************************************************************
     81 @brief bin count to bit count conversion
     82 *******************************************************************************
     83  */
     84 #define HME_GET_CAB_BIT(x) (U08(((x)*HME_CABAC_BITS_PER_BIN + 0.5)))
     85 
     86 /**
     87 *******************************************************************************
     88 @brief Columns in the MV grid
     89 *******************************************************************************
     90  */
     91 #define NUM_COLUMNS_IN_CTB_GRID (((CTB_BLK_SIZE) >> 2) + (2 * CTB_MV_GRID_PAD))
     92 
     93 /**
     94 *******************************************************************************
     95 @brief Rows in MV grid
     96 *******************************************************************************
     97  */
     98 #define NUM_ROWS_IN_CTB_GRID (NUM_COLUMNS_IN_CTB_GRID)
     99 
    100 /**
    101 *******************************************************************************
    102 @brief Total number of MVs held in CTB grid for prediction pourposes
    103 *******************************************************************************
    104  */
    105 #define NUM_MVS_IN_CTB_GRID ((NUM_COLUMNS_IN_CTB_GRID) * (NUM_ROWS_IN_CTB_GRID))
    106 
    107 /**
    108 *******************************************************************************
    109 @brief Max number of candidates used for refinement during CU merge stage
    110 *******************************************************************************
    111  */
    112 #define MAX_MERGE_CANDTS 64
    113 
    114 /**
    115 *******************************************************************************
    116 @brief For BIDIR refinement, we use 2I-P0 as input, done max at CTB level, so
    117 stride for this input is 64
    118 *******************************************************************************
    119  */
    120 #define BACK_PREDICTION_INPUT_STRIDE 64
    121 
    122 /**
    123 *******************************************************************************
    124 @brief We basically store an impossible and unique MV to identify intra blks
    125 or CUs
    126 *******************************************************************************
    127  */
    128 #define INTRA_MV 0x4000
    129 
    130 /**
    131 *******************************************************************************
    132 @brief Defines the largest CTB supported by HME
    133 *******************************************************************************
    134  */
    135 #define HME_MAX_CTB_SIZE 64
    136 
    137 /**
    138 *******************************************************************************
    139 @brief Maximum number of 16x16 blks possible in a CTB. The basic search unit
    140 in the encode layer is 16x16
    141 *******************************************************************************
    142  */
    143 #define HME_MAX_16x16_IN_CTB ((HME_MAX_CTB_SIZE >> 4) * (HME_MAX_CTB_SIZE >> 4))
    144 
    145 /**
    146 *******************************************************************************
    147 @brief Max number of 8x8s possible in a CTB, this in other words is also the
    148 maximum number of CUs possible in a CTB
    149 *******************************************************************************
    150  */
    151 #define HME_MAX_8x8_IN_CTB ((HME_MAX_CTB_SIZE >> 3) * (HME_MAX_CTB_SIZE >> 3))
    152 
    153 /**
    154 *******************************************************************************
    155 @brief Maximum number of init candts supported for refinement search.
    156 *******************************************************************************
    157  */
    158 #define MAX_INIT_CANDTS 60
    159 
    160 /**
    161 *******************************************************************************
    162 @brief Maximum MV in X and Y directions in fullpel units allowed in any layer
    163 Any computed range for MV hasto be within this
    164 *******************************************************************************
    165  */
    166 #define MAX_MV_X_FINEST 1024
    167 #define MAX_MV_Y_FINEST 512
    168 
    169 #define MAX_NUM_RESULTS 10
    170 
    171 #define USE_MODIFIED 1
    172 
    173 #define ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 1
    174 
    175 #define ENABLE_EXPLICIT_SEARCH_IN_PQ 0
    176 
    177 /**
    178 *******************************************************************************
    179 @brief Driven by reasoning that we can tolerate an error of 4 in global mv
    180  in coarsest layer per comp, assuming we have search range of 1024x512, the mv
    181  range in coarse layer is 128x64, total bins is then 256/4 x 128/4 or 2K bins
    182 *******************************************************************************
    183  */
    184 #define LOG_MAX_NUM_BINS 11
    185 #define MAX_NUM_BINS (1 << LOG_MAX_NUM_BINS)
    186 
    187 #define NEXT_BLOCK_OFFSET_IN_L0_ME 22
    188 
    189 #define PREV_BLOCK_OFFSET_IN_L0_ME 6
    190 
    191 #define COLOCATED_BLOCK_OFFSET 2
    192 
    193 #define COLOCATED_4X4_NEXT_BLOCK_OFFSET 14
    194 
    195 #define MAP_X_MAX 16
    196 
    197 #define MAP_Y_MAX 16
    198 
    199 #define NUM_POINTS_IN_RECTANGULAR_GRID 9
    200 
    201 /*
    202 ******************************************************************************
    203 @brief Maximum number of elements in the sigmaX and sigmaX-Square array
    204 computed at 4x4 level for any CU size
    205 ******************************************************************************
    206 */
    207 #define MAX_NUM_SIGMAS_4x4 256
    208 
    209 /*****************************************************************************/
    210 /* Function Macros                                                           */
    211 /*****************************************************************************/
    212 
    213 /**
    214 *******************************************************************************
    215 @brief Calculates number of blks in picture, given width, ht, and a variable
    216 shift that controls basic blk size
    217 *******************************************************************************
    218  */
    219 #define GET_NUM_BLKS_IN_PIC(wd, ht, shift, num_cols, num_blks)                                     \
    220     {                                                                                              \
    221         S32 y, rnd;                                                                                \
    222         rnd = (1 << shift) - 1;                                                                    \
    223         num_cols = (wd + rnd) >> shift;                                                            \
    224         y = (ht + rnd) >> shift;                                                                   \
    225         num_blks = num_cols * y;                                                                   \
    226     }
    227 
    228 #define COUNT_CANDS(a, b)                                                                          \
    229     {                                                                                              \
    230         b = (((a) & (1))) + (((a >> 1) & (1))) + (((a >> 2) & (1))) + (((a >> 3) & (1))) +         \
    231             (((a >> 4) & (1))) + (((a >> 5) & (1))) + (((a >> 6) & (1))) + (((a >> 7) & (1))) +    \
    232             (((a >> 8) & (1)));                                                                    \
    233     }
    234 
    235 #define COPY_MV_TO_SEARCH_NODE(node, mv, pref, refid, shift)                                       \
    236     {                                                                                              \
    237         (node)->s_mv.i2_mvx = (mv)->i2_mv_x;                                                       \
    238         (node)->s_mv.i2_mvy = (mv)->i2_mv_y;                                                       \
    239         (node)->i1_ref_idx = *pref;                                                                \
    240         (node)->u1_is_avail = 1;                                                                   \
    241                                                                                                    \
    242         /* Can set the availability flag for MV Pred purposes */                                   \
    243         if(((node)->i1_ref_idx < 0) || ((node)->s_mv.i2_mvx == INTRA_MV))                          \
    244         {                                                                                          \
    245             (node)->u1_is_avail = 0;                                                               \
    246             (node)->i1_ref_idx = refid;                                                            \
    247             (node)->s_mv.i2_mvx = 0;                                                               \
    248             (node)->s_mv.i2_mvy = 0;                                                               \
    249         }                                                                                          \
    250         (node)->s_mv.i2_mvx >>= (shift);                                                           \
    251         (node)->s_mv.i2_mvy >>= (shift);                                                           \
    252         (node)->u1_subpel_done = (shift) ? 0 : 1;                                                  \
    253     }
    254 
    255 #define COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance)                                        \
    256     {                                                                                              \
    257         S32 mvx_q8 = (ps_mv)->mvx << 8;                                                            \
    258         S32 mvy_q8 = (ps_mv)->mvy << 8;                                                            \
    259         S32 mvcx_q8 = (ps_data)->s_centroid.i4_pos_x_q8;                                           \
    260         S32 mvcy_q8 = (ps_data)->s_centroid.i4_pos_y_q8;                                           \
    261                                                                                                    \
    262         S32 mvdx_q8 = mvx_q8 - mvcx_q8;                                                            \
    263         S32 mvdy_q8 = mvy_q8 - mvcy_q8;                                                            \
    264                                                                                                    \
    265         S32 mvdx = (mvdx_q8 + (1 << 7)) >> 8;                                                      \
    266         S32 mvdy = (mvdy_q8 + (1 << 7)) >> 8;                                                      \
    267                                                                                                    \
    268         S32 mvd = ABS(mvdx) + ABS(mvdy);                                                           \
    269                                                                                                    \
    270         cumulative_mv_distance += mvd;                                                             \
    271     }
    272 
    273 #define STATS_COLLECTOR_MV_INSERT(                                                                 \
    274     ps_mv_store, num_mvs_stored, mvx_cur, mvy_cur, stats_struct, check_for_duplicate, ref_idx)     \
    275     {                                                                                              \
    276         S32 i4_j;                                                                                  \
    277         (stats_struct).f_num_cands_being_processed++;                                              \
    278         check_for_duplicate = 0;                                                                   \
    279                                                                                                    \
    280         for(i4_j = 0; i4_j < (num_mvs_stored); i4_j++)                                             \
    281         {                                                                                          \
    282             if(((ps_mv_store)[i4_j].s_mv.i2_mvx == (mvx_cur)) &&                                   \
    283                ((ps_mv_store)[i4_j].s_mv.i2_mvy == (mvy_cur)) &&                                   \
    284                ((ps_mv_store)[i4_j].i1_ref_idx == ref_idx))                                        \
    285             {                                                                                      \
    286                 (stats_struct).f_num_duplicates_amongst_processed++;                               \
    287                 check_for_duplicate = 0;                                                           \
    288                 break;                                                                             \
    289             }                                                                                      \
    290         }                                                                                          \
    291                                                                                                    \
    292         if(i4_j == (num_mvs_stored))                                                               \
    293         {                                                                                          \
    294             (ps_mv_store)[i4_j].s_mv.i2_mvx = (mvx_cur);                                           \
    295             (ps_mv_store)[i4_j].s_mv.i2_mvy = (mvy_cur);                                           \
    296             (ps_mv_store)[i4_j].i1_ref_idx = ref_idx;                                              \
    297             (num_mvs_stored)++;                                                                    \
    298         }                                                                                          \
    299     }
    300 
    301 #define UPDATE_CLUSTER_METADATA_POST_MERGE(ps_cluster)                                             \
    302     {                                                                                              \
    303         S32 m;                                                                                     \
    304                                                                                                    \
    305         S32 num_clusters_evaluated = 0;                                                            \
    306                                                                                                    \
    307         for(m = 0; num_clusters_evaluated < (ps_cluster)->num_clusters; m++)                       \
    308         {                                                                                          \
    309             if(!((ps_cluster)->as_cluster_data[m].is_valid_cluster))                               \
    310             {                                                                                      \
    311                 if(-1 != (ps_cluster)->as_cluster_data[m].ref_id)                                  \
    312                 {                                                                                  \
    313                     (ps_cluster)->au1_num_clusters[(ps_cluster)->as_cluster_data[m].ref_id]--;     \
    314                 }                                                                                  \
    315             }                                                                                      \
    316             else                                                                                   \
    317             {                                                                                      \
    318                 num_clusters_evaluated++;                                                          \
    319             }                                                                                      \
    320         }                                                                                          \
    321     }
    322 
    323 #define SET_VALUES_FOR_TOP_REF_IDS(ps_cluster_blk, best_uni_ref, best_alt_ref, num_ref)            \
    324     {                                                                                              \
    325         ps_cluster_blk->best_uni_ref = best_uni_ref;                                               \
    326         ps_cluster_blk->best_alt_ref = best_alt_ref;                                               \
    327         ps_cluster_blk->num_refs = num_ref;                                                        \
    328     }
    329 
    330 #define MAP_X_MAX 16
    331 #define MAP_Y_MAX 16
    332 
    333 #define CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(                                                   \
    334     ps_dedup_enabler, num_cands, mvx, mvy, check_for_duplicate)                                    \
    335     {                                                                                              \
    336         S32 center_mvx;                                                                            \
    337         S32 center_mvy;                                                                            \
    338         S32 mvdx;                                                                                  \
    339         S32 mvdy;                                                                                  \
    340         U32 *pu4_node_map;                                                                         \
    341         S32 columnar_presence;                                                                     \
    342                                                                                                    \
    343         (check_for_duplicate) = 0;                                                                 \
    344         {                                                                                          \
    345             subpel_dedup_enabler_t *ps_dedup = &(ps_dedup_enabler)[0];                             \
    346             center_mvx = ps_dedup->i2_mv_x;                                                        \
    347             center_mvy = ps_dedup->i2_mv_y;                                                        \
    348             pu4_node_map = ps_dedup->au4_node_map;                                                 \
    349                                                                                                    \
    350             mvdx = (mvx)-center_mvx;                                                               \
    351             mvdy = (mvy)-center_mvy;                                                               \
    352                                                                                                    \
    353             if(((mvdx < MAP_X_MAX) && (mvdx >= -MAP_X_MAX)) &&                                     \
    354                ((mvdy < MAP_Y_MAX) && (mvdy >= -MAP_Y_MAX)))                                       \
    355             {                                                                                      \
    356                 columnar_presence = pu4_node_map[MAP_X_MAX + mvdx];                                \
    357                                                                                                    \
    358                 if(0 == (columnar_presence & (1U << (MAP_Y_MAX + mvdy))))                          \
    359                 {                                                                                  \
    360                     columnar_presence |= (1U << (MAP_Y_MAX + mvdy));                               \
    361                     pu4_node_map[MAP_X_MAX + mvdx] = columnar_presence;                            \
    362                 }                                                                                  \
    363                 else                                                                               \
    364                 {                                                                                  \
    365                     (check_for_duplicate) = 1;                                                     \
    366                 }                                                                                  \
    367             }                                                                                      \
    368         }                                                                                          \
    369     }
    370 
    371 #define BUMP_OUTLIER_CLUSTERS(ps_cluster_blk, sdi_threshold)                                       \
    372     {                                                                                              \
    373         outlier_data_t as_outliers[MAX_NUM_CLUSTERS_64x64 + 1];                                    \
    374                                                                                                    \
    375         S32 j, k;                                                                                  \
    376                                                                                                    \
    377         S32 num_clusters_evaluated = 0;                                                            \
    378         S32 num_clusters = ps_cluster_blk->num_clusters;                                           \
    379         S32 num_outliers_present = 0;                                                              \
    380                                                                                                    \
    381         for(j = 0; num_clusters_evaluated < num_clusters; j++)                                     \
    382         {                                                                                          \
    383             cluster_data_t *ps_data = &ps_cluster_blk->as_cluster_data[j];                         \
    384                                                                                                    \
    385             if(!ps_data->is_valid_cluster)                                                         \
    386             {                                                                                      \
    387                 continue;                                                                          \
    388             }                                                                                      \
    389                                                                                                    \
    390             num_clusters_evaluated++;                                                              \
    391                                                                                                    \
    392             if((ps_data->num_mvs == 1) && (ps_data->as_mv[0].sdi < sdi_threshold) &&               \
    393                (ps_cluster_blk->au1_num_clusters[ps_data->ref_id] >                                \
    394                 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX))                                                  \
    395             {                                                                                      \
    396                 as_outliers[num_outliers_present].cluster_id = j;                                  \
    397                 as_outliers[num_outliers_present].ref_idx = ps_data->ref_id;                       \
    398                 as_outliers[num_outliers_present].sdi = ps_data->as_mv[0].sdi;                     \
    399                 num_outliers_present++;                                                            \
    400             }                                                                                      \
    401         }                                                                                          \
    402                                                                                                    \
    403         for(j = 0; j < (num_outliers_present - 1); j++)                                            \
    404         {                                                                                          \
    405             for(k = (j + 1); k < num_outliers_present; k++)                                        \
    406             {                                                                                      \
    407                 if(as_outliers[j].sdi > as_outliers[k].sdi)                                        \
    408                 {                                                                                  \
    409                     as_outliers[MAX_NUM_CLUSTERS_64x64] = as_outliers[j];                          \
    410                     as_outliers[j] = as_outliers[k];                                               \
    411                     as_outliers[k] = as_outliers[MAX_NUM_CLUSTERS_64x64];                          \
    412                 }                                                                                  \
    413             }                                                                                      \
    414         }                                                                                          \
    415                                                                                                    \
    416         for(j = 0; j < (num_outliers_present); j++)                                                \
    417         {                                                                                          \
    418             S32 ref_idx = as_outliers[j].ref_idx;                                                  \
    419                                                                                                    \
    420             if((ps_cluster_blk->au1_num_clusters[ref_idx] > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX))      \
    421             {                                                                                      \
    422                 ps_cluster_blk->as_cluster_data[as_outliers[j].cluster_id].is_valid_cluster = 0;   \
    423                 ps_cluster_blk->num_clusters--;                                                    \
    424                 ps_cluster_blk->au1_num_clusters[ref_idx]--;                                       \
    425             }                                                                                      \
    426         }                                                                                          \
    427     }
    428 
    429 #define ADD_CLUSTER_CENTROID_AS_CANDS_FOR_BLK_MERGE(                                               \
    430     ps_cluster_data, ps_range_prms, ps_list, ps_mv, is_ref_in_l0, ref_idx)                         \
    431     {                                                                                              \
    432         ps_list = &(ps_cluster_data)->as_mv_list[!(is_ref_in_l0)][(ref_idx)];                      \
    433         ps_mv = &ps_list->as_mv[ps_list->num_mvs];                                                 \
    434                                                                                                    \
    435         ps_mv->i2_mvx = (ps_centroid->i4_pos_x_q8 + (1 << 7)) >> 8;                                \
    436         ps_mv->i2_mvy = (ps_centroid->i4_pos_y_q8 + (1 << 7)) >> 8;                                \
    437                                                                                                    \
    438         CLIP_MV_WITHIN_RANGE(ps_mv->i2_mvx, ps_mv->i2_mvy, (ps_range_prms), 0, 0, 0);              \
    439                                                                                                    \
    440         ps_cluster_data->ai4_ref_id_valid[!(is_ref_in_l0)][(ref_idx)] = 1;                         \
    441                                                                                                    \
    442         ps_list->num_mvs++;                                                                        \
    443     }
    444 
    445 #define COPY_SEARCH_CANDIDATE_DATA(node, mv, pref, refid, shift)                                   \
    446     {                                                                                              \
    447         (node)->ps_mv->i2_mvx = (mv)->i2_mv_x;                                                     \
    448         (node)->ps_mv->i2_mvy = (mv)->i2_mv_y;                                                     \
    449         (node)->i1_ref_idx = *pref;                                                                \
    450         (node)->u1_is_avail = 1;                                                                   \
    451                                                                                                    \
    452         /* Can set the availability flag for MV Pred purposes */                                   \
    453         if(((node)->i1_ref_idx < 0) || ((node)->ps_mv->i2_mvx == INTRA_MV))                        \
    454         {                                                                                          \
    455             (node)->u1_is_avail = 0;                                                               \
    456             (node)->i1_ref_idx = refid;                                                            \
    457             (node)->ps_mv->i2_mvx = 0;                                                             \
    458             (node)->ps_mv->i2_mvy = 0;                                                             \
    459         }                                                                                          \
    460         (node)->ps_mv->i2_mvx >>= (shift);                                                         \
    461         (node)->ps_mv->i2_mvy >>= (shift);                                                         \
    462         (node)->u1_subpel_done = (shift) ? 0 : 1;                                                  \
    463     }
    464 /**
    465 *******************************************************************************
    466 * @macro MIN_NODE
    467 * @brief Returns the search node with lesser cost
    468 *******************************************************************************
    469  */
    470 #define MIN_NODE(a, b) (((a)->i4_tot_cost < (b)->i4_tot_cost) ? (a) : (b))
    471 
    472 /**
    473 *******************************************************************************
    474 * @macro MAX_NODE
    475 * @brief Returns search node with higher cost
    476 *******************************************************************************
    477  */
    478 #define MAX_NODE(a, b) (((a)->i4_tot_cost >= (b)->i4_tot_cost) ? (a) : (b))
    479 
    480 /**
    481 ******************************************************************************
    482  *  @macro  HME_INV_WT_PRED
    483  *  @brief Implements inverse of wt pred formula. Actual wt pred formula is
    484  *  ((input * wt) + rnd) >> shift) + offset
    485 ******************************************************************************
    486 */
    487 #define HME_INV_WT_PRED(inp, wt, off, shift) (((((inp) - (off)) << (shift)) + ((wt) >> 1)) / (wt))
    488 #define HME_INV_WT_PRED1(inp, wt, off, shift)                                                      \
    489     (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
    490 
    491 /**
    492 ******************************************************************************
    493  *  @macro  HME_WT_PRED
    494  *  @brief Implements wt pred formula as per spec
    495 ******************************************************************************
    496 */
    497 #define HME_WT_PRED(p0, p1, w0, w1, rnd, shift)                                                    \
    498     (((((S32)w0) * ((S32)p0) + ((S32)w1) * ((S32)p1)) >> shift) + rnd)
    499 
    500 /**
    501 ******************************************************************************
    502  *  @macro PREFETCH_BLK
    503  *  @brief Prefetches a block of data into cahce before hand
    504 ******************************************************************************
    505 */
    506 
    507 /**
    508 ******************************************************************************
    509  *  @macro INSERT_NEW_NODE
    510  *  @brief Inserts a new search node in a list if it is unique; helps in
    511            removing duplicate nodes/candidates
    512 ******************************************************************************
    513 */
    514 #define PREFETCH_BLK(pu1_src, src_stride, lines, type)                                             \
    515     {                                                                                              \
    516         WORD32 ctr;                                                                                \
    517         for(ctr = 0; ctr < lines; ctr++)                                                           \
    518         {                                                                                          \
    519             PREFETCH((char const *)pu1_src, type);                                                 \
    520             pu1_src += src_stride;                                                                 \
    521         }                                                                                          \
    522     }
    523 
    524 #define INSERT_UNIQUE_NODE(                                                                        \
    525     as_nodes, num_nodes, new_node, au4_map, center_x, center_y, use_hashing)                       \
    526     {                                                                                              \
    527         WORD32 k;                                                                                  \
    528         UWORD32 map;                                                                               \
    529         WORD32 delta_x, delta_y;                                                                   \
    530         delta_x = (new_node).ps_mv->i2_mvx - (center_x);                                           \
    531         delta_y = (new_node).ps_mv->i2_mvy - (center_y);                                           \
    532         map = 0;                                                                                   \
    533                                                                                                    \
    534         if((use_hashing) && (delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) &&                  \
    535            (delta_y < MAP_Y_MAX) && (delta_y >= (-MAP_Y_MAX)))                                     \
    536         {                                                                                          \
    537             map = (au4_map)[delta_x + MAP_X_MAX];                                                  \
    538             if(0 == (map & (1U << (delta_y + MAP_Y_MAX))))                                         \
    539             {                                                                                      \
    540                 (new_node).s_mv = (new_node).ps_mv[0];                                             \
    541                 (as_nodes)[(num_nodes)] = (new_node);                                              \
    542                 ((num_nodes))++;                                                                   \
    543                 map |= 1U << (delta_y + MAP_Y_MAX);                                                \
    544                 (au4_map)[delta_x + MAP_X_MAX] = map;                                              \
    545             }                                                                                      \
    546         }                                                                                          \
    547         else                                                                                       \
    548         {                                                                                          \
    549             for(k = 0; k < ((num_nodes)); k++)                                                     \
    550             {                                                                                      \
    551                 /* Search is this node is already present in unique list */                        \
    552                 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).ps_mv->i2_mvx) &&                      \
    553                    ((as_nodes)[k].s_mv.i2_mvy == (new_node).ps_mv->i2_mvy) &&                      \
    554                    ((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx))                            \
    555                 {                                                                                  \
    556                     /* This is duplicate node; need not be inserted */                             \
    557                     break;                                                                         \
    558                 }                                                                                  \
    559             }                                                                                      \
    560             if(k == ((num_nodes)))                                                                 \
    561             {                                                                                      \
    562                 /* Insert new node only if it is not duplicate node */                             \
    563                 (new_node).s_mv = (new_node).ps_mv[0];                                             \
    564                 (as_nodes)[k] = (new_node);                                                        \
    565                 ((num_nodes))++;                                                                   \
    566             }                                                                                      \
    567         }                                                                                          \
    568     }
    569 
    570 /**
    571 ******************************************************************************
    572  *  @macro INSERT_NEW_NODE
    573  *  @brief Inserts a new search node in a list if it is unique; helps in
    574            removing duplicate nodes/candidates
    575 ******************************************************************************
    576 */
    577 #define INSERT_NEW_NODE_NOMAP(as_nodes, num_nodes, new_node, implicit_layer)                       \
    578     {                                                                                              \
    579         WORD32 k;                                                                                  \
    580         if(!implicit_layer)                                                                        \
    581         {                                                                                          \
    582             for(k = 0; k < (num_nodes); k++)                                                       \
    583             {                                                                                      \
    584                 /* Search is this node is already present in unique list */                        \
    585                 if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) &&                            \
    586                    (as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy))                              \
    587                 {                                                                                  \
    588                     /* This is duplicate node; need not be inserted */                             \
    589                     break;                                                                         \
    590                 }                                                                                  \
    591             }                                                                                      \
    592         }                                                                                          \
    593         else                                                                                       \
    594         {                                                                                          \
    595             for(k = 0; k < (num_nodes); k++)                                                       \
    596             {                                                                                      \
    597                 /* Search is this node is already present in unique list */                        \
    598                 if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) &&                            \
    599                    (as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy) &&                            \
    600                    (as_nodes[k].i1_ref_idx == new_node.i1_ref_idx))                                \
    601                 {                                                                                  \
    602                     /* This is duplicate node; need not be inserted */                             \
    603                     break;                                                                         \
    604                 }                                                                                  \
    605             }                                                                                      \
    606         }                                                                                          \
    607                                                                                                    \
    608         if(k == (num_nodes))                                                                       \
    609         {                                                                                          \
    610             /* Insert new node only if it is not duplicate node */                                 \
    611             as_nodes[k] = new_node;                                                                \
    612             (num_nodes)++;                                                                         \
    613         }                                                                                          \
    614     }
    615 /**
    616 ******************************************************************************
    617  *  @macro INSERT_NEW_NODE_NOMAP_ALTERNATE
    618  *  @brief Inserts a new search node in a list if it is unique; helps in
    619            removing duplicate nodes/candidates
    620 ******************************************************************************
    621 */
    622 #define INSERT_NEW_NODE_NOMAP_ALTERNATE(as_nodes, num_nodes, new_node, result_num, part_id)        \
    623     {                                                                                              \
    624         WORD32 k;                                                                                  \
    625         WORD32 part_id_1 = (new_node->i4_num_valid_parts > 8) ? new_node->ai4_part_id[part_id]     \
    626                                                               : part_id;                           \
    627         for(k = 0; k < (num_nodes); k++)                                                           \
    628         {                                                                                          \
    629             /* Search is this node is already present in unique list */                            \
    630             if((as_nodes[k].s_mv.i2_mvx == new_node->i2_mv_x[result_num][part_id_1]) &&            \
    631                (as_nodes[k].s_mv.i2_mvy == new_node->i2_mv_y[result_num][part_id_1]) &&            \
    632                (as_nodes[k].i1_ref_idx == new_node->i2_ref_idx[result_num][part_id_1]))            \
    633             {                                                                                      \
    634                 /* This is duplicate node; need not be inserted */                                 \
    635                 break;                                                                             \
    636             }                                                                                      \
    637         }                                                                                          \
    638                                                                                                    \
    639         if(k == (num_nodes))                                                                       \
    640         {                                                                                          \
    641             /* Insert new node only if it is not duplicate node */                                 \
    642             as_nodes[k].i4_tot_cost = (WORD32)new_node->i2_tot_cost[result_num][part_id_1];        \
    643             as_nodes[k].i4_mv_cost = (WORD32)new_node->i2_mv_cost[result_num][part_id_1];          \
    644             as_nodes[k].s_mv.i2_mvx = new_node->i2_mv_x[result_num][part_id_1];                    \
    645             as_nodes[k].s_mv.i2_mvy = new_node->i2_mv_y[result_num][part_id_1];                    \
    646             as_nodes[k].i1_ref_idx = (WORD8)new_node->i2_ref_idx[result_num][part_id_1];           \
    647             as_nodes[k].u1_part_id = new_node->ai4_part_id[part_id];                               \
    648             (num_nodes)++;                                                                         \
    649         }                                                                                          \
    650     }
    651 
    652 #define INSERT_NEW_NODE(                                                                           \
    653     as_nodes, num_nodes, new_node, implicit_layer, au4_map, center_x, center_y, use_hashing)       \
    654     {                                                                                              \
    655         WORD32 k;                                                                                  \
    656         UWORD32 map;                                                                               \
    657         WORD32 delta_x, delta_y;                                                                   \
    658         delta_x = (new_node).s_mv.i2_mvx - center_x;                                               \
    659         delta_y = (new_node).s_mv.i2_mvy - center_y;                                               \
    660         map = 0;                                                                                   \
    661         if((delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) && (delta_y < MAP_Y_MAX) &&          \
    662            (delta_y >= (-MAP_Y_MAX)) && (use_hashing))                                             \
    663         {                                                                                          \
    664             map = (au4_map)[delta_x + MAP_X_MAX];                                                  \
    665             if(0 == (map & (1U << (delta_y + MAP_Y_MAX))))                                         \
    666             {                                                                                      \
    667                 (as_nodes)[(num_nodes)] = (new_node);                                              \
    668                 (num_nodes)++;                                                                     \
    669                 map |= 1U << (delta_y + MAP_Y_MAX);                                                \
    670                 (au4_map)[delta_x + MAP_X_MAX] = map;                                              \
    671             }                                                                                      \
    672         }                                                                                          \
    673         else if(!(implicit_layer))                                                                 \
    674         {                                                                                          \
    675             for(k = 0; k < (num_nodes); k++)                                                       \
    676             {                                                                                      \
    677                 /* Search is this node is already present in unique list */                        \
    678                 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) &&                        \
    679                    ((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy))                          \
    680                 {                                                                                  \
    681                     /* This is duplicate node; need not be inserted */                             \
    682                     break;                                                                         \
    683                 }                                                                                  \
    684             }                                                                                      \
    685             if(k == (num_nodes))                                                                   \
    686             {                                                                                      \
    687                 /* Insert new node only if it is not duplicate node */                             \
    688                 (as_nodes)[k] = (new_node);                                                        \
    689                 (num_nodes)++;                                                                     \
    690             }                                                                                      \
    691         }                                                                                          \
    692         else                                                                                       \
    693         {                                                                                          \
    694             for(k = 0; k < (num_nodes); k++)                                                       \
    695             {                                                                                      \
    696                 /* Search is this node is already present in unique list */                        \
    697                 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) &&                        \
    698                    ((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy) &&                        \
    699                    ((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx))                            \
    700                 {                                                                                  \
    701                     /* This is duplicate node; need not be inserted */                             \
    702                     break;                                                                         \
    703                 }                                                                                  \
    704             }                                                                                      \
    705             if(k == (num_nodes))                                                                   \
    706             {                                                                                      \
    707                 /* Insert new node only if it is not duplicate node */                             \
    708                 (as_nodes)[k] = (new_node);                                                        \
    709                 (num_nodes)++;                                                                     \
    710             }                                                                                      \
    711         }                                                                                          \
    712     }
    713 
    714 #define COMPUTE_DIFF_MV(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh)                     \
    715     {                                                                                              \
    716         mvdx = (inp_node)->s_mv.i2_mvx << (inp_sh);                                                \
    717         mvdy = (inp_node)->s_mv.i2_mvy << (inp_sh);                                                \
    718         mvdx -= ((mv_p_x) << (pred_sh));                                                           \
    719         mvdy -= ((mv_p_y) << (pred_sh));                                                           \
    720     }
    721 
    722 #define COMPUTE_MV_DIFFERENCE(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh)               \
    723     {                                                                                              \
    724         mvdx = (inp_node)->ps_mv->i2_mvx << (inp_sh);                                              \
    725         mvdy = (inp_node)->ps_mv->i2_mvy << (inp_sh);                                              \
    726         mvdx -= ((mv_p_x) << (pred_sh));                                                           \
    727         mvdy -= ((mv_p_y) << (pred_sh));                                                           \
    728     }
    729 
    730 /**
    731 ******************************************************************************
    732  *  @enum  CU_MERGE_RESULT_T
    733  *  @brief Describes the results of merge, whether successful or not
    734 ******************************************************************************
    735 */
    736 typedef enum
    737 {
    738     CU_MERGED,
    739     CU_SPLIT
    740 } CU_MERGE_RESULT_T;
    741 
    742 /**
    743 ******************************************************************************
    744  *  @enum  PART_ORIENT_T
    745  *  @brief Describes the orientation of partition (vert/horz, left/rt)
    746 ******************************************************************************
    747 */
    748 typedef enum
    749 {
    750     VERT_LEFT,
    751     VERT_RIGHT,
    752     HORZ_TOP,
    753     HORZ_BOT
    754 } PART_ORIENT_T;
    755 
    756 /**
    757 ******************************************************************************
    758  *  @enum  GRID_PT_T
    759  *  @brief For a  3x3 rect grid, nubers each pt as shown
    760 *     5   2   6
    761 *     1   0   3
    762 *     7   4   8
    763 ******************************************************************************
    764 */
    765 typedef enum
    766 {
    767     PT_C = 0,
    768     PT_L = 1,
    769     PT_T = 2,
    770     PT_R = 3,
    771     PT_B = 4,
    772     PT_TL = 5,
    773     PT_TR = 6,
    774     PT_BL = 7,
    775     PT_BR = 8,
    776     NUM_GRID_PTS
    777 } GRID_PT_T;
    778 
    779 /**
    780 ******************************************************************************
    781  *  @macro  IS_POW
    782  *  @brief Returns whwehter a number is power of 2
    783 ******************************************************************************
    784 */
    785 #define IS_POW_2(x) (!((x) & ((x)-1)))
    786 
    787 /**
    788 ******************************************************************************
    789  *  @macro  GRID_ALL_PTS_VALID
    790  *  @brief For a 3x3 rect grid, this can be used to enable all pts in grid
    791 ******************************************************************************
    792 */
    793 #define GRID_ALL_PTS_VALID 0x1ff
    794 
    795 /**
    796 ******************************************************************************
    797  *  @macro  GRID_DIAMOND_ENABLE_ALL
    798  *  @brief If we search diamond, this enables all 5 pts of diamond (including centre)
    799 ******************************************************************************
    800 */
    801 #define GRID_DIAMOND_ENABLE_ALL                                                                    \
    802     (BIT_EN(PT_C) | BIT_EN(PT_L) | BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B))
    803 
    804 /**
    805 ******************************************************************************
    806  *  @macro  GRID_RT_3_INVALID, GRID_LT_3_INVALID,GRID_TOP_3_INVALID,GRID_BOT_3_INVALID
    807  *  @brief For a square grid search, depending on where the best result is
    808  *  we can optimise search for next iteration by invalidating some pts
    809 ******************************************************************************
    810 */
    811 #define GRID_RT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR)))
    812 #define GRID_LT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL)))
    813 #define GRID_TOP_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR)))
    814 #define GRID_BOT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR)))
    815 
    816 /**
    817 ******************************************************************************
    818  *  @enum  GMV_MVTYPE_T
    819  *  @brief Defines what type of GMV we need (thin lobe for a very spiky
    820  * distribution of mv or thick lobe for a blurred distrib of mvs
    821 ******************************************************************************
    822 */
    823 typedef enum
    824 {
    825     GMV_THICK_LOBE,
    826     GMV_THIN_LOBE,
    827     NUM_GMV_LOBES
    828 } GMV_MVTYPE_T;
    829 
    830 /**
    831 ******************************************************************************
    832  *  @enum  BLK_TYPE_T
    833  *  @brief Defines all possible inter blks possible
    834 ******************************************************************************
    835 */
    836 typedef enum
    837 {
    838     BLK_INVALID = -1,
    839     BLK_4x4 = 0,
    840     BLK_4x8,
    841     BLK_8x4,
    842     BLK_8x8,
    843     BLK_4x16,
    844     BLK_8x16,
    845     BLK_12x16,
    846     BLK_16x4,
    847     BLK_16x8,
    848     BLK_16x12,
    849     BLK_16x16,
    850     BLK_8x32,
    851     BLK_16x32,
    852     BLK_24x32,
    853     BLK_32x8,
    854     BLK_32x16,
    855     BLK_32x24,
    856     BLK_32x32,
    857     BLK_16x64,
    858     BLK_32x64,
    859     BLK_48x64,
    860     BLK_64x16,
    861     BLK_64x32,
    862     BLK_64x48,
    863     BLK_64x64,
    864     NUM_BLK_SIZES
    865 } BLK_SIZE_T;
    866 
    867 /**
    868 ******************************************************************************
    869  *  @enum  SEARCH_COMPLEXITY_T
    870  *  @brief For refinement layer, this decides the number of refinement candts
    871 ******************************************************************************
    872 */
    873 typedef enum
    874 {
    875     SEARCH_CX_LOW = 0,
    876     SEARCH_CX_MED = 1,
    877     SEARCH_CX_HIGH = 2
    878 } SEARCH_COMPLEXITY_T;
    879 
    880 /**
    881 ******************************************************************************
    882  *  @enum  CTB_BOUNDARY_TYPES_T
    883  *  @brief For pictures not a multiples of CTB horizontally or vertically, we
    884  *  define 4 unique cases, centre (full ctbs), bottom boundary (64x8k CTBs),
    885  *  right boundary (8mx64 CTBs), and bottom rt corner (8mx8k CTB)
    886 ******************************************************************************
    887 */
    888 typedef enum
    889 {
    890     CTB_CENTRE,
    891     CTB_BOT_PIC_BOUNDARY,
    892     CTB_RT_PIC_BOUNDARY,
    893     CTB_BOT_RT_PIC_BOUNDARY,
    894     NUM_CTB_BOUNDARY_TYPES,
    895 } CTB_BOUNDARY_TYPES_T;
    896 
    897 /**
    898 ******************************************************************************
    899  *  @enum  SEARCH_CANDIDATE_TYPE_T
    900  *  @brief Monikers for all sorts of search candidates used in ME
    901 ******************************************************************************
    902 */
    903 typedef enum
    904 {
    905     ILLUSORY_CANDIDATE = -1,
    906     ZERO_MV = 0,
    907     ZERO_MV_ALTREF,
    908     SPATIAL_LEFT0,
    909     SPATIAL_TOP0,
    910     SPATIAL_TOP_RIGHT0,
    911     SPATIAL_TOP_LEFT0,
    912     SPATIAL_LEFT1,
    913     SPATIAL_TOP1,
    914     SPATIAL_TOP_RIGHT1,
    915     SPATIAL_TOP_LEFT1,
    916     PROJECTED_COLOC0,
    917     PROJECTED_COLOC1,
    918     PROJECTED_COLOC2,
    919     PROJECTED_COLOC3,
    920     PROJECTED_COLOC4,
    921     PROJECTED_COLOC5,
    922     PROJECTED_COLOC6,
    923     PROJECTED_COLOC7,
    924     PROJECTED_COLOC_TR0,
    925     PROJECTED_COLOC_TR1,
    926     PROJECTED_COLOC_BL0,
    927     PROJECTED_COLOC_BL1,
    928     PROJECTED_COLOC_BR0,
    929     PROJECTED_COLOC_BR1,
    930     PROJECTED_TOP0,
    931     PROJECTED_TOP1,
    932     PROJECTED_TOP_RIGHT0,
    933     PROJECTED_TOP_RIGHT1,
    934     PROJECTED_TOP_LEFT0,
    935     PROJECTED_TOP_LEFT1,
    936     PROJECTED_RIGHT0,
    937     PROJECTED_RIGHT1,
    938     PROJECTED_BOTTOM0,
    939     PROJECTED_BOTTOM1,
    940     PROJECTED_BOTTOM_RIGHT0,
    941     PROJECTED_BOTTOM_RIGHT1,
    942     PROJECTED_BOTTOM_LEFT0,
    943     PROJECTED_BOTTOM_LEFT1,
    944     COLOCATED_GLOBAL_MV0,
    945     COLOCATED_GLOBAL_MV1,
    946     PROJECTED_TOP2,
    947     PROJECTED_TOP3,
    948     PROJECTED_TOP_RIGHT2,
    949     PROJECTED_TOP_RIGHT3,
    950     PROJECTED_TOP_LEFT2,
    951     PROJECTED_TOP_LEFT3,
    952     PROJECTED_RIGHT2,
    953     PROJECTED_RIGHT3,
    954     PROJECTED_BOTTOM2,
    955     PROJECTED_BOTTOM3,
    956     PROJECTED_BOTTOM_RIGHT2,
    957     PROJECTED_BOTTOM_RIGHT3,
    958     PROJECTED_BOTTOM_LEFT2,
    959     PROJECTED_BOTTOM_LEFT3,
    960     NUM_SEARCH_CAND_TYPES
    961 } SEARCH_CANDIDATE_TYPE_T;
    962 
    963 typedef enum
    964 {
    965     ILLUSORY_LOCATION = -1,
    966     COLOCATED,
    967     COLOCATED_4x4_TR,
    968     COLOCATED_4x4_BL,
    969     COLOCATED_4x4_BR,
    970     LEFT,
    971     TOPLEFT,
    972     TOP,
    973     TOPRIGHT,
    974     RIGHT,
    975     BOTTOMRIGHT,
    976     BOTTOM,
    977     BOTTOMLEFT,
    978     NUM_SEARCH_CAND_LOCATIONS
    979 } SEARCH_CAND_LOCATIONS_T;
    980 
    981 /**
    982 ******************************************************************************
    983  *  @macros  ENABLE_mxn
    984  *  @brief Enables a type or a group of partitions. ENABLE_ALL_PARTS, enables all
    985  *  partitions, while others enable selected partitions. These can be used
    986  *  to set the mask of active partitions
    987 ******************************************************************************
    988 */
    989 #define ENABLE_2Nx2N (BIT_EN(PART_ID_2Nx2N))
    990 #define ENABLE_2NxN (BIT_EN(PART_ID_2NxN_T) | BIT_EN(PART_ID_2NxN_B))
    991 #define ENABLE_Nx2N (BIT_EN(PART_ID_Nx2N_L) | BIT_EN(PART_ID_Nx2N_R))
    992 #define ENABLE_NxN                                                                                 \
    993     (BIT_EN(PART_ID_NxN_TL) | BIT_EN(PART_ID_NxN_TR) | BIT_EN(PART_ID_NxN_BL) |                    \
    994      BIT_EN(PART_ID_NxN_BR))
    995 #define ENABLE_2NxnU (BIT_EN(PART_ID_2NxnU_T) | BIT_EN(PART_ID_2NxnU_B))
    996 #define ENABLE_2NxnD (BIT_EN(PART_ID_2NxnD_T) | BIT_EN(PART_ID_2NxnD_B))
    997 #define ENABLE_nLx2N (BIT_EN(PART_ID_nLx2N_L) | BIT_EN(PART_ID_nLx2N_R))
    998 #define ENABLE_nRx2N (BIT_EN(PART_ID_nRx2N_L) | BIT_EN(PART_ID_nRx2N_R))
    999 #define ENABLE_AMP ((ENABLE_2NxnU) | (ENABLE_2NxnD) | (ENABLE_nLx2N) | (ENABLE_nRx2N))
   1000 #define ENABLE_SMP ((ENABLE_2NxN) | (ENABLE_Nx2N))
   1001 #define ENABLE_ALL_PARTS                                                                           \
   1002     ((ENABLE_2Nx2N) | (ENABLE_NxN) | (ENABLE_2NxN) | (ENABLE_Nx2N) | (ENABLE_AMP))
   1003 #define ENABLE_SQUARE_PARTS ((ENABLE_2Nx2N) | (ENABLE_NxN))
   1004 
   1005 /**
   1006 ******************************************************************************
   1007  *  @enum  MV_PEL_RES_T
   1008  *  @brief Resolution of MV fpel/hpel/qpel units. Useful for maintaining
   1009  *  predictors. During fpel search, candts, predictors etc are in fpel units,
   1010  *  in subpel search, they are in subpel units
   1011 ******************************************************************************
   1012 */
   1013 typedef enum
   1014 {
   1015     MV_RES_FPEL,
   1016     MV_RES_HPEL,
   1017     MV_RES_QPEL
   1018 } MV_PEL_RES_T;
   1019 
   1020 /**
   1021 ******************************************************************************
   1022  *  @enum  HME_SET_MVPRED_RES
   1023  *  @brief Sets resolution for predictor bank (fpel/qpel/hpel units)
   1024 ******************************************************************************
   1025 */
   1026 #define HME_SET_MVPRED_RES(ps_pred_ctxt, mv_pel_res) ((ps_pred_ctxt)->mv_pel = mv_pel_res)
   1027 
   1028 /**
   1029 ******************************************************************************
   1030  *  @enum  HME_SET_MVPRED_DIR
   1031  *  @brief Sets the direction, meaning L0/L1. Since L0 and L1 use separate
   1032  *  candts, the pred ctxt for them hasto be maintained separately
   1033 ******************************************************************************
   1034 */
   1035 #define HME_SET_MVPRED_DIR(ps_pred_ctxt, pred_lx) ((ps_pred_ctxt)->pred_lx = pred_lx)
   1036 
   1037 /**
   1038 ******************************************************************************
   1039  *  @brief macros to clip / check mv within specified range
   1040 ******************************************************************************
   1041  */
   1042 #define CHECK_MV_WITHIN_RANGE(x, y, range)                                                         \
   1043     (((x) > (range)->i2_min_x) && ((x) < (range)->i2_max_x) && ((y) > (range)->i2_min_y) &&        \
   1044      ((y) < (range)->i2_max_y))
   1045 
   1046 #define CONVERT_MV_LIMIT_TO_QPEL(range)                                                            \
   1047     {                                                                                              \
   1048         (range)->i2_max_x <<= 2;                                                                   \
   1049         (range)->i2_max_y <<= 2;                                                                   \
   1050         (range)->i2_min_x <<= 2;                                                                   \
   1051         (range)->i2_min_y <<= 2;                                                                   \
   1052     }
   1053 
   1054 #define CONVERT_MV_LIMIT_TO_FPEL(range)                                                            \
   1055     {                                                                                              \
   1056         (range)->i2_max_x >>= 2;                                                                   \
   1057         (range)->i2_max_y >>= 2;                                                                   \
   1058         (range)->i2_min_x >>= 2;                                                                   \
   1059         (range)->i2_min_y >>= 2;                                                                   \
   1060     }
   1061 
   1062 /**
   1063 ******************************************************************************
   1064  *  @brief Swicth to debug the number of subpel search nodes
   1065 ******************************************************************************
   1066 */
   1067 #define DEBUG_SUBPEL_SEARCH_NODE_HS_COUNT 0
   1068 
   1069 /**
   1070 ******************************************************************************
   1071  *  @typedef  SAD_GRID_T
   1072  *  @brief Defines a 2D array type used to store SADs across grid and across
   1073  * partition types
   1074 ******************************************************************************
   1075 */
   1076 typedef S32 SAD_GRID_T[9][MAX_NUM_PARTS];
   1077 
   1078 /*****************************************************************************/
   1079 /* Structures                                                                */
   1080 /*****************************************************************************/
   1081 
   1082 /**
   1083 ******************************************************************************
   1084  *  @struct  grid_node_t
   1085  *  @brief stores a complete info for a candt
   1086 ******************************************************************************
   1087 */
   1088 typedef struct
   1089 {
   1090     S16 i2_mv_x;
   1091     S16 i2_mv_y;
   1092     S08 i1_ref_idx;
   1093 } grid_node_t;
   1094 
   1095 /**
   1096 ******************************************************************************
   1097  *  @struct  search_node_t
   1098  *  @brief   Basic structure used for storage of search results, specification
   1099  *  of init candidates for search etc. This structure is complete for
   1100  *  specification of mv and cost for a given direction of search (L0/L1) but
   1101  *  does not carry information of what type of partition it represents.
   1102 ******************************************************************************
   1103  */
   1104 typedef struct
   1105 {
   1106     /** Motion vector */
   1107     mv_t s_mv;
   1108 
   1109     /** Used in the hme_mv_clipper function to reduce loads and stores */
   1110     mv_t *ps_mv;
   1111 
   1112     /** Ref id, as specified in terms of Lc, unified list */
   1113     S08 i1_ref_idx;
   1114 
   1115     /** Flag to indicate whether mv is in fpel or QPEL units */
   1116     U08 u1_subpel_done;
   1117 
   1118     /**
   1119      * Indicates whether this node constitutes a valid predictor candt.
   1120      * Since this structure also used for predictor candts, some candts may
   1121      * not be available (anti causal or outside pic boundary). Availabilit
   1122      * can be inferred using this flag.
   1123      */
   1124     U08 u1_is_avail;
   1125 
   1126     /**
   1127      * Indicates partition Id to which this node belongs. Useful during
   1128      * subpel / fullpel refinement search to identify partition whose
   1129      * cost needs to be minimized
   1130      */
   1131     U08 u1_part_id;
   1132 
   1133     /** SAD / SATD stored here */
   1134     S32 i4_sad;
   1135 
   1136     /**
   1137      * Cost related to coding MV, multiplied by lambda
   1138      * TODO : Entry may be redundant, can be removed
   1139      */
   1140     S32 i4_mv_cost;
   1141 
   1142     /** Total cost, (SAD + MV Cost) */
   1143     S32 i4_tot_cost;
   1144 
   1145     /** Subpel_Dist_Improvement.
   1146         It is the reduction in distortion (SAD or SATD) achieved
   1147         from the full-pel stage to the sub-pel stage
   1148     */
   1149     S32 i4_sdi;
   1150 
   1151 } search_node_t;
   1152 
   1153 /**
   1154 ******************************************************************************
   1155  *  @macro  INIT_SEARCH_NODE
   1156  *  @brief   Initializes this search_node_t structure. Can be used to zero
   1157  *          out candts, set max costs in results etc
   1158 ******************************************************************************
   1159  */
   1160 #define INIT_SEARCH_NODE(x, a)                                                                     \
   1161     {                                                                                              \
   1162         (x)->s_mv.i2_mvx = 0;                                                                      \
   1163         (x)->s_mv.i2_mvy = 0;                                                                      \
   1164         (x)->i1_ref_idx = a;                                                                       \
   1165         (x)->i4_tot_cost = MAX_32BIT_VAL;                                                          \
   1166         (x)->i4_sad = MAX_32BIT_VAL;                                                               \
   1167         (x)->u1_subpel_done = 0;                                                                   \
   1168         (x)->u1_is_avail = 1;                                                                      \
   1169     }
   1170 
   1171 /**
   1172 ******************************************************************************
   1173  *  @struct  part_attr_t
   1174  *  @brief   Geometric description of a partition w.r.t. CU start. Note that
   1175  *           since this is used across various CU sizes, the inference of
   1176  *           these members is to be done in the context of specific usage
   1177 ******************************************************************************
   1178  */
   1179 typedef struct
   1180 {
   1181     /** Start of partition w.r.t. CU start in x dirn */
   1182     U08 u1_x_start;
   1183     /** Size of partitino w.r.t. CU start in x dirn */
   1184     U08 u1_x_count;
   1185     /** Start of partition w.r.t. CU start in y dirn */
   1186     U08 u1_y_start;
   1187     /** Size of partitino w.r.t. CU start in y dirn */
   1188     U08 u1_y_count;
   1189 } part_attr_t;
   1190 
   1191 /**
   1192 ******************************************************************************
   1193  *  @struct  search_candt_t
   1194  *  @brief   Complete information for a given candt in any refinement srch
   1195 ******************************************************************************
   1196  */
   1197 typedef struct
   1198 {
   1199     /** Points to the mv, ref id info. */
   1200     search_node_t *ps_search_node;
   1201     /** Number of refinemnts to be done for this candt */
   1202     U08 u1_num_steps_refine;
   1203 } search_candt_t;
   1204 
   1205 /**
   1206 ******************************************************************************
   1207  *  @struct  result_node_t
   1208  *  @brief   Contains complete search result for a CU for a given type of
   1209  *           partition split. Holds ptrs to results for each partition, with
   1210  *           information of partition type.
   1211 ******************************************************************************
   1212  */
   1213 typedef struct
   1214 {
   1215     /**
   1216      * Type of partition that the CU is split into, for which this
   1217      * result is relevant
   1218      */
   1219     PART_TYPE_T e_part_type;
   1220 
   1221     /**
   1222      * Total cost of coding the CU (sum of costs of individual partitions
   1223      * plus other possible CU level overheads)
   1224      */
   1225     S32 i4_tot_cost;
   1226 
   1227     /**
   1228      * Pointer to results of each individual partitions. Note that max
   1229      * number of partitions a CU can be split into is MAX_NUM_PARTS
   1230      */
   1231     search_node_t *ps_part_result[MAX_NUM_PARTS];
   1232 
   1233     /* TU split flag : tu_split_flag[0] represents the transform splits
   1234      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
   1235      *  to respective 32x32  */
   1236     S32 ai4_tu_split_flag[4];
   1237 
   1238 } result_node_t;
   1239 
   1240 /**
   1241 ******************************************************************************
   1242  *  @struct  ctb_node_t
   1243  *  @brief   Finalized information for a given CU or CTB. This is a recursive
   1244  *           structure and can hence start at CTB level, recursing for every
   1245  *           level of split till we hit leaf CUs in the CTB. At leaf node
   1246  *           it contains info for coded non split CU, with child nodes being
   1247  *           set to NULL
   1248 ******************************************************************************
   1249  */
   1250 typedef struct ctb_node_t
   1251 {
   1252     /** x offset of this CU w.r.t. CTB start (0-63) */
   1253     U08 u1_x_off;
   1254     /** y offset of this C U w.r.t. CTB start (0-63) */
   1255     U08 u1_y_off;
   1256     /** Results of each partition in both directions L0,L1 */
   1257     search_node_t as_part_results[MAX_NUM_PARTS][2];
   1258     /**
   1259      * Pointers to pred buffers. Note that the buffer may be allocated
   1260      * at parent level or at this level
   1261      */
   1262     U08 *apu1_pred[2];
   1263     /** Prediction direction for each partition: 0-L0, 1-L1, 2-BI */
   1264     U08 u1_pred_dir[MAX_NUM_PARTS];
   1265     /**
   1266      * When pred direction is decided to be BI, we still store the best
   1267      * uni pred dir (L0/L1) in this array, for RD Opt purposes
   1268      */
   1269     U08 u1_best_uni_dir[MAX_NUM_PARTS];
   1270     /** Stride of pred buffer pointed to by apu1_pred member */
   1271     S32 i4_pred_stride;
   1272     /** Size of the CU that this node represents */
   1273     CU_SIZE_T e_cu_size;
   1274     /** For leaf CUs, this indicats type of partition (for e.g. PRT_2NxN) */
   1275     PART_TYPE_T e_part_type;
   1276     /** Below entries are for a CU level*/
   1277     S32 i4_sad;
   1278     S32 i4_satd;
   1279     S32 i4_mv_cost;
   1280     S32 i4_rate;
   1281     S32 i4_dist;
   1282     S32 i4_tot_cost;
   1283     /** Best costs of each partitions, if partition is BI, then best cost across uni/bi */
   1284     S32 ai4_part_costs[4];
   1285 
   1286     /* TU split flag : tu_split_flag[0] represents the transform splits
   1287      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
   1288      *  to respective 32x32  */
   1289     /* For a 8x8 TU - 1 bit used to indicate split */
   1290     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
   1291     /* For a 32x32 TU - See above */
   1292     S32 ai4_tu_split_flag[4];
   1293 
   1294     /**
   1295      * pointers to child nodes. If this node is split, then the below point
   1296      * to children nodes (TL, TR, BL, BR) each of quarter size (w/2, h/2)
   1297      * If this node not split, then below point to null
   1298      */
   1299     struct ctb_node_t *ps_tl;
   1300     struct ctb_node_t *ps_tr;
   1301     struct ctb_node_t *ps_bl;
   1302     struct ctb_node_t *ps_br;
   1303 } ctb_node_t;
   1304 
   1305 /**
   1306 ******************************************************************************
   1307  *  @struct  ctb_mem_mgr_t
   1308  *  @brief   Memory manager structure for CTB level memory allocations of CTB
   1309  *           nodes
   1310 ******************************************************************************
   1311  */
   1312 typedef struct
   1313 {
   1314     /** Base memory ptr */
   1315     U08 *pu1_mem;
   1316     /** Amount used so far (running value) */
   1317     S32 i4_used;
   1318     /** Total memory available for this mem mgr */
   1319     S32 i4_tot;
   1320 
   1321     /** Size of CTB node, and alignment requiremnts */
   1322     S32 i4_size;
   1323     S32 i4_align;
   1324 } ctb_mem_mgr_t;
   1325 
   1326 /**
   1327 ******************************************************************************
   1328  *  @struct  buf_mgr_t
   1329  *  @brief   Memory manager structure for CTB level buffer allocations on the
   1330  *           fly, esp useful for pred bufs and working memory
   1331 ******************************************************************************
   1332  */
   1333 typedef struct
   1334 {
   1335     /** base memory ptr */
   1336     U08 *pu1_wkg_mem;
   1337     /** total memory available */
   1338     S32 i4_total;
   1339     /** Memory used so far */
   1340     S32 i4_used;
   1341 } buf_mgr_t;
   1342 
   1343 /**
   1344 ******************************************************************************
   1345  *  @struct  pred_candt_nodes_t
   1346  *  @brief   For a given partition and a given CU/blk, this has pointers to
   1347  *           all the neighbouring and coloc pred candts. All the pred candts
   1348  *           are stored as search_node_t structures itself.
   1349 ******************************************************************************
   1350  */
   1351 typedef struct
   1352 {
   1353     search_node_t *ps_tl;
   1354     search_node_t *ps_t;
   1355     search_node_t *ps_tr;
   1356     search_node_t *ps_bl;
   1357     search_node_t *ps_l;
   1358     search_node_t *ps_coloc;
   1359     search_node_t *ps_zeromv;
   1360     search_node_t **pps_proj_coloc;
   1361 
   1362     search_node_t *ps_mvp_node;
   1363 } pred_candt_nodes_t;
   1364 
   1365 /**
   1366 ******************************************************************************
   1367  *  @struct  pred_ctxt_t
   1368  *  @brief   For a given CU/blk, has complete prediction information for all
   1369  *           types of partitions. Note that the pred candts are only pointed
   1370  *           to, not actually stored here. This indirection is to avoid
   1371  *           copies after each partition search, this way, the result of
   1372  *           a partition is updated and the causally next partition
   1373  *           automatically uses this result
   1374 ******************************************************************************
   1375  */
   1376 typedef struct
   1377 {
   1378     pred_candt_nodes_t as_pred_nodes[TOT_NUM_PARTS];
   1379 
   1380     /**
   1381      *  We use S + lambda * R to evaluate cost. Here S = SAD/SATD and lambda
   1382      *  is the scaling of bits to S and R is bits of overhead (MV + mode).
   1383      *  Choice of lambda depends on open loop / closed loop, Qp, temporal id
   1384      *  and possibly CU depth. It is the caller's responsiblity to pass
   1385      *  to this module the appropriate lambda.
   1386      */
   1387     S32 lambda;
   1388 
   1389     /** lambda is in Q format, so this is the downshift reqd */
   1390     S32 lambda_q_shift;
   1391 
   1392     /** Prediction direction : PRED_L0 or PRED_L1 */
   1393     S32 pred_lx;
   1394 
   1395     /** MV resolution: FPEL, HPEL or QPEL */
   1396     S32 mv_pel;
   1397 
   1398     /** Points to the ref bits lookup 1 ptr for each PRED_Lx */
   1399     U08 **ppu1_ref_bits_tlu;
   1400 
   1401     /**
   1402      *  Points to the ref scale factor, for a given ref id k,
   1403      *  to scale as per ref id m, we use entry k+MAX_NUM_REF*m
   1404      */
   1405     S16 *pi2_ref_scf;
   1406 
   1407     /**
   1408      *  Flag that indicates whether T, TR and TL candidates used
   1409      *  are causal or projected
   1410      */
   1411     U08 proj_used;
   1412 
   1413 } pred_ctxt_t;
   1414 
   1415 /**
   1416 ******************************************************************************
   1417  *  @struct  search_results_t
   1418  *  @brief   For a given CU/blk, Stores all the results of ME search. Results
   1419  *           are stored per partition, also the best results for CU are stored
   1420  *           across partitions.
   1421 ******************************************************************************
   1422  */
   1423 typedef struct
   1424 {
   1425     /** Size of CU for which this structure used */
   1426     CU_SIZE_T e_cu_size;
   1427 
   1428     /**
   1429      * X and y offsets w.r.t. CTB start in encode layers. For non encode
   1430      * layers, these may typically be 0
   1431      */
   1432     U08 u1_x_off;
   1433     U08 u1_y_off;
   1434 
   1435     /** Number of best results for this CU stored */
   1436     U08 u1_num_best_results;
   1437 
   1438     /** Number of results stored per partition. */
   1439     U08 u1_num_results_per_part;
   1440 
   1441     /**
   1442      * Number of result planes active. This may be different from total
   1443      * number of active references during search. For example, we may
   1444      * have 4 active ref, 2 ineach dirn, but active result planes may
   1445      * only be 2, one for L0 and 1 for L1
   1446      */
   1447     U08 u1_num_active_ref;
   1448     /**
   1449      * mask of active partitions, Totally 17 bits. For a given partition
   1450      * id, as per PART_ID_T enum the corresponding bit position is 1/0
   1451      * indicating that partition is active or inactive
   1452      */
   1453     S32 i4_part_mask;
   1454 
   1455     /** Points to partial results for each partition id
   1456      *  Temporary hack for the bug: If +1 is not kept,
   1457      *  it doesn't bit match with older version
   1458      */
   1459     search_node_t *aps_part_results[MAX_NUM_REF][TOT_NUM_PARTS];
   1460 
   1461     /**
   1462      * Ptr to best results for the current CU post bi pred evaluation and
   1463      * intra mode insertions
   1464      */
   1465     inter_cu_results_t *ps_cu_results;
   1466 
   1467     /** 2 pred ctxts, one for L0 and one for L1 */
   1468     pred_ctxt_t as_pred_ctxt[2];
   1469 
   1470     /**
   1471      * Pointer to a table that indicates whether the ref id
   1472      * corresponds to past or future dirn. Input is ref id Lc form
   1473      */
   1474 
   1475     U08 *pu1_is_past;
   1476 
   1477     /**
   1478      * Overall best CU cost, while other entries store CU costs
   1479      * in single direction, this is best CU cost, where each
   1480      * partition cost is evaluated as best of uni/bi
   1481      */
   1482     S32 best_cu_cost;
   1483 
   1484     /**
   1485      * Split_flag which is used for deciding if 16x16 CU is split or not
   1486      */
   1487     U08 u1_split_flag;
   1488 } search_results_t;
   1489 
   1490 /**
   1491 ******************************************************************************
   1492  *  @struct  ctb_list_t
   1493  *  @brief   Tree structure containing info for entire CTB. At top level
   1494  *           it points to entire CTB results, with children nodes at each lvl
   1495  *           being non null if split.
   1496 ******************************************************************************
   1497  */
   1498 typedef struct ctb_list_t
   1499 {
   1500     /** Indicates whether this level split further */
   1501     U08 u1_is_split;
   1502 
   1503     /** Number of result candts present */
   1504     U08 u1_num_candts;
   1505 
   1506     /**
   1507      * Whether this level valid. E.g. if we are at boundary, where only
   1508      * left 2 32x32 are within pic boundary, then the parent is force split
   1509      * at the children level, TR and BR are invalid.
   1510      */
   1511     U08 u1_is_valid;
   1512 
   1513     /**
   1514      * IF this level is 16x16 then this mask indicates which 8x8 blks
   1515      * are valid
   1516      */
   1517     U08 u1_8x8_mask;
   1518 
   1519     /** Search results of this CU */
   1520     search_results_t *ps_search_results;
   1521 
   1522     /** Search results of this CU */
   1523     inter_cu_results_t *ps_cu_results;
   1524 
   1525     /** Pointers to leaf nodes, if CU is split further, else null */
   1526     struct ctb_list_t *ps_tl;
   1527     struct ctb_list_t *ps_tr;
   1528     struct ctb_list_t *ps_bl;
   1529     struct ctb_list_t *ps_br;
   1530 } ctb_list_t;
   1531 
   1532 /**
   1533 ******************************************************************************
   1534  *  @struct  layer_mv_t
   1535  *  @brief   mv bank structure for a particular layer
   1536 ******************************************************************************
   1537  */
   1538 typedef struct
   1539 {
   1540     /** Number of mvs for a given ref/pred dirn */
   1541     S32 i4_num_mvs_per_ref;
   1542     /** Number of reference for which results stored */
   1543     S32 i4_num_ref;
   1544     /** Number of mvs stored per blk. Product of above two */
   1545     S32 i4_num_mvs_per_blk;
   1546     /** Block size of the unit for which MVs stored */
   1547     BLK_SIZE_T e_blk_size;
   1548     /** Number of blocks present per row */
   1549     S32 i4_num_blks_per_row;
   1550 
   1551     /** Number of mvs stored every row */
   1552     S32 i4_num_mvs_per_row;
   1553 
   1554     /**
   1555      * Max number of mvs allowed per row. The main purpose of this variable
   1556      * is to resolve or detect discrepanceis between allocation time mem
   1557      * and run time mem, when alloc time resolution and run time resolution
   1558      * may be different
   1559      */
   1560     S32 max_num_mvs_per_row;
   1561 
   1562     /**
   1563      * Pointer to mvs of 0, 0 blk, This is different from base since the
   1564      * mv bank is padded all sides
   1565     */
   1566     hme_mv_t *ps_mv;
   1567 
   1568     /** Pointer to base of mv bank mvs */
   1569     hme_mv_t *ps_mv_base;
   1570 
   1571     /** Pointers to ref idx.One to one correspondence between this and ps_mv*/
   1572     S08 *pi1_ref_idx;
   1573     /** Base of ref ids just like in case of ps_mv */
   1574     S08 *pi1_ref_idx_base;
   1575 
   1576     /** Part mask for every blk, if stored, 1 per blk */
   1577     U08 *pu1_part_mask;
   1578 } layer_mv_t;
   1579 
   1580 /**
   1581 ******************************************************************************
   1582  *  @struct  mv_hist_t
   1583  *  @brief   Histogram structure to calculate global mvs
   1584 ******************************************************************************
   1585  */
   1586 typedef struct
   1587 {
   1588     S32 i4_num_rows;
   1589     S32 i4_num_cols;
   1590     S32 i4_shift_x;
   1591     S32 i4_shift_y;
   1592     S32 i4_lobe1_size;
   1593     S32 i4_lobe2_size;
   1594     S32 i4_min_x;
   1595     S32 i4_min_y;
   1596     S32 i4_num_bins;
   1597     S32 ai4_bin_count[MAX_NUM_BINS];
   1598 } mv_hist_t;
   1599 
   1600 typedef struct
   1601 {
   1602     U08 u1_is_past;
   1603 } ref_attr_t;
   1604 
   1605 /**
   1606 ******************************************************************************
   1607  *  @struct  layer_ctxt_t
   1608  *  @brief   Complete information for the layer
   1609 ******************************************************************************
   1610  */
   1611 typedef struct
   1612 {
   1613     /** Display Width of this layer */
   1614     S32 i4_disp_wd;
   1615     /** Display height of this layer */
   1616     S32 i4_disp_ht;
   1617     /** Width of this layer */
   1618     S32 i4_wd;
   1619     /** height of this layer */
   1620     S32 i4_ht;
   1621     /** Amount of padding of input in x dirn */
   1622     S32 i4_pad_x_inp;
   1623     /** Amount of padding of input in y dirn */
   1624     S32 i4_pad_y_inp;
   1625     /** Padding amount of recon in x dirn */
   1626     S32 i4_pad_x_rec;
   1627     /** padding amt of recon in y dirn */
   1628     S32 i4_pad_y_rec;
   1629 
   1630     /**
   1631      * Offset for recon. Since recon has padding, the 0, 0 start differs
   1632      * from base of buffer
   1633      */
   1634     S32 i4_rec_offset;
   1635     /** Offset for input, same explanation as recon */
   1636     S32 i4_inp_offset;
   1637     /** stride of input buffer */
   1638     S32 i4_inp_stride;
   1639     /** stride of recon buffer */
   1640     S32 i4_rec_stride;
   1641     /** Pic order count */
   1642     S32 i4_poc;
   1643     /** input pointer. */
   1644     U08 *pu1_inp;
   1645     /** Base of input. Add inp_offset to go to 0, 0 locn */
   1646     U08 *pu1_inp_base;
   1647 
   1648     /** Pointer to 4 hpel recon planes */
   1649     U08 *pu1_rec_fxfy;
   1650     U08 *pu1_rec_hxfy;
   1651     U08 *pu1_rec_fxhy;
   1652     U08 *pu1_rec_hxhy;
   1653 
   1654     /** Global mv, one set per reference searched */
   1655     hme_mv_t s_global_mv[MAX_NUM_REF][NUM_GMV_LOBES];
   1656 
   1657     /** Layer MV bank */
   1658     layer_mv_t *ps_layer_mvbank;
   1659 
   1660     /** Pointer to list of recon buffers for each ref id, one ptr per plane */
   1661     U08 **ppu1_list_rec_fxfy;
   1662     U08 **ppu1_list_rec_hxfy;
   1663     U08 **ppu1_list_rec_fxhy;
   1664     U08 **ppu1_list_rec_hxhy;
   1665 
   1666     void **ppv_dep_mngr_recon;
   1667 
   1668     /** Pointer to list of input buffers for each ref id, one ptr per plane */
   1669     U08 **ppu1_list_inp;
   1670 
   1671     /** Max MV in x and y direction supported at this layer resolution */
   1672     S16 i2_max_mv_x;
   1673     S16 i2_max_mv_y;
   1674 
   1675     /** Converts ref id (as per Lc list) to POC */
   1676     S32 ai4_ref_id_to_poc_lc[MAX_NUM_REF];
   1677 
   1678     S32 ai4_ref_id_to_disp_num[MAX_NUM_REF];
   1679 
   1680     /** status of the buffer */
   1681     S32 i4_is_free;
   1682 
   1683     /** idr gop number */
   1684     S32 i4_idr_gop_num;
   1685 
   1686     /** is reference picture */
   1687     S32 i4_is_reference;
   1688 
   1689     /** is non reference picture processed by me*/
   1690     S32 i4_non_ref_free;
   1691 
   1692 } layer_ctxt_t;
   1693 
   1694 typedef S32 (*PF_MV_COST_FXN)(search_node_t *, pred_ctxt_t *, PART_ID_T, S32);
   1695 
   1696 /**
   1697  ******************************************************************************
   1698  *  @struct refine_prms_t
   1699  *  @brief  All the configurable input parameters for the refinement layer
   1700  *
   1701  *  @param encode: Whether this layer is encoded or not
   1702  *  @param explicit_ref: If enabled, then the number of reference frames to
   1703  *                       be searched is a function of coarsest layer num ref
   1704                          frames. Else, number of references collapsed to 1/2
   1705  *  @param i4_num_fpel_results : Number of full pel results to be allowed
   1706  *  @param i4_num_results_per_part: Number of results stored per partition
   1707  *  @param e_search_complexity: Decides the number of initial candts, refer
   1708  *                               to SEARCH_COMPLEXITY_T
   1709  *  @param i4_use_rec_in_fpel: Whether to use input buf or recon buf in fpel
   1710  *  @param i4_enable_4x4_part : if encode is 0, we use 8x8 blks, if this param
   1711                                 enabled, then we do 4x4 partial sad update
   1712  *  @param i4_layer_id        : id of this layer (0 = finest)
   1713  *  @param i4_num_32x32_merge_results: number of 32x32 merged results stored
   1714  *  @param i4_num_64x64_merge_results: number of 64x64 merged results stored
   1715  *  @param i4_use_satd_cu_merge: Use SATD during CU merge
   1716  *  @param i4_num_steps_hpel_refine : Number of steps during hpel refinement
   1717  *  @param i4_num_steps_qpel_refine : Same as above but for qpel
   1718  *  @param i4_use_satd_subpel : Use of SATD or SAD for subpel
   1719  ******************************************************************************
   1720 */
   1721 typedef struct
   1722 {
   1723     /* This array is used to place upper bounds on the number of search candidates */
   1724     /* that can be used per 'search cand location' */
   1725     U08 au1_num_fpel_search_cands[NUM_SEARCH_CAND_LOCATIONS];
   1726 
   1727     U08 u1_max_2nx2n_tu_recur_cands;
   1728 
   1729     U08 u1_max_num_fpel_refine_centers;
   1730 
   1731     U08 u1_max_num_subpel_refine_centers;
   1732 
   1733     S32 i4_encode;
   1734     S32 explicit_ref;
   1735     S32 i4_num_ref_fpel;
   1736     S32 i4_num_fpel_results;
   1737 
   1738     S32 i4_num_results_per_part;
   1739 
   1740     S32 i4_num_mvbank_results;
   1741     SEARCH_COMPLEXITY_T e_search_complexity;
   1742     S32 i4_use_rec_in_fpel;
   1743 
   1744     S32 i4_enable_4x4_part;
   1745     S32 i4_layer_id;
   1746 
   1747     S32 i4_num_32x32_merge_results;
   1748     S32 i4_num_64x64_merge_results;
   1749 
   1750     S32 i4_use_satd_cu_merge;
   1751 
   1752     S32 i4_num_steps_post_refine_fpel;
   1753     S32 i4_num_steps_fpel_refine;
   1754     S32 i4_num_steps_hpel_refine;
   1755     S32 i4_num_steps_qpel_refine;
   1756     S32 i4_use_satd_subpel;
   1757 
   1758     double *pd_intra_costs;
   1759     S32 bidir_enabled;
   1760     S32 lambda_inp;
   1761     S32 lambda_recon;
   1762     S32 lambda_q_shift;
   1763 
   1764     S32 limit_active_partitions;
   1765 
   1766     S32 sdi_threshold;
   1767 
   1768     U08 u1_use_lambda_derived_from_min_8x8_act_in_ctb;
   1769 
   1770     U08 u1_max_subpel_candts;
   1771 
   1772     U08 u1_max_subpel_candts_2Nx2N;
   1773     U08 u1_max_subpel_candts_NxN;
   1774 
   1775     U08 u1_subpel_candt_threshold;
   1776 
   1777     /* Pointer to the array which has num best results for
   1778         fpel refinement */
   1779     U08 *pu1_num_best_results;
   1780 
   1781 } refine_prms_t;
   1782 
   1783 /**
   1784 ******************************************************************************
   1785  *  @struct  coarse_prms_t
   1786  *  @brief   All the parameters passed to coarse layer search
   1787 ******************************************************************************
   1788  */
   1789 typedef struct
   1790 {
   1791     /** ID of this layer, typically N-1 where N is tot layers */
   1792     S32 i4_layer_id;
   1793 
   1794     /** Initial step size, valid if full search disabled */
   1795     S32 i4_start_step;
   1796 
   1797     /** Maximum number of iterations to consider if full search disabled */
   1798     S32 i4_max_iters;
   1799 
   1800     /** Number of reference frames to search */
   1801     S32 i4_num_ref;
   1802 
   1803     /** Number of best results to maintain at this layer for projection */
   1804     S32 num_results;
   1805 
   1806     /**
   1807      * Enable or disable full search, if disabled then, we search around initial
   1808      * candidates with early exit
   1809      */
   1810     S32 do_full_search;
   1811 
   1812     /** Values of lambda and the Q format */
   1813     S32 lambda;
   1814     S32 lambda_q_shift;
   1815 
   1816     /** Step size for full search 2/4 */
   1817     S32 full_search_step;
   1818 
   1819 } coarse_prms_t;
   1820 
   1821 typedef struct
   1822 {
   1823     /**
   1824      * These pointers point to modified input, one each for one ref idx.
   1825      * Instead of weighting the reference, we weight the input with inverse
   1826      * wt and offset.
   1827      * +1 for storing non weighted input
   1828      */
   1829     U08 *apu1_wt_inp[MAX_NUM_REF + 1];
   1830 
   1831     /* These are allocated once at the start of encoding */
   1832     /* These are necessary only if wt_pred is switched on */
   1833     /* Else, only a single buffer is used to store the */
   1834     /* unweighed input */
   1835     U08 *apu1_wt_inp_buf_array[MAX_NUM_REF + 1];
   1836 
   1837     /** Stores the weights and offsets for each ref */
   1838     S32 a_wpred_wt[MAX_NUM_REF];
   1839     S32 a_inv_wpred_wt[MAX_NUM_REF];
   1840     S32 a_wpred_off[MAX_NUM_REF];
   1841     S32 wpred_log_wdc;
   1842 
   1843     S32 ai4_shift_val[MAX_NUM_REF];
   1844 } wgt_pred_ctxt_t;
   1845 
   1846 /**
   1847 ******************************************************************************
   1848  *  @struct  mv_refine_ctxt_t
   1849  *  @brief   This structure contains important parameters used motion vector
   1850              refinement
   1851 ******************************************************************************
   1852  */
   1853 typedef struct
   1854 {
   1855     /* Added +7 in the array sizes below to make every array dimension
   1856     16-byte aligned */
   1857     /** Cost of best candidate for each partition*/
   1858     MEM_ALIGN16 WORD16 i2_tot_cost[2][TOT_NUM_PARTS + 7];
   1859 
   1860     MEM_ALIGN16 WORD16 i2_stim_injected_cost[2][TOT_NUM_PARTS + 7];
   1861 
   1862     /** Motion vector cost for the best candidate of each partition*/
   1863     MEM_ALIGN16 WORD16 i2_mv_cost[2][TOT_NUM_PARTS + 7];
   1864     /** X component of the motion vector of the best candidate of each partition*/
   1865     MEM_ALIGN16 WORD16 i2_mv_x[2][TOT_NUM_PARTS + 7];
   1866     /** Y component of the motion vector of the best candidate of each partition*/
   1867     MEM_ALIGN16 WORD16 i2_mv_y[2][TOT_NUM_PARTS + 7];
   1868     /** Reference index of the best candidate of each partition*/
   1869     MEM_ALIGN16 WORD16 i2_ref_idx[2][TOT_NUM_PARTS + 7];
   1870 
   1871     /** Partition id for the various partitions*/
   1872     WORD32 ai4_part_id[TOT_NUM_PARTS + 1];
   1873     /** Indicates the total number of valid partitions*/
   1874     WORD32 i4_num_valid_parts;
   1875 
   1876     /** Number of candidates to refine through*/
   1877     WORD32 i4_num_search_nodes;
   1878 
   1879     /** Stores the satd at the end of fullpel refinement*/
   1880     WORD16 ai2_fullpel_satd[2][TOT_NUM_PARTS];
   1881 } mv_refine_ctxt_t;
   1882 
   1883 typedef mv_refine_ctxt_t fullpel_refine_ctxt_t;
   1884 typedef mv_refine_ctxt_t subpel_refine_ctxt_t;
   1885 /**
   1886 ******************************************************************************
   1887  *  @struct  hme_search_prms_t
   1888  *  @brief   All prms going to any fpel search
   1889 ******************************************************************************
   1890  */
   1891 typedef struct
   1892 {
   1893     /** for explicit search, indicates which ref frm to search */
   1894     /** for implicit search, indicates the prediction direction for search */
   1895     S08 i1_ref_idx;
   1896 
   1897     /** Blk size used for search, and for which the search is done */
   1898     BLK_SIZE_T e_blk_size;
   1899 
   1900     /** Number of init candts being searched */
   1901     S32 i4_num_init_candts;
   1902 
   1903     S32 i4_num_steps_post_refine;
   1904 
   1905     /**
   1906      * For coarser searches, bigger refinement is done around each candt
   1907      * in these cases, this prm has start step
   1908      */
   1909     S32 i4_start_step;
   1910 
   1911     /** whether SATD to be used for srch */
   1912     S32 i4_use_satd;
   1913 
   1914     /** if 1, we use recon frm for search (closed loop ) */
   1915     S32 i4_use_rec;
   1916 
   1917     /** bitmask of active partitions */
   1918     S32 i4_part_mask;
   1919 
   1920     /** x and y offset of blk w.r.t. pic start */
   1921     S32 i4_x_off;
   1922     S32 i4_y_off;
   1923 
   1924     /**
   1925      * max number of iterations to search if early exit not hit
   1926      * relevant only for coarser searches
   1927      */
   1928     S32 i4_max_iters;
   1929 
   1930     /** pointer to str holding all results for this blk */
   1931     search_results_t *ps_search_results;
   1932 
   1933     /** pts to str having all search candt with refinement info */
   1934     search_candt_t *ps_search_candts;
   1935     /** pts to str having valid mv range info for this blk */
   1936     range_prms_t *aps_mv_range[MAX_NUM_REF];
   1937     /** cost compute fxnptr */
   1938     PF_MV_COST_FXN pf_mv_cost_compute;
   1939 
   1940     /** when this str is set up for full search, indicates step size for same */
   1941     S32 full_search_step;
   1942 
   1943     /** stride ofinp buffer */
   1944     S32 i4_inp_stride;
   1945 
   1946     /** x and y offset of cu w.r.t. ctb start, set to 0 for non enc layer */
   1947     S32 i4_cu_x_off;
   1948     S32 i4_cu_y_off;
   1949 
   1950     /** base pointer to the de-duplicated search nodes */
   1951     search_node_t *ps_search_nodes;
   1952 
   1953     /** number of de-duplicated nodes to be searched */
   1954     S32 i4_num_search_nodes;
   1955 
   1956     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt;
   1957 
   1958     U32 au4_src_variance[TOT_NUM_PARTS];
   1959 
   1960     S32 i4_alpha_stim_multiplier;
   1961 
   1962     U08 u1_is_cu_noisy;
   1963 
   1964     ULWORD64 *pu8_part_src_sigmaX;
   1965     ULWORD64 *pu8_part_src_sigmaXSquared;
   1966 
   1967 } hme_search_prms_t;
   1968 
   1969 /**
   1970 ******************************************************************************
   1971  *  @struct  hme_err_prms_t
   1972  *  @brief   This is input prms struct for SAD/SATD computation
   1973 ******************************************************************************
   1974  */
   1975 typedef struct
   1976 {
   1977     /** Ptr to input blk for which err computed */
   1978     U08 *pu1_inp;
   1979 
   1980     U16 *pu2_inp;
   1981 
   1982     /** Ptr to ref blk after adjusting for mv and coordinates in pic */
   1983     U08 *pu1_ref;
   1984 
   1985     U16 *pu2_ref;
   1986 
   1987     /** Stride of input buffer */
   1988     S32 i4_inp_stride;
   1989     /** Stride of ref buffer */
   1990     S32 i4_ref_stride;
   1991     /** Mask of active partitions. */
   1992     S32 i4_part_mask;
   1993     /** Mask of active grid pts. Refer to GRID_PT_T enum for bit posns */
   1994     S32 i4_grid_mask;
   1995     /**
   1996      * Pointer to SAD Grid where SADs for each partition are stored.
   1997      * The layout is as follows: If there are M total partitions
   1998      * and N active pts in the grid, then the first N results contain
   1999      * first partition, e.g. 2Nx2N. Next N results contain 2nd partitino
   2000      * sad, e.g. 2NxN_T. Totally we have MxN results.
   2001      * Note: The active partition count may be lesser than M, still we
   2002      * have results for M partitions
   2003      */
   2004     S32 *pi4_sad_grid;
   2005 
   2006     /** Pointer to TU_SPLIT grid flags */
   2007     S32 *pi4_tu_split_flags;
   2008 
   2009     /** Pointer to the Child's satd cost */
   2010     S32 *pi4_child_cost;
   2011 
   2012     /** pointer to the child'd TU_split flags */
   2013     S32 *pi4_child_tu_split_flags;
   2014 
   2015     /** pointer to the child'd TU_early_cbf flags */
   2016     S32 *pi4_child_tu_early_cbf;
   2017 
   2018     /** Pointer to TU early CBF flags */
   2019     S32 *pi4_tu_early_cbf;
   2020 
   2021     /** pointer to the early cbf thresholds */
   2022     S32 *pi4_tu_early_cbf_threshold;
   2023 
   2024     /** store the DC value */
   2025     S32 i4_dc_val;
   2026 
   2027     /** Block width and ht of the block being evaluated for SAD */
   2028     S32 i4_blk_wd;
   2029     S32 i4_blk_ht;
   2030 
   2031     /**
   2032      * Array of valid partition ids. E.g. if 2 partitions active,
   2033      * then there will be 3 entries, 3rd entry being -1
   2034      */
   2035     S32 *pi4_valid_part_ids;
   2036     /** Step size of the grid */
   2037     S32 i4_step;
   2038 
   2039     /* Number of partitions */
   2040     S32 i4_num_partitions;
   2041 
   2042     /** Store the tu_spli_flag cost */
   2043     S32 i4_tu_split_cost;
   2044 
   2045     /** The max_depth for inter tu_tree */
   2046     U08 u1_max_tr_depth;
   2047 
   2048     U08 u1_max_tr_size;
   2049 
   2050     /** Scratch memory for Doing hadamard */
   2051     U08 *pu1_wkg_mem;
   2052 
   2053     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
   2054 
   2055 } err_prms_t;
   2056 
   2057 typedef struct grid
   2058 {
   2059     WORD32 num_grids; /* Number of grid to work with */
   2060     WORD32 ref_buf_stride; /* Buffer stride of reference buffer */
   2061     WORD32
   2062         grd_sz_y_x; /* Packed 16 bits indicating grid spacing in y & x direction <--grid-size-y--><--grid-size-x--> */
   2063     UWORD8 **ppu1_ref_ptr; /* Center point for the grid search */
   2064     WORD32 *pi4_grd_mask; /* Mask indicating which grid points need to be evaluated */
   2065     hme_mv_t *p_mv; /* <--MVy--><--MVx--> */
   2066     WORD32 *p_ref_idx; /* Ref idx to which the grid is pointing */
   2067 } grid_ctxt_t;
   2068 
   2069 typedef struct cand
   2070 {
   2071     hme_mv_t mv; /* MV corresponding to the candidate <--MVy--><--MVx--> */
   2072     WORD32 ref_idx; /* Ref idx corresponding to the candidate */
   2073     WORD32 grid_ix; /* Grid to which this candidate belongs */
   2074     UWORD8 *pu1_ref_ptr; /* Pointer to the candidate */
   2075 } cand_t;
   2076 
   2077 /**
   2078 ******************************************************************************
   2079  *  @struct  hme_ctb_prms_t
   2080  *  @brief   Parameters to create the CTB list, which is a tree structure
   2081 ******************************************************************************
   2082  */
   2083 typedef struct
   2084 {
   2085     /**
   2086      * These parameters cover number of input 16x16, 32x32 and 64x64 results
   2087      * and the number of output results that are mix of all above CU sizes.
   2088      * i4_num_kxk_unified_out is relevant only if we are sending multiple CU
   2089      * sizes for same region for RD Opt.
   2090      */
   2091     S32 i4_num_16x16_in;
   2092     S32 i4_num_32x32_in;
   2093     S32 i4_num_32x32_unified_out;
   2094     S32 i4_num_64x64_in;
   2095     S32 i4_num_64x64_unified_out;
   2096 
   2097     /** Pointers to results at differen CU sizes */
   2098     search_results_t *ps_search_results_16x16;
   2099     search_results_t *ps_search_results_32x32;
   2100     search_results_t *ps_search_results_64x64;
   2101 
   2102     S32 i4_num_part_type;
   2103 
   2104     /** Indicates whether we have split at 64x64 level */
   2105     S32 i4_cu_64x64_split;
   2106     /** Indicates whether each of the 32x32 CU is split */
   2107     S32 ai4_cu_32x32_split[4];
   2108 
   2109     /** X and y offset of the CTB */
   2110     S32 i4_ctb_x;
   2111     S32 i4_ctb_y;
   2112 
   2113     /**
   2114      * Memory manager for the CTB that is responsible for node allocation
   2115      * at a CU level
   2116      */
   2117     ctb_mem_mgr_t *ps_ctb_mem_mgr;
   2118 
   2119     /** Buffer manager that is responsible for memory allocation (pred bufs) */
   2120     buf_mgr_t *ps_buf_mgr;
   2121 } hme_ctb_prms_t;
   2122 
   2123 /**
   2124 ******************************************************************************
   2125  *  @struct  result_upd_prms_t
   2126  *  @brief   Updation of results
   2127 ******************************************************************************
   2128  */
   2129 typedef struct
   2130 {
   2131     /** Cost compuatation function ponter */
   2132     PF_MV_COST_FXN pf_mv_cost_compute;
   2133 
   2134     /** Points to the SAD grid updated during SAD compute fxn */
   2135     S32 *pi4_sad_grid;
   2136 
   2137     /** Points to the TU_SPLIT grid updates duting the SATD TU REC fxn */
   2138     S32 *pi4_tu_split_flags;
   2139 
   2140     /**
   2141      * This is the central mv of the grid. For e.g. if we have a 3x3 grid,
   2142      * this covers the central pt's mv in the grid.
   2143      */
   2144     const search_node_t *ps_search_node_base;
   2145 
   2146     /** Search results structure updated by the result update fxn */
   2147     search_results_t *ps_search_results;
   2148 
   2149     /** List of active partitions, only these are processed and updated */
   2150     S32 *pi4_valid_part_ids;
   2151 
   2152     /** Reference id for this candt and grid */
   2153     S08 i1_ref_idx;
   2154 
   2155     /** Mask of active pts in the grid */
   2156     S32 i4_grid_mask;
   2157 
   2158     /**
   2159      * For early exit reasons we may want to know the id of the least candt
   2160      * This will correspond to id of  candt with least cost for 2Nx2N part,
   2161      * if multiple partitions enabled, or if 1 part enabled, it will be for
   2162      * id of candt of that partition
   2163      */
   2164     S32 i4_min_id;
   2165 
   2166     /** Step size of the grid */
   2167     S32 i4_step;
   2168 
   2169     /** Mask of active partitions */
   2170     S32 i4_part_mask;
   2171 
   2172     /** Min cost corresponding to min id */
   2173     S32 i4_min_cost;
   2174 
   2175     /** Store the motion vectors in qpel unit*/
   2176     S16 i2_mv_x;
   2177 
   2178     S16 i2_mv_y;
   2179 
   2180     U08 u1_pred_lx;
   2181 
   2182     subpel_refine_ctxt_t *ps_subpel_refine_ctxt;
   2183 
   2184     /** Current candidate in the subpel refinement process*/
   2185     search_node_t *ps_search_node;
   2186 
   2187 } result_upd_prms_t;
   2188 
   2189 /**
   2190 ******************************************************************************
   2191  *  @struct  mv_grid_t
   2192  *  @brief   Grid of MVs storing results for a CTB and neighbours. For a CTB
   2193  *           of size 64x64, we may store upto 16x16 mvs (one for each 4x4)
   2194  *           along with 1 neighbour on each side. Valid only for encode layer
   2195 ******************************************************************************
   2196  */
   2197 typedef struct
   2198 {
   2199     /** All the mvs in the grid */
   2200     search_node_t as_node[NUM_MVS_IN_CTB_GRID];
   2201 
   2202     /** Stride of the grid */
   2203     S32 i4_stride;
   2204 
   2205     /** Start offset of the 0,0 locn in CTB. */
   2206     S32 i4_start_offset;
   2207 } mv_grid_t;
   2208 
   2209 typedef struct
   2210 {
   2211     /* centroid's (x, y) co-ordinates in Q8 format */
   2212     WORD32 i4_pos_x_q8;
   2213 
   2214     WORD32 i4_pos_y_q8;
   2215 } centroid_t;
   2216 
   2217 typedef struct
   2218 {
   2219     S16 min_x;
   2220 
   2221     S16 min_y;
   2222 
   2223     S16 max_x;
   2224 
   2225     S16 max_y;
   2226 
   2227     /* The cumulative sum of partition sizes of the mvs */
   2228     /* in this cluster */
   2229     S16 area_in_pixels;
   2230 
   2231     S16 uni_mv_pixel_area;
   2232 
   2233     S16 bi_mv_pixel_area;
   2234 
   2235     mv_data_t as_mv[128];
   2236 
   2237     U08 num_mvs;
   2238 
   2239     /* Weighted average of all mvs in the cluster */
   2240     centroid_t s_centroid;
   2241 
   2242     S08 ref_id;
   2243 
   2244     S32 max_dist_from_centroid;
   2245 
   2246     U08 is_valid_cluster;
   2247 
   2248 } cluster_data_t;
   2249 
   2250 typedef struct
   2251 {
   2252     cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_16x16];
   2253 
   2254     U08 num_clusters;
   2255 
   2256     U08 au1_num_clusters[MAX_NUM_REF];
   2257 
   2258     S16 intra_mv_area;
   2259 
   2260     S32 best_inter_cost;
   2261 
   2262 } cluster_16x16_blk_t;
   2263 
   2264 typedef struct
   2265 {
   2266     cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_32x32];
   2267 
   2268     U08 num_clusters;
   2269 
   2270     U08 au1_num_clusters[MAX_NUM_REF];
   2271 
   2272     S16 intra_mv_area;
   2273 
   2274     S08 best_uni_ref;
   2275 
   2276     S08 best_alt_ref;
   2277 
   2278     S32 best_inter_cost;
   2279 
   2280     U08 num_refs;
   2281 
   2282     U08 num_clusters_with_weak_sdi_density;
   2283 
   2284 } cluster_32x32_blk_t;
   2285 
   2286 typedef struct
   2287 {
   2288     cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_64x64];
   2289 
   2290     U08 num_clusters;
   2291 
   2292     U08 au1_num_clusters[MAX_NUM_REF];
   2293 
   2294     S16 intra_mv_area;
   2295 
   2296     S08 best_uni_ref;
   2297 
   2298     S08 best_alt_ref;
   2299 
   2300     S32 best_inter_cost;
   2301 
   2302     U08 num_refs;
   2303 
   2304 } cluster_64x64_blk_t;
   2305 
   2306 typedef struct
   2307 {
   2308     cluster_16x16_blk_t *ps_16x16_blk;
   2309 
   2310     cluster_32x32_blk_t *ps_32x32_blk;
   2311 
   2312     cluster_64x64_blk_t *ps_64x64_blk;
   2313 
   2314     cur_ctb_cu_tree_t *ps_cu_tree_root;
   2315     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
   2316     S32 nodes_created_in_cu_tree;
   2317 
   2318     S32 *pi4_blk_8x8_mask;
   2319 
   2320     S32 blk_32x32_mask;
   2321 
   2322     S32 sdi_threshold;
   2323 
   2324     S32 i4_frame_qstep;
   2325 
   2326     S32 i4_frame_qstep_multiplier;
   2327 
   2328     U08 au1_is_16x16_blk_split[16];
   2329 
   2330     S32 ai4_part_mask[16];
   2331 
   2332 } ctb_cluster_info_t;
   2333 
   2334 /**
   2335 ******************************************************************************
   2336  *  @struct  hme_merge_prms_t
   2337  *  @brief   All parameters related to the merge process
   2338 ******************************************************************************
   2339  */
   2340 typedef struct
   2341 {
   2342     /**
   2343      * MV Range prms for the merged CU, this may have to be conservative
   2344      * in comparison to individual CUs
   2345      */
   2346     range_prms_t *aps_mv_range[MAX_NUM_REF];
   2347 
   2348     /** Pointers to search results of 4 children CUs to be merged */
   2349     search_results_t *ps_results_tl;
   2350     search_results_t *ps_results_tr;
   2351     search_results_t *ps_results_bl;
   2352     search_results_t *ps_results_br;
   2353 
   2354     search_results_t *ps_results_grandchild;
   2355 
   2356     /** Pointer to search results of the parent CU updated during merge */
   2357     search_results_t *ps_results_merge;
   2358 
   2359     inter_cu_results_t *ps_8x8_cu_results;
   2360 
   2361     /** Layer related context */
   2362     layer_ctxt_t *ps_layer_ctxt;
   2363 
   2364     inter_ctb_prms_t *ps_inter_ctb_prms;
   2365 
   2366     /**
   2367      * Points to an array of pointers. This array in turn points to
   2368      * the active mv grid in each direction (L0/L1)
   2369      */
   2370     mv_grid_t **pps_mv_grid;
   2371 
   2372     ctb_cluster_info_t *ps_cluster_info;
   2373 
   2374     S08 *pi1_past_list;
   2375 
   2376     S08 *pi1_future_list;
   2377 
   2378     /** MV cost compute function */
   2379     PF_MV_COST_FXN pf_mv_cost_compute;
   2380 
   2381     /** If segmentation info available for the parent block */
   2382     S32 i4_seg_info_avail;
   2383 
   2384     /** Partition mask (if segmentation info available) */
   2385     S32 i4_part_mask;
   2386 
   2387     /** Number of input results available for the merge proc from children*/
   2388     S32 i4_num_inp_results;
   2389 
   2390     /** Whether SATD to be used for fpel searches */
   2391     S32 i4_use_satd;
   2392 
   2393     /**
   2394      * Number of result planes valid for this merge process. For example,
   2395      * for fpel search in encode layer, we may have only L0 and L1
   2396      */
   2397     S32 i4_num_ref;
   2398 
   2399     /** Whether to use input or recon frm for search */
   2400     S32 i4_use_rec;
   2401 
   2402     /** optimized mv grid flag : indicates if same mvgrid is used for both fpel and qpel
   2403      *  This helps in copying fpel and qpel mv grid in pred context mv grid
   2404      */
   2405     S32 i4_mv_grid_opt;
   2406 
   2407     /** ctb size, typically 32 or 64 */
   2408     S32 log_ctb_size;
   2409 
   2410     S32 i4_ctb_x_off;
   2411 
   2412     S32 i4_ctb_y_off;
   2413 
   2414     ME_QUALITY_PRESETS_T e_quality_preset;
   2415 
   2416     S32 i4_num_pred_dir_actual;
   2417 
   2418     U08 au1_pred_dir_searched[2];
   2419 
   2420     S32 i4_alpha_stim_multiplier;
   2421 
   2422     U08 u1_is_cu_noisy;
   2423 
   2424 } hme_merge_prms_t;
   2425 
   2426 /**
   2427 ******************************************************************************
   2428  *  @struct  mvbank_update_prms_t
   2429  *  @brief   Useful prms for updating the mv bank
   2430 ******************************************************************************
   2431  */
   2432 typedef struct
   2433 {
   2434     /** Number of references for which update to be done */
   2435     S32 i4_num_ref;
   2436 
   2437     /**
   2438      * Search blk size that was used, if this is different from the blk
   2439      * size used in mv bank, then some replications or reductions may
   2440      * have to be done. E.g. if search blk size is 8x8 and result blk
   2441      * size is 4x4, then we have to update part NxN results to be
   2442      * used for update along with replication of 2Nx2N result in each
   2443      * of the 4 4x4 blk.
   2444      */
   2445     BLK_SIZE_T e_search_blk_size;
   2446 
   2447     /**
   2448      * Redundant prm as it reflects differences between search blk size
   2449      * and mv blk size if any
   2450      */
   2451     S32 i4_shift;
   2452 
   2453     S32 i4_num_active_ref_l0;
   2454 
   2455     S32 i4_num_active_ref_l1;
   2456 
   2457     S32 i4_num_results_to_store;
   2458 } mvbank_update_prms_t;
   2459 
   2460 /**
   2461 ******************************************************************************
   2462  *  @struct  hme_subpel_prms_t
   2463  *  @brief   input and control prms for subpel refinement
   2464 ******************************************************************************
   2465  */
   2466 typedef struct
   2467 {
   2468     /** Relevant only for the case where we mix up results of diff cu sizes */
   2469     S32 i4_num_16x16_candts;
   2470     S32 i4_num_32x32_candts;
   2471     S32 i4_num_64x64_candts;
   2472 
   2473     /** X and y offset of ctb w.r.t. start of pic */
   2474     S32 i4_ctb_x_off;
   2475     S32 i4_ctb_y_off;
   2476 
   2477     /** Max Number of diamond steps for hpel and qpel refinement */
   2478     S32 i4_num_steps_hpel_refine;
   2479     S32 i4_num_steps_qpel_refine;
   2480 
   2481     /** Whether SATD to be used or SAD to be used */
   2482     S32 i4_use_satd;
   2483 
   2484     /**
   2485      * Input ptr. This is updated inside the subpel refinement by picking
   2486      * up correct adress
   2487      */
   2488     void *pv_inp;
   2489 
   2490     /**
   2491      * Pred buffer ptr, updated inside subpel refinement process. This
   2492      * location passed to the leaf fxn for copying the winner pred buf
   2493      */
   2494     U08 *pu1_pred;
   2495 
   2496     /** Interpolation fxn sent by top layer, should exact qpel be desired */
   2497     PF_INTERP_FXN_T pf_qpel_interp;
   2498 
   2499     /** Working mem passed to leaf fxns */
   2500     U08 *pu1_wkg_mem;
   2501 
   2502     /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
   2503     S32 i4_pred_stride;
   2504 
   2505     /** Type of input ; sizeof(UWORD8) => unidir refinement, else BIDIR */
   2506     S32 i4_inp_type;
   2507 
   2508     /** Stride of input buf, updated inside subpel fxn */
   2509     S32 i4_inp_stride;
   2510 
   2511     /**
   2512      * Pointer to the backward input ptr. This is also updated inside
   2513      * the subpel fxn. Needed for BIDIR refinement where modified inpu
   2514      * is 2I - P0
   2515      */
   2516     S16 *pi2_inp_bck;
   2517 
   2518     /** Indicates if CU merge uses SATD / SAD */
   2519     S32 i4_use_satd_cu_merge;
   2520 
   2521     /** valid MV range in hpel and qpel units */
   2522     range_prms_t *aps_mv_range_hpel[MAX_NUM_REF];
   2523     range_prms_t *aps_mv_range_qpel[MAX_NUM_REF];
   2524     /** Relevant only for mixed CU cases */
   2525     search_results_t *ps_search_results_16x16;
   2526     search_results_t *ps_search_results_32x32;
   2527     search_results_t *ps_search_results_64x64;
   2528 
   2529     /** Cost computatino fxn ptr */
   2530     PF_MV_COST_FXN pf_mv_cost_compute;
   2531 
   2532     /** Whether BI mode is allowed for this pic (not allowed in P) */
   2533     S32 bidir_enabled;
   2534 
   2535     /**
   2536      * Total number of references of current picture which is enocded
   2537      */
   2538     U08 u1_num_ref;
   2539 
   2540     /**
   2541      * Number of candidates used for refinement
   2542      * If given 1 candidate, then 2Nx2N is chosen as the best candidate
   2543      */
   2544     U08 u1_max_subpel_candts;
   2545 
   2546     U08 u1_subpel_candt_threshold;
   2547 
   2548     ME_QUALITY_PRESETS_T e_me_quality_presets;
   2549 
   2550     U08 u1_max_subpel_candts_2Nx2N;
   2551     U08 u1_max_subpel_candts_NxN;
   2552 
   2553     U08 u1_max_num_subpel_refine_centers;
   2554 
   2555     subpel_refine_ctxt_t *ps_subpel_refine_ctxt;
   2556 
   2557     S32 i4_num_act_ref_l0;
   2558 
   2559     S32 i4_num_act_ref_l1;
   2560 
   2561     U08 u1_is_cu_noisy;
   2562 } hme_subpel_prms_t;
   2563 
   2564 /**
   2565 ******************************************************************************
   2566  *  @struct  layers_descr_t
   2567  *  @brief   One such str exists for each ref and curr input in the me ctxt
   2568  *           Has ctxt handles for all layers of a given POC
   2569 ******************************************************************************
   2570  */
   2571 typedef struct
   2572 {
   2573     /** Handles for all layers. Entry 0 is finest layer */
   2574     layer_ctxt_t *aps_layers[MAX_NUM_LAYERS];
   2575 } layers_descr_t;
   2576 
   2577 /**
   2578 ******************************************************************************
   2579  *  @struct  blk_ctb_attrs_t
   2580  *  @brief   The CTB is split into 16x16 blks. For each such blk, this str
   2581  *           stores attributes of this blk w.r.t. ctb
   2582 ******************************************************************************
   2583  */
   2584 typedef struct
   2585 {
   2586     /**
   2587      * ID of the blk in the full ctb. Assuming the full ctb were coded,
   2588      * this indicates what is the blk num of this blk (in encode order)
   2589      * within the full ctb
   2590      */
   2591     U08 u1_blk_id_in_full_ctb;
   2592 
   2593     /** x and y coordinates of this blk w.r.t. ctb base */
   2594     U08 u1_blk_x;
   2595     U08 u1_blk_y;
   2596     /**
   2597      * Mask of 8x8 blks that are active. Bits 0-3 for blks 0-3 in raster order
   2598      * within a 16x16 blk. This will be 0xf in interiors and < 0xf at rt/bot
   2599      * boundaries or at bot rt corners, where we may not have full 16x16 blk
   2600      */
   2601     U08 u1_blk_8x8_mask;
   2602 } blk_ctb_attrs_t;
   2603 
   2604 /**
   2605 ******************************************************************************
   2606  *  @struct  ctb_boundary_attrs_t
   2607  *  @brief   Depending on the location of ctb (rt boundary, bot boundary,
   2608  *           bot rt corner, elsewhere) this picks out the appropriate
   2609  *           attributes of the ctb
   2610 ******************************************************************************
   2611  */
   2612 typedef struct
   2613 {
   2614     /**
   2615      * 4 bit variable, one for each of the 4 possible 32x32s in a full ctb
   2616      * If any 32x32 is partially present / not present at boundaries, that
   2617      * bit posn will be 0
   2618      */
   2619     U08 u1_merge_to_32x32_flag;
   2620 
   2621     /**
   2622      * 1 bit flag indicating whether it is a complete ctb or not, and
   2623      * consequently whether it can be merged to a full 64x64
   2624      */
   2625     U08 u1_merge_to_64x64_flag;
   2626 
   2627     /** Number of valid 16x16 blks (includes those partially/fully present*/
   2628     U08 u1_num_blks_in_ctb;
   2629 
   2630     /** 16 bit variable indicating whether the corresponding 16x16 is valid */
   2631     S32 cu_16x16_valid_flag;
   2632 
   2633     /**
   2634      * For possible 16 16x16 blks in a CTB, we have one attribute str for
   2635      * every valid blk. Tightly packed structure. For example,
   2636      *  0  1  4  5
   2637      *  2  3  6  7
   2638      *  8  9 12 13
   2639      * 10 11 14 15
   2640      * Assuming the ctb width is only 48, blks 5,7,13,15 are invalid
   2641      * Then We store attributes in the order: 0,1,2,3,4,6,8,9,10,11,12,14
   2642      */
   2643     blk_ctb_attrs_t as_blk_attrs[16];
   2644 } ctb_boundary_attrs_t;
   2645 
   2646 typedef struct
   2647 {
   2648     S32 sdi;
   2649 
   2650     S32 ref_idx;
   2651 
   2652     S32 cluster_id;
   2653 } outlier_data_t;
   2654 
   2655 /**
   2656 ******************************************************************************
   2657  *  @struct  coarse_dyn_range_prms_t
   2658  *  @brief   The parameters for Dyn. Search Range in coarse ME
   2659 ******************************************************************************
   2660  */
   2661 
   2662 typedef struct
   2663 {
   2664     /* TO DO : size can be reduced, as not getting used for L0 */
   2665 
   2666     /** Dynamical Search Range parameters per layer & ref_pic */
   2667     dyn_range_prms_t as_dyn_range_prms[MAX_NUM_LAYERS][MAX_NUM_REF];
   2668 
   2669     /** Min y value Normalized per POC distance */
   2670     WORD16 i2_dyn_min_y_per_poc[MAX_NUM_LAYERS];
   2671     /** Max y value Normalized per POC distance */
   2672     WORD16 i2_dyn_max_y_per_poc[MAX_NUM_LAYERS];
   2673 
   2674 } coarse_dyn_range_prms_t;
   2675 
   2676 /**
   2677 ******************************************************************************
   2678  *  @struct  coarse_me_ctxt_t
   2679  *  @brief   Handle for Coarse ME
   2680 ******************************************************************************
   2681  */
   2682 typedef struct
   2683 {
   2684     /** Init search candts, 2 sets, one for 4x8 and one for 8x4 */
   2685     search_node_t s_init_search_node[MAX_INIT_CANDTS * 2];
   2686 
   2687     /** For non enc layer, we search 8x8 blks and store results here */
   2688     search_results_t s_search_results_8x8;
   2689     /**
   2690      * Below arays store input planes for each ref pic.
   2691      * These are duplications, and are present within layer ctxts, but
   2692      * kept here together for faster indexing during search
   2693      */
   2694     U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF];
   2695 
   2696     /** Ptr to all layer context placeholder for curr pic encoded */
   2697     layers_descr_t *ps_curr_descr;
   2698 
   2699     /** Ptr to all layer ctxt place holder for all pics */
   2700     layers_descr_t as_ref_descr[MAX_NUM_REF + 1 + NUM_BUFS_DECOMP_HME];
   2701 
   2702     /**
   2703      * ME uses ref id lc to search multi ref. This TLU gets POC of
   2704      * the pic w.r.t. a given ref id
   2705      */
   2706     S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF];
   2707 
   2708     /** use this array to get disp num from ref_idx. Used for L1 traqo **/
   2709     S32 ai4_ref_idx_to_disp_num[MAX_NUM_REF];
   2710 
   2711     /** POC of pic encoded just before current */
   2712     S32 i4_prev_poc;
   2713 
   2714     /** POC of curret pic being encoded */
   2715     S32 i4_curr_poc;
   2716 
   2717     /** Number of HME layers encode + non encode */
   2718     S32 num_layers;
   2719 
   2720     /** Alloc time parameter, max ref frms used for this session */
   2721     S32 max_num_ref;
   2722 
   2723     /**
   2724      * Number of layers that use explicit search. Explicit search means
   2725      * that each ref id is searched separately
   2726      */
   2727     S32 num_layers_explicit_search;
   2728 
   2729     /**
   2730      * Maximum number of results maintained at any refinement layer
   2731      * search. Important from mem alloc perspective
   2732      */
   2733     S32 max_num_results;
   2734 
   2735     /** Same as above but for coarse layer */
   2736     S32 max_num_results_coarse;
   2737 
   2738     /** Array of flags, one per layer indicating hwether layer is encoded */
   2739     U08 u1_encode[MAX_NUM_LAYERS];
   2740 
   2741     /** Init prms send by encoder during create time */
   2742     hme_init_prms_t s_init_prms;
   2743 
   2744     /**
   2745      * Array look up created each frm, maintaining the corresponding
   2746      * layer descr look up for each ref id
   2747      */
   2748     S32 a_ref_to_descr_id[MAX_NUM_REF];
   2749 
   2750     /**
   2751      * Array lookup created each frame that maps a given ref id
   2752      * pertaining to unified list to a L0/L1 list. Encoder searches in terms
   2753      * of LC list or in other words does not differentiate between L0
   2754      * and L1 frames for most of search. Finally to report results to
   2755      * encoder, the ref id has to be remapped to suitable list
   2756      */
   2757     S32 a_ref_idx_lc_to_l0[MAX_NUM_REF];
   2758     S32 a_ref_idx_lc_to_l1[MAX_NUM_REF];
   2759 
   2760     /** Width and ht of each layer */
   2761     S32 a_wd[MAX_NUM_LAYERS];
   2762     S32 a_ht[MAX_NUM_LAYERS];
   2763 
   2764     /** Histogram, one for each ref, allocated during craete time */
   2765     mv_hist_t *aps_mv_hist[MAX_NUM_REF];
   2766 
   2767     /** Whether a given ref id in Lc list is past frm or future frm */
   2768     U08 au1_is_past[MAX_NUM_REF];
   2769 
   2770     /** These are L0 and L1 lists, storing ref id Lc in them */
   2771     S08 ai1_past_list[MAX_NUM_REF];
   2772     S08 ai1_future_list[MAX_NUM_REF];
   2773 
   2774     /** Number of past and future ref pics sent this frm */
   2775     S32 num_ref_past;
   2776     S32 num_ref_future;
   2777 
   2778     void *pv_ext_frm_prms;
   2779 
   2780     hme_frm_prms_t *ps_hme_frm_prms;
   2781 
   2782     hme_ref_map_t *ps_hme_ref_map;
   2783     /**
   2784      *  Scale factor of any given ref lc to another ref in Q8
   2785      *  First MAX_NUM_REF entries are to scale an mv of ref id k
   2786      *  w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k))
   2787      *  Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0
   2788      *  And so on
   2789      */
   2790     S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF];
   2791 
   2792     /** bits for a given ref id, in either list L0/L1 */
   2793     U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF];
   2794 
   2795     /** Points to above: 1 ptr for each list */
   2796     U08 *apu1_ref_bits_tlu_lc[2];
   2797 
   2798     /** number of b fraems between P, depends on number of hierarchy layers */
   2799     S32 num_b_frms;
   2800 
   2801     /** Frame level qp passed every frame by ME's caller */
   2802     S32 frm_qstep;
   2803 
   2804     /** Backup of frame parameters */
   2805     hme_frm_prms_t s_frm_prms;
   2806 
   2807     /** Weighted prediction parameters for all references are stored
   2808      *  Scratch buffers for populated widgted inputs are also stored in this
   2809      */
   2810     wgt_pred_ctxt_t s_wt_pred;
   2811 
   2812     /** Weighted pred enable flag */
   2813     S32 i4_wt_pred_enable_flag;
   2814 
   2815     /* Pointer to hold 5 rows of best search node information */
   2816     search_node_t *aps_best_search_nodes_4x8_n_rows[MAX_NUM_REF];
   2817 
   2818     search_node_t *aps_best_search_nodes_8x4_n_rows[MAX_NUM_REF];
   2819 
   2820     /* Pointer to hold 5 rows of best search node information */
   2821     S16 *api2_sads_4x4_n_rows[MAX_NUM_REF];
   2822 
   2823     /*  Number of row buffers to store SADs and best search nodes */
   2824     S32 i4_num_row_bufs;
   2825 
   2826     /* (HEVCE_MAX_HEIGHT>>1) assuming layer 1 is coarse layer and >>2 assuming block size is 4x4*/
   2827     S32 ai4_row_index[(HEVCE_MAX_HEIGHT >> 1) >> 2];
   2828 
   2829     /* store L1 cost required for rate control for enc decision*/
   2830     S32 i4_L1_hme_best_cost;
   2831 
   2832     /* store L1 cost required for modulation index calc*/
   2833     //S32 i4_L1_hme_best_cost_for_ref;
   2834 
   2835     /* store L1 satd */
   2836     S32 i4_L1_hme_sad;
   2837     /* EIID: layer1 buffer to store the early inter intra costs and decisions */
   2838     /* pic_level pointer stored here */
   2839     ihevce_ed_blk_t *ps_ed_blk;
   2840     /* EIID: layer1 buffer to store the sad/cost information for rate control
   2841     or cu level qp modulation*/
   2842     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
   2843     /** Dynamical Search Range parameters */
   2844     coarse_dyn_range_prms_t s_coarse_dyn_range_prms;
   2845 
   2846     /** Dependency manager for Row level sync in HME pass */
   2847     void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1];
   2848 
   2849     /* pointer buffers for memory mapping */
   2850     UWORD8 *pu1_me_reverse_map_info;
   2851 
   2852     /*blk count which has higher SAD*/
   2853     S32 i4_num_blks_high_sad;
   2854 
   2855     /*num of 8x8 blocks in nearest poc*/
   2856     S32 i4_num_blks;
   2857 
   2858     /* thread id of the current context */
   2859     WORD32 thrd_id;
   2860 
   2861     /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
   2862     void *pv_me_optimised_function_list;
   2863 
   2864     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
   2865 
   2866 } coarse_me_ctxt_t;
   2867 
   2868 /**
   2869 ******************************************************************************
   2870  *  @struct  coarse_dyn_range_prms_t
   2871  *  @brief   The parameters for Dyn. Search Range in coarse ME
   2872 ******************************************************************************
   2873  */
   2874 typedef struct
   2875 {
   2876     /** Dynamical Search Range parameters per ref_pic */
   2877     dyn_range_prms_t as_dyn_range_prms[MAX_NUM_REF];
   2878 
   2879     /** Min y value Normalized per POC distance */
   2880     WORD16 i2_dyn_min_y_per_poc;
   2881     /** Max y value Normalized per POC distance */
   2882     WORD16 i2_dyn_max_y_per_poc;
   2883 
   2884     /* The number of ref. pic. actually used in L0. Used to communicate */
   2885     /* to ihevce_l0_me_frame_end and frame process                      */
   2886     WORD32 i4_num_act_ref_in_l0;
   2887 
   2888     /*display number*/
   2889     WORD32 i4_display_num;
   2890 
   2891 } l0_dyn_range_prms_t;
   2892 
   2893 /**
   2894 ******************************************************************************
   2895  *  @brief inter prediction (MC) context for me loop
   2896 ******************************************************************************
   2897  */
   2898 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
   2899 typedef struct
   2900 {
   2901     /** pointer to reference lists */
   2902     recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
   2903 
   2904     /** scratch buffer for horizontal interpolation destination */
   2905     WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
   2906 
   2907     /** scratch 16 bit buffer for interpolation in l0 direction */
   2908     WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
   2909 
   2910     /** scratch 16 bit buffer for interpolation in l1 direction */
   2911     WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
   2912 
   2913     /** Pointer to struct containing function pointers to
   2914         functions in the 'common' library' */
   2915     func_selector_t *ps_func_selector;
   2916 
   2917     /** common denominator used for luma weights */
   2918     WORD32 i4_log2_luma_wght_denom;
   2919 
   2920     /** common denominator used for chroma weights */
   2921     WORD32 i4_log2_chroma_wght_denom;
   2922 
   2923     /**  offset w.r.t frame start in horz direction (pels) */
   2924     WORD32 i4_ctb_frm_pos_x;
   2925 
   2926     /**  offset w.r.t frame start in vert direction (pels) */
   2927     WORD32 i4_ctb_frm_pos_y;
   2928 
   2929     /* Bit Depth of Input */
   2930     WORD32 i4_bit_depth;
   2931 
   2932     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
   2933     UWORD8 u1_chroma_array_type;
   2934 
   2935     /** weighted_pred_flag      */
   2936     WORD8 i1_weighted_pred_flag;
   2937 
   2938     /** weighted_bipred_flag    */
   2939     WORD8 i1_weighted_bipred_flag;
   2940 
   2941     /** Structure to describe extra CTBs around frame due to search
   2942         range associated with distributed-mode. Entries are top, left,
   2943         right and bottom */
   2944     WORD32 ai4_tile_xtra_pel[4];
   2945 
   2946 } inter_pred_me_ctxt_t;
   2947 
   2948 typedef void FT_CALC_SATD_AND_RESULT(err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms);
   2949 
   2950 typedef struct
   2951 {
   2952     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_eq_1;
   2953     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_9;
   2954     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_17;
   2955     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_eq_1;
   2956     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_9;
   2957     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_17;
   2958     FT_HAD_8X8_USING_4_4X4_R *pf_had_8x8_using_4_4x4_r;
   2959     FT_HAD_16X16_R *pf_had_16x16_r;
   2960     FT_HAD_32X32_USING_16X16 *pf_compute_32x32HAD_using_16x16;
   2961 } me_func_selector_t;
   2962 
   2963 /**
   2964 ******************************************************************************
   2965  *  @struct  me_frm_ctxt_t
   2966  *  @brief   Handle for ME
   2967 ******************************************************************************
   2968  */
   2969 typedef struct
   2970 {
   2971     /** Init search candts, 2 sets, one for 4x8 and one for 8x4 */
   2972     search_node_t s_init_search_node[MAX_INIT_CANDTS];
   2973 
   2974     /** Motion Vectors array */
   2975     mv_t as_search_cand_mv[MAX_INIT_CANDTS];
   2976 
   2977     /** Results of 16 16x16 blks within a CTB used in enc layer */
   2978     search_results_t as_search_results_16x16[16];
   2979 
   2980     /** Results of 4 32x32 blks in a ctb for enc layer merge stage */
   2981     search_results_t as_search_results_32x32[4];
   2982 
   2983     /** Same as above but fo 64x64 blk */
   2984     search_results_t s_search_results_64x64;
   2985 
   2986     /**
   2987      * Below arays store input, 4 recon planes for each ref pic.
   2988      * These are duplications, and are present within layer ctxts, but
   2989      * kept here together for faster indexing during search
   2990      */
   2991 
   2992     U08 *apu1_list_rec_fxfy[MAX_NUM_LAYERS][MAX_NUM_REF];
   2993     U08 *apu1_list_rec_hxfy[MAX_NUM_LAYERS][MAX_NUM_REF];
   2994     U08 *apu1_list_rec_fxhy[MAX_NUM_LAYERS][MAX_NUM_REF];
   2995     U08 *apu1_list_rec_hxhy[MAX_NUM_LAYERS][MAX_NUM_REF];
   2996     U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF];
   2997 
   2998     void *apv_list_dep_mngr[MAX_NUM_LAYERS][MAX_NUM_REF];
   2999 
   3000     /** Ptr to all layer context placeholder for curr pic encoded */
   3001     layers_descr_t *ps_curr_descr;
   3002 
   3003     /**
   3004      * ME uses ref id lc to search multi ref. This TLU gets POC of
   3005      * the pic w.r.t. a given ref id
   3006      */
   3007     S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF];
   3008 
   3009     /** POC of pic encoded just before current */
   3010     S32 i4_prev_poc;
   3011 
   3012     /** POC of curret pic being encoded */
   3013     S32 i4_curr_poc;
   3014 
   3015     /** Buf mgr for memory allocation */
   3016     buf_mgr_t s_buf_mgr;
   3017 
   3018     /** MV Grid for L0 and L1, this is active one used */
   3019     mv_grid_t as_mv_grid[2];
   3020 
   3021     /**
   3022      * MV grid for FPEL and QPEL maintained separately. Depending on the
   3023      * correct prediction res. being used, copy appropriate results to
   3024      * the as_mv_Grid structure
   3025      */
   3026     mv_grid_t as_mv_grid_fpel[2];
   3027     mv_grid_t as_mv_grid_qpel[2];
   3028 
   3029     /** Number of HME layers encode + non encode */
   3030     S32 num_layers;
   3031 
   3032     /** Alloc time parameter, max ref frms used for this session */
   3033     S32 max_num_ref;
   3034 
   3035     /**
   3036      * Number of layers that use explicit search. Explicit search means
   3037      * that each ref id is searched separately
   3038      */
   3039     S32 num_layers_explicit_search;
   3040 
   3041     /**
   3042      * Maximum number of results maintained at any refinement layer
   3043      * search. Important from mem alloc perspective
   3044      */
   3045     S32 max_num_results;
   3046 
   3047     /** Same as above but for coarse layer */
   3048     S32 max_num_results_coarse;
   3049 
   3050     /** Array of flags, one per layer indicating hwether layer is encoded */
   3051     U08 u1_encode[MAX_NUM_LAYERS];
   3052 
   3053     /* Parameters used for lambda computation */
   3054     frm_lambda_ctxt_t s_frm_lambda_ctxt;
   3055 
   3056     /**
   3057      * Array look up created each frm, maintaining the corresponding
   3058      * layer descr look up for each ref id
   3059      */
   3060     S32 a_ref_to_descr_id[MAX_NUM_REF];
   3061 
   3062     /**
   3063      * Array lookup created each frame that maps a given ref id
   3064      * pertaining to unified list to a L0/L1 list. Encoder searches in terms
   3065      * of LC list or in other words does not differentiate between L0
   3066      * and L1 frames for most of search. Finally to report results to
   3067      * encoder, the ref id has to be remapped to suitable list
   3068      */
   3069     S32 a_ref_idx_lc_to_l0[MAX_NUM_REF];
   3070     S32 a_ref_idx_lc_to_l1[MAX_NUM_REF];
   3071 
   3072     /** Width and ht of each layer */
   3073     S32 i4_wd;
   3074     S32 i4_ht;
   3075 
   3076     /** Histogram, one for each ref, allocated during craete time */
   3077     mv_hist_t *aps_mv_hist[MAX_NUM_REF];
   3078 
   3079     /**
   3080      * Back input requiring > 8  bit precision, allocated during
   3081      * create time, storing 2I-P0 for Bidir refinement
   3082      */
   3083     S16 *pi2_inp_bck;
   3084     ctb_boundary_attrs_t as_ctb_bound_attrs[NUM_CTB_BOUNDARY_TYPES];
   3085 
   3086     /** Whether a given ref id in Lc list is past frm or future frm */
   3087     U08 au1_is_past[MAX_NUM_REF];
   3088 
   3089     /** These are L0 and L1 lists, storing ref id Lc in them */
   3090     S08 ai1_past_list[MAX_NUM_REF];
   3091     S08 ai1_future_list[MAX_NUM_REF];
   3092 
   3093     /** Number of past and future ref pics sent this frm */
   3094     S32 num_ref_past;
   3095     S32 num_ref_future;
   3096 
   3097     /**
   3098      * Passed by encoder, stored as void to avoid header file inclusion
   3099      * of encoder wks into ME, these are frm prms passed by encoder,
   3100      * pointers to ctbanalyse_t and cu_analyse_t structures and the
   3101      * corresponding running ptrs
   3102      */
   3103 
   3104     ctb_analyse_t *ps_ctb_analyse_base;
   3105     cur_ctb_cu_tree_t *ps_cu_tree_base;
   3106     me_ctb_data_t *ps_me_ctb_data_base;
   3107 
   3108     ctb_analyse_t *ps_ctb_analyse_curr_row;
   3109     cu_analyse_t *ps_cu_analyse_curr_row;
   3110     cur_ctb_cu_tree_t *ps_cu_tree_curr_row;
   3111     me_ctb_data_t *ps_me_ctb_data_curr_row;
   3112 
   3113     /** Log2 of ctb size e.g. for 64 size, it will be 6 */
   3114     S32 log_ctb_size;
   3115 
   3116     hme_frm_prms_t *ps_hme_frm_prms;
   3117 
   3118     hme_ref_map_t *ps_hme_ref_map;
   3119 
   3120     /**
   3121      *  Scale factor of any given ref lc to another ref in Q8
   3122      *  First MAX_NUM_REF entries are to scale an mv of ref id k
   3123      *  w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k))
   3124      *  Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0
   3125      *  And so on
   3126      */
   3127     S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF];
   3128 
   3129     /** bits for a given ref id, in either list L0/L1 */
   3130     U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF];
   3131 
   3132     /** Points to above: 1 ptr for each list */
   3133     U08 *apu1_ref_bits_tlu_lc[2];
   3134 
   3135     /**
   3136      *  Frame level base pointer to L0 IPE ctb analyze structures.
   3137      *  This strucutres include the following
   3138      *
   3139      *  1. Best costs and modes at all levels of CTB (CU=8,16,32,64)
   3140      *  2. Recommended IPE intra CU sizes for this CTB size
   3141      *  3. Early intra/inter decision structures for all 8x8 blocks of CTB
   3142      *     populated by L1-ME and L1-IPE
   3143      *
   3144      */
   3145     ipe_l0_ctb_analyse_for_me_t *ps_ipe_l0_ctb_frm_base;
   3146 
   3147     /** array of ptrs to intra cost per layer encoded, stored at 8x8 */
   3148     double *apd_intra_cost[MAX_NUM_LAYERS];
   3149 
   3150     /** number of b fraems between P, depends on number of hierarchy layers */
   3151     S32 num_b_frms;
   3152 
   3153     /** Frame level qp passed every frame by ME's caller */
   3154     S32 frm_qstep;
   3155 
   3156     /** Frame level qp with higher precision : left shifted by 8 */
   3157     S32 qstep_ls8;
   3158 
   3159     /** Backup of frame parameters */
   3160     hme_frm_prms_t s_frm_prms;
   3161 
   3162     /** Weighted prediction parameters for all references are stored
   3163      *  Scratch buffers for populated widgted inputs are also stored in this
   3164      */
   3165     wgt_pred_ctxt_t s_wt_pred;
   3166 
   3167     /** Weighted pred enable flag */
   3168     S32 i4_wt_pred_enable_flag;
   3169 
   3170     /** Results of 16 16x16 blks within a CTB used in enc layer */
   3171     inter_cu_results_t as_cu16x16_results[16];
   3172 
   3173     /** Results of 4 32x32 blks in a ctb for enc layer merge stage */
   3174     inter_cu_results_t as_cu32x32_results[4];
   3175 
   3176     /** Same as above but fo 64x64 blk */
   3177     inter_cu_results_t s_cu64x64_results;
   3178 
   3179     /** Results of 64 8x8 blks within a CTB used in enc layer */
   3180     inter_cu_results_t as_cu8x8_results[64];
   3181 
   3182     WORD32 i4_is_prev_frame_reference;
   3183 
   3184     rc_quant_t *ps_rc_quant_ctxt;
   3185 
   3186     /** Dynamical Search Range parameters */
   3187     l0_dyn_range_prms_t as_l0_dyn_range_prms[NUM_SG_INTERLEAVED];
   3188 
   3189     /** Dependency manager for Row level sync in L0 ME pass */
   3190     void *pv_dep_mngr_l0_me_sync;
   3191 
   3192     /** Pointer to structure containing function pointers of encoder*/
   3193     me_func_selector_t *ps_func_selector;
   3194 
   3195     cluster_16x16_blk_t *ps_blk_16x16;
   3196 
   3197     cluster_32x32_blk_t *ps_blk_32x32;
   3198 
   3199     cluster_64x64_blk_t *ps_blk_64x64;
   3200 
   3201     ctb_cluster_info_t *ps_ctb_cluster_info;
   3202 
   3203     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt;
   3204 
   3205     /* thread id of the current context */
   3206     WORD32 thrd_id;
   3207 
   3208     /* dependency manager for froward ME sync */
   3209     void *pv_dep_mngr_encloop_dep_me;
   3210     WORD32 i4_l0me_qp_mod;
   3211 
   3212     /*mc ctxt to reuse lume inter pred fucntion
   3213     for the purpose of TRAQO*/
   3214     inter_pred_me_ctxt_t s_mc_ctxt;
   3215 
   3216     WORD32 i4_rc_pass;
   3217     /*pic type*/
   3218     WORD32 i4_pic_type;
   3219 
   3220     WORD32 i4_temporal_layer;
   3221 
   3222     WORD32 i4_count;
   3223 
   3224     WORD32 i4_use_const_lamda_modifier;
   3225 
   3226     double f_i_pic_lamda_modifier;
   3227 
   3228     UWORD8 u1_is_curFrame_a_refFrame;
   3229 
   3230     /* src_var related variables */
   3231     U32 au4_4x4_src_sigmaX[MAX_NUM_SIGMAS_4x4];
   3232     U32 au4_4x4_src_sigmaXSquared[MAX_NUM_SIGMAS_4x4];
   3233 } me_frm_ctxt_t;
   3234 
   3235 /**
   3236 ******************************************************************************
   3237  *  @struct  me_ctxt_t
   3238  *  @brief   Handle for ME
   3239 ******************************************************************************
   3240  */
   3241 typedef struct
   3242 {
   3243     /** Init prms send by encoder during create time */
   3244     hme_init_prms_t s_init_prms;
   3245 
   3246     /** Not used in encoder, relevant to test bench */
   3247     U08 *pu1_debug_out;
   3248 
   3249     void *pv_ext_frm_prms;
   3250 
   3251     /* Frame level ME ctxt */
   3252     me_frm_ctxt_t *aps_me_frm_prms[MAX_NUM_ME_PARALLEL];
   3253 
   3254     /** Ptr to all layer ctxt place holder for all pics */
   3255     /** number of reference descriptors should be equal to max number of active references **/
   3256     layers_descr_t as_ref_descr[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1];
   3257 
   3258     /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
   3259     void *pv_me_optimised_function_list;
   3260 
   3261     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
   3262 
   3263     /* Pointer to Tile params base */
   3264     void *pv_tile_params_base;
   3265 
   3266 } me_ctxt_t;
   3267 
   3268 typedef struct
   3269 {
   3270     /** array of context for each thread */
   3271     coarse_me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
   3272 
   3273     /** memtabs storage memory */
   3274     hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
   3275 
   3276     /** Frame level parameters for ME */
   3277     hme_frm_prms_t s_frm_prms;
   3278 
   3279     /** Holds all reference mapping */
   3280     hme_ref_map_t s_ref_map;
   3281 
   3282     /** number of threads created run time */
   3283     WORD32 i4_num_proc_thrds;
   3284 
   3285     /** Dependency manager for Row level sync in HME pass */
   3286     /* Note : Indexing should be like layer_id - 1        */
   3287     void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1];
   3288     /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
   3289     void *pv_me_optimised_function_list;
   3290 
   3291     ihevce_cmn_opt_func_t s_cmn_opt_func;
   3292 } coarse_me_master_ctxt_t;
   3293 
   3294 typedef struct
   3295 {
   3296     /** array of context for each thread */
   3297     me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
   3298 
   3299     /** memtabs storage memory */
   3300     hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
   3301 
   3302     /** Frame level parameters for ME */
   3303     hme_frm_prms_t as_frm_prms[MAX_NUM_ME_PARALLEL];
   3304 
   3305     /** Holds all reference mapping */
   3306     hme_ref_map_t as_ref_map[MAX_NUM_ME_PARALLEL];
   3307 
   3308     /** number of threads created run time */
   3309     WORD32 i4_num_proc_thrds;
   3310 
   3311     /** number of me frames running in parallel */
   3312     WORD32 i4_num_me_frm_pllel;
   3313 
   3314     /** Pointer to structure containing function pointers of encoder*/
   3315     me_func_selector_t s_func_selector;
   3316     /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
   3317     void *pv_me_optimised_function_list;
   3318 
   3319     ihevce_cmn_opt_func_t s_cmn_opt_func;
   3320 
   3321     /* Pointer to Tile params base */
   3322     void *pv_tile_params_base;
   3323 
   3324 } me_master_ctxt_t;
   3325 
   3326 typedef struct
   3327 {
   3328     S16 i2_mv_x;
   3329 
   3330     S16 i2_mv_y;
   3331 
   3332     U08 u1_ref_idx;
   3333 
   3334     U32 au4_node_map[2 * MAP_Y_MAX];
   3335 
   3336 } subpel_dedup_enabler_t;
   3337 
   3338 typedef subpel_dedup_enabler_t hme_dedup_enabler_t;
   3339 
   3340 typedef struct
   3341 {
   3342     layer_ctxt_t *ps_curr_layer;
   3343 
   3344     layer_ctxt_t *ps_coarse_layer;
   3345 
   3346     U08 *pu1_num_fpel_search_cands;
   3347 
   3348     S32 *pi4_ref_id_lc_to_l0_map;
   3349 
   3350     S32 *pi4_ref_id_lc_to_l1_map;
   3351 
   3352     S32 i4_pos_x;
   3353 
   3354     S32 i4_pos_y;
   3355 
   3356     S32 i4_num_act_ref_l0;
   3357 
   3358     S32 i4_num_act_ref_l1;
   3359 
   3360     search_candt_t *ps_search_cands;
   3361 
   3362     U08 u1_search_candidate_list_index;
   3363 
   3364     S32 i4_max_num_init_cands;
   3365 
   3366     U08 u1_pred_dir;
   3367 
   3368     /* Indicates the position of the current predDir in the processing order of predDir */
   3369     U08 u1_pred_dir_ctr;
   3370 
   3371     /* The following 4 flags apply exclusively to spatial candidates */
   3372     U08 u1_is_topRight_available;
   3373 
   3374     U08 u1_is_topLeft_available;
   3375 
   3376     U08 u1_is_top_available;
   3377 
   3378     U08 u1_is_left_available;
   3379 
   3380     S08 i1_default_ref_id;
   3381 
   3382     S08 i1_alt_default_ref_id;
   3383 
   3384     U08 u1_num_results_in_mvbank;
   3385 
   3386     BLK_SIZE_T e_search_blk_size;
   3387 
   3388 } fpel_srch_cand_init_data_t;
   3389 
   3390 typedef struct
   3391 {
   3392     U08 *pu1_pred;
   3393 
   3394     S32 i4_pred_stride;
   3395 
   3396     U08 u1_pred_buf_array_id;
   3397 
   3398 } hme_pred_buf_info_t;
   3399 
   3400 /*****************************************************************************/
   3401 /* Typedefs                                                                  */
   3402 /*****************************************************************************/
   3403 typedef void (*PF_SAD_FXN_T)(err_prms_t *);
   3404 
   3405 typedef void (*PF_SAD_RESULT_FXN_T)(err_prms_t *, result_upd_prms_t *ps_result_prms);
   3406 
   3407 typedef WORD32 (*PF_SAD_FXN_TU_REC)(
   3408     err_prms_t *,
   3409     WORD32 lambda,
   3410     WORD32 lamda_q_shift,
   3411     WORD32 i4_frm_qstep,
   3412     me_func_selector_t *ps_func_selector);
   3413 
   3414 typedef void (*PF_RESULT_FXN_T)(result_upd_prms_t *);
   3415 
   3416 typedef void (*PF_CALC_SAD_AND_RESULT)(
   3417     hme_search_prms_t *, wgt_pred_ctxt_t *, err_prms_t *, result_upd_prms_t *, U08 **, S32);
   3418 
   3419 #endif /* _HME_DEFS_H_ */
   3420