Home | History | Annotate | Download | only in encoder
      1 /*
      2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #include <limits.h>
     13 #include <math.h>
     14 #include <stdio.h>
     15 
     16 #include "config/aom_config.h"
     17 #include "config/aom_dsp_rtcd.h"
     18 
     19 #include "aom_dsp/aom_dsp_common.h"
     20 #include "aom_mem/aom_mem.h"
     21 #include "aom_ports/mem.h"
     22 #include "aom_ports/system_state.h"
     23 
     24 #include "av1/common/common.h"
     25 #include "av1/common/mvref_common.h"
     26 #include "av1/common/onyxc_int.h"
     27 #include "av1/common/reconinter.h"
     28 
     29 #include "av1/encoder/encoder.h"
     30 #include "av1/encoder/encodemv.h"
     31 #include "av1/encoder/mcomp.h"
     32 #include "av1/encoder/partition_strategy.h"
     33 #include "av1/encoder/rdopt.h"
     34 #include "av1/encoder/reconinter_enc.h"
     35 
     36 // #define NEW_DIAMOND_SEARCH
     37 
     38 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
     39                                              const MV *mv) {
     40   return &buf->buf[mv->row * buf->stride + mv->col];
     41 }
     42 
     43 void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
     44   int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
     45   int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
     46   int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
     47   int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
     48 
     49   col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
     50   row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
     51   col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
     52   row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
     53 
     54   // Get intersection of UMV window and valid MV window to reduce # of checks
     55   // in diamond search.
     56   if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
     57   if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
     58   if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
     59   if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
     60 }
     61 
     62 static void set_subpel_mv_search_range(const MvLimits *mv_limits, int *col_min,
     63                                        int *col_max, int *row_min, int *row_max,
     64                                        const MV *ref_mv) {
     65   const int max_mv = MAX_FULL_PEL_VAL * 8;
     66   const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
     67   const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
     68   const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
     69   const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
     70 
     71   *col_min = AOMMAX(MV_LOW + 1, minc);
     72   *col_max = AOMMIN(MV_UPP - 1, maxc);
     73   *row_min = AOMMAX(MV_LOW + 1, minr);
     74   *row_max = AOMMIN(MV_UPP - 1, maxr);
     75 }
     76 
     77 int av1_init_search_range(int size) {
     78   int sr = 0;
     79   // Minimum search size no matter what the passed in value.
     80   size = AOMMAX(16, size);
     81 
     82   while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
     83 
     84   sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2);
     85   return sr;
     86 }
     87 
     88 static INLINE int mv_cost(const MV *mv, const int *joint_cost,
     89                           int *const comp_cost[2]) {
     90   return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
     91          comp_cost[1][mv->col];
     92 }
     93 
     94 int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
     95                     int *mvcost[2], int weight) {
     96   const MV diff = { mv->row - ref->row, mv->col - ref->col };
     97   return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
     98 }
     99 
    100 #define PIXEL_TRANSFORM_ERROR_SCALE 4
    101 static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
    102                        int *mvcost[2], int error_per_bit) {
    103   if (mvcost) {
    104     const MV diff = { mv->row - ref->row, mv->col - ref->col };
    105     return (int)ROUND_POWER_OF_TWO_64(
    106         (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
    107         RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
    108             PIXEL_TRANSFORM_ERROR_SCALE);
    109   }
    110   return 0;
    111 }
    112 
    113 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
    114                           int sad_per_bit) {
    115   const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
    116   return ROUND_POWER_OF_TWO(
    117       (unsigned)mv_cost(&diff, x->nmv_vec_cost, x->mv_cost_stack) * sad_per_bit,
    118       AV1_PROB_COST_SHIFT);
    119 }
    120 
    121 void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
    122   int len, ss_count = 1;
    123 
    124   cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
    125   cfg->ss[0].offset = 0;
    126 
    127   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
    128     // Generate offsets for 4 search sites per step.
    129     const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
    130     int i;
    131     for (i = 0; i < 4; ++i) {
    132       search_site *const ss = &cfg->ss[ss_count++];
    133       ss->mv = ss_mvs[i];
    134       ss->offset = ss->mv.row * stride + ss->mv.col;
    135     }
    136   }
    137 
    138   cfg->ss_count = ss_count;
    139   cfg->searches_per_step = 4;
    140 }
    141 
    142 void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
    143   int len, ss_count = 1;
    144 
    145   cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
    146   cfg->ss[0].offset = 0;
    147 
    148   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
    149     // Generate offsets for 8 search sites per step.
    150     const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
    151                            { 0, len },    { -len, -len }, { -len, len },
    152                            { len, -len }, { len, len } };
    153     int i;
    154     for (i = 0; i < 8; ++i) {
    155       search_site *const ss = &cfg->ss[ss_count++];
    156       ss->mv = ss_mvs[i];
    157       ss->offset = ss->mv.row * stride + ss->mv.col;
    158     }
    159   }
    160 
    161   cfg->ss_count = ss_count;
    162   cfg->searches_per_step = 8;
    163 }
    164 
    165 /*
    166  * To avoid the penalty for crossing cache-line read, preload the reference
    167  * area in a small buffer, which is aligned to make sure there won't be crossing
    168  * cache-line read while reading from this buffer. This reduced the cpu
    169  * cycles spent on reading ref data in sub-pixel filter functions.
    170  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    171  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    172  * could reduce the area.
    173  */
    174 
    175 // convert motion vector component to offset for sv[a]f calc
    176 static INLINE int sp(int x) { return x & 7; }
    177 
    178 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
    179   const int offset = (r >> 3) * stride + (c >> 3);
    180   return buf + offset;
    181 }
    182 
    183 /* checks if (r, c) has better score than previous best */
    184 #define CHECK_BETTER(v, r, c)                                             \
    185   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                 \
    186     MV this_mv = { r, c };                                                \
    187     v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);    \
    188     if (second_pred == NULL) {                                            \
    189       thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),  \
    190                          src_address, src_stride, &sse);                  \
    191     } else if (mask) {                                                    \
    192       thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
    193                           src_address, src_stride, second_pred, mask,     \
    194                           mask_stride, invert_mask, &sse);                \
    195     } else {                                                              \
    196       thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
    197                           src_address, src_stride, &sse, second_pred);    \
    198     }                                                                     \
    199     v += thismse;                                                         \
    200     if (v < besterr) {                                                    \
    201       besterr = v;                                                        \
    202       br = r;                                                             \
    203       bc = c;                                                             \
    204       *distortion = thismse;                                              \
    205       *sse1 = sse;                                                        \
    206     }                                                                     \
    207   } else {                                                                \
    208     v = INT_MAX;                                                          \
    209   }
    210 
    211 #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
    212 
    213 /* checks if (r, c) has better score than previous best */
    214 #define CHECK_BETTER1(v, r, c)                                             \
    215   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                  \
    216     MV this_mv = { r, c };                                                 \
    217     thismse = upsampled_pref_error(                                        \
    218         xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,    \
    219         pre(y, y_stride, r, c), y_stride, sp(c), sp(r), second_pred, mask, \
    220         mask_stride, invert_mask, w, h, &sse, use_accurate_subpel_search); \
    221     v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);     \
    222     v += thismse;                                                          \
    223     if (v < besterr) {                                                     \
    224       besterr = v;                                                         \
    225       br = r;                                                              \
    226       bc = c;                                                              \
    227       *distortion = thismse;                                               \
    228       *sse1 = sse;                                                         \
    229     }                                                                      \
    230   } else {                                                                 \
    231     v = INT_MAX;                                                           \
    232   }
    233 
    234 #define FIRST_LEVEL_CHECKS                                       \
    235   {                                                              \
    236     unsigned int left, right, up, down, diag;                    \
    237     CHECK_BETTER(left, tr, tc - hstep);                          \
    238     CHECK_BETTER(right, tr, tc + hstep);                         \
    239     CHECK_BETTER(up, tr - hstep, tc);                            \
    240     CHECK_BETTER(down, tr + hstep, tc);                          \
    241     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);     \
    242     switch (whichdir) {                                          \
    243       case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
    244       case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
    245       case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
    246       case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
    247     }                                                            \
    248   }
    249 
    250 #define SECOND_LEVEL_CHECKS                                       \
    251   {                                                               \
    252     int kr, kc;                                                   \
    253     unsigned int second;                                          \
    254     if (tr != br && tc != bc) {                                   \
    255       kr = br - tr;                                               \
    256       kc = bc - tc;                                               \
    257       CHECK_BETTER(second, tr + kr, tc + 2 * kc);                 \
    258       CHECK_BETTER(second, tr + 2 * kr, tc + kc);                 \
    259     } else if (tr == br && tc != bc) {                            \
    260       kc = bc - tc;                                               \
    261       CHECK_BETTER(second, tr + hstep, tc + 2 * kc);              \
    262       CHECK_BETTER(second, tr - hstep, tc + 2 * kc);              \
    263       switch (whichdir) {                                         \
    264         case 0:                                                   \
    265         case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
    266         case 2:                                                   \
    267         case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
    268       }                                                           \
    269     } else if (tr != br && tc == bc) {                            \
    270       kr = br - tr;                                               \
    271       CHECK_BETTER(second, tr + 2 * kr, tc + hstep);              \
    272       CHECK_BETTER(second, tr + 2 * kr, tc - hstep);              \
    273       switch (whichdir) {                                         \
    274         case 0:                                                   \
    275         case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
    276         case 1:                                                   \
    277         case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
    278       }                                                           \
    279     }                                                             \
    280   }
    281 
    282 // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
    283 // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
    284 // later in the same way.
    285 #define SECOND_LEVEL_CHECKS_BEST(k)                \
    286   {                                                \
    287     unsigned int second;                           \
    288     int br0 = br;                                  \
    289     int bc0 = bc;                                  \
    290     assert(tr == br || tc == bc);                  \
    291     if (tr == br && tc != bc) {                    \
    292       kc = bc - tc;                                \
    293     } else if (tr != br && tc == bc) {             \
    294       kr = br - tr;                                \
    295     }                                              \
    296     CHECK_BETTER##k(second, br0 + kr, bc0);        \
    297     CHECK_BETTER##k(second, br0, bc0 + kc);        \
    298     if (br0 != br || bc0 != bc) {                  \
    299       CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
    300     }                                              \
    301   }
    302 
    303 #define SETUP_SUBPEL_SEARCH                                             \
    304   const uint8_t *const src_address = x->plane[0].src.buf;               \
    305   const int src_stride = x->plane[0].src.stride;                        \
    306   const MACROBLOCKD *xd = &x->e_mbd;                                    \
    307   unsigned int besterr = INT_MAX;                                       \
    308   unsigned int sse;                                                     \
    309   unsigned int whichdir;                                                \
    310   int thismse;                                                          \
    311   MV *bestmv = &x->best_mv.as_mv;                                       \
    312   const unsigned int halfiters = iters_per_step;                        \
    313   const unsigned int quarteriters = iters_per_step;                     \
    314   const unsigned int eighthiters = iters_per_step;                      \
    315   const int y_stride = xd->plane[0].pre[0].stride;                      \
    316   const int offset = bestmv->row * y_stride + bestmv->col;              \
    317   const uint8_t *const y = xd->plane[0].pre[0].buf;                     \
    318                                                                         \
    319   int br = bestmv->row * 8;                                             \
    320   int bc = bestmv->col * 8;                                             \
    321   int hstep = 4;                                                        \
    322   int minc, maxc, minr, maxr;                                           \
    323   int tr = br;                                                          \
    324   int tc = bc;                                                          \
    325                                                                         \
    326   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, \
    327                              ref_mv);                                   \
    328                                                                         \
    329   bestmv->row *= 8;                                                     \
    330   bestmv->col *= 8;
    331 
    332 static unsigned int setup_center_error(
    333     const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
    334     int error_per_bit, const aom_variance_fn_ptr_t *vfp,
    335     const uint8_t *const src, const int src_stride, const uint8_t *const y,
    336     int y_stride, const uint8_t *second_pred, const uint8_t *mask,
    337     int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
    338     int *mvcost[2], unsigned int *sse1, int *distortion) {
    339   unsigned int besterr;
    340   if (second_pred != NULL) {
    341     if (is_cur_buf_hbd(xd)) {
    342       DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
    343       uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16);
    344       if (mask) {
    345         aom_highbd_comp_mask_pred(comp_pred, second_pred, w, h, y + offset,
    346                                   y_stride, mask, mask_stride, invert_mask);
    347       } else {
    348         aom_highbd_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
    349                                  y_stride);
    350       }
    351       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
    352     } else {
    353       DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
    354       if (mask) {
    355         aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
    356                            mask, mask_stride, invert_mask);
    357       } else {
    358         aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
    359       }
    360       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
    361     }
    362   } else {
    363     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
    364   }
    365   *distortion = besterr;
    366   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
    367   return besterr;
    368 }
    369 
    370 static INLINE int divide_and_round(int n, int d) {
    371   return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
    372 }
    373 
    374 static INLINE int is_cost_list_wellbehaved(int *cost_list) {
    375   return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
    376          cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
    377 }
    378 
    379 // Returns surface minima estimate at given precision in 1/2^n bits.
    380 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
    381 // For a given set of costs S0, S1, S2, S3, S4 at points
    382 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
    383 // the solution for the location of the minima (x0, y0) is given by:
    384 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
    385 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
    386 // The code below is an integerized version of that.
    387 static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
    388   *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
    389                          (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
    390   *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
    391                          (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
    392 }
    393 
    394 int av1_find_best_sub_pixel_tree_pruned_evenmore(
    395     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
    396     const MV *ref_mv, int allow_hp, int error_per_bit,
    397     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
    398     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
    399     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
    400     int mask_stride, int invert_mask, int w, int h,
    401     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
    402   SETUP_SUBPEL_SEARCH;
    403   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
    404                                src_address, src_stride, y, y_stride,
    405                                second_pred, mask, mask_stride, invert_mask, w,
    406                                h, offset, mvjcost, mvcost, sse1, distortion);
    407   (void)halfiters;
    408   (void)quarteriters;
    409   (void)eighthiters;
    410   (void)whichdir;
    411   (void)allow_hp;
    412   (void)forced_stop;
    413   (void)hstep;
    414   (void)use_accurate_subpel_search;
    415   (void)cm;
    416   (void)mi_row;
    417   (void)mi_col;
    418   (void)do_reset_fractional_mv;
    419 
    420   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
    421       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
    422       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
    423     int ir, ic;
    424     unsigned int minpt;
    425     get_cost_surf_min(cost_list, &ir, &ic, 2);
    426     if (ir != 0 || ic != 0) {
    427       CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
    428     }
    429   } else {
    430     FIRST_LEVEL_CHECKS;
    431     if (halfiters > 1) {
    432       SECOND_LEVEL_CHECKS;
    433     }
    434 
    435     tr = br;
    436     tc = bc;
    437 
    438     // Each subsequent iteration checks at least one point in common with
    439     // the last iteration could be 2 ( if diag selected) 1/4 pel
    440     // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    441     if (forced_stop != 2) {
    442       hstep >>= 1;
    443       FIRST_LEVEL_CHECKS;
    444       if (quarteriters > 1) {
    445         SECOND_LEVEL_CHECKS;
    446       }
    447     }
    448   }
    449 
    450   tr = br;
    451   tc = bc;
    452 
    453   if (allow_hp && forced_stop == 0) {
    454     hstep >>= 1;
    455     FIRST_LEVEL_CHECKS;
    456     if (eighthiters > 1) {
    457       SECOND_LEVEL_CHECKS;
    458     }
    459   }
    460 
    461   bestmv->row = br;
    462   bestmv->col = bc;
    463 
    464   return besterr;
    465 }
    466 
    467 int av1_find_best_sub_pixel_tree_pruned_more(
    468     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
    469     const MV *ref_mv, int allow_hp, int error_per_bit,
    470     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
    471     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
    472     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
    473     int mask_stride, int invert_mask, int w, int h,
    474     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
    475   SETUP_SUBPEL_SEARCH;
    476   (void)use_accurate_subpel_search;
    477   (void)cm;
    478   (void)mi_row;
    479   (void)mi_col;
    480   (void)do_reset_fractional_mv;
    481 
    482   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
    483                                src_address, src_stride, y, y_stride,
    484                                second_pred, mask, mask_stride, invert_mask, w,
    485                                h, offset, mvjcost, mvcost, sse1, distortion);
    486   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
    487       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
    488       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
    489     unsigned int minpt;
    490     int ir, ic;
    491     get_cost_surf_min(cost_list, &ir, &ic, 1);
    492     if (ir != 0 || ic != 0) {
    493       CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
    494     }
    495   } else {
    496     FIRST_LEVEL_CHECKS;
    497     if (halfiters > 1) {
    498       SECOND_LEVEL_CHECKS;
    499     }
    500   }
    501 
    502   // Each subsequent iteration checks at least one point in common with
    503   // the last iteration could be 2 ( if diag selected) 1/4 pel
    504 
    505   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    506   if (forced_stop != 2) {
    507     tr = br;
    508     tc = bc;
    509     hstep >>= 1;
    510     FIRST_LEVEL_CHECKS;
    511     if (quarteriters > 1) {
    512       SECOND_LEVEL_CHECKS;
    513     }
    514   }
    515 
    516   if (allow_hp && forced_stop == 0) {
    517     tr = br;
    518     tc = bc;
    519     hstep >>= 1;
    520     FIRST_LEVEL_CHECKS;
    521     if (eighthiters > 1) {
    522       SECOND_LEVEL_CHECKS;
    523     }
    524   }
    525   // These lines insure static analysis doesn't warn that
    526   // tr and tc aren't used after the above point.
    527   (void)tr;
    528   (void)tc;
    529 
    530   bestmv->row = br;
    531   bestmv->col = bc;
    532 
    533   return besterr;
    534 }
    535 
    536 int av1_find_best_sub_pixel_tree_pruned(
    537     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
    538     const MV *ref_mv, int allow_hp, int error_per_bit,
    539     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
    540     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
    541     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
    542     int mask_stride, int invert_mask, int w, int h,
    543     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
    544   SETUP_SUBPEL_SEARCH;
    545   (void)use_accurate_subpel_search;
    546   (void)cm;
    547   (void)mi_row;
    548   (void)mi_col;
    549   (void)do_reset_fractional_mv;
    550 
    551   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
    552                                src_address, src_stride, y, y_stride,
    553                                second_pred, mask, mask_stride, invert_mask, w,
    554                                h, offset, mvjcost, mvcost, sse1, distortion);
    555   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
    556       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
    557       cost_list[4] != INT_MAX) {
    558     unsigned int left, right, up, down, diag;
    559     whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
    560                (cost_list[2] < cost_list[4] ? 0 : 2);
    561     switch (whichdir) {
    562       case 0:
    563         CHECK_BETTER(left, tr, tc - hstep);
    564         CHECK_BETTER(down, tr + hstep, tc);
    565         CHECK_BETTER(diag, tr + hstep, tc - hstep);
    566         break;
    567       case 1:
    568         CHECK_BETTER(right, tr, tc + hstep);
    569         CHECK_BETTER(down, tr + hstep, tc);
    570         CHECK_BETTER(diag, tr + hstep, tc + hstep);
    571         break;
    572       case 2:
    573         CHECK_BETTER(left, tr, tc - hstep);
    574         CHECK_BETTER(up, tr - hstep, tc);
    575         CHECK_BETTER(diag, tr - hstep, tc - hstep);
    576         break;
    577       case 3:
    578         CHECK_BETTER(right, tr, tc + hstep);
    579         CHECK_BETTER(up, tr - hstep, tc);
    580         CHECK_BETTER(diag, tr - hstep, tc + hstep);
    581         break;
    582     }
    583   } else {
    584     FIRST_LEVEL_CHECKS;
    585     if (halfiters > 1) {
    586       SECOND_LEVEL_CHECKS;
    587     }
    588   }
    589 
    590   tr = br;
    591   tc = bc;
    592 
    593   // Each subsequent iteration checks at least one point in common with
    594   // the last iteration could be 2 ( if diag selected) 1/4 pel
    595 
    596   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    597   if (forced_stop != 2) {
    598     hstep >>= 1;
    599     FIRST_LEVEL_CHECKS;
    600     if (quarteriters > 1) {
    601       SECOND_LEVEL_CHECKS;
    602     }
    603     tr = br;
    604     tc = bc;
    605   }
    606 
    607   if (allow_hp && forced_stop == 0) {
    608     hstep >>= 1;
    609     FIRST_LEVEL_CHECKS;
    610     if (eighthiters > 1) {
    611       SECOND_LEVEL_CHECKS;
    612     }
    613     tr = br;
    614     tc = bc;
    615   }
    616   // These lines insure static analysis doesn't warn that
    617   // tr and tc aren't used after the above point.
    618   (void)tr;
    619   (void)tc;
    620 
    621   bestmv->row = br;
    622   bestmv->col = bc;
    623 
    624   return besterr;
    625 }
    626 
    627 /* clang-format off */
    628 static const MV search_step_table[12] = {
    629   // left, right, up, down
    630   { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
    631   { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
    632   { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
    633 };
    634 /* clang-format on */
    635 
    636 static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *const cm,
    637                                 int mi_row, int mi_col, const MV *const mv,
    638                                 const aom_variance_fn_ptr_t *vfp,
    639                                 const uint8_t *const src, const int src_stride,
    640                                 const uint8_t *const y, int y_stride,
    641                                 int subpel_x_q3, int subpel_y_q3,
    642                                 const uint8_t *second_pred, const uint8_t *mask,
    643                                 int mask_stride, int invert_mask, int w, int h,
    644                                 unsigned int *sse, int subpel_search) {
    645   unsigned int besterr;
    646   if (is_cur_buf_hbd(xd)) {
    647     DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
    648     uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16);
    649     if (second_pred != NULL) {
    650       if (mask) {
    651         aom_highbd_comp_mask_upsampled_pred(
    652             xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
    653             subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask, xd->bd,
    654             subpel_search);
    655       } else {
    656         aom_highbd_comp_avg_upsampled_pred(
    657             xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
    658             subpel_y_q3, y, y_stride, xd->bd, subpel_search);
    659       }
    660     } else {
    661       aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
    662                                 subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
    663                                 subpel_search);
    664     }
    665     besterr = vfp->vf(pred8, w, src, src_stride, sse);
    666   } else {
    667     DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
    668     if (second_pred != NULL) {
    669       if (mask) {
    670         aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
    671                                      second_pred, w, h, subpel_x_q3,
    672                                      subpel_y_q3, y, y_stride, mask,
    673                                      mask_stride, invert_mask, subpel_search);
    674       } else {
    675         aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
    676                                     second_pred, w, h, subpel_x_q3, subpel_y_q3,
    677                                     y, y_stride, subpel_search);
    678       }
    679     } else {
    680       aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
    681                          subpel_y_q3, y, y_stride, subpel_search);
    682     }
    683 
    684     besterr = vfp->vf(pred, w, src, src_stride, sse);
    685   }
    686   return besterr;
    687 }
    688 
    689 static unsigned int upsampled_setup_center_error(
    690     MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
    691     const MV *bestmv, const MV *ref_mv, int error_per_bit,
    692     const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
    693     const int src_stride, const uint8_t *const y, int y_stride,
    694     const uint8_t *second_pred, const uint8_t *mask, int mask_stride,
    695     int invert_mask, int w, int h, int offset, int *mvjcost, int *mvcost[2],
    696     unsigned int *sse1, int *distortion, int subpel_search) {
    697   unsigned int besterr =
    698       upsampled_pref_error(xd, cm, mi_row, mi_col, bestmv, vfp, src, src_stride,
    699                            y + offset, y_stride, 0, 0, second_pred, mask,
    700                            mask_stride, invert_mask, w, h, sse1, subpel_search);
    701   *distortion = besterr;
    702   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
    703   return besterr;
    704 }
    705 
    706 // when use_accurate_subpel_search == 0
    707 static INLINE unsigned int estimate_upsampled_pref_error(
    708     const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
    709     const int src_stride, const uint8_t *const pre, int y_stride,
    710     int subpel_x_q3, int subpel_y_q3, const uint8_t *second_pred,
    711     const uint8_t *mask, int mask_stride, int invert_mask, unsigned int *sse) {
    712   if (second_pred == NULL) {
    713     return vfp->svf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
    714                     sse);
    715   } else if (mask) {
    716     return vfp->msvf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
    717                      second_pred, mask, mask_stride, invert_mask, sse);
    718   } else {
    719     return vfp->svaf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
    720                      sse, second_pred);
    721   }
    722 }
    723 
    724 int av1_find_best_sub_pixel_tree(
    725     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
    726     const MV *ref_mv, int allow_hp, int error_per_bit,
    727     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
    728     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
    729     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
    730     int mask_stride, int invert_mask, int w, int h,
    731     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
    732   const uint8_t *const src_address = x->plane[0].src.buf;
    733   const int src_stride = x->plane[0].src.stride;
    734   MACROBLOCKD *xd = &x->e_mbd;
    735   unsigned int besterr = INT_MAX;
    736   unsigned int sse;
    737   unsigned int thismse;
    738   const int y_stride = xd->plane[0].pre[0].stride;
    739   MV *bestmv = &x->best_mv.as_mv;
    740   const int offset = bestmv->row * y_stride + bestmv->col;
    741   const uint8_t *const y = xd->plane[0].pre[0].buf;
    742 
    743   int br = bestmv->row * 8;
    744   int bc = bestmv->col * 8;
    745   int hstep = 4;
    746   int iter, round = 3 - forced_stop;
    747   int tr = br;
    748   int tc = bc;
    749   const MV *search_step = search_step_table;
    750   int idx, best_idx = -1;
    751   unsigned int cost_array[5];
    752   int kr, kc;
    753   int minc, maxc, minr, maxr;
    754 
    755   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
    756 
    757   if (!allow_hp)
    758     if (round == 3) round = 2;
    759 
    760   bestmv->row *= 8;
    761   bestmv->col *= 8;
    762 
    763   if (use_accurate_subpel_search)
    764     besterr = upsampled_setup_center_error(
    765         xd, cm, mi_row, mi_col, bestmv, ref_mv, error_per_bit, vfp, src_address,
    766         src_stride, y, y_stride, second_pred, mask, mask_stride, invert_mask, w,
    767         h, offset, mvjcost, mvcost, sse1, distortion,
    768         use_accurate_subpel_search);
    769   else
    770     besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
    771                                  src_address, src_stride, y, y_stride,
    772                                  second_pred, mask, mask_stride, invert_mask, w,
    773                                  h, offset, mvjcost, mvcost, sse1, distortion);
    774 
    775   (void)cost_list;  // to silence compiler warning
    776 
    777   if (do_reset_fractional_mv) {
    778     av1_set_fractional_mv(x->fractional_best_mv);
    779   }
    780 
    781   for (iter = 0; iter < round; ++iter) {
    782     if ((x->fractional_best_mv[iter].as_mv.row == br) &&
    783         (x->fractional_best_mv[iter].as_mv.col == bc))
    784       return INT_MAX;
    785     x->fractional_best_mv[iter].as_mv.row = br;
    786     x->fractional_best_mv[iter].as_mv.col = bc;
    787     // Check vertical and horizontal sub-pixel positions.
    788     for (idx = 0; idx < 4; ++idx) {
    789       tr = br + search_step[idx].row;
    790       tc = bc + search_step[idx].col;
    791       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
    792         MV this_mv = { tr, tc };
    793 
    794         if (use_accurate_subpel_search) {
    795           thismse = upsampled_pref_error(
    796               xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
    797               pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
    798               mask, mask_stride, invert_mask, w, h, &sse,
    799               use_accurate_subpel_search);
    800         } else {
    801           thismse = estimate_upsampled_pref_error(
    802               vfp, src_address, src_stride, pre(y, y_stride, tr, tc), y_stride,
    803               sp(tc), sp(tr), second_pred, mask, mask_stride, invert_mask,
    804               &sse);
    805         }
    806 
    807         cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
    808                                                 mvcost, error_per_bit);
    809 
    810         if (cost_array[idx] < besterr) {
    811           best_idx = idx;
    812           besterr = cost_array[idx];
    813           *distortion = thismse;
    814           *sse1 = sse;
    815         }
    816       } else {
    817         cost_array[idx] = INT_MAX;
    818       }
    819     }
    820 
    821     // Check diagonal sub-pixel position
    822     kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
    823     kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
    824 
    825     tc = bc + kc;
    826     tr = br + kr;
    827     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
    828       MV this_mv = { tr, tc };
    829 
    830       if (use_accurate_subpel_search) {
    831         thismse = upsampled_pref_error(
    832             xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
    833             pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
    834             mask, mask_stride, invert_mask, w, h, &sse,
    835             use_accurate_subpel_search);
    836       } else {
    837         thismse = estimate_upsampled_pref_error(
    838             vfp, src_address, src_stride, pre(y, y_stride, tr, tc), y_stride,
    839             sp(tc), sp(tr), second_pred, mask, mask_stride, invert_mask, &sse);
    840       }
    841 
    842       cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
    843                                             error_per_bit);
    844 
    845       if (cost_array[4] < besterr) {
    846         best_idx = 4;
    847         besterr = cost_array[4];
    848         *distortion = thismse;
    849         *sse1 = sse;
    850       }
    851     } else {
    852       cost_array[idx] = INT_MAX;
    853     }
    854 
    855     if (best_idx < 4 && best_idx >= 0) {
    856       br += search_step[best_idx].row;
    857       bc += search_step[best_idx].col;
    858     } else if (best_idx == 4) {
    859       br = tr;
    860       bc = tc;
    861     }
    862 
    863     if (iters_per_step > 1 && best_idx != -1) {
    864       if (use_accurate_subpel_search) {
    865         SECOND_LEVEL_CHECKS_BEST(1);
    866       } else {
    867         SECOND_LEVEL_CHECKS_BEST(0);
    868       }
    869     }
    870 
    871     search_step += 4;
    872     hstep >>= 1;
    873     best_idx = -1;
    874   }
    875 
    876   // These lines insure static analysis doesn't warn that
    877   // tr and tc aren't used after the above point.
    878   (void)tr;
    879   (void)tc;
    880 
    881   bestmv->row = br;
    882   bestmv->col = bc;
    883 
    884   return besterr;
    885 }
    886 
    887 #undef PRE
    888 #undef CHECK_BETTER
    889 
    890 unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
    891                                      BLOCK_SIZE bsize, int mi_row, int mi_col,
    892                                      const MV *this_mv) {
    893   const AV1_COMMON *const cm = &cpi->common;
    894   MACROBLOCKD *xd = &x->e_mbd;
    895   const uint8_t *const src = x->plane[0].src.buf;
    896   const int src_stride = x->plane[0].src.stride;
    897   uint8_t *const dst = xd->plane[0].dst.buf;
    898   const int dst_stride = xd->plane[0].dst.stride;
    899   const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize];
    900   const int_mv ref_mv = av1_get_ref_mv(x, 0);
    901   unsigned int mse;
    902   unsigned int sse;
    903 
    904   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
    905                                 AOM_PLANE_Y, AOM_PLANE_Y);
    906   mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
    907   mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmv_vec_cost, x->mv_cost_stack,
    908                      x->errorperbit);
    909   return mse;
    910 }
    911 
    912 // Refine MV in a small range
    913 unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
    914                                   BLOCK_SIZE bsize, int mi_row, int mi_col,
    915                                   int *pts0, int *pts_inref0,
    916                                   int total_samples) {
    917   const AV1_COMMON *const cm = &cpi->common;
    918   MACROBLOCKD *xd = &x->e_mbd;
    919   MB_MODE_INFO *mbmi = xd->mi[0];
    920   const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
    921                             { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } };
    922   const int_mv ref_mv = av1_get_ref_mv(x, 0);
    923   int16_t br = mbmi->mv[0].as_mv.row;
    924   int16_t bc = mbmi->mv[0].as_mv.col;
    925   int16_t *tr = &mbmi->mv[0].as_mv.row;
    926   int16_t *tc = &mbmi->mv[0].as_mv.col;
    927   WarpedMotionParams best_wm_params = mbmi->wm_params;
    928   int best_num_proj_ref = mbmi->num_proj_ref;
    929   unsigned int bestmse;
    930   int minc, maxc, minr, maxr;
    931   const int start = cm->allow_high_precision_mv ? 0 : 4;
    932   int ite;
    933 
    934   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
    935                              &ref_mv.as_mv);
    936 
    937   // Calculate the center position's error
    938   assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr);
    939   bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col,
    940                                     &mbmi->mv[0].as_mv);
    941 
    942   // MV search
    943   for (ite = 0; ite < 2; ++ite) {
    944     int best_idx = -1;
    945     int idx;
    946 
    947     for (idx = start; idx < start + 4; ++idx) {
    948       unsigned int thismse;
    949 
    950       *tr = br + neighbors[idx].row;
    951       *tc = bc + neighbors[idx].col;
    952 
    953       if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
    954         MV this_mv = { *tr, *tc };
    955         int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
    956 
    957         memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
    958         memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
    959         if (total_samples > 1)
    960           mbmi->num_proj_ref =
    961               selectSamples(&this_mv, pts, pts_inref, total_samples, bsize);
    962 
    963         if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, *tr,
    964                              *tc, &mbmi->wm_params, mi_row, mi_col)) {
    965           thismse =
    966               av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv);
    967 
    968           if (thismse < bestmse) {
    969             best_idx = idx;
    970             best_wm_params = mbmi->wm_params;
    971             best_num_proj_ref = mbmi->num_proj_ref;
    972             bestmse = thismse;
    973           }
    974         }
    975       }
    976     }
    977 
    978     if (best_idx == -1) break;
    979 
    980     if (best_idx >= 0) {
    981       br += neighbors[best_idx].row;
    982       bc += neighbors[best_idx].col;
    983     }
    984   }
    985 
    986   *tr = br;
    987   *tc = bc;
    988   mbmi->wm_params = best_wm_params;
    989   mbmi->num_proj_ref = best_num_proj_ref;
    990   return bestmse;
    991 }
    992 
    993 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
    994                                int range) {
    995   return ((row - range) >= mv_limits->row_min) &
    996          ((row + range) <= mv_limits->row_max) &
    997          ((col - range) >= mv_limits->col_min) &
    998          ((col + range) <= mv_limits->col_max);
    999 }
   1000 
   1001 static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
   1002   return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
   1003          (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
   1004 }
   1005 
   1006 #define CHECK_BETTER                                                      \
   1007   {                                                                       \
   1008     if (thissad < bestsad) {                                              \
   1009       if (use_mvcost)                                                     \
   1010         thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
   1011       if (thissad < bestsad) {                                            \
   1012         bestsad = thissad;                                                \
   1013         best_site = i;                                                    \
   1014       }                                                                   \
   1015     }                                                                     \
   1016   }
   1017 
   1018 #define MAX_PATTERN_SCALES 11
   1019 #define MAX_PATTERN_CANDIDATES 8  // max number of canddiates per scale
   1020 #define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
   1021 
   1022 // Calculate and return a sad+mvcost list around an integer best pel.
   1023 static INLINE void calc_int_cost_list(const MACROBLOCK *x,
   1024                                       const MV *const ref_mv, int sadpb,
   1025                                       const aom_variance_fn_ptr_t *fn_ptr,
   1026                                       const MV *best_mv, int *cost_list) {
   1027   static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
   1028   const struct buf_2d *const what = &x->plane[0].src;
   1029   const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
   1030   const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
   1031   const int br = best_mv->row;
   1032   const int bc = best_mv->col;
   1033   int i;
   1034   unsigned int sse;
   1035   const MV this_mv = { br, bc };
   1036 
   1037   cost_list[0] =
   1038       fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
   1039                  in_what->stride, &sse) +
   1040       mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
   1041   if (check_bounds(&x->mv_limits, br, bc, 1)) {
   1042     for (i = 0; i < 4; i++) {
   1043       const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1044       cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
   1045                                     get_buf_from_mv(in_what, &neighbor_mv),
   1046                                     in_what->stride, &sse) +
   1047                          mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmv_vec_cost,
   1048                                      x->mv_cost_stack, x->errorperbit);
   1049     }
   1050   } else {
   1051     for (i = 0; i < 4; i++) {
   1052       const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1053       if (!is_mv_in(&x->mv_limits, &neighbor_mv))
   1054         cost_list[i + 1] = INT_MAX;
   1055       else
   1056         cost_list[i + 1] =
   1057             fn_ptr->vf(what->buf, what->stride,
   1058                        get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
   1059                        &sse) +
   1060             mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmv_vec_cost,
   1061                         x->mv_cost_stack, x->errorperbit);
   1062     }
   1063   }
   1064 }
   1065 
   1066 static INLINE void calc_int_sad_list(const MACROBLOCK *x,
   1067                                      const MV *const ref_mv, int sadpb,
   1068                                      const aom_variance_fn_ptr_t *fn_ptr,
   1069                                      const MV *best_mv, int *cost_list,
   1070                                      const int use_mvcost, const int bestsad) {
   1071   static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
   1072   const struct buf_2d *const what = &x->plane[0].src;
   1073   const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
   1074   const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
   1075   int i;
   1076   const int br = best_mv->row;
   1077   const int bc = best_mv->col;
   1078 
   1079   if (cost_list[0] == INT_MAX) {
   1080     cost_list[0] = bestsad;
   1081     if (check_bounds(&x->mv_limits, br, bc, 1)) {
   1082       for (i = 0; i < 4; i++) {
   1083         const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1084         cost_list[i + 1] =
   1085             fn_ptr->sdf(what->buf, what->stride,
   1086                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1087       }
   1088     } else {
   1089       for (i = 0; i < 4; i++) {
   1090         const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1091         if (!is_mv_in(&x->mv_limits, &this_mv))
   1092           cost_list[i + 1] = INT_MAX;
   1093         else
   1094           cost_list[i + 1] =
   1095               fn_ptr->sdf(what->buf, what->stride,
   1096                           get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1097       }
   1098     }
   1099   } else {
   1100     if (use_mvcost) {
   1101       for (i = 0; i < 4; i++) {
   1102         const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1103         if (cost_list[i + 1] != INT_MAX) {
   1104           cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
   1105         }
   1106       }
   1107     }
   1108   }
   1109 }
   1110 
   1111 // Generic pattern search function that searches over multiple scales.
   1112 // Each scale can have a different number of candidates and shape of
   1113 // candidates as indicated in the num_candidates and candidates arrays
   1114 // passed into this function
   1115 //
   1116 static int pattern_search(
   1117     MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit,
   1118     int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp,
   1119     int use_mvcost, const MV *center_mv,
   1120     const int num_candidates[MAX_PATTERN_SCALES],
   1121     const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
   1122   const MACROBLOCKD *const xd = &x->e_mbd;
   1123   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
   1124     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
   1125   };
   1126   int i, s, t;
   1127   const struct buf_2d *const what = &x->plane[0].src;
   1128   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1129   const int last_is_4 = num_candidates[0] == 4;
   1130   int br, bc;
   1131   int bestsad = INT_MAX;
   1132   int thissad;
   1133   int k = -1;
   1134   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   1135   assert(search_param < MAX_MVSEARCH_STEPS);
   1136   int best_init_s = search_param_to_steps[search_param];
   1137   // adjust ref_mv to make sure it is within MV range
   1138   clamp_mv(start_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   1139            x->mv_limits.row_min, x->mv_limits.row_max);
   1140   br = start_mv->row;
   1141   bc = start_mv->col;
   1142   if (cost_list != NULL) {
   1143     cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
   1144         INT_MAX;
   1145   }
   1146 
   1147   // Work out the start point for the search
   1148   bestsad = vfp->sdf(what->buf, what->stride,
   1149                      get_buf_from_mv(in_what, start_mv), in_what->stride) +
   1150             mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit);
   1151 
   1152   // Search all possible scales upto the search param around the center point
   1153   // pick the scale of the point that is best as the starting scale of
   1154   // further steps around it.
   1155   if (do_init_search) {
   1156     s = best_init_s;
   1157     best_init_s = -1;
   1158     for (t = 0; t <= s; ++t) {
   1159       int best_site = -1;
   1160       if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
   1161         for (i = 0; i < num_candidates[t]; i++) {
   1162           const MV this_mv = { br + candidates[t][i].row,
   1163                                bc + candidates[t][i].col };
   1164           thissad =
   1165               vfp->sdf(what->buf, what->stride,
   1166                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1167           CHECK_BETTER
   1168         }
   1169       } else {
   1170         for (i = 0; i < num_candidates[t]; i++) {
   1171           const MV this_mv = { br + candidates[t][i].row,
   1172                                bc + candidates[t][i].col };
   1173           if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1174           thissad =
   1175               vfp->sdf(what->buf, what->stride,
   1176                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1177           CHECK_BETTER
   1178         }
   1179       }
   1180       if (best_site == -1) {
   1181         continue;
   1182       } else {
   1183         best_init_s = t;
   1184         k = best_site;
   1185       }
   1186     }
   1187     if (best_init_s != -1) {
   1188       br += candidates[best_init_s][k].row;
   1189       bc += candidates[best_init_s][k].col;
   1190     }
   1191   }
   1192 
   1193   // If the center point is still the best, just skip this and move to
   1194   // the refinement step.
   1195   if (best_init_s != -1) {
   1196     const int last_s = (last_is_4 && cost_list != NULL);
   1197     int best_site = -1;
   1198     s = best_init_s;
   1199 
   1200     for (; s >= last_s; s--) {
   1201       // No need to search all points the 1st time if initial search was used
   1202       if (!do_init_search || s != best_init_s) {
   1203         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1204           for (i = 0; i < num_candidates[s]; i++) {
   1205             const MV this_mv = { br + candidates[s][i].row,
   1206                                  bc + candidates[s][i].col };
   1207             thissad =
   1208                 vfp->sdf(what->buf, what->stride,
   1209                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1210             CHECK_BETTER
   1211           }
   1212         } else {
   1213           for (i = 0; i < num_candidates[s]; i++) {
   1214             const MV this_mv = { br + candidates[s][i].row,
   1215                                  bc + candidates[s][i].col };
   1216             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1217             thissad =
   1218                 vfp->sdf(what->buf, what->stride,
   1219                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1220             CHECK_BETTER
   1221           }
   1222         }
   1223 
   1224         if (best_site == -1) {
   1225           continue;
   1226         } else {
   1227           br += candidates[s][best_site].row;
   1228           bc += candidates[s][best_site].col;
   1229           k = best_site;
   1230         }
   1231       }
   1232 
   1233       do {
   1234         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
   1235         best_site = -1;
   1236         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
   1237         next_chkpts_indices[1] = k;
   1238         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
   1239 
   1240         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1241           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1242             const MV this_mv = {
   1243               br + candidates[s][next_chkpts_indices[i]].row,
   1244               bc + candidates[s][next_chkpts_indices[i]].col
   1245             };
   1246             thissad =
   1247                 vfp->sdf(what->buf, what->stride,
   1248                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1249             CHECK_BETTER
   1250           }
   1251         } else {
   1252           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1253             const MV this_mv = {
   1254               br + candidates[s][next_chkpts_indices[i]].row,
   1255               bc + candidates[s][next_chkpts_indices[i]].col
   1256             };
   1257             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1258             thissad =
   1259                 vfp->sdf(what->buf, what->stride,
   1260                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1261             CHECK_BETTER
   1262           }
   1263         }
   1264 
   1265         if (best_site != -1) {
   1266           k = next_chkpts_indices[best_site];
   1267           br += candidates[s][k].row;
   1268           bc += candidates[s][k].col;
   1269         }
   1270       } while (best_site != -1);
   1271     }
   1272 
   1273     // Note: If we enter the if below, then cost_list must be non-NULL.
   1274     if (s == 0) {
   1275       cost_list[0] = bestsad;
   1276       if (!do_init_search || s != best_init_s) {
   1277         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1278           for (i = 0; i < num_candidates[s]; i++) {
   1279             const MV this_mv = { br + candidates[s][i].row,
   1280                                  bc + candidates[s][i].col };
   1281             cost_list[i + 1] = thissad =
   1282                 vfp->sdf(what->buf, what->stride,
   1283                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1284             CHECK_BETTER
   1285           }
   1286         } else {
   1287           for (i = 0; i < num_candidates[s]; i++) {
   1288             const MV this_mv = { br + candidates[s][i].row,
   1289                                  bc + candidates[s][i].col };
   1290             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1291             cost_list[i + 1] = thissad =
   1292                 vfp->sdf(what->buf, what->stride,
   1293                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1294             CHECK_BETTER
   1295           }
   1296         }
   1297 
   1298         if (best_site != -1) {
   1299           br += candidates[s][best_site].row;
   1300           bc += candidates[s][best_site].col;
   1301           k = best_site;
   1302         }
   1303       }
   1304       while (best_site != -1) {
   1305         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
   1306         best_site = -1;
   1307         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
   1308         next_chkpts_indices[1] = k;
   1309         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
   1310         cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
   1311         cost_list[((k + 2) % 4) + 1] = cost_list[0];
   1312         cost_list[0] = bestsad;
   1313 
   1314         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1315           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1316             const MV this_mv = {
   1317               br + candidates[s][next_chkpts_indices[i]].row,
   1318               bc + candidates[s][next_chkpts_indices[i]].col
   1319             };
   1320             cost_list[next_chkpts_indices[i] + 1] = thissad =
   1321                 vfp->sdf(what->buf, what->stride,
   1322                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1323             CHECK_BETTER
   1324           }
   1325         } else {
   1326           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1327             const MV this_mv = {
   1328               br + candidates[s][next_chkpts_indices[i]].row,
   1329               bc + candidates[s][next_chkpts_indices[i]].col
   1330             };
   1331             if (!is_mv_in(&x->mv_limits, &this_mv)) {
   1332               cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
   1333               continue;
   1334             }
   1335             cost_list[next_chkpts_indices[i] + 1] = thissad =
   1336                 vfp->sdf(what->buf, what->stride,
   1337                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1338             CHECK_BETTER
   1339           }
   1340         }
   1341 
   1342         if (best_site != -1) {
   1343           k = next_chkpts_indices[best_site];
   1344           br += candidates[s][k].row;
   1345           bc += candidates[s][k].col;
   1346         }
   1347       }
   1348     }
   1349   }
   1350 
   1351   // Returns the one-away integer pel cost/sad around the best as follows:
   1352   // cost_list[0]: cost/sad at the best integer pel
   1353   // cost_list[1]: cost/sad at delta {0, -1} (left)   from the best integer pel
   1354   // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
   1355   // cost_list[3]: cost/sad at delta { 0, 1} (right)  from the best integer pel
   1356   // cost_list[4]: cost/sad at delta {-1, 0} (top)    from the best integer pel
   1357   if (cost_list) {
   1358     const MV best_int_mv = { br, bc };
   1359     if (last_is_4) {
   1360       calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
   1361                         use_mvcost, bestsad);
   1362     } else {
   1363       calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
   1364                          cost_list);
   1365     }
   1366   }
   1367   x->best_mv.as_mv.row = br;
   1368   x->best_mv.as_mv.col = bc;
   1369   return bestsad;
   1370 }
   1371 
   1372 int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
   1373                        const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
   1374                        int use_mvcost) {
   1375   const MACROBLOCKD *const xd = &x->e_mbd;
   1376   const struct buf_2d *const what = &x->plane[0].src;
   1377   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1378   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
   1379   unsigned int unused;
   1380 
   1381   return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
   1382                  in_what->stride, &unused) +
   1383          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
   1384                                    x->mv_cost_stack, x->errorperbit)
   1385                      : 0);
   1386 }
   1387 
   1388 int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
   1389                           const MV *center_mv, const uint8_t *second_pred,
   1390                           const aom_variance_fn_ptr_t *vfp, int use_mvcost) {
   1391   const MACROBLOCKD *const xd = &x->e_mbd;
   1392   const struct buf_2d *const what = &x->plane[0].src;
   1393   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1394   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
   1395   unsigned int unused;
   1396 
   1397   return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
   1398                    what->buf, what->stride, &unused, second_pred) +
   1399          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
   1400                                    x->mv_cost_stack, x->errorperbit)
   1401                      : 0);
   1402 }
   1403 
   1404 int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
   1405                             const MV *center_mv, const uint8_t *second_pred,
   1406                             const uint8_t *mask, int mask_stride,
   1407                             int invert_mask, const aom_variance_fn_ptr_t *vfp,
   1408                             int use_mvcost) {
   1409   const MACROBLOCKD *const xd = &x->e_mbd;
   1410   const struct buf_2d *const what = &x->plane[0].src;
   1411   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1412   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
   1413   unsigned int unused;
   1414 
   1415   return vfp->msvf(what->buf, what->stride, 0, 0,
   1416                    get_buf_from_mv(in_what, best_mv), in_what->stride,
   1417                    second_pred, mask, mask_stride, invert_mask, &unused) +
   1418          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
   1419                                    x->mv_cost_stack, x->errorperbit)
   1420                      : 0);
   1421 }
   1422 
   1423 int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
   1424                    int sad_per_bit, int do_init_search, int *cost_list,
   1425                    const aom_variance_fn_ptr_t *vfp, int use_mvcost,
   1426                    const MV *center_mv) {
   1427   // First scale has 8-closest points, the rest have 6 points in hex shape
   1428   // at increasing scales
   1429   static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
   1430                                                               6, 6, 6, 6, 6 };
   1431   // Note that the largest candidate step at each scale is 2^scale
   1432   /* clang-format off */
   1433   static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
   1434     { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
   1435       { -1, 0 } },
   1436     { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
   1437     { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
   1438     { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
   1439     { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
   1440     { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
   1441       { -32, 0 } },
   1442     { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
   1443       { -64, 0 } },
   1444     { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
   1445       { -128, 0 } },
   1446     { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
   1447       { -256, 0 } },
   1448     { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
   1449       { -512, 0 } },
   1450     { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
   1451       { -512, 1024 }, { -1024, 0 } },
   1452   };
   1453   /* clang-format on */
   1454   return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
   1455                         cost_list, vfp, use_mvcost, center_mv,
   1456                         hex_num_candidates, hex_candidates);
   1457 }
   1458 
   1459 static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param,
   1460                          int sad_per_bit, int do_init_search, int *cost_list,
   1461                          const aom_variance_fn_ptr_t *vfp, int use_mvcost,
   1462                          const MV *center_mv) {
   1463   // First scale has 4-closest points, the rest have 8 points in diamond
   1464   // shape at increasing scales
   1465   static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
   1466     4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
   1467   };
   1468   // Note that the largest candidate step at each scale is 2^scale
   1469   /* clang-format off */
   1470   static const MV
   1471       bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
   1472         { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
   1473         { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
   1474           { -1, 1 }, { -2, 0 } },
   1475         { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
   1476           { -2, 2 }, { -4, 0 } },
   1477         { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
   1478           { -4, 4 }, { -8, 0 } },
   1479         { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
   1480           { -8, 8 }, { -16, 0 } },
   1481         { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
   1482           { 0, 32 }, { -16, 16 }, { -32, 0 } },
   1483         { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
   1484           { 0, 64 }, { -32, 32 }, { -64, 0 } },
   1485         { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
   1486           { 0, 128 }, { -64, 64 }, { -128, 0 } },
   1487         { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
   1488           { 0, 256 }, { -128, 128 }, { -256, 0 } },
   1489         { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
   1490           { 0, 512 }, { -256, 256 }, { -512, 0 } },
   1491         { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
   1492           { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
   1493       };
   1494   /* clang-format on */
   1495   return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
   1496                         cost_list, vfp, use_mvcost, center_mv,
   1497                         bigdia_num_candidates, bigdia_candidates);
   1498 }
   1499 
   1500 static int square_search(MACROBLOCK *x, MV *start_mv, int search_param,
   1501                          int sad_per_bit, int do_init_search, int *cost_list,
   1502                          const aom_variance_fn_ptr_t *vfp, int use_mvcost,
   1503                          const MV *center_mv) {
   1504   // All scales have 8 closest points in square shape
   1505   static const int square_num_candidates[MAX_PATTERN_SCALES] = {
   1506     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
   1507   };
   1508   // Note that the largest candidate step at each scale is 2^scale
   1509   /* clang-format off */
   1510   static const MV
   1511       square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
   1512         { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
   1513           { -1, 1 }, { -1, 0 } },
   1514         { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
   1515           { -2, 2 }, { -2, 0 } },
   1516         { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
   1517           { -4, 4 }, { -4, 0 } },
   1518         { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
   1519           { -8, 8 }, { -8, 0 } },
   1520         { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
   1521           { 0, 16 }, { -16, 16 }, { -16, 0 } },
   1522         { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
   1523           { 0, 32 }, { -32, 32 }, { -32, 0 } },
   1524         { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
   1525           { 0, 64 }, { -64, 64 }, { -64, 0 } },
   1526         { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
   1527           { 0, 128 }, { -128, 128 }, { -128, 0 } },
   1528         { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
   1529           { 0, 256 }, { -256, 256 }, { -256, 0 } },
   1530         { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
   1531           { 0, 512 }, { -512, 512 }, { -512, 0 } },
   1532         { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
   1533           { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
   1534       };
   1535   /* clang-format on */
   1536   return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
   1537                         cost_list, vfp, use_mvcost, center_mv,
   1538                         square_num_candidates, square_candidates);
   1539 }
   1540 
   1541 static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param,
   1542                            int sad_per_bit,
   1543                            int do_init_search,  // must be zero for fast_hex
   1544                            int *cost_list, const aom_variance_fn_ptr_t *vfp,
   1545                            int use_mvcost, const MV *center_mv) {
   1546   return av1_hex_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
   1547                         sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
   1548                         center_mv);
   1549 }
   1550 
   1551 static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param,
   1552                            int sad_per_bit, int do_init_search, int *cost_list,
   1553                            const aom_variance_fn_ptr_t *vfp, int use_mvcost,
   1554                            const MV *center_mv) {
   1555   return bigdia_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
   1556                        sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
   1557                        center_mv);
   1558 }
   1559 
   1560 #undef CHECK_BETTER
   1561 
   1562 // Exhuastive motion search around a given centre position with a given
   1563 // step size.
   1564 static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
   1565                                   int range, int step, int sad_per_bit,
   1566                                   const aom_variance_fn_ptr_t *fn_ptr,
   1567                                   const MV *center_mv) {
   1568   const MACROBLOCKD *const xd = &x->e_mbd;
   1569   const struct buf_2d *const what = &x->plane[0].src;
   1570   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1571   MV fcenter_mv = { center_mv->row, center_mv->col };
   1572   unsigned int best_sad = INT_MAX;
   1573   int r, c, i;
   1574   int start_col, end_col, start_row, end_row;
   1575   int col_step = (step > 1) ? step : 4;
   1576 
   1577   assert(step >= 1);
   1578 
   1579   clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   1580            x->mv_limits.row_min, x->mv_limits.row_max);
   1581   *best_mv = fcenter_mv;
   1582   best_sad =
   1583       fn_ptr->sdf(what->buf, what->stride,
   1584                   get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
   1585       mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
   1586   start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
   1587   start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
   1588   end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row);
   1589   end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col);
   1590 
   1591   for (r = start_row; r <= end_row; r += step) {
   1592     for (c = start_col; c <= end_col; c += col_step) {
   1593       // Step > 1 means we are not checking every location in this pass.
   1594       if (step > 1) {
   1595         const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
   1596         unsigned int sad =
   1597             fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
   1598                         in_what->stride);
   1599         if (sad < best_sad) {
   1600           sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
   1601           if (sad < best_sad) {
   1602             best_sad = sad;
   1603             x->second_best_mv.as_mv = *best_mv;
   1604             *best_mv = mv;
   1605           }
   1606         }
   1607       } else {
   1608         // 4 sads in a single call if we are checking every location
   1609         if (c + 3 <= end_col) {
   1610           unsigned int sads[4];
   1611           const uint8_t *addrs[4];
   1612           for (i = 0; i < 4; ++i) {
   1613             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
   1614             addrs[i] = get_buf_from_mv(in_what, &mv);
   1615           }
   1616           fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
   1617 
   1618           for (i = 0; i < 4; ++i) {
   1619             if (sads[i] < best_sad) {
   1620               const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
   1621               const unsigned int sad =
   1622                   sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
   1623               if (sad < best_sad) {
   1624                 best_sad = sad;
   1625                 x->second_best_mv.as_mv = *best_mv;
   1626                 *best_mv = mv;
   1627               }
   1628             }
   1629           }
   1630         } else {
   1631           for (i = 0; i < end_col - c; ++i) {
   1632             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
   1633             unsigned int sad =
   1634                 fn_ptr->sdf(what->buf, what->stride,
   1635                             get_buf_from_mv(in_what, &mv), in_what->stride);
   1636             if (sad < best_sad) {
   1637               sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
   1638               if (sad < best_sad) {
   1639                 best_sad = sad;
   1640                 x->second_best_mv.as_mv = *best_mv;
   1641                 *best_mv = mv;
   1642               }
   1643             }
   1644           }
   1645         }
   1646       }
   1647     }
   1648   }
   1649 
   1650   return best_sad;
   1651 }
   1652 
   1653 int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg,
   1654                              MV *ref_mv, MV *best_mv, int search_param,
   1655                              int sad_per_bit, int *num00,
   1656                              const aom_variance_fn_ptr_t *fn_ptr,
   1657                              const MV *center_mv) {
   1658   int i, j, step;
   1659 
   1660   const MACROBLOCKD *const xd = &x->e_mbd;
   1661   uint8_t *what = x->plane[0].src.buf;
   1662   const int what_stride = x->plane[0].src.stride;
   1663   const uint8_t *in_what;
   1664   const int in_what_stride = xd->plane[0].pre[0].stride;
   1665   const uint8_t *best_address;
   1666 
   1667   unsigned int bestsad = INT_MAX;
   1668   int best_site = 0;
   1669   int last_site = 0;
   1670 
   1671   int ref_row;
   1672   int ref_col;
   1673 
   1674   // search_param determines the length of the initial step and hence the number
   1675   // of iterations.
   1676   // 0 = initial step (MAX_FIRST_STEP) pel
   1677   // 1 = (MAX_FIRST_STEP/2) pel,
   1678   // 2 = (MAX_FIRST_STEP/4) pel...
   1679   const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
   1680   const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
   1681 
   1682   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   1683   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   1684            x->mv_limits.row_min, x->mv_limits.row_max);
   1685   ref_row = ref_mv->row;
   1686   ref_col = ref_mv->col;
   1687   *num00 = 0;
   1688   best_mv->row = ref_row;
   1689   best_mv->col = ref_col;
   1690 
   1691   // Work out the start point for the search
   1692   in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
   1693   best_address = in_what;
   1694 
   1695   // Check the starting position
   1696   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
   1697             mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
   1698 
   1699   i = 1;
   1700 
   1701   for (step = 0; step < tot_steps; step++) {
   1702     int all_in = 1, t;
   1703 
   1704     // All_in is true if every one of the points we are checking are within
   1705     // the bounds of the image.
   1706     all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min);
   1707     all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max);
   1708     all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min);
   1709     all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max);
   1710 
   1711     // If all the pixels are within the bounds we don't check whether the
   1712     // search point is valid in this loop,  otherwise we check each point
   1713     // for validity..
   1714     if (all_in) {
   1715       unsigned int sad_array[4];
   1716 
   1717       for (j = 0; j < cfg->searches_per_step; j += 4) {
   1718         unsigned char const *block_offset[4];
   1719 
   1720         for (t = 0; t < 4; t++)
   1721           block_offset[t] = ss[i + t].offset + best_address;
   1722 
   1723         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1724                        sad_array);
   1725 
   1726         for (t = 0; t < 4; t++, i++) {
   1727           if (sad_array[t] < bestsad) {
   1728             const MV this_mv = { best_mv->row + ss[i].mv.row,
   1729                                  best_mv->col + ss[i].mv.col };
   1730             sad_array[t] +=
   1731                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
   1732             if (sad_array[t] < bestsad) {
   1733               bestsad = sad_array[t];
   1734               best_site = i;
   1735             }
   1736           }
   1737         }
   1738       }
   1739     } else {
   1740       for (j = 0; j < cfg->searches_per_step; j++) {
   1741         // Trap illegal vectors
   1742         const MV this_mv = { best_mv->row + ss[i].mv.row,
   1743                              best_mv->col + ss[i].mv.col };
   1744 
   1745         if (is_mv_in(&x->mv_limits, &this_mv)) {
   1746           const uint8_t *const check_here = ss[i].offset + best_address;
   1747           unsigned int thissad =
   1748               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1749 
   1750           if (thissad < bestsad) {
   1751             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
   1752             if (thissad < bestsad) {
   1753               bestsad = thissad;
   1754               best_site = i;
   1755             }
   1756           }
   1757         }
   1758         i++;
   1759       }
   1760     }
   1761     if (best_site != last_site) {
   1762       x->second_best_mv.as_mv = *best_mv;
   1763       best_mv->row += ss[best_site].mv.row;
   1764       best_mv->col += ss[best_site].mv.col;
   1765       best_address += ss[best_site].offset;
   1766       last_site = best_site;
   1767 #if defined(NEW_DIAMOND_SEARCH)
   1768       while (1) {
   1769         const MV this_mv = { best_mv->row + ss[best_site].mv.row,
   1770                              best_mv->col + ss[best_site].mv.col };
   1771         if (is_mv_in(&x->mv_limits, &this_mv)) {
   1772           const uint8_t *const check_here = ss[best_site].offset + best_address;
   1773           unsigned int thissad =
   1774               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1775           if (thissad < bestsad) {
   1776             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
   1777             if (thissad < bestsad) {
   1778               bestsad = thissad;
   1779               best_mv->row += ss[best_site].mv.row;
   1780               best_mv->col += ss[best_site].mv.col;
   1781               best_address += ss[best_site].offset;
   1782               continue;
   1783             }
   1784           }
   1785         }
   1786         break;
   1787       }
   1788 #endif
   1789     } else if (best_address == in_what) {
   1790       (*num00)++;
   1791     }
   1792   }
   1793   return bestsad;
   1794 }
   1795 
   1796 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
   1797               point as the best match, we will do a final 1-away diamond
   1798               refining search  */
   1799 static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
   1800                               MV *mvp_full, int step_param, int sadpb,
   1801                               int further_steps, int do_refine, int *cost_list,
   1802                               const aom_variance_fn_ptr_t *fn_ptr,
   1803                               const MV *ref_mv, const search_site_config *cfg) {
   1804   MV temp_mv;
   1805   int thissme, n, num00 = 0;
   1806   int bestsme = cpi->diamond_search_sad(x, cfg, mvp_full, &temp_mv, step_param,
   1807                                         sadpb, &n, fn_ptr, ref_mv);
   1808   if (bestsme < INT_MAX)
   1809     bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   1810   x->best_mv.as_mv = temp_mv;
   1811 
   1812   // If there won't be more n-step search, check to see if refining search is
   1813   // needed.
   1814   if (n > further_steps) do_refine = 0;
   1815 
   1816   while (n < further_steps) {
   1817     ++n;
   1818 
   1819     if (num00) {
   1820       num00--;
   1821     } else {
   1822       thissme =
   1823           cpi->diamond_search_sad(x, cfg, mvp_full, &temp_mv, step_param + n,
   1824                                   sadpb, &num00, fn_ptr, ref_mv);
   1825       if (thissme < INT_MAX)
   1826         thissme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   1827 
   1828       // check to see if refining search is needed.
   1829       if (num00 > further_steps - n) do_refine = 0;
   1830 
   1831       if (thissme < bestsme) {
   1832         bestsme = thissme;
   1833         x->best_mv.as_mv = temp_mv;
   1834       }
   1835     }
   1836   }
   1837 
   1838   // final 1-away diamond refining search
   1839   if (do_refine) {
   1840     const int search_range = 8;
   1841     MV best_mv = x->best_mv.as_mv;
   1842     thissme = av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
   1843                                       ref_mv);
   1844     if (thissme < INT_MAX)
   1845       thissme = av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
   1846     if (thissme < bestsme) {
   1847       bestsme = thissme;
   1848       x->best_mv.as_mv = best_mv;
   1849     }
   1850   }
   1851 
   1852   // Return cost list.
   1853   if (cost_list) {
   1854     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
   1855   }
   1856   return bestsme;
   1857 }
   1858 
   1859 #define MIN_RANGE 7
   1860 #define MAX_RANGE 256
   1861 #define MIN_INTERVAL 1
   1862 // Runs an limited range exhaustive mesh search using a pattern set
   1863 // according to the encode speed profile.
   1864 static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
   1865                                  const MV *centre_mv_full, int sadpb,
   1866                                  int *cost_list,
   1867                                  const aom_variance_fn_ptr_t *fn_ptr,
   1868                                  const MV *ref_mv, MV *dst_mv) {
   1869   const SPEED_FEATURES *const sf = &cpi->sf;
   1870   MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
   1871   MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
   1872   int bestsme;
   1873   int i;
   1874   int interval = sf->mesh_patterns[0].interval;
   1875   int range = sf->mesh_patterns[0].range;
   1876   int baseline_interval_divisor;
   1877 
   1878   // Keep track of number of exhaustive calls (this frame in this thread).
   1879   if (x->ex_search_count_ptr != NULL) ++(*x->ex_search_count_ptr);
   1880 
   1881   // Trap illegal values for interval and range for this function.
   1882   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
   1883       (interval > range))
   1884     return INT_MAX;
   1885 
   1886   baseline_interval_divisor = range / interval;
   1887 
   1888   // Check size of proposed first range against magnitude of the centre
   1889   // value used as a starting point.
   1890   range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
   1891   range = AOMMIN(range, MAX_RANGE);
   1892   interval = AOMMAX(interval, range / baseline_interval_divisor);
   1893 
   1894   // initial search
   1895   bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
   1896                                    sadpb, fn_ptr, &temp_mv);
   1897 
   1898   if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
   1899     // Progressive searches with range and step size decreasing each time
   1900     // till we reach a step size of 1. Then break out.
   1901     for (i = 1; i < MAX_MESH_STEP; ++i) {
   1902       // First pass with coarser step and longer range
   1903       bestsme = exhuastive_mesh_search(
   1904           x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
   1905           sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
   1906 
   1907       if (sf->mesh_patterns[i].interval == 1) break;
   1908     }
   1909   }
   1910 
   1911   if (bestsme < INT_MAX)
   1912     bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   1913   *dst_mv = temp_mv;
   1914 
   1915   // Return cost list.
   1916   if (cost_list) {
   1917     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
   1918   }
   1919   return bestsme;
   1920 }
   1921 
   1922 int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
   1923                             int search_range,
   1924                             const aom_variance_fn_ptr_t *fn_ptr,
   1925                             const MV *center_mv) {
   1926   const MACROBLOCKD *const xd = &x->e_mbd;
   1927   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   1928   const struct buf_2d *const what = &x->plane[0].src;
   1929   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1930   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   1931   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
   1932   unsigned int best_sad =
   1933       fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
   1934       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   1935   int i, j;
   1936 
   1937   for (i = 0; i < search_range; i++) {
   1938     int best_site = -1;
   1939     const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
   1940                        ((ref_mv->row + 1) < x->mv_limits.row_max) &
   1941                        ((ref_mv->col - 1) > x->mv_limits.col_min) &
   1942                        ((ref_mv->col + 1) < x->mv_limits.col_max);
   1943 
   1944     if (all_in) {
   1945       unsigned int sads[4];
   1946       const uint8_t *const positions[4] = { best_address - in_what->stride,
   1947                                             best_address - 1, best_address + 1,
   1948                                             best_address + in_what->stride };
   1949 
   1950       fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
   1951 
   1952       for (j = 0; j < 4; ++j) {
   1953         if (sads[j] < best_sad) {
   1954           const MV mv = { ref_mv->row + neighbors[j].row,
   1955                           ref_mv->col + neighbors[j].col };
   1956           sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
   1957           if (sads[j] < best_sad) {
   1958             best_sad = sads[j];
   1959             best_site = j;
   1960           }
   1961         }
   1962       }
   1963     } else {
   1964       for (j = 0; j < 4; ++j) {
   1965         const MV mv = { ref_mv->row + neighbors[j].row,
   1966                         ref_mv->col + neighbors[j].col };
   1967 
   1968         if (is_mv_in(&x->mv_limits, &mv)) {
   1969           unsigned int sad =
   1970               fn_ptr->sdf(what->buf, what->stride,
   1971                           get_buf_from_mv(in_what, &mv), in_what->stride);
   1972           if (sad < best_sad) {
   1973             sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
   1974             if (sad < best_sad) {
   1975               best_sad = sad;
   1976               best_site = j;
   1977             }
   1978           }
   1979         }
   1980       }
   1981     }
   1982 
   1983     if (best_site == -1) {
   1984       break;
   1985     } else {
   1986       x->second_best_mv.as_mv = *ref_mv;
   1987       ref_mv->row += neighbors[best_site].row;
   1988       ref_mv->col += neighbors[best_site].col;
   1989       best_address = get_buf_from_mv(in_what, ref_mv);
   1990     }
   1991   }
   1992 
   1993   return best_sad;
   1994 }
   1995 
   1996 // This function is called when we do joint motion search in comp_inter_inter
   1997 // mode, or when searching for one component of an ext-inter compound mode.
   1998 int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
   1999                              const aom_variance_fn_ptr_t *fn_ptr,
   2000                              const uint8_t *mask, int mask_stride,
   2001                              int invert_mask, const MV *center_mv,
   2002                              const uint8_t *second_pred) {
   2003   static const search_neighbors neighbors[8] = {
   2004     { { -1, 0 }, -1 * SEARCH_GRID_STRIDE_8P + 0 },
   2005     { { 0, -1 }, 0 * SEARCH_GRID_STRIDE_8P - 1 },
   2006     { { 0, 1 }, 0 * SEARCH_GRID_STRIDE_8P + 1 },
   2007     { { 1, 0 }, 1 * SEARCH_GRID_STRIDE_8P + 0 },
   2008     { { -1, -1 }, -1 * SEARCH_GRID_STRIDE_8P - 1 },
   2009     { { 1, -1 }, 1 * SEARCH_GRID_STRIDE_8P - 1 },
   2010     { { -1, 1 }, -1 * SEARCH_GRID_STRIDE_8P + 1 },
   2011     { { 1, 1 }, 1 * SEARCH_GRID_STRIDE_8P + 1 }
   2012   };
   2013   const MACROBLOCKD *const xd = &x->e_mbd;
   2014   const struct buf_2d *const what = &x->plane[0].src;
   2015   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   2016   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   2017   MV *best_mv = &x->best_mv.as_mv;
   2018   unsigned int best_sad = INT_MAX;
   2019   int i, j;
   2020   uint8_t do_refine_search_grid[SEARCH_GRID_STRIDE_8P * SEARCH_GRID_STRIDE_8P] =
   2021       { 0 };
   2022   int grid_center = SEARCH_GRID_CENTER_8P;
   2023   int grid_coord = grid_center;
   2024 
   2025   clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   2026            x->mv_limits.row_min, x->mv_limits.row_max);
   2027   if (mask) {
   2028     best_sad = fn_ptr->msdf(what->buf, what->stride,
   2029                             get_buf_from_mv(in_what, best_mv), in_what->stride,
   2030                             second_pred, mask, mask_stride, invert_mask) +
   2031                mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
   2032   } else {
   2033     best_sad =
   2034         fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
   2035                      in_what->stride, second_pred) +
   2036         mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
   2037   }
   2038 
   2039   do_refine_search_grid[grid_coord] = 1;
   2040 
   2041   for (i = 0; i < search_range; ++i) {
   2042     int best_site = -1;
   2043 
   2044     for (j = 0; j < 8; ++j) {
   2045       grid_coord = grid_center + neighbors[j].coord_offset;
   2046       if (do_refine_search_grid[grid_coord] == 1) {
   2047         continue;
   2048       }
   2049       const MV mv = { best_mv->row + neighbors[j].coord.row,
   2050                       best_mv->col + neighbors[j].coord.col };
   2051 
   2052       do_refine_search_grid[grid_coord] = 1;
   2053       if (is_mv_in(&x->mv_limits, &mv)) {
   2054         unsigned int sad;
   2055         if (mask) {
   2056           sad = fn_ptr->msdf(what->buf, what->stride,
   2057                              get_buf_from_mv(in_what, &mv), in_what->stride,
   2058                              second_pred, mask, mask_stride, invert_mask);
   2059         } else {
   2060           sad = fn_ptr->sdaf(what->buf, what->stride,
   2061                              get_buf_from_mv(in_what, &mv), in_what->stride,
   2062                              second_pred);
   2063         }
   2064         if (sad < best_sad) {
   2065           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
   2066           if (sad < best_sad) {
   2067             best_sad = sad;
   2068             best_site = j;
   2069           }
   2070         }
   2071       }
   2072     }
   2073 
   2074     if (best_site == -1) {
   2075       break;
   2076     } else {
   2077       best_mv->row += neighbors[best_site].coord.row;
   2078       best_mv->col += neighbors[best_site].coord.col;
   2079       grid_center += neighbors[best_site].coord_offset;
   2080     }
   2081   }
   2082   return best_sad;
   2083 }
   2084 
   2085 #define MIN_EX_SEARCH_LIMIT 128
   2086 static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
   2087   const SPEED_FEATURES *const sf = &cpi->sf;
   2088   int is_allowed = sf->allow_exhaustive_searches &&
   2089                    (sf->exhaustive_searches_thresh < INT_MAX) &&
   2090                    !cpi->rc.is_src_frame_alt_ref;
   2091   if (x->m_search_count_ptr != NULL && x->ex_search_count_ptr != NULL) {
   2092     const int max_ex =
   2093         AOMMAX(MIN_EX_SEARCH_LIMIT,
   2094                (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
   2095     is_allowed = *x->ex_search_count_ptr <= max_ex && is_allowed;
   2096   }
   2097   return is_allowed;
   2098 }
   2099 
   2100 static int vector_match(int16_t *ref, int16_t *src, int bwl) {
   2101   int best_sad = INT_MAX;
   2102   int this_sad;
   2103   int d;
   2104   int center, offset = 0;
   2105   int bw = 4 << bwl;  // redundant variable, to be changed in the experiments.
   2106   for (d = 0; d <= bw; d += 16) {
   2107     this_sad = aom_vector_var(&ref[d], src, bwl);
   2108     if (this_sad < best_sad) {
   2109       best_sad = this_sad;
   2110       offset = d;
   2111     }
   2112   }
   2113   center = offset;
   2114 
   2115   for (d = -8; d <= 8; d += 16) {
   2116     int this_pos = offset + d;
   2117     // check limit
   2118     if (this_pos < 0 || this_pos > bw) continue;
   2119     this_sad = aom_vector_var(&ref[this_pos], src, bwl);
   2120     if (this_sad < best_sad) {
   2121       best_sad = this_sad;
   2122       center = this_pos;
   2123     }
   2124   }
   2125   offset = center;
   2126 
   2127   for (d = -4; d <= 4; d += 8) {
   2128     int this_pos = offset + d;
   2129     // check limit
   2130     if (this_pos < 0 || this_pos > bw) continue;
   2131     this_sad = aom_vector_var(&ref[this_pos], src, bwl);
   2132     if (this_sad < best_sad) {
   2133       best_sad = this_sad;
   2134       center = this_pos;
   2135     }
   2136   }
   2137   offset = center;
   2138 
   2139   for (d = -2; d <= 2; d += 4) {
   2140     int this_pos = offset + d;
   2141     // check limit
   2142     if (this_pos < 0 || this_pos > bw) continue;
   2143     this_sad = aom_vector_var(&ref[this_pos], src, bwl);
   2144     if (this_sad < best_sad) {
   2145       best_sad = this_sad;
   2146       center = this_pos;
   2147     }
   2148   }
   2149   offset = center;
   2150 
   2151   for (d = -1; d <= 1; d += 2) {
   2152     int this_pos = offset + d;
   2153     // check limit
   2154     if (this_pos < 0 || this_pos > bw) continue;
   2155     this_sad = aom_vector_var(&ref[this_pos], src, bwl);
   2156     if (this_sad < best_sad) {
   2157       best_sad = this_sad;
   2158       center = this_pos;
   2159     }
   2160   }
   2161 
   2162   return (center - (bw >> 1));
   2163 }
   2164 
   2165 static const MV search_pos[4] = {
   2166   { -1, 0 },
   2167   { 0, -1 },
   2168   { 0, 1 },
   2169   { 1, 0 },
   2170 };
   2171 
   2172 unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
   2173                                            BLOCK_SIZE bsize, int mi_row,
   2174                                            int mi_col, const MV *ref_mv) {
   2175   MACROBLOCKD *xd = &x->e_mbd;
   2176   MB_MODE_INFO *mi = xd->mi[0];
   2177   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
   2178   DECLARE_ALIGNED(16, int16_t, hbuf[256]);
   2179   DECLARE_ALIGNED(16, int16_t, vbuf[256]);
   2180   DECLARE_ALIGNED(16, int16_t, src_hbuf[128]);
   2181   DECLARE_ALIGNED(16, int16_t, src_vbuf[128]);
   2182   int idx;
   2183   const int bw = 4 << mi_size_wide_log2[bsize];
   2184   const int bh = 4 << mi_size_high_log2[bsize];
   2185   const int search_width = bw << 1;
   2186   const int search_height = bh << 1;
   2187   const int src_stride = x->plane[0].src.stride;
   2188   const int ref_stride = xd->plane[0].pre[0].stride;
   2189   uint8_t const *ref_buf, *src_buf;
   2190   MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
   2191   unsigned int best_sad, tmp_sad, this_sad[4];
   2192   MV this_mv;
   2193   const int norm_factor = 3 + (bw >> 5);
   2194   const YV12_BUFFER_CONFIG *scaled_ref_frame =
   2195       av1_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
   2196   MvLimits subpel_mv_limits;
   2197 
   2198   if (scaled_ref_frame) {
   2199     int i;
   2200     // Swap out the reference frame for a version that's been scaled to
   2201     // match the resolution of the current frame, allowing the existing
   2202     // motion search code to be used without additional modifications.
   2203     for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
   2204     av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
   2205                          MAX_MB_PLANE);
   2206   }
   2207 
   2208   if (xd->bd != 8) {
   2209     unsigned int sad;
   2210     tmp_mv->row = 0;
   2211     tmp_mv->col = 0;
   2212     sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
   2213                                  xd->plane[0].pre[0].buf, ref_stride);
   2214 
   2215     if (scaled_ref_frame) {
   2216       int i;
   2217       for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
   2218     }
   2219     return sad;
   2220   }
   2221 
   2222   // Set up prediction 1-D reference set
   2223   ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
   2224   for (idx = 0; idx < search_width; idx += 16) {
   2225     aom_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
   2226     ref_buf += 16;
   2227   }
   2228 
   2229   ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
   2230   for (idx = 0; idx < search_height; ++idx) {
   2231     vbuf[idx] = aom_int_pro_col(ref_buf, bw) >> norm_factor;
   2232     ref_buf += ref_stride;
   2233   }
   2234 
   2235   // Set up src 1-D reference set
   2236   for (idx = 0; idx < bw; idx += 16) {
   2237     src_buf = x->plane[0].src.buf + idx;
   2238     aom_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
   2239   }
   2240 
   2241   src_buf = x->plane[0].src.buf;
   2242   for (idx = 0; idx < bh; ++idx) {
   2243     src_vbuf[idx] = aom_int_pro_col(src_buf, bw) >> norm_factor;
   2244     src_buf += src_stride;
   2245   }
   2246 
   2247   // Find the best match per 1-D search
   2248   tmp_mv->col = vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize]);
   2249   tmp_mv->row = vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize]);
   2250 
   2251   this_mv = *tmp_mv;
   2252   src_buf = x->plane[0].src.buf;
   2253   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
   2254   best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
   2255 
   2256   {
   2257     const uint8_t *const pos[4] = {
   2258       ref_buf - ref_stride,
   2259       ref_buf - 1,
   2260       ref_buf + 1,
   2261       ref_buf + ref_stride,
   2262     };
   2263 
   2264     cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
   2265   }
   2266 
   2267   for (idx = 0; idx < 4; ++idx) {
   2268     if (this_sad[idx] < best_sad) {
   2269       best_sad = this_sad[idx];
   2270       tmp_mv->row = search_pos[idx].row + this_mv.row;
   2271       tmp_mv->col = search_pos[idx].col + this_mv.col;
   2272     }
   2273   }
   2274 
   2275   if (this_sad[0] < this_sad[3])
   2276     this_mv.row -= 1;
   2277   else
   2278     this_mv.row += 1;
   2279 
   2280   if (this_sad[1] < this_sad[2])
   2281     this_mv.col -= 1;
   2282   else
   2283     this_mv.col += 1;
   2284 
   2285   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
   2286 
   2287   tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
   2288   if (best_sad > tmp_sad) {
   2289     *tmp_mv = this_mv;
   2290     best_sad = tmp_sad;
   2291   }
   2292 
   2293   tmp_mv->row *= 8;
   2294   tmp_mv->col *= 8;
   2295 
   2296   set_subpel_mv_search_range(
   2297       &x->mv_limits, &subpel_mv_limits.col_min, &subpel_mv_limits.col_max,
   2298       &subpel_mv_limits.row_min, &subpel_mv_limits.row_max, ref_mv);
   2299   clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max,
   2300            subpel_mv_limits.row_min, subpel_mv_limits.row_max);
   2301 
   2302   if (scaled_ref_frame) {
   2303     int i;
   2304     for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
   2305   }
   2306 
   2307   return best_sad;
   2308 }
   2309 
   2310 int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
   2311                           MV *mvp_full, int step_param, int method,
   2312                           int run_mesh_search, int error_per_bit,
   2313                           int *cost_list, const MV *ref_mv, int var_max, int rd,
   2314                           int x_pos, int y_pos, int intra,
   2315                           const search_site_config *cfg) {
   2316   const SPEED_FEATURES *const sf = &cpi->sf;
   2317   const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
   2318   int var = 0;
   2319 
   2320   if (cost_list) {
   2321     cost_list[0] = INT_MAX;
   2322     cost_list[1] = INT_MAX;
   2323     cost_list[2] = INT_MAX;
   2324     cost_list[3] = INT_MAX;
   2325     cost_list[4] = INT_MAX;
   2326   }
   2327 
   2328   // Keep track of number of searches (this frame in this thread).
   2329   if (x->m_search_count_ptr != NULL) ++(*x->m_search_count_ptr);
   2330 
   2331   switch (method) {
   2332     case FAST_DIAMOND:
   2333       var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
   2334                             cost_list, fn_ptr, 1, ref_mv);
   2335       break;
   2336     case FAST_HEX:
   2337       var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
   2338                             cost_list, fn_ptr, 1, ref_mv);
   2339       break;
   2340     case HEX:
   2341       var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
   2342                            fn_ptr, 1, ref_mv);
   2343       break;
   2344     case SQUARE:
   2345       var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
   2346                           fn_ptr, 1, ref_mv);
   2347       break;
   2348     case BIGDIA:
   2349       var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
   2350                           fn_ptr, 1, ref_mv);
   2351       break;
   2352     case NSTEP:
   2353       var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
   2354                                MAX_MVSEARCH_STEPS - 1 - step_param, 1,
   2355                                cost_list, fn_ptr, ref_mv, cfg);
   2356 
   2357       // Should we allow a follow on exhaustive search?
   2358       if (is_exhaustive_allowed(cpi, x)) {
   2359         int exhuastive_thr = sf->exhaustive_searches_thresh;
   2360         exhuastive_thr >>=
   2361             10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
   2362 
   2363         // Threshold variance for an exhaustive full search.
   2364         if (var > exhuastive_thr) {
   2365           int var_ex;
   2366           MV tmp_mv_ex;
   2367           var_ex =
   2368               full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
   2369                                     cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
   2370 
   2371           if (var_ex < var) {
   2372             var = var_ex;
   2373             x->best_mv.as_mv = tmp_mv_ex;
   2374           }
   2375         }
   2376       }
   2377       break;
   2378     default: assert(0 && "Invalid search method.");
   2379   }
   2380 
   2381   // Should we allow a follow on exhaustive search?
   2382   if (!run_mesh_search) {
   2383     if (method == NSTEP) {
   2384       if (is_exhaustive_allowed(cpi, x)) {
   2385         int exhuastive_thr = sf->exhaustive_searches_thresh;
   2386         exhuastive_thr >>=
   2387             10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
   2388         // Threshold variance for an exhaustive full search.
   2389         if (var > exhuastive_thr) run_mesh_search = 1;
   2390       }
   2391     }
   2392   }
   2393 
   2394   if (run_mesh_search) {
   2395     int var_ex;
   2396     MV tmp_mv_ex;
   2397     var_ex = full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
   2398                                    cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
   2399     if (var_ex < var) {
   2400       var = var_ex;
   2401       x->best_mv.as_mv = tmp_mv_ex;
   2402     }
   2403   }
   2404 
   2405   if (method != NSTEP && rd && var < var_max)
   2406     var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
   2407 
   2408   do {
   2409     if (!intra || !av1_use_hash_me(&cpi->common)) break;
   2410 
   2411     // already single ME
   2412     // get block size and original buffer of current block
   2413     const int block_height = block_size_high[bsize];
   2414     const int block_width = block_size_wide[bsize];
   2415     if (block_height == block_width && x_pos >= 0 && y_pos >= 0) {
   2416       if (block_width == 4 || block_width == 8 || block_width == 16 ||
   2417           block_width == 32 || block_width == 64 || block_width == 128) {
   2418         uint8_t *what = x->plane[0].src.buf;
   2419         const int what_stride = x->plane[0].src.stride;
   2420         uint32_t hash_value1, hash_value2;
   2421         MV best_hash_mv;
   2422         int best_hash_cost = INT_MAX;
   2423 
   2424         // for the hashMap
   2425         hash_table *ref_frame_hash =
   2426             intra ? &cpi->common.cur_frame->hash_table
   2427                   : av1_get_ref_frame_hash_map(&cpi->common,
   2428                                                x->e_mbd.mi[0]->ref_frame[0]);
   2429 
   2430         av1_get_block_hash_value(what, what_stride, block_width, &hash_value1,
   2431                                  &hash_value2, is_cur_buf_hbd(&x->e_mbd), x);
   2432 
   2433         const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
   2434         // for intra, at lest one matching can be found, itself.
   2435         if (count <= (intra ? 1 : 0)) {
   2436           break;
   2437         }
   2438 
   2439         Iterator iterator =
   2440             av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
   2441         for (int i = 0; i < count; i++, iterator_increment(&iterator)) {
   2442           block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator));
   2443           if (hash_value2 == ref_block_hash.hash_value2) {
   2444             // For intra, make sure the prediction is from valid area.
   2445             if (intra) {
   2446               const int mi_col = x_pos / MI_SIZE;
   2447               const int mi_row = y_pos / MI_SIZE;
   2448               const MV dv = { 8 * (ref_block_hash.y - y_pos),
   2449                               8 * (ref_block_hash.x - x_pos) };
   2450               if (!av1_is_dv_valid(dv, &cpi->common, &x->e_mbd, mi_row, mi_col,
   2451                                    bsize, cpi->common.seq_params.mib_size_log2))
   2452                 continue;
   2453             }
   2454             MV hash_mv;
   2455             hash_mv.col = ref_block_hash.x - x_pos;
   2456             hash_mv.row = ref_block_hash.y - y_pos;
   2457             if (!is_mv_in(&x->mv_limits, &hash_mv)) continue;
   2458             const int refCost =
   2459                 av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
   2460             if (refCost < best_hash_cost) {
   2461               best_hash_cost = refCost;
   2462               best_hash_mv = hash_mv;
   2463             }
   2464           }
   2465         }
   2466         if (best_hash_cost < var) {
   2467           x->second_best_mv = x->best_mv;
   2468           x->best_mv.as_mv = best_hash_mv;
   2469           var = best_hash_cost;
   2470         }
   2471       }
   2472     }
   2473   } while (0);
   2474 
   2475   return var;
   2476 }
   2477 
   2478 /* returns subpixel variance error function */
   2479 #define DIST(r, c) \
   2480   vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
   2481 
   2482 /* checks if (r, c) has better score than previous best */
   2483 #define MVC(r, c)                                                              \
   2484   (unsigned int)(mvcost                                                        \
   2485                      ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +              \
   2486                          mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
   2487                             error_per_bit +                                    \
   2488                         4096) >>                                               \
   2489                            13                                                  \
   2490                      : 0)
   2491 
   2492 #define CHECK_BETTER(v, r, c)                             \
   2493   if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
   2494     thismse = (DIST(r, c));                               \
   2495     if ((v = MVC(r, c) + thismse) < besterr) {            \
   2496       besterr = v;                                        \
   2497       br = r;                                             \
   2498       bc = c;                                             \
   2499       *distortion = thismse;                              \
   2500       *sse1 = sse;                                        \
   2501     }                                                     \
   2502   } else {                                                \
   2503     v = INT_MAX;                                          \
   2504   }
   2505 
   2506 #undef CHECK_BETTER0
   2507 #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
   2508 
   2509 #undef CHECK_BETTER1
   2510 #define CHECK_BETTER1(v, r, c)                                                \
   2511   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                     \
   2512     MV this_mv = { r, c };                                                    \
   2513     thismse = upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, &this_mv,     \
   2514                                         mask, vfp, z, pre(y, y_stride, r, c), \
   2515                                         y_stride, sp(c), sp(r), w, h, &sse,   \
   2516                                         use_accurate_subpel_search);          \
   2517     v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);        \
   2518     if ((v + thismse) < besterr) {                                            \
   2519       besterr = v + thismse;                                                  \
   2520       br = r;                                                                 \
   2521       bc = c;                                                                 \
   2522       *distortion = thismse;                                                  \
   2523       *sse1 = sse;                                                            \
   2524     }                                                                         \
   2525   } else {                                                                    \
   2526     v = INT_MAX;                                                              \
   2527   }
   2528 
   2529 static unsigned int setup_obmc_center_error(
   2530     const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
   2531     const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
   2532     const uint8_t *const y, int y_stride, int offset, int *mvjcost,
   2533     int *mvcost[2], unsigned int *sse1, int *distortion) {
   2534   unsigned int besterr;
   2535   besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
   2536   *distortion = besterr;
   2537   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   2538   return besterr;
   2539 }
   2540 
   2541 static int upsampled_obmc_pref_error(
   2542     MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
   2543     const MV *const mv, const int32_t *mask, const aom_variance_fn_ptr_t *vfp,
   2544     const int32_t *const wsrc, const uint8_t *const y, int y_stride,
   2545     int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse,
   2546     int subpel_search) {
   2547   unsigned int besterr;
   2548 
   2549   DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
   2550   if (is_cur_buf_hbd(xd)) {
   2551     uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred);
   2552     aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
   2553                               subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
   2554                               subpel_search);
   2555     besterr = vfp->ovf(pred8, w, wsrc, mask, sse);
   2556   } else {
   2557     aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
   2558                        subpel_y_q3, y, y_stride, subpel_search);
   2559 
   2560     besterr = vfp->ovf(pred, w, wsrc, mask, sse);
   2561   }
   2562   return besterr;
   2563 }
   2564 
   2565 static unsigned int upsampled_setup_obmc_center_error(
   2566     MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
   2567     const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
   2568     const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
   2569     const uint8_t *const y, int y_stride, int w, int h, int offset,
   2570     int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion,
   2571     int subpel_search) {
   2572   unsigned int besterr = upsampled_obmc_pref_error(
   2573       xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0,
   2574       0, w, h, sse1, subpel_search);
   2575   *distortion = besterr;
   2576   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   2577   return besterr;
   2578 }
   2579 
   2580 int av1_find_best_obmc_sub_pixel_tree_up(
   2581     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
   2582     MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
   2583     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
   2584     int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
   2585     int is_second, int use_accurate_subpel_search) {
   2586   const int32_t *wsrc = x->wsrc_buf;
   2587   const int32_t *mask = x->mask_buf;
   2588   const int *const z = wsrc;
   2589   const int *const src_address = z;
   2590   MACROBLOCKD *xd = &x->e_mbd;
   2591   struct macroblockd_plane *const pd = &xd->plane[0];
   2592   MB_MODE_INFO *mbmi = xd->mi[0];
   2593   unsigned int besterr = INT_MAX;
   2594   unsigned int sse;
   2595   unsigned int thismse;
   2596 
   2597   int rr = ref_mv->row;
   2598   int rc = ref_mv->col;
   2599   int br = bestmv->row * 8;
   2600   int bc = bestmv->col * 8;
   2601   int hstep = 4;
   2602   int iter;
   2603   int round = 3 - forced_stop;
   2604   int tr = br;
   2605   int tc = bc;
   2606   const MV *search_step = search_step_table;
   2607   int idx, best_idx = -1;
   2608   unsigned int cost_array[5];
   2609   int kr, kc;
   2610   const int w = block_size_wide[mbmi->sb_type];
   2611   const int h = block_size_high[mbmi->sb_type];
   2612   int offset;
   2613   int y_stride;
   2614   const uint8_t *y;
   2615 
   2616   int minc, maxc, minr, maxr;
   2617 
   2618   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
   2619 
   2620   y = pd->pre[is_second].buf;
   2621   y_stride = pd->pre[is_second].stride;
   2622   offset = bestmv->row * y_stride + bestmv->col;
   2623 
   2624   if (!allow_hp)
   2625     if (round == 3) round = 2;
   2626 
   2627   bestmv->row *= 8;
   2628   bestmv->col *= 8;
   2629   // use_accurate_subpel_search can be 0 or 1 or 2
   2630   if (use_accurate_subpel_search)
   2631     besterr = upsampled_setup_obmc_center_error(
   2632         xd, cm, mi_row, mi_col, mask, bestmv, ref_mv, error_per_bit, vfp, z, y,
   2633         y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion,
   2634         use_accurate_subpel_search);
   2635   else
   2636     besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
   2637                                       z, y, y_stride, offset, mvjcost, mvcost,
   2638                                       sse1, distortion);
   2639 
   2640   for (iter = 0; iter < round; ++iter) {
   2641     // Check vertical and horizontal sub-pixel positions.
   2642     for (idx = 0; idx < 4; ++idx) {
   2643       tr = br + search_step[idx].row;
   2644       tc = bc + search_step[idx].col;
   2645       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
   2646         MV this_mv = { tr, tc };
   2647         if (use_accurate_subpel_search) {
   2648           thismse = upsampled_obmc_pref_error(
   2649               xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
   2650               pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
   2651               use_accurate_subpel_search);
   2652         } else {
   2653           thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc),
   2654                               sp(tr), src_address, mask, &sse);
   2655         }
   2656 
   2657         cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
   2658                                                 mvcost, error_per_bit);
   2659         if (cost_array[idx] < besterr) {
   2660           best_idx = idx;
   2661           besterr = cost_array[idx];
   2662           *distortion = thismse;
   2663           *sse1 = sse;
   2664         }
   2665       } else {
   2666         cost_array[idx] = INT_MAX;
   2667       }
   2668     }
   2669 
   2670     // Check diagonal sub-pixel position
   2671     kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
   2672     kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
   2673 
   2674     tc = bc + kc;
   2675     tr = br + kr;
   2676     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
   2677       MV this_mv = { tr, tc };
   2678 
   2679       if (use_accurate_subpel_search) {
   2680         thismse = upsampled_obmc_pref_error(
   2681             xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
   2682             pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
   2683             use_accurate_subpel_search);
   2684       } else {
   2685         thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
   2686                             src_address, mask, &sse);
   2687       }
   2688 
   2689       cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
   2690                                             error_per_bit);
   2691 
   2692       if (cost_array[4] < besterr) {
   2693         best_idx = 4;
   2694         besterr = cost_array[4];
   2695         *distortion = thismse;
   2696         *sse1 = sse;
   2697       }
   2698     } else {
   2699       cost_array[idx] = INT_MAX;
   2700     }
   2701 
   2702     if (best_idx < 4 && best_idx >= 0) {
   2703       br += search_step[best_idx].row;
   2704       bc += search_step[best_idx].col;
   2705     } else if (best_idx == 4) {
   2706       br = tr;
   2707       bc = tc;
   2708     }
   2709 
   2710     if (iters_per_step > 1 && best_idx != -1) {
   2711       if (use_accurate_subpel_search) {
   2712         SECOND_LEVEL_CHECKS_BEST(1);
   2713       } else {
   2714         SECOND_LEVEL_CHECKS_BEST(0);
   2715       }
   2716     }
   2717 
   2718     tr = br;
   2719     tc = bc;
   2720 
   2721     search_step += 4;
   2722     hstep >>= 1;
   2723     best_idx = -1;
   2724   }
   2725 
   2726   // These lines insure static analysis doesn't warn that
   2727   // tr and tc aren't used after the above point.
   2728   (void)tr;
   2729   (void)tc;
   2730 
   2731   bestmv->row = br;
   2732   bestmv->col = bc;
   2733 
   2734   return besterr;
   2735 }
   2736 
   2737 #undef DIST
   2738 #undef MVC
   2739 #undef CHECK_BETTER
   2740 
   2741 static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc,
   2742                                const int32_t *mask, const MV *best_mv,
   2743                                const MV *center_mv,
   2744                                const aom_variance_fn_ptr_t *vfp, int use_mvcost,
   2745                                int is_second) {
   2746   const MACROBLOCKD *const xd = &x->e_mbd;
   2747   const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
   2748   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
   2749   unsigned int unused;
   2750 
   2751   return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc,
   2752                   mask, &unused) +
   2753          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
   2754                                    x->mv_cost_stack, x->errorperbit)
   2755                      : 0);
   2756 }
   2757 
   2758 static int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc,
   2759                                     const int32_t *mask, MV *ref_mv,
   2760                                     int error_per_bit, int search_range,
   2761                                     const aom_variance_fn_ptr_t *fn_ptr,
   2762                                     const MV *center_mv, int is_second) {
   2763   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   2764   const MACROBLOCKD *const xd = &x->e_mbd;
   2765   const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
   2766   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   2767   unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
   2768                                        in_what->stride, wsrc, mask) +
   2769                           mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   2770   int i, j;
   2771 
   2772   for (i = 0; i < search_range; i++) {
   2773     int best_site = -1;
   2774 
   2775     for (j = 0; j < 4; j++) {
   2776       const MV mv = { ref_mv->row + neighbors[j].row,
   2777                       ref_mv->col + neighbors[j].col };
   2778       if (is_mv_in(&x->mv_limits, &mv)) {
   2779         unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
   2780                                         in_what->stride, wsrc, mask);
   2781         if (sad < best_sad) {
   2782           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
   2783           if (sad < best_sad) {
   2784             best_sad = sad;
   2785             best_site = j;
   2786           }
   2787         }
   2788       }
   2789     }
   2790 
   2791     if (best_site == -1) {
   2792       break;
   2793     } else {
   2794       ref_mv->row += neighbors[best_site].row;
   2795       ref_mv->col += neighbors[best_site].col;
   2796     }
   2797   }
   2798   return best_sad;
   2799 }
   2800 
   2801 static int obmc_diamond_search_sad(const MACROBLOCK *x,
   2802                                    const search_site_config *cfg,
   2803                                    const int32_t *wsrc, const int32_t *mask,
   2804                                    MV *ref_mv, MV *best_mv, int search_param,
   2805                                    int sad_per_bit, int *num00,
   2806                                    const aom_variance_fn_ptr_t *fn_ptr,
   2807                                    const MV *center_mv, int is_second) {
   2808   const MACROBLOCKD *const xd = &x->e_mbd;
   2809   const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
   2810   // search_param determines the length of the initial step and hence the number
   2811   // of iterations
   2812   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
   2813   // (MAX_FIRST_STEP/4) pel... etc.
   2814   const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
   2815   const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
   2816   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   2817   const uint8_t *best_address, *in_what_ref;
   2818   int best_sad = INT_MAX;
   2819   int best_site = 0;
   2820   int last_site = 0;
   2821   int i, j, step;
   2822 
   2823   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   2824            x->mv_limits.row_min, x->mv_limits.row_max);
   2825   in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
   2826   best_address = in_what_ref;
   2827   *num00 = 0;
   2828   *best_mv = *ref_mv;
   2829 
   2830   // Check the starting position
   2831   best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
   2832              mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
   2833 
   2834   i = 1;
   2835 
   2836   for (step = 0; step < tot_steps; step++) {
   2837     for (j = 0; j < cfg->searches_per_step; j++) {
   2838       const MV mv = { best_mv->row + ss[i].mv.row,
   2839                       best_mv->col + ss[i].mv.col };
   2840       if (is_mv_in(&x->mv_limits, &mv)) {
   2841         int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
   2842                                wsrc, mask);
   2843         if (sad < best_sad) {
   2844           sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
   2845           if (sad < best_sad) {
   2846             best_sad = sad;
   2847             best_site = i;
   2848           }
   2849         }
   2850       }
   2851 
   2852       i++;
   2853     }
   2854 
   2855     if (best_site != last_site) {
   2856       best_mv->row += ss[best_site].mv.row;
   2857       best_mv->col += ss[best_site].mv.col;
   2858       best_address += ss[best_site].offset;
   2859       last_site = best_site;
   2860 #if defined(NEW_DIAMOND_SEARCH)
   2861       while (1) {
   2862         const MV this_mv = { best_mv->row + ss[best_site].mv.row,
   2863                              best_mv->col + ss[best_site].mv.col };
   2864         if (is_mv_in(&x->mv_limits, &this_mv)) {
   2865           int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
   2866                                  in_what->stride, wsrc, mask);
   2867           if (sad < best_sad) {
   2868             sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
   2869             if (sad < best_sad) {
   2870               best_sad = sad;
   2871               best_mv->row += ss[best_site].mv.row;
   2872               best_mv->col += ss[best_site].mv.col;
   2873               best_address += ss[best_site].offset;
   2874               continue;
   2875             }
   2876           }
   2877         }
   2878         break;
   2879       }
   2880 #endif
   2881     } else if (best_address == in_what_ref) {
   2882       (*num00)++;
   2883     }
   2884   }
   2885   return best_sad;
   2886 }
   2887 
   2888 static int obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
   2889                                    MV *mvp_full, int step_param, int sadpb,
   2890                                    int further_steps, int do_refine,
   2891                                    const aom_variance_fn_ptr_t *fn_ptr,
   2892                                    const MV *ref_mv, MV *dst_mv, int is_second,
   2893                                    const search_site_config *cfg) {
   2894   (void)cpi;  // to silence compiler warning
   2895   const int32_t *wsrc = x->wsrc_buf;
   2896   const int32_t *mask = x->mask_buf;
   2897   MV temp_mv;
   2898   int thissme, n, num00 = 0;
   2899   int bestsme =
   2900       obmc_diamond_search_sad(x, cfg, wsrc, mask, mvp_full, &temp_mv,
   2901                               step_param, sadpb, &n, fn_ptr, ref_mv, is_second);
   2902   if (bestsme < INT_MAX)
   2903     bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1,
   2904                                   is_second);
   2905   *dst_mv = temp_mv;
   2906 
   2907   // If there won't be more n-step search, check to see if refining search is
   2908   // needed.
   2909   if (n > further_steps) do_refine = 0;
   2910 
   2911   while (n < further_steps) {
   2912     ++n;
   2913 
   2914     if (num00) {
   2915       num00--;
   2916     } else {
   2917       thissme = obmc_diamond_search_sad(x, cfg, wsrc, mask, mvp_full, &temp_mv,
   2918                                         step_param + n, sadpb, &num00, fn_ptr,
   2919                                         ref_mv, is_second);
   2920       if (thissme < INT_MAX)
   2921         thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr,
   2922                                       1, is_second);
   2923 
   2924       // check to see if refining search is needed.
   2925       if (num00 > further_steps - n) do_refine = 0;
   2926 
   2927       if (thissme < bestsme) {
   2928         bestsme = thissme;
   2929         *dst_mv = temp_mv;
   2930       }
   2931     }
   2932   }
   2933 
   2934   // final 1-away diamond refining search
   2935   if (do_refine) {
   2936     const int search_range = 8;
   2937     MV best_mv = *dst_mv;
   2938     thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb,
   2939                                        search_range, fn_ptr, ref_mv, is_second);
   2940     if (thissme < INT_MAX)
   2941       thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1,
   2942                                     is_second);
   2943     if (thissme < bestsme) {
   2944       bestsme = thissme;
   2945       *dst_mv = best_mv;
   2946     }
   2947   }
   2948   return bestsme;
   2949 }
   2950 
   2951 int av1_obmc_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
   2952                                int step_param, int sadpb, int further_steps,
   2953                                int do_refine,
   2954                                const aom_variance_fn_ptr_t *fn_ptr,
   2955                                const MV *ref_mv, MV *dst_mv, int is_second,
   2956                                const search_site_config *cfg) {
   2957   if (cpi->sf.obmc_full_pixel_search_level == 0) {
   2958     return obmc_full_pixel_diamond(cpi, x, mvp_full, step_param, sadpb,
   2959                                    further_steps, do_refine, fn_ptr, ref_mv,
   2960                                    dst_mv, is_second, cfg);
   2961   } else {
   2962     const int32_t *wsrc = x->wsrc_buf;
   2963     const int32_t *mask = x->mask_buf;
   2964     const int search_range = 8;
   2965     *dst_mv = *mvp_full;
   2966     clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   2967              x->mv_limits.row_min, x->mv_limits.row_max);
   2968     int thissme = obmc_refining_search_sad(
   2969         x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second);
   2970     if (thissme < INT_MAX)
   2971       thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1,
   2972                                     is_second);
   2973     return thissme;
   2974   }
   2975 }
   2976 
   2977 // Note(yunqingwang): The following 2 functions are only used in the motion
   2978 // vector unit test, which return extreme motion vectors allowed by the MV
   2979 // limits.
   2980 #define COMMON_MV_TEST              \
   2981   SETUP_SUBPEL_SEARCH;              \
   2982                                     \
   2983   (void)error_per_bit;              \
   2984   (void)vfp;                        \
   2985   (void)src_address;                \
   2986   (void)src_stride;                 \
   2987   (void)y;                          \
   2988   (void)y_stride;                   \
   2989   (void)second_pred;                \
   2990   (void)w;                          \
   2991   (void)h;                          \
   2992   (void)use_accurate_subpel_search; \
   2993   (void)offset;                     \
   2994   (void)mvjcost;                    \
   2995   (void)mvcost;                     \
   2996   (void)sse1;                       \
   2997   (void)distortion;                 \
   2998                                     \
   2999   (void)halfiters;                  \
   3000   (void)quarteriters;               \
   3001   (void)eighthiters;                \
   3002   (void)whichdir;                   \
   3003   (void)forced_stop;                \
   3004   (void)hstep;                      \
   3005                                     \
   3006   (void)tr;                         \
   3007   (void)tc;                         \
   3008   (void)sse;                        \
   3009   (void)thismse;                    \
   3010   (void)cost_list;
   3011 // Return the maximum MV.
   3012 int av1_return_max_sub_pixel_mv(
   3013     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
   3014     const MV *ref_mv, int allow_hp, int error_per_bit,
   3015     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
   3016     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
   3017     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
   3018     int mask_stride, int invert_mask, int w, int h,
   3019     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
   3020   COMMON_MV_TEST;
   3021   (void)mask;
   3022   (void)mask_stride;
   3023   (void)invert_mask;
   3024   (void)minr;
   3025   (void)minc;
   3026 
   3027   (void)cm;
   3028   (void)mi_row;
   3029   (void)mi_col;
   3030   (void)do_reset_fractional_mv;
   3031 
   3032   bestmv->row = maxr;
   3033   bestmv->col = maxc;
   3034   besterr = 0;
   3035   // In the sub-pel motion search, if hp is not used, then the last bit of mv
   3036   // has to be 0.
   3037   lower_mv_precision(bestmv, allow_hp, 0);
   3038   return besterr;
   3039 }
   3040 // Return the minimum MV.
   3041 int av1_return_min_sub_pixel_mv(
   3042     MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
   3043     const MV *ref_mv, int allow_hp, int error_per_bit,
   3044     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
   3045     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
   3046     unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
   3047     int mask_stride, int invert_mask, int w, int h,
   3048     int use_accurate_subpel_search, const int do_reset_fractional_mv) {
   3049   COMMON_MV_TEST;
   3050   (void)maxr;
   3051   (void)maxc;
   3052   (void)mask;
   3053   (void)mask_stride;
   3054   (void)invert_mask;
   3055 
   3056   (void)cm;
   3057   (void)mi_row;
   3058   (void)mi_col;
   3059   (void)do_reset_fractional_mv;
   3060 
   3061   bestmv->row = minr;
   3062   bestmv->col = minc;
   3063   besterr = 0;
   3064   // In the sub-pel motion search, if hp is not used, then the last bit of mv
   3065   // has to be 0.
   3066   lower_mv_precision(bestmv, allow_hp, 0);
   3067   return besterr;
   3068 }
   3069 
   3070 void av1_simple_motion_search(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
   3071                               int mi_col, BLOCK_SIZE bsize, int ref,
   3072                               MV ref_mv_full, int num_planes,
   3073                               int use_subpixel) {
   3074   assert(num_planes == 1 &&
   3075          "Currently simple_motion_search only supports luma plane");
   3076   assert(!frame_is_intra_only(&cpi->common) &&
   3077          "Simple motion search only enabled for non-key frames");
   3078   AV1_COMMON *const cm = &cpi->common;
   3079   MACROBLOCKD *xd = &x->e_mbd;
   3080 
   3081   set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize);
   3082 
   3083   MB_MODE_INFO *mbmi = xd->mi[0];
   3084   mbmi->sb_type = bsize;
   3085   mbmi->ref_frame[0] = ref;
   3086   mbmi->ref_frame[1] = NONE_FRAME;
   3087   mbmi->motion_mode = SIMPLE_TRANSLATION;
   3088 
   3089   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref);
   3090   const YV12_BUFFER_CONFIG *scaled_ref_frame =
   3091       av1_get_scaled_ref_frame(cpi, ref);
   3092   struct buf_2d backup_yv12;
   3093   // ref_mv is used to code the motion vector. ref_mv_full is the initial point.
   3094   // ref_mv is in units of 1/8 pel whereas ref_mv_full is in units of pel.
   3095   MV ref_mv = { 0, 0 };
   3096   const int step_param = cpi->mv_step_param;
   3097   const MvLimits tmp_mv_limits = x->mv_limits;
   3098   const SEARCH_METHODS search_methods = NSTEP;
   3099   const int do_mesh_search = 0;
   3100   const int sadpb = x->sadperbit16;
   3101   int cost_list[5];
   3102   const int ref_idx = 0;
   3103   int var;
   3104 
   3105   av1_setup_pre_planes(xd, ref_idx, yv12, mi_row, mi_col,
   3106                        get_ref_scale_factors(cm, ref), num_planes);
   3107   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   3108   if (scaled_ref_frame) {
   3109     backup_yv12 = xd->plane[AOM_PLANE_Y].pre[ref_idx];
   3110     av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
   3111                          num_planes);
   3112   }
   3113 
   3114   // This overwrites the mv_limits so we will need to restore it later.
   3115   av1_set_mv_search_range(&x->mv_limits, &ref_mv);
   3116   var = av1_full_pixel_search(
   3117       cpi, x, bsize, &ref_mv_full, step_param, search_methods, do_mesh_search,
   3118       sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1,
   3119       mi_col * MI_SIZE, mi_row * MI_SIZE, 0, &cpi->ss_cfg[SS_CFG_SRC]);
   3120   // Restore
   3121   x->mv_limits = tmp_mv_limits;
   3122 
   3123   const int use_subpel_search =
   3124       var < INT_MAX && !cpi->common.cur_frame_force_integer_mv && use_subpixel;
   3125   if (scaled_ref_frame) {
   3126     xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12;
   3127   }
   3128   if (use_subpel_search) {
   3129     int not_used = 0;
   3130     if (cpi->sf.use_accurate_subpel_search) {
   3131       const int pw = block_size_wide[bsize];
   3132       const int ph = block_size_high[bsize];
   3133       cpi->find_fractional_mv_step(
   3134           x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
   3135           x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
   3136           cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
   3137           x->nmv_vec_cost, x->mv_cost_stack, &not_used, &x->pred_sse[ref], NULL,
   3138           NULL, 0, 0, pw, ph, cpi->sf.use_accurate_subpel_search, 1);
   3139     } else {
   3140       cpi->find_fractional_mv_step(
   3141           x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
   3142           x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
   3143           cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
   3144           x->nmv_vec_cost, x->mv_cost_stack, &not_used, &x->pred_sse[ref], NULL,
   3145           NULL, 0, 0, 0, 0, 0, 1);
   3146     }
   3147   } else {
   3148     // Manually convert from units of pixel to 1/8-pixels if we are not doing
   3149     // subpel search
   3150     x->best_mv.as_mv.row *= 8;
   3151     x->best_mv.as_mv.col *= 8;
   3152   }
   3153 
   3154   mbmi->mv[0].as_mv = x->best_mv.as_mv;
   3155 
   3156   // Get a copy of the prediction output
   3157   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
   3158                                 AOM_PLANE_Y, AOM_PLANE_Y);
   3159 
   3160   aom_clear_system_state();
   3161 
   3162   if (scaled_ref_frame) {
   3163     xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12;
   3164   }
   3165 }
   3166 
   3167 void av1_simple_motion_sse_var(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
   3168                                int mi_col, BLOCK_SIZE bsize,
   3169                                const MV ref_mv_full, int use_subpixel,
   3170                                unsigned int *sse, unsigned int *var) {
   3171   MACROBLOCKD *xd = &x->e_mbd;
   3172   const MV_REFERENCE_FRAME ref =
   3173       cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
   3174 
   3175   av1_simple_motion_search(cpi, x, mi_row, mi_col, bsize, ref, ref_mv_full, 1,
   3176                            use_subpixel);
   3177 
   3178   const uint8_t *src = x->plane[0].src.buf;
   3179   const int src_stride = x->plane[0].src.stride;
   3180   const uint8_t *dst = xd->plane[0].dst.buf;
   3181   const int dst_stride = xd->plane[0].dst.stride;
   3182 
   3183   *var = cpi->fn_ptr[bsize].vf(src, src_stride, dst, dst_stride, sse);
   3184 }
   3185