Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 #include <limits.h>
     13 #include <math.h>
     14 #include <stdio.h>
     15 
     16 #include "./vpx_config.h"
     17 #include "./vpx_dsp_rtcd.h"
     18 
     19 #include "vpx_dsp/vpx_dsp_common.h"
     20 #include "vpx_mem/vpx_mem.h"
     21 #include "vpx_ports/mem.h"
     22 
     23 #include "vp9/common/vp9_common.h"
     24 #include "vp9/common/vp9_mvref_common.h"
     25 #include "vp9/common/vp9_reconinter.h"
     26 
     27 #include "vp9/encoder/vp9_encoder.h"
     28 #include "vp9/encoder/vp9_mcomp.h"
     29 
     30 // #define NEW_DIAMOND_SEARCH
     31 
     32 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
     33                                              const MV *mv) {
     34   return &buf->buf[mv->row * buf->stride + mv->col];
     35 }
     36 
     37 void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
     38   int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
     39   int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
     40   int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
     41   int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
     42 
     43   col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
     44   row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
     45   col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
     46   row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
     47 
     48   // Get intersection of UMV window and valid MV window to reduce # of checks
     49   // in diamond search.
     50   if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
     51   if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
     52   if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
     53   if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
     54 }
     55 
     56 void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
     57                                     const MvLimits *umv_window_limits,
     58                                     const MV *ref_mv) {
     59   subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8,
     60                                      ref_mv->col - MAX_FULL_PEL_VAL * 8);
     61   subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8,
     62                                      ref_mv->col + MAX_FULL_PEL_VAL * 8);
     63   subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8,
     64                                      ref_mv->row - MAX_FULL_PEL_VAL * 8);
     65   subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8,
     66                                      ref_mv->row + MAX_FULL_PEL_VAL * 8);
     67 
     68   subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min);
     69   subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max);
     70   subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min);
     71   subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max);
     72 }
     73 
     74 int vp9_init_search_range(int size) {
     75   int sr = 0;
     76   // Minimum search size no matter what the passed in value.
     77   size = VPXMAX(16, size);
     78 
     79   while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
     80 
     81   sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
     82   return sr;
     83 }
     84 
     85 static INLINE int mv_cost(const MV *mv, const int *joint_cost,
     86                           int *const comp_cost[2]) {
     87   assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
     88   assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
     89   return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
     90          comp_cost[1][mv->col];
     91 }
     92 
     93 int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
     94                     int *mvcost[2], int weight) {
     95   const MV diff = { mv->row - ref->row, mv->col - ref->col };
     96   return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
     97 }
     98 
     99 #define PIXEL_TRANSFORM_ERROR_SCALE 4
    100 static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
    101                        int *mvcost[2], int error_per_bit) {
    102   if (mvcost) {
    103     const MV diff = { mv->row - ref->row, mv->col - ref->col };
    104     return (int)ROUND64_POWER_OF_TWO(
    105         (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
    106         RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
    107             PIXEL_TRANSFORM_ERROR_SCALE);
    108   }
    109   return 0;
    110 }
    111 
    112 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
    113                           int sad_per_bit) {
    114   const MV diff = { mv->row - ref->row, mv->col - ref->col };
    115   return ROUND_POWER_OF_TWO(
    116       (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
    117       VP9_PROB_COST_SHIFT);
    118 }
    119 
    120 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
    121   int len;
    122   int ss_count = 0;
    123 
    124   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
    125     // Generate offsets for 4 search sites per step.
    126     const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
    127     int i;
    128     for (i = 0; i < 4; ++i, ++ss_count) {
    129       cfg->ss_mv[ss_count] = ss_mvs[i];
    130       cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
    131     }
    132   }
    133 
    134   cfg->searches_per_step = 4;
    135   cfg->total_steps = ss_count / cfg->searches_per_step;
    136 }
    137 
    138 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
    139   int len;
    140   int ss_count = 0;
    141 
    142   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
    143     // Generate offsets for 8 search sites per step.
    144     const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
    145                            { 0, len },    { -len, -len }, { -len, len },
    146                            { len, -len }, { len, len } };
    147     int i;
    148     for (i = 0; i < 8; ++i, ++ss_count) {
    149       cfg->ss_mv[ss_count] = ss_mvs[i];
    150       cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
    151     }
    152   }
    153 
    154   cfg->searches_per_step = 8;
    155   cfg->total_steps = ss_count / cfg->searches_per_step;
    156 }
    157 
    158 // convert motion vector component to offset for sv[a]f calc
    159 static INLINE int sp(int x) { return x & 7; }
    160 
    161 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
    162   return &buf[(r >> 3) * stride + (c >> 3)];
    163 }
    164 
    165 #if CONFIG_VP9_HIGHBITDEPTH
    166 /* checks if (r, c) has better score than previous best */
    167 #define CHECK_BETTER(v, r, c)                                                \
    168   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
    169     int64_t tmpmse;                                                          \
    170     const MV mv = { r, c };                                                  \
    171     const MV ref_mv = { rr, rc };                                            \
    172     if (second_pred == NULL) {                                               \
    173       thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
    174                          src_stride, &sse);                                  \
    175     } else {                                                                 \
    176       thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
    177                           src_stride, &sse, second_pred);                    \
    178     }                                                                        \
    179     tmpmse = thismse;                                                        \
    180     tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);     \
    181     if (tmpmse >= INT_MAX) {                                                 \
    182       v = INT_MAX;                                                           \
    183     } else if ((v = (uint32_t)tmpmse) < besterr) {                           \
    184       besterr = v;                                                           \
    185       br = r;                                                                \
    186       bc = c;                                                                \
    187       *distortion = thismse;                                                 \
    188       *sse1 = sse;                                                           \
    189     }                                                                        \
    190   } else {                                                                   \
    191     v = INT_MAX;                                                             \
    192   }
    193 #else
    194 /* checks if (r, c) has better score than previous best */
    195 #define CHECK_BETTER(v, r, c)                                                \
    196   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
    197     const MV mv = { r, c };                                                  \
    198     const MV ref_mv = { rr, rc };                                            \
    199     if (second_pred == NULL)                                                 \
    200       thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
    201                          src_stride, &sse);                                  \
    202     else                                                                     \
    203       thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
    204                           src_stride, &sse, second_pred);                    \
    205     if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +     \
    206              thismse) < besterr) {                                           \
    207       besterr = v;                                                           \
    208       br = r;                                                                \
    209       bc = c;                                                                \
    210       *distortion = thismse;                                                 \
    211       *sse1 = sse;                                                           \
    212     }                                                                        \
    213   } else {                                                                   \
    214     v = INT_MAX;                                                             \
    215   }
    216 
    217 #endif
    218 #define FIRST_LEVEL_CHECKS                                       \
    219   {                                                              \
    220     unsigned int left, right, up, down, diag;                    \
    221     CHECK_BETTER(left, tr, tc - hstep);                          \
    222     CHECK_BETTER(right, tr, tc + hstep);                         \
    223     CHECK_BETTER(up, tr - hstep, tc);                            \
    224     CHECK_BETTER(down, tr + hstep, tc);                          \
    225     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);     \
    226     switch (whichdir) {                                          \
    227       case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
    228       case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
    229       case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
    230       case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
    231     }                                                            \
    232   }
    233 
    234 #define SECOND_LEVEL_CHECKS                                       \
    235   {                                                               \
    236     int kr, kc;                                                   \
    237     unsigned int second;                                          \
    238     if (tr != br && tc != bc) {                                   \
    239       kr = br - tr;                                               \
    240       kc = bc - tc;                                               \
    241       CHECK_BETTER(second, tr + kr, tc + 2 * kc);                 \
    242       CHECK_BETTER(second, tr + 2 * kr, tc + kc);                 \
    243     } else if (tr == br && tc != bc) {                            \
    244       kc = bc - tc;                                               \
    245       CHECK_BETTER(second, tr + hstep, tc + 2 * kc);              \
    246       CHECK_BETTER(second, tr - hstep, tc + 2 * kc);              \
    247       switch (whichdir) {                                         \
    248         case 0:                                                   \
    249         case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
    250         case 2:                                                   \
    251         case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
    252       }                                                           \
    253     } else if (tr != br && tc == bc) {                            \
    254       kr = br - tr;                                               \
    255       CHECK_BETTER(second, tr + 2 * kr, tc + hstep);              \
    256       CHECK_BETTER(second, tr + 2 * kr, tc - hstep);              \
    257       switch (whichdir) {                                         \
    258         case 0:                                                   \
    259         case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
    260         case 1:                                                   \
    261         case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
    262       }                                                           \
    263     }                                                             \
    264   }
    265 
    266 #define SETUP_SUBPEL_SEARCH                                                 \
    267   const uint8_t *const z = x->plane[0].src.buf;                             \
    268   const int src_stride = x->plane[0].src.stride;                            \
    269   const MACROBLOCKD *xd = &x->e_mbd;                                        \
    270   unsigned int besterr = UINT_MAX;                                          \
    271   unsigned int sse;                                                         \
    272   unsigned int whichdir;                                                    \
    273   int thismse;                                                              \
    274   const unsigned int halfiters = iters_per_step;                            \
    275   const unsigned int quarteriters = iters_per_step;                         \
    276   const unsigned int eighthiters = iters_per_step;                          \
    277   const int y_stride = xd->plane[0].pre[0].stride;                          \
    278   const int offset = bestmv->row * y_stride + bestmv->col;                  \
    279   const uint8_t *const y = xd->plane[0].pre[0].buf;                         \
    280                                                                             \
    281   int rr = ref_mv->row;                                                     \
    282   int rc = ref_mv->col;                                                     \
    283   int br = bestmv->row * 8;                                                 \
    284   int bc = bestmv->col * 8;                                                 \
    285   int hstep = 4;                                                            \
    286   int minc, maxc, minr, maxr;                                               \
    287   int tr = br;                                                              \
    288   int tc = bc;                                                              \
    289   MvLimits subpel_mv_limits;                                                \
    290                                                                             \
    291   vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \
    292   minc = subpel_mv_limits.col_min;                                          \
    293   maxc = subpel_mv_limits.col_max;                                          \
    294   minr = subpel_mv_limits.row_min;                                          \
    295   maxr = subpel_mv_limits.row_max;                                          \
    296                                                                             \
    297   bestmv->row *= 8;                                                         \
    298   bestmv->col *= 8;
    299 
    300 static unsigned int setup_center_error(
    301     const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
    302     int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
    303     const uint8_t *const src, const int src_stride, const uint8_t *const y,
    304     int y_stride, const uint8_t *second_pred, int w, int h, int offset,
    305     int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) {
    306 #if CONFIG_VP9_HIGHBITDEPTH
    307   uint64_t besterr;
    308   if (second_pred != NULL) {
    309     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    310       DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
    311       vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
    312                                h, CONVERT_TO_SHORTPTR(y + offset), y_stride);
    313       besterr =
    314           vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
    315     } else {
    316       DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
    317       vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
    318       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
    319     }
    320   } else {
    321     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
    322   }
    323   *distortion = (uint32_t)besterr;
    324   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
    325   if (besterr >= UINT_MAX) return UINT_MAX;
    326   return (uint32_t)besterr;
    327 #else
    328   uint32_t besterr;
    329   (void)xd;
    330   if (second_pred != NULL) {
    331     DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
    332     vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
    333     besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
    334   } else {
    335     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
    336   }
    337   *distortion = besterr;
    338   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
    339   return besterr;
    340 #endif  // CONFIG_VP9_HIGHBITDEPTH
    341 }
    342 
    343 static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) {
    344   return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
    345 }
    346 
    347 static INLINE int is_cost_list_wellbehaved(int *cost_list) {
    348   return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
    349          cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
    350 }
    351 
    352 // Returns surface minima estimate at given precision in 1/2^n bits.
    353 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
    354 // For a given set of costs S0, S1, S2, S3, S4 at points
    355 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
    356 // the solution for the location of the minima (x0, y0) is given by:
    357 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
    358 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
    359 // The code below is an integerized version of that.
    360 static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
    361   const int64_t x0 = (int64_t)cost_list[1] - cost_list[3];
    362   const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3];
    363   const int64_t x1 = (int64_t)cost_list[4] - cost_list[2];
    364   const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2];
    365   const int b = 1 << (bits - 1);
    366   *ic = (int)divide_and_round(x0 * b, y0);
    367   *ir = (int)divide_and_round(x1 * b, y1);
    368 }
    369 
    370 uint32_t vp9_skip_sub_pixel_tree(
    371     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    372     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    373     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    374     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    375     int h, int use_accurate_subpel_search) {
    376   SETUP_SUBPEL_SEARCH;
    377   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
    378                                src_stride, y, y_stride, second_pred, w, h,
    379                                offset, mvjcost, mvcost, sse1, distortion);
    380   (void)halfiters;
    381   (void)quarteriters;
    382   (void)eighthiters;
    383   (void)whichdir;
    384   (void)allow_hp;
    385   (void)forced_stop;
    386   (void)hstep;
    387   (void)rr;
    388   (void)rc;
    389   (void)minr;
    390   (void)minc;
    391   (void)maxr;
    392   (void)maxc;
    393   (void)tr;
    394   (void)tc;
    395   (void)sse;
    396   (void)thismse;
    397   (void)cost_list;
    398   (void)use_accurate_subpel_search;
    399 
    400   return besterr;
    401 }
    402 
    403 uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
    404     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    405     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    406     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    407     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    408     int h, int use_accurate_subpel_search) {
    409   SETUP_SUBPEL_SEARCH;
    410   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
    411                                src_stride, y, y_stride, second_pred, w, h,
    412                                offset, mvjcost, mvcost, sse1, distortion);
    413   (void)halfiters;
    414   (void)quarteriters;
    415   (void)eighthiters;
    416   (void)whichdir;
    417   (void)allow_hp;
    418   (void)forced_stop;
    419   (void)hstep;
    420   (void)use_accurate_subpel_search;
    421 
    422   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
    423       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
    424       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
    425     int ir, ic;
    426     unsigned int minpt = INT_MAX;
    427     get_cost_surf_min(cost_list, &ir, &ic, 2);
    428     if (ir != 0 || ic != 0) {
    429       CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
    430     }
    431   } else {
    432     FIRST_LEVEL_CHECKS;
    433     if (halfiters > 1) {
    434       SECOND_LEVEL_CHECKS;
    435     }
    436 
    437     tr = br;
    438     tc = bc;
    439 
    440     // Each subsequent iteration checks at least one point in common with
    441     // the last iteration could be 2 ( if diag selected) 1/4 pel
    442     // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    443     if (forced_stop != 2) {
    444       hstep >>= 1;
    445       FIRST_LEVEL_CHECKS;
    446       if (quarteriters > 1) {
    447         SECOND_LEVEL_CHECKS;
    448       }
    449     }
    450   }
    451 
    452   tr = br;
    453   tc = bc;
    454 
    455   if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
    456     hstep >>= 1;
    457     FIRST_LEVEL_CHECKS;
    458     if (eighthiters > 1) {
    459       SECOND_LEVEL_CHECKS;
    460     }
    461   }
    462 
    463   bestmv->row = br;
    464   bestmv->col = bc;
    465 
    466   return besterr;
    467 }
    468 
    469 uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
    470     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    471     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    472     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    473     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    474     int h, int use_accurate_subpel_search) {
    475   SETUP_SUBPEL_SEARCH;
    476   (void)use_accurate_subpel_search;
    477 
    478   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
    479                                src_stride, y, y_stride, second_pred, w, h,
    480                                offset, mvjcost, mvcost, sse1, distortion);
    481   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
    482       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
    483       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
    484     unsigned int minpt;
    485     int ir, ic;
    486     get_cost_surf_min(cost_list, &ir, &ic, 1);
    487     if (ir != 0 || ic != 0) {
    488       CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
    489     }
    490   } else {
    491     FIRST_LEVEL_CHECKS;
    492     if (halfiters > 1) {
    493       SECOND_LEVEL_CHECKS;
    494     }
    495   }
    496 
    497   // Each subsequent iteration checks at least one point in common with
    498   // the last iteration could be 2 ( if diag selected) 1/4 pel
    499 
    500   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    501   if (forced_stop != 2) {
    502     tr = br;
    503     tc = bc;
    504     hstep >>= 1;
    505     FIRST_LEVEL_CHECKS;
    506     if (quarteriters > 1) {
    507       SECOND_LEVEL_CHECKS;
    508     }
    509   }
    510 
    511   if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
    512     tr = br;
    513     tc = bc;
    514     hstep >>= 1;
    515     FIRST_LEVEL_CHECKS;
    516     if (eighthiters > 1) {
    517       SECOND_LEVEL_CHECKS;
    518     }
    519   }
    520   // These lines insure static analysis doesn't warn that
    521   // tr and tc aren't used after the above point.
    522   (void)tr;
    523   (void)tc;
    524 
    525   bestmv->row = br;
    526   bestmv->col = bc;
    527 
    528   return besterr;
    529 }
    530 
    531 uint32_t vp9_find_best_sub_pixel_tree_pruned(
    532     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    533     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    534     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    535     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    536     int h, int use_accurate_subpel_search) {
    537   SETUP_SUBPEL_SEARCH;
    538   (void)use_accurate_subpel_search;
    539 
    540   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
    541                                src_stride, y, y_stride, second_pred, w, h,
    542                                offset, mvjcost, mvcost, sse1, distortion);
    543   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
    544       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
    545       cost_list[4] != INT_MAX) {
    546     unsigned int left, right, up, down, diag;
    547     whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
    548                (cost_list[2] < cost_list[4] ? 0 : 2);
    549     switch (whichdir) {
    550       case 0:
    551         CHECK_BETTER(left, tr, tc - hstep);
    552         CHECK_BETTER(down, tr + hstep, tc);
    553         CHECK_BETTER(diag, tr + hstep, tc - hstep);
    554         break;
    555       case 1:
    556         CHECK_BETTER(right, tr, tc + hstep);
    557         CHECK_BETTER(down, tr + hstep, tc);
    558         CHECK_BETTER(diag, tr + hstep, tc + hstep);
    559         break;
    560       case 2:
    561         CHECK_BETTER(left, tr, tc - hstep);
    562         CHECK_BETTER(up, tr - hstep, tc);
    563         CHECK_BETTER(diag, tr - hstep, tc - hstep);
    564         break;
    565       case 3:
    566         CHECK_BETTER(right, tr, tc + hstep);
    567         CHECK_BETTER(up, tr - hstep, tc);
    568         CHECK_BETTER(diag, tr - hstep, tc + hstep);
    569         break;
    570     }
    571   } else {
    572     FIRST_LEVEL_CHECKS;
    573     if (halfiters > 1) {
    574       SECOND_LEVEL_CHECKS;
    575     }
    576   }
    577 
    578   tr = br;
    579   tc = bc;
    580 
    581   // Each subsequent iteration checks at least one point in common with
    582   // the last iteration could be 2 ( if diag selected) 1/4 pel
    583 
    584   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    585   if (forced_stop != 2) {
    586     hstep >>= 1;
    587     FIRST_LEVEL_CHECKS;
    588     if (quarteriters > 1) {
    589       SECOND_LEVEL_CHECKS;
    590     }
    591     tr = br;
    592     tc = bc;
    593   }
    594 
    595   if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
    596     hstep >>= 1;
    597     FIRST_LEVEL_CHECKS;
    598     if (eighthiters > 1) {
    599       SECOND_LEVEL_CHECKS;
    600     }
    601     tr = br;
    602     tc = bc;
    603   }
    604   // These lines insure static analysis doesn't warn that
    605   // tr and tc aren't used after the above point.
    606   (void)tr;
    607   (void)tc;
    608 
    609   bestmv->row = br;
    610   bestmv->col = bc;
    611 
    612   return besterr;
    613 }
    614 
    615 /* clang-format off */
    616 static const MV search_step_table[12] = {
    617   // left, right, up, down
    618   { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
    619   { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
    620   { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
    621 };
    622 /* clang-format on */
    623 
    624 static int accurate_sub_pel_search(
    625     const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf,
    626     const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp,
    627     const uint8_t *const src_address, const int src_stride,
    628     const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred,
    629     int w, int h, uint32_t *sse) {
    630 #if CONFIG_VP9_HIGHBITDEPTH
    631   uint64_t besterr;
    632   assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
    633   assert(w != 0 && h != 0);
    634   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    635     DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
    636     vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride,
    637                                      pred16, w, this_mv, sf, w, h, 0, kernel,
    638                                      MV_PRECISION_Q3, 0, 0, xd->bd);
    639     if (second_pred != NULL) {
    640       DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
    641       vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
    642                                h, pred16, w);
    643       besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address,
    644                         src_stride, sse);
    645     } else {
    646       besterr =
    647           vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse);
    648     }
    649   } else {
    650     DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
    651     vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
    652                               0, kernel, MV_PRECISION_Q3, 0, 0);
    653     if (second_pred != NULL) {
    654       DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
    655       vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
    656       besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
    657     } else {
    658       besterr = vfp->vf(pred, w, src_address, src_stride, sse);
    659     }
    660   }
    661   if (besterr >= UINT_MAX) return UINT_MAX;
    662   return (int)besterr;
    663 #else
    664   int besterr;
    665   DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
    666   assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
    667   assert(w != 0 && h != 0);
    668   (void)xd;
    669 
    670   vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
    671                             0, kernel, MV_PRECISION_Q3, 0, 0);
    672   if (second_pred != NULL) {
    673     DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
    674     vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
    675     besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
    676   } else {
    677     besterr = vfp->vf(pred, w, src_address, src_stride, sse);
    678   }
    679   return besterr;
    680 #endif  // CONFIG_VP9_HIGHBITDEPTH
    681 }
    682 
    683 // TODO(yunqing): this part can be further refactored.
    684 #if CONFIG_VP9_HIGHBITDEPTH
    685 /* checks if (r, c) has better score than previous best */
    686 #define CHECK_BETTER1(v, r, c)                                                 \
    687   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                      \
    688     int64_t tmpmse;                                                            \
    689     const MV mv = { r, c };                                                    \
    690     const MV ref_mv = { rr, rc };                                              \
    691     thismse =                                                                  \
    692         accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
    693                                 y, y_stride, second_pred, w, h, &sse);         \
    694     tmpmse = thismse;                                                          \
    695     tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);       \
    696     if (tmpmse >= INT_MAX) {                                                   \
    697       v = INT_MAX;                                                             \
    698     } else if ((v = (uint32_t)tmpmse) < besterr) {                             \
    699       besterr = v;                                                             \
    700       br = r;                                                                  \
    701       bc = c;                                                                  \
    702       *distortion = thismse;                                                   \
    703       *sse1 = sse;                                                             \
    704     }                                                                          \
    705   } else {                                                                     \
    706     v = INT_MAX;                                                               \
    707   }
    708 #else
    709 /* checks if (r, c) has better score than previous best */
    710 #define CHECK_BETTER1(v, r, c)                                                 \
    711   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                      \
    712     const MV mv = { r, c };                                                    \
    713     const MV ref_mv = { rr, rc };                                              \
    714     thismse =                                                                  \
    715         accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
    716                                 y, y_stride, second_pred, w, h, &sse);         \
    717     if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +       \
    718              thismse) < besterr) {                                             \
    719       besterr = v;                                                             \
    720       br = r;                                                                  \
    721       bc = c;                                                                  \
    722       *distortion = thismse;                                                   \
    723       *sse1 = sse;                                                             \
    724     }                                                                          \
    725   } else {                                                                     \
    726     v = INT_MAX;                                                               \
    727   }
    728 
    729 #endif
    730 
    731 uint32_t vp9_find_best_sub_pixel_tree(
    732     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
    733     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
    734     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
    735     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
    736     int h, int use_accurate_subpel_search) {
    737   const uint8_t *const z = x->plane[0].src.buf;
    738   const uint8_t *const src_address = z;
    739   const int src_stride = x->plane[0].src.stride;
    740   const MACROBLOCKD *xd = &x->e_mbd;
    741   unsigned int besterr = UINT_MAX;
    742   unsigned int sse;
    743   int thismse;
    744   const int y_stride = xd->plane[0].pre[0].stride;
    745   const int offset = bestmv->row * y_stride + bestmv->col;
    746   const uint8_t *const y = xd->plane[0].pre[0].buf;
    747 
    748   int rr = ref_mv->row;
    749   int rc = ref_mv->col;
    750   int br = bestmv->row * 8;
    751   int bc = bestmv->col * 8;
    752   int hstep = 4;
    753   int iter, round = 3 - forced_stop;
    754 
    755   int minc, maxc, minr, maxr;
    756   int tr = br;
    757   int tc = bc;
    758   const MV *search_step = search_step_table;
    759   int idx, best_idx = -1;
    760   unsigned int cost_array[5];
    761   int kr, kc;
    762   MvLimits subpel_mv_limits;
    763 
    764   // TODO(yunqing): need to add 4-tap filter optimization to speed up the
    765   // encoder.
    766   const InterpKernel *kernel =
    767       (use_accurate_subpel_search > 0)
    768           ? ((use_accurate_subpel_search == USE_4_TAPS)
    769                  ? vp9_filter_kernels[FOURTAP]
    770                  : ((use_accurate_subpel_search == USE_8_TAPS)
    771                         ? vp9_filter_kernels[EIGHTTAP]
    772                         : vp9_filter_kernels[EIGHTTAP_SHARP]))
    773           : vp9_filter_kernels[BILINEAR];
    774 
    775   vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
    776   minc = subpel_mv_limits.col_min;
    777   maxc = subpel_mv_limits.col_max;
    778   minr = subpel_mv_limits.row_min;
    779   maxr = subpel_mv_limits.row_max;
    780 
    781   if (!(allow_hp && use_mv_hp(ref_mv)))
    782     if (round == 3) round = 2;
    783 
    784   bestmv->row *= 8;
    785   bestmv->col *= 8;
    786 
    787   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
    788                                src_stride, y, y_stride, second_pred, w, h,
    789                                offset, mvjcost, mvcost, sse1, distortion);
    790 
    791   (void)cost_list;  // to silence compiler warning
    792 
    793   for (iter = 0; iter < round; ++iter) {
    794     // Check vertical and horizontal sub-pixel positions.
    795     for (idx = 0; idx < 4; ++idx) {
    796       tr = br + search_step[idx].row;
    797       tc = bc + search_step[idx].col;
    798       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
    799         MV this_mv;
    800         this_mv.row = tr;
    801         this_mv.col = tc;
    802 
    803         if (use_accurate_subpel_search) {
    804           thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
    805                                             src_address, src_stride, y,
    806                                             y_stride, second_pred, w, h, &sse);
    807         } else {
    808           const uint8_t *const pre_address =
    809               y + (tr >> 3) * y_stride + (tc >> 3);
    810           if (second_pred == NULL)
    811             thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
    812                                src_address, src_stride, &sse);
    813           else
    814             thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
    815                                 src_address, src_stride, &sse, second_pred);
    816         }
    817 
    818         cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
    819                                                 mvcost, error_per_bit);
    820 
    821         if (cost_array[idx] < besterr) {
    822           best_idx = idx;
    823           besterr = cost_array[idx];
    824           *distortion = thismse;
    825           *sse1 = sse;
    826         }
    827       } else {
    828         cost_array[idx] = UINT_MAX;
    829       }
    830     }
    831 
    832     // Check diagonal sub-pixel position
    833     kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
    834     kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
    835 
    836     tc = bc + kc;
    837     tr = br + kr;
    838     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
    839       MV this_mv = { tr, tc };
    840       if (use_accurate_subpel_search) {
    841         thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
    842                                           src_address, src_stride, y, y_stride,
    843                                           second_pred, w, h, &sse);
    844       } else {
    845         const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
    846         if (second_pred == NULL)
    847           thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
    848                              src_stride, &sse);
    849         else
    850           thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
    851                               src_address, src_stride, &sse, second_pred);
    852       }
    853 
    854       cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
    855                                             error_per_bit);
    856 
    857       if (cost_array[4] < besterr) {
    858         best_idx = 4;
    859         besterr = cost_array[4];
    860         *distortion = thismse;
    861         *sse1 = sse;
    862       }
    863     } else {
    864       cost_array[idx] = UINT_MAX;
    865     }
    866 
    867     if (best_idx < 4 && best_idx >= 0) {
    868       br += search_step[best_idx].row;
    869       bc += search_step[best_idx].col;
    870     } else if (best_idx == 4) {
    871       br = tr;
    872       bc = tc;
    873     }
    874 
    875     if (iters_per_step > 0 && best_idx != -1) {
    876       unsigned int second;
    877       const int br0 = br;
    878       const int bc0 = bc;
    879       assert(tr == br || tc == bc);
    880 
    881       if (tr == br && tc != bc) {
    882         kc = bc - tc;
    883         if (iters_per_step == 1) {
    884           if (use_accurate_subpel_search) {
    885             CHECK_BETTER1(second, br0, bc0 + kc);
    886           } else {
    887             CHECK_BETTER(second, br0, bc0 + kc);
    888           }
    889         }
    890       } else if (tr != br && tc == bc) {
    891         kr = br - tr;
    892         if (iters_per_step == 1) {
    893           if (use_accurate_subpel_search) {
    894             CHECK_BETTER1(second, br0 + kr, bc0);
    895           } else {
    896             CHECK_BETTER(second, br0 + kr, bc0);
    897           }
    898         }
    899       }
    900 
    901       if (iters_per_step > 1) {
    902         if (use_accurate_subpel_search) {
    903           CHECK_BETTER1(second, br0 + kr, bc0);
    904           CHECK_BETTER1(second, br0, bc0 + kc);
    905           if (br0 != br || bc0 != bc) {
    906             CHECK_BETTER1(second, br0 + kr, bc0 + kc);
    907           }
    908         } else {
    909           CHECK_BETTER(second, br0 + kr, bc0);
    910           CHECK_BETTER(second, br0, bc0 + kc);
    911           if (br0 != br || bc0 != bc) {
    912             CHECK_BETTER(second, br0 + kr, bc0 + kc);
    913           }
    914         }
    915       }
    916     }
    917 
    918     search_step += 4;
    919     hstep >>= 1;
    920     best_idx = -1;
    921   }
    922 
    923   // Each subsequent iteration checks at least one point in common with
    924   // the last iteration could be 2 ( if diag selected) 1/4 pel
    925 
    926   // These lines insure static analysis doesn't warn that
    927   // tr and tc aren't used after the above point.
    928   (void)tr;
    929   (void)tc;
    930 
    931   bestmv->row = br;
    932   bestmv->col = bc;
    933 
    934   return besterr;
    935 }
    936 
    937 #undef CHECK_BETTER
    938 #undef CHECK_BETTER1
    939 
    940 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
    941                                int range) {
    942   return ((row - range) >= mv_limits->row_min) &
    943          ((row + range) <= mv_limits->row_max) &
    944          ((col - range) >= mv_limits->col_min) &
    945          ((col + range) <= mv_limits->col_max);
    946 }
    947 
    948 static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
    949   return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
    950          (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
    951 }
    952 
    953 #define CHECK_BETTER                                                      \
    954   {                                                                       \
    955     if (thissad < bestsad) {                                              \
    956       if (use_mvcost)                                                     \
    957         thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
    958       if (thissad < bestsad) {                                            \
    959         bestsad = thissad;                                                \
    960         best_site = i;                                                    \
    961       }                                                                   \
    962     }                                                                     \
    963   }
    964 
    965 #define MAX_PATTERN_SCALES 11
    966 #define MAX_PATTERN_CANDIDATES 8  // max number of canddiates per scale
    967 #define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
    968 
    969 // Calculate and return a sad+mvcost list around an integer best pel.
    970 static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
    971                                       int sadpb,
    972                                       const vp9_variance_fn_ptr_t *fn_ptr,
    973                                       const MV *best_mv, int *cost_list) {
    974   static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
    975   const struct buf_2d *const what = &x->plane[0].src;
    976   const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
    977   const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
    978   int br = best_mv->row;
    979   int bc = best_mv->col;
    980   MV this_mv;
    981   int i;
    982   unsigned int sse;
    983 
    984   this_mv.row = br;
    985   this_mv.col = bc;
    986   cost_list[0] =
    987       fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
    988                  in_what->stride, &sse) +
    989       mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
    990   if (check_bounds(&x->mv_limits, br, bc, 1)) {
    991     for (i = 0; i < 4; i++) {
    992       const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
    993       cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
    994                                     get_buf_from_mv(in_what, &this_mv),
    995                                     in_what->stride, &sse) +
    996                          mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
    997                                      x->mvcost, x->errorperbit);
    998     }
    999   } else {
   1000     for (i = 0; i < 4; i++) {
   1001       const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1002       if (!is_mv_in(&x->mv_limits, &this_mv))
   1003         cost_list[i + 1] = INT_MAX;
   1004       else
   1005         cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
   1006                                       get_buf_from_mv(in_what, &this_mv),
   1007                                       in_what->stride, &sse) +
   1008                            mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
   1009                                        x->mvcost, x->errorperbit);
   1010     }
   1011   }
   1012 }
   1013 
   1014 // Generic pattern search function that searches over multiple scales.
   1015 // Each scale can have a different number of candidates and shape of
   1016 // candidates as indicated in the num_candidates and candidates arrays
   1017 // passed into this function
   1018 //
   1019 static int vp9_pattern_search(
   1020     const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
   1021     int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
   1022     int use_mvcost, const MV *center_mv, MV *best_mv,
   1023     const int num_candidates[MAX_PATTERN_SCALES],
   1024     const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
   1025   const MACROBLOCKD *const xd = &x->e_mbd;
   1026   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
   1027     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
   1028   };
   1029   int i, s, t;
   1030   const struct buf_2d *const what = &x->plane[0].src;
   1031   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1032   int br, bc;
   1033   int bestsad = INT_MAX;
   1034   int thissad;
   1035   int k = -1;
   1036   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   1037   int best_init_s = search_param_to_steps[search_param];
   1038   // adjust ref_mv to make sure it is within MV range
   1039   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   1040            x->mv_limits.row_min, x->mv_limits.row_max);
   1041   br = ref_mv->row;
   1042   bc = ref_mv->col;
   1043 
   1044   // Work out the start point for the search
   1045   bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
   1046                      in_what->stride) +
   1047             mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
   1048 
   1049   // Search all possible scales upto the search param around the center point
   1050   // pick the scale of the point that is best as the starting scale of
   1051   // further steps around it.
   1052   if (do_init_search) {
   1053     s = best_init_s;
   1054     best_init_s = -1;
   1055     for (t = 0; t <= s; ++t) {
   1056       int best_site = -1;
   1057       if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
   1058         for (i = 0; i < num_candidates[t]; i++) {
   1059           const MV this_mv = { br + candidates[t][i].row,
   1060                                bc + candidates[t][i].col };
   1061           thissad =
   1062               vfp->sdf(what->buf, what->stride,
   1063                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1064           CHECK_BETTER
   1065         }
   1066       } else {
   1067         for (i = 0; i < num_candidates[t]; i++) {
   1068           const MV this_mv = { br + candidates[t][i].row,
   1069                                bc + candidates[t][i].col };
   1070           if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1071           thissad =
   1072               vfp->sdf(what->buf, what->stride,
   1073                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1074           CHECK_BETTER
   1075         }
   1076       }
   1077       if (best_site == -1) {
   1078         continue;
   1079       } else {
   1080         best_init_s = t;
   1081         k = best_site;
   1082       }
   1083     }
   1084     if (best_init_s != -1) {
   1085       br += candidates[best_init_s][k].row;
   1086       bc += candidates[best_init_s][k].col;
   1087     }
   1088   }
   1089 
   1090   // If the center point is still the best, just skip this and move to
   1091   // the refinement step.
   1092   if (best_init_s != -1) {
   1093     int best_site = -1;
   1094     s = best_init_s;
   1095 
   1096     do {
   1097       // No need to search all 6 points the 1st time if initial search was used
   1098       if (!do_init_search || s != best_init_s) {
   1099         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1100           for (i = 0; i < num_candidates[s]; i++) {
   1101             const MV this_mv = { br + candidates[s][i].row,
   1102                                  bc + candidates[s][i].col };
   1103             thissad =
   1104                 vfp->sdf(what->buf, what->stride,
   1105                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1106             CHECK_BETTER
   1107           }
   1108         } else {
   1109           for (i = 0; i < num_candidates[s]; i++) {
   1110             const MV this_mv = { br + candidates[s][i].row,
   1111                                  bc + candidates[s][i].col };
   1112             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1113             thissad =
   1114                 vfp->sdf(what->buf, what->stride,
   1115                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1116             CHECK_BETTER
   1117           }
   1118         }
   1119 
   1120         if (best_site == -1) {
   1121           continue;
   1122         } else {
   1123           br += candidates[s][best_site].row;
   1124           bc += candidates[s][best_site].col;
   1125           k = best_site;
   1126         }
   1127       }
   1128 
   1129       do {
   1130         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
   1131         best_site = -1;
   1132         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
   1133         next_chkpts_indices[1] = k;
   1134         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
   1135 
   1136         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1137           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1138             const MV this_mv = {
   1139               br + candidates[s][next_chkpts_indices[i]].row,
   1140               bc + candidates[s][next_chkpts_indices[i]].col
   1141             };
   1142             thissad =
   1143                 vfp->sdf(what->buf, what->stride,
   1144                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1145             CHECK_BETTER
   1146           }
   1147         } else {
   1148           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1149             const MV this_mv = {
   1150               br + candidates[s][next_chkpts_indices[i]].row,
   1151               bc + candidates[s][next_chkpts_indices[i]].col
   1152             };
   1153             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1154             thissad =
   1155                 vfp->sdf(what->buf, what->stride,
   1156                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1157             CHECK_BETTER
   1158           }
   1159         }
   1160 
   1161         if (best_site != -1) {
   1162           k = next_chkpts_indices[best_site];
   1163           br += candidates[s][k].row;
   1164           bc += candidates[s][k].col;
   1165         }
   1166       } while (best_site != -1);
   1167     } while (s--);
   1168   }
   1169 
   1170   // Returns the one-away integer pel sad values around the best as follows:
   1171   // cost_list[0]: cost at the best integer pel
   1172   // cost_list[1]: cost at delta {0, -1} (left)   from the best integer pel
   1173   // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
   1174   // cost_list[3]: cost at delta { 0, 1} (right)  from the best integer pel
   1175   // cost_list[4]: cost at delta {-1, 0} (top)    from the best integer pel
   1176   if (cost_list) {
   1177     const MV best_mv = { br, bc };
   1178     calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
   1179   }
   1180   best_mv->row = br;
   1181   best_mv->col = bc;
   1182   return bestsad;
   1183 }
   1184 
   1185 // A specialized function where the smallest scale search candidates
   1186 // are 4 1-away neighbors, and cost_list is non-null
   1187 // TODO(debargha): Merge this function with the one above. Also remove
   1188 // use_mvcost option since it is always 1, to save unnecessary branches.
   1189 static int vp9_pattern_search_sad(
   1190     const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
   1191     int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
   1192     int use_mvcost, const MV *center_mv, MV *best_mv,
   1193     const int num_candidates[MAX_PATTERN_SCALES],
   1194     const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
   1195   const MACROBLOCKD *const xd = &x->e_mbd;
   1196   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
   1197     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
   1198   };
   1199   int i, s, t;
   1200   const struct buf_2d *const what = &x->plane[0].src;
   1201   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1202   int br, bc;
   1203   int bestsad = INT_MAX;
   1204   int thissad;
   1205   int k = -1;
   1206   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   1207   int best_init_s = search_param_to_steps[search_param];
   1208   // adjust ref_mv to make sure it is within MV range
   1209   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   1210            x->mv_limits.row_min, x->mv_limits.row_max);
   1211   br = ref_mv->row;
   1212   bc = ref_mv->col;
   1213   if (cost_list != NULL) {
   1214     cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
   1215         INT_MAX;
   1216   }
   1217 
   1218   // Work out the start point for the search
   1219   bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
   1220                      in_what->stride) +
   1221             mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
   1222 
   1223   // Search all possible scales upto the search param around the center point
   1224   // pick the scale of the point that is best as the starting scale of
   1225   // further steps around it.
   1226   if (do_init_search) {
   1227     s = best_init_s;
   1228     best_init_s = -1;
   1229     for (t = 0; t <= s; ++t) {
   1230       int best_site = -1;
   1231       if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
   1232         for (i = 0; i < num_candidates[t]; i++) {
   1233           const MV this_mv = { br + candidates[t][i].row,
   1234                                bc + candidates[t][i].col };
   1235           thissad =
   1236               vfp->sdf(what->buf, what->stride,
   1237                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1238           CHECK_BETTER
   1239         }
   1240       } else {
   1241         for (i = 0; i < num_candidates[t]; i++) {
   1242           const MV this_mv = { br + candidates[t][i].row,
   1243                                bc + candidates[t][i].col };
   1244           if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1245           thissad =
   1246               vfp->sdf(what->buf, what->stride,
   1247                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1248           CHECK_BETTER
   1249         }
   1250       }
   1251       if (best_site == -1) {
   1252         continue;
   1253       } else {
   1254         best_init_s = t;
   1255         k = best_site;
   1256       }
   1257     }
   1258     if (best_init_s != -1) {
   1259       br += candidates[best_init_s][k].row;
   1260       bc += candidates[best_init_s][k].col;
   1261     }
   1262   }
   1263 
   1264   // If the center point is still the best, just skip this and move to
   1265   // the refinement step.
   1266   if (best_init_s != -1) {
   1267     int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
   1268     int best_site = -1;
   1269     s = best_init_s;
   1270 
   1271     for (; s >= do_sad; s--) {
   1272       if (!do_init_search || s != best_init_s) {
   1273         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1274           for (i = 0; i < num_candidates[s]; i++) {
   1275             const MV this_mv = { br + candidates[s][i].row,
   1276                                  bc + candidates[s][i].col };
   1277             thissad =
   1278                 vfp->sdf(what->buf, what->stride,
   1279                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1280             CHECK_BETTER
   1281           }
   1282         } else {
   1283           for (i = 0; i < num_candidates[s]; i++) {
   1284             const MV this_mv = { br + candidates[s][i].row,
   1285                                  bc + candidates[s][i].col };
   1286             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1287             thissad =
   1288                 vfp->sdf(what->buf, what->stride,
   1289                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1290             CHECK_BETTER
   1291           }
   1292         }
   1293 
   1294         if (best_site == -1) {
   1295           continue;
   1296         } else {
   1297           br += candidates[s][best_site].row;
   1298           bc += candidates[s][best_site].col;
   1299           k = best_site;
   1300         }
   1301       }
   1302 
   1303       do {
   1304         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
   1305         best_site = -1;
   1306         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
   1307         next_chkpts_indices[1] = k;
   1308         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
   1309 
   1310         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1311           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1312             const MV this_mv = {
   1313               br + candidates[s][next_chkpts_indices[i]].row,
   1314               bc + candidates[s][next_chkpts_indices[i]].col
   1315             };
   1316             thissad =
   1317                 vfp->sdf(what->buf, what->stride,
   1318                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1319             CHECK_BETTER
   1320           }
   1321         } else {
   1322           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1323             const MV this_mv = {
   1324               br + candidates[s][next_chkpts_indices[i]].row,
   1325               bc + candidates[s][next_chkpts_indices[i]].col
   1326             };
   1327             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1328             thissad =
   1329                 vfp->sdf(what->buf, what->stride,
   1330                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1331             CHECK_BETTER
   1332           }
   1333         }
   1334 
   1335         if (best_site != -1) {
   1336           k = next_chkpts_indices[best_site];
   1337           br += candidates[s][k].row;
   1338           bc += candidates[s][k].col;
   1339         }
   1340       } while (best_site != -1);
   1341     }
   1342 
   1343     // Note: If we enter the if below, then cost_list must be non-NULL.
   1344     if (s == 0) {
   1345       cost_list[0] = bestsad;
   1346       if (!do_init_search || s != best_init_s) {
   1347         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1348           for (i = 0; i < num_candidates[s]; i++) {
   1349             const MV this_mv = { br + candidates[s][i].row,
   1350                                  bc + candidates[s][i].col };
   1351             cost_list[i + 1] = thissad =
   1352                 vfp->sdf(what->buf, what->stride,
   1353                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1354             CHECK_BETTER
   1355           }
   1356         } else {
   1357           for (i = 0; i < num_candidates[s]; i++) {
   1358             const MV this_mv = { br + candidates[s][i].row,
   1359                                  bc + candidates[s][i].col };
   1360             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
   1361             cost_list[i + 1] = thissad =
   1362                 vfp->sdf(what->buf, what->stride,
   1363                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1364             CHECK_BETTER
   1365           }
   1366         }
   1367 
   1368         if (best_site != -1) {
   1369           br += candidates[s][best_site].row;
   1370           bc += candidates[s][best_site].col;
   1371           k = best_site;
   1372         }
   1373       }
   1374       while (best_site != -1) {
   1375         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
   1376         best_site = -1;
   1377         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
   1378         next_chkpts_indices[1] = k;
   1379         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
   1380         cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
   1381         cost_list[((k + 2) % 4) + 1] = cost_list[0];
   1382         cost_list[0] = bestsad;
   1383 
   1384         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
   1385           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1386             const MV this_mv = {
   1387               br + candidates[s][next_chkpts_indices[i]].row,
   1388               bc + candidates[s][next_chkpts_indices[i]].col
   1389             };
   1390             cost_list[next_chkpts_indices[i] + 1] = thissad =
   1391                 vfp->sdf(what->buf, what->stride,
   1392                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1393             CHECK_BETTER
   1394           }
   1395         } else {
   1396           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
   1397             const MV this_mv = {
   1398               br + candidates[s][next_chkpts_indices[i]].row,
   1399               bc + candidates[s][next_chkpts_indices[i]].col
   1400             };
   1401             if (!is_mv_in(&x->mv_limits, &this_mv)) {
   1402               cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
   1403               continue;
   1404             }
   1405             cost_list[next_chkpts_indices[i] + 1] = thissad =
   1406                 vfp->sdf(what->buf, what->stride,
   1407                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1408             CHECK_BETTER
   1409           }
   1410         }
   1411 
   1412         if (best_site != -1) {
   1413           k = next_chkpts_indices[best_site];
   1414           br += candidates[s][k].row;
   1415           bc += candidates[s][k].col;
   1416         }
   1417       }
   1418     }
   1419   }
   1420 
   1421   // Returns the one-away integer pel sad values around the best as follows:
   1422   // cost_list[0]: sad at the best integer pel
   1423   // cost_list[1]: sad at delta {0, -1} (left)   from the best integer pel
   1424   // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
   1425   // cost_list[3]: sad at delta { 0, 1} (right)  from the best integer pel
   1426   // cost_list[4]: sad at delta {-1, 0} (top)    from the best integer pel
   1427   if (cost_list) {
   1428     static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
   1429     if (cost_list[0] == INT_MAX) {
   1430       cost_list[0] = bestsad;
   1431       if (check_bounds(&x->mv_limits, br, bc, 1)) {
   1432         for (i = 0; i < 4; i++) {
   1433           const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1434           cost_list[i + 1] =
   1435               vfp->sdf(what->buf, what->stride,
   1436                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1437         }
   1438       } else {
   1439         for (i = 0; i < 4; i++) {
   1440           const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1441           if (!is_mv_in(&x->mv_limits, &this_mv))
   1442             cost_list[i + 1] = INT_MAX;
   1443           else
   1444             cost_list[i + 1] =
   1445                 vfp->sdf(what->buf, what->stride,
   1446                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
   1447         }
   1448       }
   1449     } else {
   1450       if (use_mvcost) {
   1451         for (i = 0; i < 4; i++) {
   1452           const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
   1453           if (cost_list[i + 1] != INT_MAX) {
   1454             cost_list[i + 1] +=
   1455                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
   1456           }
   1457         }
   1458       }
   1459     }
   1460   }
   1461   best_mv->row = br;
   1462   best_mv->col = bc;
   1463   return bestsad;
   1464 }
   1465 
   1466 int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
   1467                        const MV *center_mv, const vp9_variance_fn_ptr_t *vfp,
   1468                        int use_mvcost) {
   1469   const MACROBLOCKD *const xd = &x->e_mbd;
   1470   const struct buf_2d *const what = &x->plane[0].src;
   1471   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1472   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
   1473   uint32_t unused;
   1474 #if CONFIG_VP9_HIGHBITDEPTH
   1475   uint64_t err =
   1476       vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
   1477               in_what->stride, &unused);
   1478   err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
   1479                                    x->errorperbit)
   1480                      : 0);
   1481   if (err >= INT_MAX) return INT_MAX;
   1482   return (int)err;
   1483 #else
   1484   return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
   1485                  in_what->stride, &unused) +
   1486          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
   1487                                    x->errorperbit)
   1488                      : 0);
   1489 #endif
   1490 }
   1491 
   1492 int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
   1493                           const MV *center_mv, const uint8_t *second_pred,
   1494                           const vp9_variance_fn_ptr_t *vfp, int use_mvcost) {
   1495   const MACROBLOCKD *const xd = &x->e_mbd;
   1496   const struct buf_2d *const what = &x->plane[0].src;
   1497   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1498   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
   1499   unsigned int unused;
   1500 
   1501   return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
   1502                    what->buf, what->stride, &unused, second_pred) +
   1503          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
   1504                                    x->errorperbit)
   1505                      : 0);
   1506 }
   1507 
   1508 static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
   1509                       int sad_per_bit, int do_init_search, int *cost_list,
   1510                       const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
   1511                       const MV *center_mv, MV *best_mv) {
   1512   // First scale has 8-closest points, the rest have 6 points in hex shape
   1513   // at increasing scales
   1514   static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
   1515                                                               6, 6, 6, 6, 6 };
   1516   // Note that the largest candidate step at each scale is 2^scale
   1517   /* clang-format off */
   1518   static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
   1519     { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
   1520       { -1, 0 } },
   1521     { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
   1522     { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
   1523     { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
   1524     { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
   1525     { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
   1526       { -32, 0 } },
   1527     { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
   1528       { -64, 0 } },
   1529     { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
   1530       { -128, 0 } },
   1531     { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
   1532       { -256, 0 } },
   1533     { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
   1534       { -512, 0 } },
   1535     { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
   1536       { -512, 1024 }, { -1024, 0 } }
   1537   };
   1538   /* clang-format on */
   1539   return vp9_pattern_search(
   1540       x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
   1541       use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates);
   1542 }
   1543 
   1544 static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
   1545                          int sad_per_bit, int do_init_search, int *cost_list,
   1546                          const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
   1547                          const MV *center_mv, MV *best_mv) {
   1548   // First scale has 4-closest points, the rest have 8 points in diamond
   1549   // shape at increasing scales
   1550   static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
   1551     4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
   1552   };
   1553   // Note that the largest candidate step at each scale is 2^scale
   1554   /* clang-format off */
   1555   static const MV
   1556       bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
   1557         { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
   1558         { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
   1559           { -1, 1 }, { -2, 0 } },
   1560         { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
   1561           { -2, 2 }, { -4, 0 } },
   1562         { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
   1563           { -4, 4 }, { -8, 0 } },
   1564         { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
   1565           { -8, 8 }, { -16, 0 } },
   1566         { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
   1567           { 0, 32 }, { -16, 16 }, { -32, 0 } },
   1568         { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
   1569           { 0, 64 }, { -32, 32 }, { -64, 0 } },
   1570         { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
   1571           { 0, 128 }, { -64, 64 }, { -128, 0 } },
   1572         { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
   1573           { 0, 256 }, { -128, 128 }, { -256, 0 } },
   1574         { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
   1575           { 0, 512 }, { -256, 256 }, { -512, 0 } },
   1576         { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
   1577           { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }
   1578       };
   1579   /* clang-format on */
   1580   return vp9_pattern_search_sad(
   1581       x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
   1582       use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates);
   1583 }
   1584 
   1585 static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
   1586                          int sad_per_bit, int do_init_search, int *cost_list,
   1587                          const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
   1588                          const MV *center_mv, MV *best_mv) {
   1589   // All scales have 8 closest points in square shape
   1590   static const int square_num_candidates[MAX_PATTERN_SCALES] = {
   1591     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
   1592   };
   1593   // Note that the largest candidate step at each scale is 2^scale
   1594   /* clang-format off */
   1595   static const MV
   1596       square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
   1597         { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
   1598           { -1, 1 }, { -1, 0 } },
   1599         { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
   1600           { -2, 2 }, { -2, 0 } },
   1601         { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
   1602           { -4, 4 }, { -4, 0 } },
   1603         { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
   1604           { -8, 8 }, { -8, 0 } },
   1605         { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
   1606           { 0, 16 }, { -16, 16 }, { -16, 0 } },
   1607         { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
   1608           { 0, 32 }, { -32, 32 }, { -32, 0 } },
   1609         { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
   1610           { 0, 64 }, { -64, 64 }, { -64, 0 } },
   1611         { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
   1612           { 0, 128 }, { -128, 128 }, { -128, 0 } },
   1613         { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
   1614           { 0, 256 }, { -256, 256 }, { -256, 0 } },
   1615         { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
   1616           { 0, 512 }, { -512, 512 }, { -512, 0 } },
   1617         { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
   1618           { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }
   1619       };
   1620   /* clang-format on */
   1621   return vp9_pattern_search(
   1622       x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
   1623       use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates);
   1624 }
   1625 
   1626 static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
   1627                            int sad_per_bit,
   1628                            int do_init_search,  // must be zero for fast_hex
   1629                            int *cost_list, const vp9_variance_fn_ptr_t *vfp,
   1630                            int use_mvcost, const MV *center_mv, MV *best_mv) {
   1631   return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
   1632                     sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
   1633                     center_mv, best_mv);
   1634 }
   1635 
   1636 static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
   1637                            int sad_per_bit, int do_init_search, int *cost_list,
   1638                            const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
   1639                            const MV *center_mv, MV *best_mv) {
   1640   return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
   1641                        sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
   1642                        center_mv, best_mv);
   1643 }
   1644 
   1645 #undef CHECK_BETTER
   1646 
   1647 // Exhuastive motion search around a given centre position with a given
   1648 // step size.
   1649 static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
   1650                                   int range, int step, int sad_per_bit,
   1651                                   const vp9_variance_fn_ptr_t *fn_ptr,
   1652                                   const MV *center_mv) {
   1653   const MACROBLOCKD *const xd = &x->e_mbd;
   1654   const struct buf_2d *const what = &x->plane[0].src;
   1655   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   1656   MV fcenter_mv = { center_mv->row, center_mv->col };
   1657   unsigned int best_sad = INT_MAX;
   1658   int r, c, i;
   1659   int start_col, end_col, start_row, end_row;
   1660   int col_step = (step > 1) ? step : 4;
   1661 
   1662   assert(step >= 1);
   1663 
   1664   clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   1665            x->mv_limits.row_min, x->mv_limits.row_max);
   1666   *best_mv = fcenter_mv;
   1667   best_sad =
   1668       fn_ptr->sdf(what->buf, what->stride,
   1669                   get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
   1670       mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
   1671   start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
   1672   start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
   1673   end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
   1674   end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
   1675 
   1676   for (r = start_row; r <= end_row; r += step) {
   1677     for (c = start_col; c <= end_col; c += col_step) {
   1678       // Step > 1 means we are not checking every location in this pass.
   1679       if (step > 1) {
   1680         const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
   1681         unsigned int sad =
   1682             fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
   1683                         in_what->stride);
   1684         if (sad < best_sad) {
   1685           sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
   1686           if (sad < best_sad) {
   1687             best_sad = sad;
   1688             *best_mv = mv;
   1689           }
   1690         }
   1691       } else {
   1692         // 4 sads in a single call if we are checking every location
   1693         if (c + 3 <= end_col) {
   1694           unsigned int sads[4];
   1695           const uint8_t *addrs[4];
   1696           for (i = 0; i < 4; ++i) {
   1697             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
   1698             addrs[i] = get_buf_from_mv(in_what, &mv);
   1699           }
   1700           fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
   1701 
   1702           for (i = 0; i < 4; ++i) {
   1703             if (sads[i] < best_sad) {
   1704               const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
   1705               const unsigned int sad =
   1706                   sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
   1707               if (sad < best_sad) {
   1708                 best_sad = sad;
   1709                 *best_mv = mv;
   1710               }
   1711             }
   1712           }
   1713         } else {
   1714           for (i = 0; i < end_col - c; ++i) {
   1715             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
   1716             unsigned int sad =
   1717                 fn_ptr->sdf(what->buf, what->stride,
   1718                             get_buf_from_mv(in_what, &mv), in_what->stride);
   1719             if (sad < best_sad) {
   1720               sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
   1721               if (sad < best_sad) {
   1722                 best_sad = sad;
   1723                 *best_mv = mv;
   1724               }
   1725             }
   1726           }
   1727         }
   1728       }
   1729     }
   1730   }
   1731 
   1732   return best_sad;
   1733 }
   1734 
   1735 #if CONFIG_NON_GREEDY_MV
   1736 double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
   1737                                 int mv_num) {
   1738   int i;
   1739   int update = 0;
   1740   double best_cost = 0;
   1741   vpx_clear_system_state();
   1742   for (i = 0; i < mv_num; ++i) {
   1743     if (nb_mvs[i].as_int != INVALID_MV) {
   1744       MV nb_mv = nb_mvs[i].as_mv;
   1745       const double row_diff = mv->row - nb_mv.row;
   1746       const double col_diff = mv->col - nb_mv.col;
   1747       double cost = row_diff * row_diff + col_diff * col_diff;
   1748       cost = log2(1 + cost);
   1749       if (update == 0) {
   1750         best_cost = cost;
   1751         update = 1;
   1752       } else {
   1753         best_cost = cost < best_cost ? cost : best_cost;
   1754       }
   1755     }
   1756   }
   1757   return best_cost;
   1758 }
   1759 
   1760 double vp9_diamond_search_sad_new(const MACROBLOCK *x,
   1761                                   const search_site_config *cfg,
   1762                                   const MV *init_full_mv, MV *best_full_mv,
   1763                                   double *best_mv_dist, double *best_mv_cost,
   1764                                   int search_param, double lambda, int *num00,
   1765                                   const vp9_variance_fn_ptr_t *fn_ptr,
   1766                                   const int_mv *nb_full_mvs, int full_mv_num) {
   1767   int i, j, step;
   1768 
   1769   const MACROBLOCKD *const xd = &x->e_mbd;
   1770   uint8_t *what = x->plane[0].src.buf;
   1771   const int what_stride = x->plane[0].src.stride;
   1772   const uint8_t *in_what;
   1773   const int in_what_stride = xd->plane[0].pre[0].stride;
   1774   const uint8_t *best_address;
   1775 
   1776   double bestsad;
   1777   int best_site = -1;
   1778   int last_site = -1;
   1779 
   1780   // search_param determines the length of the initial step and hence the number
   1781   // of iterations.
   1782   // 0 = initial step (MAX_FIRST_STEP) pel
   1783   // 1 = (MAX_FIRST_STEP/2) pel,
   1784   // 2 = (MAX_FIRST_STEP/4) pel...
   1785   //  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
   1786   const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
   1787   const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
   1788   const int tot_steps = cfg->total_steps - search_param;
   1789   vpx_clear_system_state();
   1790 
   1791   *best_full_mv = *init_full_mv;
   1792   clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   1793            x->mv_limits.row_min, x->mv_limits.row_max);
   1794   *num00 = 0;
   1795 
   1796   // Work out the start point for the search
   1797   in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride +
   1798             best_full_mv->col;
   1799   best_address = in_what;
   1800 
   1801   // Check the starting position
   1802   *best_mv_dist = fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
   1803   *best_mv_cost =
   1804       vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
   1805   bestsad = (*best_mv_dist) + lambda * (*best_mv_cost);
   1806 
   1807   i = 0;
   1808 
   1809   for (step = 0; step < tot_steps; step++) {
   1810     int all_in = 1, t;
   1811 
   1812     // All_in is true if every one of the points we are checking are within
   1813     // the bounds of the image.
   1814     all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
   1815     all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
   1816     all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
   1817     all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
   1818 
   1819     // If all the pixels are within the bounds we don't check whether the
   1820     // search point is valid in this loop,  otherwise we check each point
   1821     // for validity..
   1822     if (all_in) {
   1823       unsigned int sad_array[4];
   1824 
   1825       for (j = 0; j < cfg->searches_per_step; j += 4) {
   1826         unsigned char const *block_offset[4];
   1827 
   1828         for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
   1829 
   1830         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1831                        sad_array);
   1832 
   1833         for (t = 0; t < 4; t++, i++) {
   1834           const MV this_mv = { best_full_mv->row + ss_mv[i].row,
   1835                                best_full_mv->col + ss_mv[i].col };
   1836           const double mv_dist = sad_array[t];
   1837           const double mv_cost =
   1838               vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
   1839           double thissad = mv_dist + lambda * mv_cost;
   1840           if (thissad < bestsad) {
   1841             bestsad = thissad;
   1842             *best_mv_dist = mv_dist;
   1843             *best_mv_cost = mv_cost;
   1844             best_site = i;
   1845           }
   1846         }
   1847       }
   1848     } else {
   1849       for (j = 0; j < cfg->searches_per_step; j++) {
   1850         // Trap illegal vectors
   1851         const MV this_mv = { best_full_mv->row + ss_mv[i].row,
   1852                              best_full_mv->col + ss_mv[i].col };
   1853 
   1854         if (is_mv_in(&x->mv_limits, &this_mv)) {
   1855           const uint8_t *const check_here = ss_os[i] + best_address;
   1856           const double mv_dist =
   1857               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1858           const double mv_cost =
   1859               vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
   1860           double thissad = mv_dist + lambda * mv_cost;
   1861           if (thissad < bestsad) {
   1862             bestsad = thissad;
   1863             *best_mv_dist = mv_dist;
   1864             *best_mv_cost = mv_cost;
   1865             best_site = i;
   1866           }
   1867         }
   1868         i++;
   1869       }
   1870     }
   1871     if (best_site != last_site) {
   1872       best_full_mv->row += ss_mv[best_site].row;
   1873       best_full_mv->col += ss_mv[best_site].col;
   1874       best_address += ss_os[best_site];
   1875       last_site = best_site;
   1876     } else if (best_address == in_what) {
   1877       (*num00)++;
   1878     }
   1879   }
   1880   return bestsad;
   1881 }
   1882 
   1883 void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
   1884                              int mi_col, int rf_idx, BLOCK_SIZE bsize,
   1885                              int_mv *nb_full_mvs) {
   1886   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   1887   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   1888   const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
   1889   int i;
   1890   for (i = 0; i < NB_MVS_NUM; ++i) {
   1891     int r = dirs[i][0] * mi_height;
   1892     int c = dirs[i][1] * mi_width;
   1893     if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
   1894         mi_col + c < tpl_frame->mi_cols) {
   1895       const TplDepStats *tpl_ptr =
   1896           &tpl_frame
   1897                ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
   1898       int_mv *mv =
   1899           get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
   1900       if (tpl_ptr->ready[rf_idx]) {
   1901         nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);
   1902       } else {
   1903         nb_full_mvs[i].as_int = INVALID_MV;
   1904       }
   1905     } else {
   1906       nb_full_mvs[i].as_int = INVALID_MV;
   1907     }
   1908   }
   1909 }
   1910 #endif  // CONFIG_NON_GREEDY_MV
   1911 
   1912 int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
   1913                              MV *ref_mv, MV *best_mv, int search_param,
   1914                              int sad_per_bit, int *num00,
   1915                              const vp9_variance_fn_ptr_t *fn_ptr,
   1916                              const MV *center_mv) {
   1917   int i, j, step;
   1918 
   1919   const MACROBLOCKD *const xd = &x->e_mbd;
   1920   uint8_t *what = x->plane[0].src.buf;
   1921   const int what_stride = x->plane[0].src.stride;
   1922   const uint8_t *in_what;
   1923   const int in_what_stride = xd->plane[0].pre[0].stride;
   1924   const uint8_t *best_address;
   1925 
   1926   unsigned int bestsad = INT_MAX;
   1927   int best_site = -1;
   1928   int last_site = -1;
   1929 
   1930   int ref_row;
   1931   int ref_col;
   1932 
   1933   // search_param determines the length of the initial step and hence the number
   1934   // of iterations.
   1935   // 0 = initial step (MAX_FIRST_STEP) pel
   1936   // 1 = (MAX_FIRST_STEP/2) pel,
   1937   // 2 = (MAX_FIRST_STEP/4) pel...
   1938   //  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
   1939   const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
   1940   const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
   1941   const int tot_steps = cfg->total_steps - search_param;
   1942 
   1943   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   1944   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   1945            x->mv_limits.row_min, x->mv_limits.row_max);
   1946   ref_row = ref_mv->row;
   1947   ref_col = ref_mv->col;
   1948   *num00 = 0;
   1949   best_mv->row = ref_row;
   1950   best_mv->col = ref_col;
   1951 
   1952   // Work out the start point for the search
   1953   in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
   1954   best_address = in_what;
   1955 
   1956   // Check the starting position
   1957   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
   1958             mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
   1959 
   1960   i = 0;
   1961 
   1962   for (step = 0; step < tot_steps; step++) {
   1963     int all_in = 1, t;
   1964 
   1965     // All_in is true if every one of the points we are checking are within
   1966     // the bounds of the image.
   1967     all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
   1968     all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
   1969     all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
   1970     all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
   1971 
   1972     // If all the pixels are within the bounds we don't check whether the
   1973     // search point is valid in this loop,  otherwise we check each point
   1974     // for validity..
   1975     if (all_in) {
   1976       unsigned int sad_array[4];
   1977 
   1978       for (j = 0; j < cfg->searches_per_step; j += 4) {
   1979         unsigned char const *block_offset[4];
   1980 
   1981         for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
   1982 
   1983         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1984                        sad_array);
   1985 
   1986         for (t = 0; t < 4; t++, i++) {
   1987           if (sad_array[t] < bestsad) {
   1988             const MV this_mv = { best_mv->row + ss_mv[i].row,
   1989                                  best_mv->col + ss_mv[i].col };
   1990             sad_array[t] +=
   1991                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
   1992             if (sad_array[t] < bestsad) {
   1993               bestsad = sad_array[t];
   1994               best_site = i;
   1995             }
   1996           }
   1997         }
   1998       }
   1999     } else {
   2000       for (j = 0; j < cfg->searches_per_step; j++) {
   2001         // Trap illegal vectors
   2002         const MV this_mv = { best_mv->row + ss_mv[i].row,
   2003                              best_mv->col + ss_mv[i].col };
   2004 
   2005         if (is_mv_in(&x->mv_limits, &this_mv)) {
   2006           const uint8_t *const check_here = ss_os[i] + best_address;
   2007           unsigned int thissad =
   2008               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   2009 
   2010           if (thissad < bestsad) {
   2011             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
   2012             if (thissad < bestsad) {
   2013               bestsad = thissad;
   2014               best_site = i;
   2015             }
   2016           }
   2017         }
   2018         i++;
   2019       }
   2020     }
   2021     if (best_site != last_site) {
   2022       best_mv->row += ss_mv[best_site].row;
   2023       best_mv->col += ss_mv[best_site].col;
   2024       best_address += ss_os[best_site];
   2025       last_site = best_site;
   2026 #if defined(NEW_DIAMOND_SEARCH)
   2027       while (1) {
   2028         const MV this_mv = { best_mv->row + ss_mv[best_site].row,
   2029                              best_mv->col + ss_mv[best_site].col };
   2030         if (is_mv_in(&x->mv_limits, &this_mv)) {
   2031           const uint8_t *const check_here = ss_os[best_site] + best_address;
   2032           unsigned int thissad =
   2033               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   2034           if (thissad < bestsad) {
   2035             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
   2036             if (thissad < bestsad) {
   2037               bestsad = thissad;
   2038               best_mv->row += ss_mv[best_site].row;
   2039               best_mv->col += ss_mv[best_site].col;
   2040               best_address += ss_os[best_site];
   2041               continue;
   2042             }
   2043           }
   2044         }
   2045         break;
   2046       }
   2047 #endif
   2048     } else if (best_address == in_what) {
   2049       (*num00)++;
   2050     }
   2051   }
   2052   return bestsad;
   2053 }
   2054 
   2055 static int vector_match(int16_t *ref, int16_t *src, int bwl) {
   2056   int best_sad = INT_MAX;
   2057   int this_sad;
   2058   int d;
   2059   int center, offset = 0;
   2060   int bw = 4 << bwl;  // redundant variable, to be changed in the experiments.
   2061   for (d = 0; d <= bw; d += 16) {
   2062     this_sad = vpx_vector_var(&ref[d], src, bwl);
   2063     if (this_sad < best_sad) {
   2064       best_sad = this_sad;
   2065       offset = d;
   2066     }
   2067   }
   2068   center = offset;
   2069 
   2070   for (d = -8; d <= 8; d += 16) {
   2071     int this_pos = offset + d;
   2072     // check limit
   2073     if (this_pos < 0 || this_pos > bw) continue;
   2074     this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
   2075     if (this_sad < best_sad) {
   2076       best_sad = this_sad;
   2077       center = this_pos;
   2078     }
   2079   }
   2080   offset = center;
   2081 
   2082   for (d = -4; d <= 4; d += 8) {
   2083     int this_pos = offset + d;
   2084     // check limit
   2085     if (this_pos < 0 || this_pos > bw) continue;
   2086     this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
   2087     if (this_sad < best_sad) {
   2088       best_sad = this_sad;
   2089       center = this_pos;
   2090     }
   2091   }
   2092   offset = center;
   2093 
   2094   for (d = -2; d <= 2; d += 4) {
   2095     int this_pos = offset + d;
   2096     // check limit
   2097     if (this_pos < 0 || this_pos > bw) continue;
   2098     this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
   2099     if (this_sad < best_sad) {
   2100       best_sad = this_sad;
   2101       center = this_pos;
   2102     }
   2103   }
   2104   offset = center;
   2105 
   2106   for (d = -1; d <= 1; d += 2) {
   2107     int this_pos = offset + d;
   2108     // check limit
   2109     if (this_pos < 0 || this_pos > bw) continue;
   2110     this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
   2111     if (this_sad < best_sad) {
   2112       best_sad = this_sad;
   2113       center = this_pos;
   2114     }
   2115   }
   2116 
   2117   return (center - (bw >> 1));
   2118 }
   2119 
   2120 static const MV search_pos[4] = {
   2121   { -1, 0 },
   2122   { 0, -1 },
   2123   { 0, 1 },
   2124   { 1, 0 },
   2125 };
   2126 
   2127 unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
   2128                                            BLOCK_SIZE bsize, int mi_row,
   2129                                            int mi_col, const MV *ref_mv) {
   2130   MACROBLOCKD *xd = &x->e_mbd;
   2131   MODE_INFO *mi = xd->mi[0];
   2132   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
   2133   DECLARE_ALIGNED(16, int16_t, hbuf[128]);
   2134   DECLARE_ALIGNED(16, int16_t, vbuf[128]);
   2135   DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
   2136   DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
   2137   int idx;
   2138   const int bw = 4 << b_width_log2_lookup[bsize];
   2139   const int bh = 4 << b_height_log2_lookup[bsize];
   2140   const int search_width = bw << 1;
   2141   const int search_height = bh << 1;
   2142   const int src_stride = x->plane[0].src.stride;
   2143   const int ref_stride = xd->plane[0].pre[0].stride;
   2144   uint8_t const *ref_buf, *src_buf;
   2145   MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
   2146   unsigned int best_sad, tmp_sad, this_sad[4];
   2147   MV this_mv;
   2148   const int norm_factor = 3 + (bw >> 5);
   2149   const YV12_BUFFER_CONFIG *scaled_ref_frame =
   2150       vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
   2151   MvLimits subpel_mv_limits;
   2152 
   2153   if (scaled_ref_frame) {
   2154     int i;
   2155     // Swap out the reference frame for a version that's been scaled to
   2156     // match the resolution of the current frame, allowing the existing
   2157     // motion search code to be used without additional modifications.
   2158     for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
   2159     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
   2160   }
   2161 
   2162 #if CONFIG_VP9_HIGHBITDEPTH
   2163   // TODO(jingning): Implement integral projection functions for high bit-depth
   2164   // setting and remove this part of code.
   2165   if (xd->bd != 8) {
   2166     unsigned int this_sad;
   2167     tmp_mv->row = 0;
   2168     tmp_mv->col = 0;
   2169     this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
   2170                                       xd->plane[0].pre[0].buf, ref_stride);
   2171 
   2172     if (scaled_ref_frame) {
   2173       int i;
   2174       for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
   2175     }
   2176     return this_sad;
   2177   }
   2178 #endif
   2179 
   2180   // Set up prediction 1-D reference set
   2181   ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
   2182   for (idx = 0; idx < search_width; idx += 16) {
   2183     vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
   2184     ref_buf += 16;
   2185   }
   2186 
   2187   ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
   2188   for (idx = 0; idx < search_height; ++idx) {
   2189     vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
   2190     ref_buf += ref_stride;
   2191   }
   2192 
   2193   // Set up src 1-D reference set
   2194   for (idx = 0; idx < bw; idx += 16) {
   2195     src_buf = x->plane[0].src.buf + idx;
   2196     vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
   2197   }
   2198 
   2199   src_buf = x->plane[0].src.buf;
   2200   for (idx = 0; idx < bh; ++idx) {
   2201     src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
   2202     src_buf += src_stride;
   2203   }
   2204 
   2205   // Find the best match per 1-D search
   2206   tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
   2207   tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
   2208 
   2209   this_mv = *tmp_mv;
   2210   src_buf = x->plane[0].src.buf;
   2211   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
   2212   best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
   2213 
   2214   {
   2215     const uint8_t *const pos[4] = {
   2216       ref_buf - ref_stride,
   2217       ref_buf - 1,
   2218       ref_buf + 1,
   2219       ref_buf + ref_stride,
   2220     };
   2221 
   2222     cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
   2223   }
   2224 
   2225   for (idx = 0; idx < 4; ++idx) {
   2226     if (this_sad[idx] < best_sad) {
   2227       best_sad = this_sad[idx];
   2228       tmp_mv->row = search_pos[idx].row + this_mv.row;
   2229       tmp_mv->col = search_pos[idx].col + this_mv.col;
   2230     }
   2231   }
   2232 
   2233   if (this_sad[0] < this_sad[3])
   2234     this_mv.row -= 1;
   2235   else
   2236     this_mv.row += 1;
   2237 
   2238   if (this_sad[1] < this_sad[2])
   2239     this_mv.col -= 1;
   2240   else
   2241     this_mv.col += 1;
   2242 
   2243   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
   2244 
   2245   tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
   2246   if (best_sad > tmp_sad) {
   2247     *tmp_mv = this_mv;
   2248     best_sad = tmp_sad;
   2249   }
   2250 
   2251   tmp_mv->row *= 8;
   2252   tmp_mv->col *= 8;
   2253 
   2254   vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
   2255   clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max,
   2256            subpel_mv_limits.row_min, subpel_mv_limits.row_max);
   2257 
   2258   if (scaled_ref_frame) {
   2259     int i;
   2260     for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
   2261   }
   2262 
   2263   return best_sad;
   2264 }
   2265 
   2266 #if CONFIG_NON_GREEDY_MV
   2267 // Runs sequence of diamond searches in smaller steps for RD.
   2268 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
   2269               point as the best match, we will do a final 1-away diamond
   2270               refining search  */
   2271 double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
   2272                                   MV *mvp_full, int step_param, double lambda,
   2273                                   int do_refine,
   2274                                   const vp9_variance_fn_ptr_t *fn_ptr,
   2275                                   const int_mv *nb_full_mvs, int full_mv_num,
   2276                                   MV *best_mv, double *best_mv_dist,
   2277                                   double *best_mv_cost) {
   2278   int n, num00 = 0;
   2279   double thissme;
   2280   double bestsme;
   2281   const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
   2282   vpx_clear_system_state();
   2283   bestsme = vp9_diamond_search_sad_new(
   2284       x, &cpi->ss_cfg, mvp_full, best_mv, best_mv_dist, best_mv_cost,
   2285       step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num);
   2286 
   2287   // If there won't be more n-step search, check to see if refining search is
   2288   // needed.
   2289   if (n > further_steps) do_refine = 0;
   2290 
   2291   while (n < further_steps) {
   2292     ++n;
   2293     if (num00) {
   2294       num00--;
   2295     } else {
   2296       MV temp_mv;
   2297       double mv_dist;
   2298       double mv_cost;
   2299       thissme = vp9_diamond_search_sad_new(
   2300           x, &cpi->ss_cfg, mvp_full, &temp_mv, &mv_dist, &mv_cost,
   2301           step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num);
   2302       // check to see if refining search is needed.
   2303       if (num00 > further_steps - n) do_refine = 0;
   2304 
   2305       if (thissme < bestsme) {
   2306         bestsme = thissme;
   2307         *best_mv = temp_mv;
   2308         *best_mv_dist = mv_dist;
   2309         *best_mv_cost = mv_cost;
   2310       }
   2311     }
   2312   }
   2313 
   2314   // final 1-away diamond refining search
   2315   if (do_refine) {
   2316     const int search_range = 8;
   2317     MV temp_mv = *best_mv;
   2318     double mv_dist;
   2319     double mv_cost;
   2320     thissme = vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost,
   2321                                           lambda, search_range, fn_ptr,
   2322                                           nb_full_mvs, full_mv_num);
   2323     if (thissme < bestsme) {
   2324       bestsme = thissme;
   2325       *best_mv = temp_mv;
   2326       *best_mv_dist = mv_dist;
   2327       *best_mv_cost = mv_cost;
   2328     }
   2329   }
   2330   return bestsme;
   2331 }
   2332 #endif  // CONFIG_NON_GREEDY_MV
   2333 
   2334 // Runs sequence of diamond searches in smaller steps for RD.
   2335 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
   2336               point as the best match, we will do a final 1-away diamond
   2337               refining search  */
   2338 static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
   2339                               int step_param, int sadpb, int further_steps,
   2340                               int do_refine, int *cost_list,
   2341                               const vp9_variance_fn_ptr_t *fn_ptr,
   2342                               const MV *ref_mv, MV *dst_mv) {
   2343   MV temp_mv;
   2344   int thissme, n, num00 = 0;
   2345   int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
   2346                                         step_param, sadpb, &n, fn_ptr, ref_mv);
   2347   if (bestsme < INT_MAX)
   2348     bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   2349   *dst_mv = temp_mv;
   2350 
   2351   // If there won't be more n-step search, check to see if refining search is
   2352   // needed.
   2353   if (n > further_steps) do_refine = 0;
   2354 
   2355   while (n < further_steps) {
   2356     ++n;
   2357 
   2358     if (num00) {
   2359       num00--;
   2360     } else {
   2361       thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
   2362                                         step_param + n, sadpb, &num00, fn_ptr,
   2363                                         ref_mv);
   2364       if (thissme < INT_MAX)
   2365         thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   2366 
   2367       // check to see if refining search is needed.
   2368       if (num00 > further_steps - n) do_refine = 0;
   2369 
   2370       if (thissme < bestsme) {
   2371         bestsme = thissme;
   2372         *dst_mv = temp_mv;
   2373       }
   2374     }
   2375   }
   2376 
   2377   // final 1-away diamond refining search
   2378   if (do_refine) {
   2379     const int search_range = 8;
   2380     MV best_mv = *dst_mv;
   2381     thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
   2382                                       ref_mv);
   2383     if (thissme < INT_MAX)
   2384       thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
   2385     if (thissme < bestsme) {
   2386       bestsme = thissme;
   2387       *dst_mv = best_mv;
   2388     }
   2389   }
   2390 
   2391   // Return cost list.
   2392   if (cost_list) {
   2393     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
   2394   }
   2395   return bestsme;
   2396 }
   2397 
   2398 #define MIN_RANGE 7
   2399 #define MAX_RANGE 256
   2400 #define MIN_INTERVAL 1
   2401 // Runs an limited range exhaustive mesh search using a pattern set
   2402 // according to the encode speed profile.
   2403 static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
   2404                                  MV *centre_mv_full, int sadpb, int *cost_list,
   2405                                  const vp9_variance_fn_ptr_t *fn_ptr,
   2406                                  const MV *ref_mv, MV *dst_mv) {
   2407   const SPEED_FEATURES *const sf = &cpi->sf;
   2408   MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
   2409   MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
   2410   int bestsme;
   2411   int i;
   2412   int interval = sf->mesh_patterns[0].interval;
   2413   int range = sf->mesh_patterns[0].range;
   2414   int baseline_interval_divisor;
   2415 
   2416   // Trap illegal values for interval and range for this function.
   2417   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
   2418       (interval > range))
   2419     return INT_MAX;
   2420 
   2421   baseline_interval_divisor = range / interval;
   2422 
   2423   // Check size of proposed first range against magnitude of the centre
   2424   // value used as a starting point.
   2425   range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
   2426   range = VPXMIN(range, MAX_RANGE);
   2427   interval = VPXMAX(interval, range / baseline_interval_divisor);
   2428 
   2429   // initial search
   2430   bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
   2431                                    sadpb, fn_ptr, &temp_mv);
   2432 
   2433   if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
   2434     // Progressive searches with range and step size decreasing each time
   2435     // till we reach a step size of 1. Then break out.
   2436     for (i = 1; i < MAX_MESH_STEP; ++i) {
   2437       // First pass with coarser step and longer range
   2438       bestsme = exhuastive_mesh_search(
   2439           x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
   2440           sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
   2441 
   2442       if (sf->mesh_patterns[i].interval == 1) break;
   2443     }
   2444   }
   2445 
   2446   if (bestsme < INT_MAX)
   2447     bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   2448   *dst_mv = temp_mv;
   2449 
   2450   // Return cost list.
   2451   if (cost_list) {
   2452     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
   2453   }
   2454   return bestsme;
   2455 }
   2456 
   2457 #if CONFIG_NON_GREEDY_MV
   2458 double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
   2459                                    double *best_mv_dist, double *best_mv_cost,
   2460                                    double lambda, int search_range,
   2461                                    const vp9_variance_fn_ptr_t *fn_ptr,
   2462                                    const int_mv *nb_full_mvs, int full_mv_num) {
   2463   const MACROBLOCKD *const xd = &x->e_mbd;
   2464   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   2465   const struct buf_2d *const what = &x->plane[0].src;
   2466   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   2467   const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
   2468   double best_sad;
   2469   int i, j;
   2470   vpx_clear_system_state();
   2471   *best_mv_dist =
   2472       fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride);
   2473   *best_mv_cost =
   2474       vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
   2475   best_sad = (*best_mv_dist) + lambda * (*best_mv_cost);
   2476 
   2477   for (i = 0; i < search_range; i++) {
   2478     int best_site = -1;
   2479     const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) &
   2480                        ((best_full_mv->row + 1) < x->mv_limits.row_max) &
   2481                        ((best_full_mv->col - 1) > x->mv_limits.col_min) &
   2482                        ((best_full_mv->col + 1) < x->mv_limits.col_max);
   2483 
   2484     if (all_in) {
   2485       unsigned int sads[4];
   2486       const uint8_t *const positions[4] = { best_address - in_what->stride,
   2487                                             best_address - 1, best_address + 1,
   2488                                             best_address + in_what->stride };
   2489 
   2490       fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
   2491 
   2492       for (j = 0; j < 4; ++j) {
   2493         const MV mv = { best_full_mv->row + neighbors[j].row,
   2494                         best_full_mv->col + neighbors[j].col };
   2495         const double mv_dist = sads[j];
   2496         const double mv_cost =
   2497             vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
   2498         const double thissad = mv_dist + lambda * mv_cost;
   2499         if (thissad < best_sad) {
   2500           best_sad = thissad;
   2501           *best_mv_dist = mv_dist;
   2502           *best_mv_cost = mv_cost;
   2503           best_site = j;
   2504         }
   2505       }
   2506     } else {
   2507       for (j = 0; j < 4; ++j) {
   2508         const MV mv = { best_full_mv->row + neighbors[j].row,
   2509                         best_full_mv->col + neighbors[j].col };
   2510 
   2511         if (is_mv_in(&x->mv_limits, &mv)) {
   2512           const double mv_dist =
   2513               fn_ptr->sdf(what->buf, what->stride,
   2514                           get_buf_from_mv(in_what, &mv), in_what->stride);
   2515           const double mv_cost =
   2516               vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
   2517           const double thissad = mv_dist + lambda * mv_cost;
   2518           if (thissad < best_sad) {
   2519             best_sad = thissad;
   2520             *best_mv_dist = mv_dist;
   2521             *best_mv_cost = mv_cost;
   2522             best_site = j;
   2523           }
   2524         }
   2525       }
   2526     }
   2527 
   2528     if (best_site == -1) {
   2529       break;
   2530     } else {
   2531       best_full_mv->row += neighbors[best_site].row;
   2532       best_full_mv->col += neighbors[best_site].col;
   2533       best_address = get_buf_from_mv(in_what, best_full_mv);
   2534     }
   2535   }
   2536 
   2537   return best_sad;
   2538 }
   2539 #endif  // CONFIG_NON_GREEDY_MV
   2540 
   2541 int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
   2542                             int search_range,
   2543                             const vp9_variance_fn_ptr_t *fn_ptr,
   2544                             const MV *center_mv) {
   2545   const MACROBLOCKD *const xd = &x->e_mbd;
   2546   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   2547   const struct buf_2d *const what = &x->plane[0].src;
   2548   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   2549   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   2550   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
   2551   unsigned int best_sad =
   2552       fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
   2553       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   2554   int i, j;
   2555 
   2556   for (i = 0; i < search_range; i++) {
   2557     int best_site = -1;
   2558     const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
   2559                        ((ref_mv->row + 1) < x->mv_limits.row_max) &
   2560                        ((ref_mv->col - 1) > x->mv_limits.col_min) &
   2561                        ((ref_mv->col + 1) < x->mv_limits.col_max);
   2562 
   2563     if (all_in) {
   2564       unsigned int sads[4];
   2565       const uint8_t *const positions[4] = { best_address - in_what->stride,
   2566                                             best_address - 1, best_address + 1,
   2567                                             best_address + in_what->stride };
   2568 
   2569       fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
   2570 
   2571       for (j = 0; j < 4; ++j) {
   2572         if (sads[j] < best_sad) {
   2573           const MV mv = { ref_mv->row + neighbors[j].row,
   2574                           ref_mv->col + neighbors[j].col };
   2575           sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
   2576           if (sads[j] < best_sad) {
   2577             best_sad = sads[j];
   2578             best_site = j;
   2579           }
   2580         }
   2581       }
   2582     } else {
   2583       for (j = 0; j < 4; ++j) {
   2584         const MV mv = { ref_mv->row + neighbors[j].row,
   2585                         ref_mv->col + neighbors[j].col };
   2586 
   2587         if (is_mv_in(&x->mv_limits, &mv)) {
   2588           unsigned int sad =
   2589               fn_ptr->sdf(what->buf, what->stride,
   2590                           get_buf_from_mv(in_what, &mv), in_what->stride);
   2591           if (sad < best_sad) {
   2592             sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
   2593             if (sad < best_sad) {
   2594               best_sad = sad;
   2595               best_site = j;
   2596             }
   2597           }
   2598         }
   2599       }
   2600     }
   2601 
   2602     if (best_site == -1) {
   2603       break;
   2604     } else {
   2605       ref_mv->row += neighbors[best_site].row;
   2606       ref_mv->col += neighbors[best_site].col;
   2607       best_address = get_buf_from_mv(in_what, ref_mv);
   2608     }
   2609   }
   2610 
   2611   return best_sad;
   2612 }
   2613 
   2614 // This function is called when we do joint motion search in comp_inter_inter
   2615 // mode.
   2616 int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
   2617                              int search_range,
   2618                              const vp9_variance_fn_ptr_t *fn_ptr,
   2619                              const MV *center_mv, const uint8_t *second_pred) {
   2620   const MV neighbors[8] = { { -1, 0 },  { 0, -1 }, { 0, 1 },  { 1, 0 },
   2621                             { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
   2622   const MACROBLOCKD *const xd = &x->e_mbd;
   2623   const struct buf_2d *const what = &x->plane[0].src;
   2624   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   2625   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
   2626   unsigned int best_sad = INT_MAX;
   2627   int i, j;
   2628   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
   2629            x->mv_limits.row_min, x->mv_limits.row_max);
   2630   best_sad =
   2631       fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
   2632                    in_what->stride, second_pred) +
   2633       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   2634 
   2635   for (i = 0; i < search_range; ++i) {
   2636     int best_site = -1;
   2637 
   2638     for (j = 0; j < 8; ++j) {
   2639       const MV mv = { ref_mv->row + neighbors[j].row,
   2640                       ref_mv->col + neighbors[j].col };
   2641 
   2642       if (is_mv_in(&x->mv_limits, &mv)) {
   2643         unsigned int sad =
   2644             fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
   2645                          in_what->stride, second_pred);
   2646         if (sad < best_sad) {
   2647           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
   2648           if (sad < best_sad) {
   2649             best_sad = sad;
   2650             best_site = j;
   2651           }
   2652         }
   2653       }
   2654     }
   2655 
   2656     if (best_site == -1) {
   2657       break;
   2658     } else {
   2659       ref_mv->row += neighbors[best_site].row;
   2660       ref_mv->col += neighbors[best_site].col;
   2661     }
   2662   }
   2663   return best_sad;
   2664 }
   2665 
   2666 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
   2667                           MV *mvp_full, int step_param, int search_method,
   2668                           int error_per_bit, int *cost_list, const MV *ref_mv,
   2669                           MV *tmp_mv, int var_max, int rd) {
   2670   const SPEED_FEATURES *const sf = &cpi->sf;
   2671   const SEARCH_METHODS method = (SEARCH_METHODS)search_method;
   2672   vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
   2673   int var = 0;
   2674   int run_exhaustive_search = 0;
   2675 
   2676   if (cost_list) {
   2677     cost_list[0] = INT_MAX;
   2678     cost_list[1] = INT_MAX;
   2679     cost_list[2] = INT_MAX;
   2680     cost_list[3] = INT_MAX;
   2681     cost_list[4] = INT_MAX;
   2682   }
   2683 
   2684   switch (method) {
   2685     case FAST_DIAMOND:
   2686       var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
   2687                             cost_list, fn_ptr, 1, ref_mv, tmp_mv);
   2688       break;
   2689     case FAST_HEX:
   2690       var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
   2691                             cost_list, fn_ptr, 1, ref_mv, tmp_mv);
   2692       break;
   2693     case HEX:
   2694       var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
   2695                        fn_ptr, 1, ref_mv, tmp_mv);
   2696       break;
   2697     case SQUARE:
   2698       var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
   2699                           fn_ptr, 1, ref_mv, tmp_mv);
   2700       break;
   2701     case BIGDIA:
   2702       var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
   2703                           fn_ptr, 1, ref_mv, tmp_mv);
   2704       break;
   2705     case NSTEP:
   2706     case MESH:
   2707       var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
   2708                                MAX_MVSEARCH_STEPS - 1 - step_param, 1,
   2709                                cost_list, fn_ptr, ref_mv, tmp_mv);
   2710       break;
   2711     default: assert(0 && "Unknown search method");
   2712   }
   2713 
   2714   if (method == NSTEP) {
   2715     if (sf->exhaustive_searches_thresh < INT_MAX &&
   2716         !cpi->rc.is_src_frame_alt_ref) {
   2717       const int64_t exhuastive_thr =
   2718           sf->exhaustive_searches_thresh >>
   2719           (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
   2720       if (var > exhuastive_thr) run_exhaustive_search = 1;
   2721     }
   2722   } else if (method == MESH) {
   2723     run_exhaustive_search = 1;
   2724   }
   2725 
   2726   if (run_exhaustive_search) {
   2727     int var_ex;
   2728     MV tmp_mv_ex;
   2729     var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list,
   2730                                    fn_ptr, ref_mv, &tmp_mv_ex);
   2731     if (var_ex < var) {
   2732       var = var_ex;
   2733       *tmp_mv = tmp_mv_ex;
   2734     }
   2735   }
   2736 
   2737   if (method != NSTEP && method != MESH && rd && var < var_max)
   2738     var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
   2739 
   2740   return var;
   2741 }
   2742 
   2743 // Note(yunqingwang): The following 2 functions are only used in the motion
   2744 // vector unit test, which return extreme motion vectors allowed by the MV
   2745 // limits.
   2746 #define COMMON_MV_TEST \
   2747   SETUP_SUBPEL_SEARCH; \
   2748                        \
   2749   (void)error_per_bit; \
   2750   (void)vfp;           \
   2751   (void)z;             \
   2752   (void)src_stride;    \
   2753   (void)y;             \
   2754   (void)y_stride;      \
   2755   (void)second_pred;   \
   2756   (void)w;             \
   2757   (void)h;             \
   2758   (void)offset;        \
   2759   (void)mvjcost;       \
   2760   (void)mvcost;        \
   2761   (void)sse1;          \
   2762   (void)distortion;    \
   2763                        \
   2764   (void)halfiters;     \
   2765   (void)quarteriters;  \
   2766   (void)eighthiters;   \
   2767   (void)whichdir;      \
   2768   (void)allow_hp;      \
   2769   (void)forced_stop;   \
   2770   (void)hstep;         \
   2771   (void)rr;            \
   2772   (void)rc;            \
   2773                        \
   2774   (void)tr;            \
   2775   (void)tc;            \
   2776   (void)sse;           \
   2777   (void)thismse;       \
   2778   (void)cost_list;     \
   2779   (void)use_accurate_subpel_search;
   2780 
   2781 // Return the maximum MV.
   2782 uint32_t vp9_return_max_sub_pixel_mv(
   2783     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
   2784     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
   2785     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
   2786     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
   2787     int h, int use_accurate_subpel_search) {
   2788   COMMON_MV_TEST;
   2789 
   2790   (void)minr;
   2791   (void)minc;
   2792 
   2793   bestmv->row = maxr;
   2794   bestmv->col = maxc;
   2795   besterr = 0;
   2796 
   2797   // In the sub-pel motion search, if hp is not used, then the last bit of mv
   2798   // has to be 0.
   2799   lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
   2800 
   2801   return besterr;
   2802 }
   2803 // Return the minimum MV.
   2804 uint32_t vp9_return_min_sub_pixel_mv(
   2805     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
   2806     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
   2807     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
   2808     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
   2809     int h, int use_accurate_subpel_search) {
   2810   COMMON_MV_TEST;
   2811 
   2812   (void)maxr;
   2813   (void)maxc;
   2814 
   2815   bestmv->row = minr;
   2816   bestmv->col = minc;
   2817   besterr = 0;
   2818 
   2819   // In the sub-pel motion search, if hp is not used, then the last bit of mv
   2820   // has to be 0.
   2821   lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
   2822 
   2823   return besterr;
   2824 }
   2825