Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <limits.h>
     12 #include <math.h>
     13 #include <stdio.h>
     14 
     15 #include "./vpx_config.h"
     16 
     17 #include "vpx_mem/vpx_mem.h"
     18 
     19 #include "vp9/common/vp9_findnearmv.h"
     20 #include "vp9/common/vp9_common.h"
     21 
     22 #include "vp9/encoder/vp9_onyx_int.h"
     23 #include "vp9/encoder/vp9_mcomp.h"
     24 
     25 // #define NEW_DIAMOND_SEARCH
     26 
     27 void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) {
     28   const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
     29   const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
     30   const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
     31   const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
     32 
     33   // Get intersection of UMV window and valid MV window to reduce # of checks
     34   // in diamond search.
     35   if (x->mv_col_min < col_min)
     36     x->mv_col_min = col_min;
     37   if (x->mv_col_max > col_max)
     38     x->mv_col_max = col_max;
     39   if (x->mv_row_min < row_min)
     40     x->mv_row_min = row_min;
     41   if (x->mv_row_max > row_max)
     42     x->mv_row_max = row_max;
     43 }
     44 
     45 int vp9_init_search_range(VP9_COMP *cpi, int size) {
     46   int sr = 0;
     47 
     48   // Minimum search size no matter what the passed in value.
     49   size = MAX(16, size);
     50 
     51   while ((size << sr) < MAX_FULL_PEL_VAL)
     52     sr++;
     53 
     54   if (sr)
     55     sr--;
     56 
     57   sr += cpi->sf.reduce_first_step_size;
     58   sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
     59   return sr;
     60 }
     61 
     62 static INLINE int mv_cost(const MV *mv,
     63                           const int *joint_cost, int *comp_cost[2]) {
     64   return joint_cost[vp9_get_mv_joint(mv)] +
     65              comp_cost[0][mv->row] + comp_cost[1][mv->col];
     66 }
     67 
     68 int vp9_mv_bit_cost(const MV *mv, const MV *ref,
     69                     const int *mvjcost, int *mvcost[2], int weight) {
     70   const MV diff = { mv->row - ref->row,
     71                     mv->col - ref->col };
     72   return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
     73 }
     74 
     75 static int mv_err_cost(const MV *mv, const MV *ref,
     76                        const int *mvjcost, int *mvcost[2],
     77                        int error_per_bit) {
     78   if (mvcost) {
     79     const MV diff = { mv->row - ref->row,
     80                       mv->col - ref->col };
     81     return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
     82                                   error_per_bit, 13);
     83   }
     84   return 0;
     85 }
     86 
     87 static int mvsad_err_cost(const MV *mv, const MV *ref,
     88                           const int *mvjsadcost, int *mvsadcost[2],
     89                           int error_per_bit) {
     90   if (mvsadcost) {
     91     const MV diff = { mv->row - ref->row,
     92                       mv->col - ref->col };
     93     return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
     94                                   error_per_bit, 8);
     95   }
     96   return 0;
     97 }
     98 
     99 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
    100   int len;
    101   int search_site_count = 0;
    102 
    103   // Generate offsets for 4 search sites per step.
    104   x->ss[search_site_count].mv.col = 0;
    105   x->ss[search_site_count].mv.row = 0;
    106   x->ss[search_site_count].offset = 0;
    107   search_site_count++;
    108 
    109   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
    110     // Compute offsets for search sites.
    111     x->ss[search_site_count].mv.col = 0;
    112     x->ss[search_site_count].mv.row = -len;
    113     x->ss[search_site_count].offset = -len * stride;
    114     search_site_count++;
    115 
    116     // Compute offsets for search sites.
    117     x->ss[search_site_count].mv.col = 0;
    118     x->ss[search_site_count].mv.row = len;
    119     x->ss[search_site_count].offset = len * stride;
    120     search_site_count++;
    121 
    122     // Compute offsets for search sites.
    123     x->ss[search_site_count].mv.col = -len;
    124     x->ss[search_site_count].mv.row = 0;
    125     x->ss[search_site_count].offset = -len;
    126     search_site_count++;
    127 
    128     // Compute offsets for search sites.
    129     x->ss[search_site_count].mv.col = len;
    130     x->ss[search_site_count].mv.row = 0;
    131     x->ss[search_site_count].offset = len;
    132     search_site_count++;
    133   }
    134 
    135   x->ss_count = search_site_count;
    136   x->searches_per_step = 4;
    137 }
    138 
    139 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
    140   int len, ss_count = 1;
    141 
    142   x->ss[0].mv.col = x->ss[0].mv.row = 0;
    143   x->ss[0].offset = 0;
    144 
    145   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
    146     // Generate offsets for 8 search sites per step.
    147     const MV ss_mvs[8] = {
    148       {-len,  0  }, {len,  0  }, { 0,   -len}, {0,    len},
    149       {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
    150     };
    151     int i;
    152     for (i = 0; i < 8; ++i) {
    153       search_site *const ss = &x->ss[ss_count++];
    154       ss->mv = ss_mvs[i];
    155       ss->offset = ss->mv.row * stride + ss->mv.col;
    156     }
    157   }
    158 
    159   x->ss_count = ss_count;
    160   x->searches_per_step = 8;
    161 }
    162 
    163 /*
    164  * To avoid the penalty for crossing cache-line read, preload the reference
    165  * area in a small buffer, which is aligned to make sure there won't be crossing
    166  * cache-line read while reading from this buffer. This reduced the cpu
    167  * cycles spent on reading ref data in sub-pixel filter functions.
    168  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    169  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    170  * could reduce the area.
    171  */
    172 
    173 /* estimated cost of a motion vector (r,c) */
    174 #define MVC(r, c)                                       \
    175     (mvcost ?                                           \
    176      ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
    177        mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
    178       error_per_bit + 4096) >> 13 : 0)
    179 
    180 
    181 #define SP(x) (((x) & 7) << 1)  // convert motion vector component to offset
    182                                 // for svf calc
    183 
    184 #define IFMVCV(r, c, s, e)                                \
    185     if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
    186       s                                                   \
    187     else                                                  \
    188       e;
    189 
    190 /* pointer to predictor base of a motionvector */
    191 #define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
    192 
    193 /* returns subpixel variance error function */
    194 #define DIST(r, c) \
    195     vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse)
    196 
    197 /* checks if (r, c) has better score than previous best */
    198 #define CHECK_BETTER(v, r, c) \
    199     IFMVCV(r, c, {                                                       \
    200       thismse = (DIST(r, c));                                            \
    201       if ((v = MVC(r, c) + thismse) < besterr) {                         \
    202         besterr = v;                                                     \
    203         br = r;                                                          \
    204         bc = c;                                                          \
    205         *distortion = thismse;                                           \
    206         *sse1 = sse;                                                     \
    207       }                                                                  \
    208     },                                                                   \
    209     v = INT_MAX;)
    210 
    211 #define FIRST_LEVEL_CHECKS                              \
    212   {                                                     \
    213     unsigned int left, right, up, down, diag;           \
    214     CHECK_BETTER(left, tr, tc - hstep);                 \
    215     CHECK_BETTER(right, tr, tc + hstep);                \
    216     CHECK_BETTER(up, tr - hstep, tc);                   \
    217     CHECK_BETTER(down, tr + hstep, tc);                 \
    218     whichdir = (left < right ? 0 : 1) +                 \
    219                (up < down ? 0 : 2);                     \
    220     switch (whichdir) {                                 \
    221       case 0:                                           \
    222         CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
    223         break;                                          \
    224       case 1:                                           \
    225         CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
    226         break;                                          \
    227       case 2:                                           \
    228         CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
    229         break;                                          \
    230       case 3:                                           \
    231         CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
    232         break;                                          \
    233     }                                                   \
    234   }
    235 
    236 #define SECOND_LEVEL_CHECKS                             \
    237   {                                                     \
    238     int kr, kc;                                         \
    239     unsigned int second;                                \
    240     if (tr != br && tc != bc) {                         \
    241       kr = br - tr;                                     \
    242       kc = bc - tc;                                     \
    243       CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
    244       CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
    245     } else if (tr == br && tc != bc) {                  \
    246       kc = bc - tc;                                     \
    247       CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
    248       CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
    249       switch (whichdir) {                               \
    250         case 0:                                         \
    251         case 1:                                         \
    252           CHECK_BETTER(second, tr + hstep, tc + kc);    \
    253           break;                                        \
    254         case 2:                                         \
    255         case 3:                                         \
    256           CHECK_BETTER(second, tr - hstep, tc + kc);    \
    257           break;                                        \
    258       }                                                 \
    259     } else if (tr != br && tc == bc) {                  \
    260       kr = br - tr;                                     \
    261       CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
    262       CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
    263       switch (whichdir) {                               \
    264         case 0:                                         \
    265         case 2:                                         \
    266           CHECK_BETTER(second, tr + kr, tc + hstep);    \
    267           break;                                        \
    268         case 1:                                         \
    269         case 3:                                         \
    270           CHECK_BETTER(second, tr + kr, tc - hstep);    \
    271           break;                                        \
    272       }                                                 \
    273     }                                                   \
    274   }
    275 
    276 int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
    277                                       MV *bestmv, const MV *ref_mv,
    278                                       int allow_hp,
    279                                       int error_per_bit,
    280                                       const vp9_variance_fn_ptr_t *vfp,
    281                                       int forced_stop,
    282                                       int iters_per_step,
    283                                       int *mvjcost, int *mvcost[2],
    284                                       int *distortion,
    285                                       unsigned int *sse1) {
    286   uint8_t *z = x->plane[0].src.buf;
    287   int src_stride = x->plane[0].src.stride;
    288   MACROBLOCKD *xd = &x->e_mbd;
    289 
    290   unsigned int besterr = INT_MAX;
    291   unsigned int sse;
    292   unsigned int whichdir;
    293   unsigned int halfiters = iters_per_step;
    294   unsigned int quarteriters = iters_per_step;
    295   unsigned int eighthiters = iters_per_step;
    296   int thismse;
    297 
    298   const int y_stride = xd->plane[0].pre[0].stride;
    299   const int offset = bestmv->row * y_stride + bestmv->col;
    300   uint8_t *y = xd->plane[0].pre[0].buf + offset;
    301 
    302   int rr = ref_mv->row;
    303   int rc = ref_mv->col;
    304   int br = bestmv->row * 8;
    305   int bc = bestmv->col * 8;
    306   int hstep = 4;
    307   const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
    308   const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
    309   const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
    310   const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
    311 
    312   int tr = br;
    313   int tc = bc;
    314 
    315   // central mv
    316   bestmv->row <<= 3;
    317   bestmv->col <<= 3;
    318 
    319   // calculate central point error
    320   besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
    321   *distortion = besterr;
    322   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
    323 
    324   // TODO(jbb): Each subsequent iteration checks at least one point in
    325   // common with the last iteration could be 2 if diagonal is selected.
    326   while (halfiters--) {
    327     // 1/2 pel
    328     FIRST_LEVEL_CHECKS;
    329     // no reason to check the same one again.
    330     if (tr == br && tc == bc)
    331       break;
    332     tr = br;
    333     tc = bc;
    334   }
    335 
    336   // TODO(yaowu): Each subsequent iteration checks at least one point in common
    337   // with the last iteration could be 2 if diagonal is selected.
    338 
    339   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    340   if (forced_stop != 2) {
    341     hstep >>= 1;
    342     while (quarteriters--) {
    343       FIRST_LEVEL_CHECKS;
    344       // no reason to check the same one again.
    345       if (tr == br && tc == bc)
    346         break;
    347       tr = br;
    348       tc = bc;
    349     }
    350   }
    351 
    352   if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
    353     hstep >>= 1;
    354     while (eighthiters--) {
    355       FIRST_LEVEL_CHECKS;
    356       // no reason to check the same one again.
    357       if (tr == br && tc == bc)
    358         break;
    359       tr = br;
    360       tc = bc;
    361     }
    362   }
    363 
    364   bestmv->row = br;
    365   bestmv->col = bc;
    366 
    367   if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
    368       (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
    369     return INT_MAX;
    370 
    371   return besterr;
    372 }
    373 
    374 int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
    375                                  MV *bestmv, const MV *ref_mv,
    376                                  int allow_hp,
    377                                  int error_per_bit,
    378                                  const vp9_variance_fn_ptr_t *vfp,
    379                                  int forced_stop,
    380                                  int iters_per_step,
    381                                  int *mvjcost, int *mvcost[2],
    382                                  int *distortion,
    383                                  unsigned int *sse1) {
    384   uint8_t *z = x->plane[0].src.buf;
    385   const int src_stride = x->plane[0].src.stride;
    386   MACROBLOCKD *xd = &x->e_mbd;
    387   unsigned int besterr = INT_MAX;
    388   unsigned int sse;
    389   unsigned int whichdir;
    390   int thismse;
    391   unsigned int halfiters = iters_per_step;
    392   unsigned int quarteriters = iters_per_step;
    393   unsigned int eighthiters = iters_per_step;
    394 
    395   const int y_stride = xd->plane[0].pre[0].stride;
    396   const int offset = bestmv->row * y_stride + bestmv->col;
    397   uint8_t *y = xd->plane[0].pre[0].buf + offset;
    398 
    399   int rr = ref_mv->row;
    400   int rc = ref_mv->col;
    401   int br = bestmv->row * 8;
    402   int bc = bestmv->col * 8;
    403   int hstep = 4;
    404   const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
    405   const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
    406   const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
    407   const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
    408 
    409   int tr = br;
    410   int tc = bc;
    411 
    412   // central mv
    413   bestmv->row *= 8;
    414   bestmv->col *= 8;
    415 
    416   // calculate central point error
    417   besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
    418   *distortion = besterr;
    419   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
    420 
    421   // 1/2 pel
    422   FIRST_LEVEL_CHECKS;
    423   if (halfiters > 1) {
    424     SECOND_LEVEL_CHECKS;
    425   }
    426   tr = br;
    427   tc = bc;
    428 
    429   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    430   if (forced_stop != 2) {
    431     hstep >>= 1;
    432     FIRST_LEVEL_CHECKS;
    433     if (quarteriters > 1) {
    434       SECOND_LEVEL_CHECKS;
    435     }
    436     tr = br;
    437     tc = bc;
    438   }
    439 
    440   if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
    441     hstep >>= 1;
    442     FIRST_LEVEL_CHECKS;
    443     if (eighthiters > 1) {
    444       SECOND_LEVEL_CHECKS;
    445     }
    446     tr = br;
    447     tc = bc;
    448   }
    449 
    450   bestmv->row = br;
    451   bestmv->col = bc;
    452 
    453   if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
    454       (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
    455     return INT_MAX;
    456 
    457   return besterr;
    458 }
    459 
    460 #undef DIST
    461 /* returns subpixel variance error function */
    462 #define DIST(r, c) \
    463     vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
    464               z, src_stride, &sse, second_pred)
    465 
    466 int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
    467                                            MV *bestmv, const MV *ref_mv,
    468                                            int allow_hp,
    469                                            int error_per_bit,
    470                                            const vp9_variance_fn_ptr_t *vfp,
    471                                            int forced_stop,
    472                                            int iters_per_step,
    473                                            int *mvjcost, int *mvcost[2],
    474                                            int *distortion,
    475                                            unsigned int *sse1,
    476                                            const uint8_t *second_pred,
    477                                            int w, int h) {
    478   uint8_t *const z = x->plane[0].src.buf;
    479   const int src_stride = x->plane[0].src.stride;
    480   MACROBLOCKD *const xd = &x->e_mbd;
    481 
    482   unsigned int besterr = INT_MAX;
    483   unsigned int sse;
    484   unsigned int whichdir;
    485   unsigned int halfiters = iters_per_step;
    486   unsigned int quarteriters = iters_per_step;
    487   unsigned int eighthiters = iters_per_step;
    488   int thismse;
    489 
    490   DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
    491   const int y_stride = xd->plane[0].pre[0].stride;
    492   const int offset = bestmv->row * y_stride + bestmv->col;
    493   uint8_t *const y = xd->plane[0].pre[0].buf + offset;
    494 
    495   int rr = ref_mv->row;
    496   int rc = ref_mv->col;
    497   int br = bestmv->row * 8;
    498   int bc = bestmv->col * 8;
    499   int hstep = 4;
    500   const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
    501   const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
    502   const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
    503   const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
    504 
    505   int tr = br;
    506   int tc = bc;
    507 
    508   // central mv
    509   bestmv->row *= 8;
    510   bestmv->col *= 8;
    511 
    512   // calculate central point error
    513   // TODO(yunqingwang): central pointer error was already calculated in full-
    514   // pixel search, and can be passed in this function.
    515   comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
    516   besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
    517   *distortion = besterr;
    518   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
    519 
    520   // Each subsequent iteration checks at least one point in
    521   // common with the last iteration could be 2 ( if diag selected)
    522   while (halfiters--) {
    523     // 1/2 pel
    524     FIRST_LEVEL_CHECKS;
    525     // no reason to check the same one again.
    526     if (tr == br && tc == bc)
    527       break;
    528     tr = br;
    529     tc = bc;
    530   }
    531 
    532   // Each subsequent iteration checks at least one point in common with
    533   // the last iteration could be 2 ( if diag selected) 1/4 pel
    534 
    535   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    536   if (forced_stop != 2) {
    537     hstep >>= 1;
    538     while (quarteriters--) {
    539       FIRST_LEVEL_CHECKS;
    540       // no reason to check the same one again.
    541       if (tr == br && tc == bc)
    542         break;
    543       tr = br;
    544       tc = bc;
    545     }
    546   }
    547 
    548   if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
    549     hstep >>= 1;
    550     while (eighthiters--) {
    551       FIRST_LEVEL_CHECKS;
    552       // no reason to check the same one again.
    553       if (tr == br && tc == bc)
    554         break;
    555       tr = br;
    556       tc = bc;
    557     }
    558   }
    559   bestmv->row = br;
    560   bestmv->col = bc;
    561 
    562   if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
    563       (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
    564     return INT_MAX;
    565 
    566   return besterr;
    567 }
    568 
    569 int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
    570                                       MV *bestmv, const MV *ref_mv,
    571                                       int allow_hp,
    572                                       int error_per_bit,
    573                                       const vp9_variance_fn_ptr_t *vfp,
    574                                       int forced_stop,
    575                                       int iters_per_step,
    576                                       int *mvjcost, int *mvcost[2],
    577                                       int *distortion,
    578                                       unsigned int *sse1,
    579                                       const uint8_t *second_pred,
    580                                       int w, int h) {
    581   uint8_t *z = x->plane[0].src.buf;
    582   const int src_stride = x->plane[0].src.stride;
    583   MACROBLOCKD *xd = &x->e_mbd;
    584   unsigned int besterr = INT_MAX;
    585   unsigned int sse;
    586   unsigned int whichdir;
    587   int thismse;
    588   unsigned int halfiters = iters_per_step;
    589   unsigned int quarteriters = iters_per_step;
    590   unsigned int eighthiters = iters_per_step;
    591 
    592   DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
    593   const int y_stride = xd->plane[0].pre[0].stride;
    594   const int offset = bestmv->row * y_stride + bestmv->col;
    595   uint8_t *y = xd->plane[0].pre[0].buf + offset;
    596 
    597   int rr = ref_mv->row;
    598   int rc = ref_mv->col;
    599   int br = bestmv->row * 8;
    600   int bc = bestmv->col * 8;
    601   int hstep = 4;
    602   const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
    603   const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
    604   const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
    605   const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
    606 
    607   int tr = br;
    608   int tc = bc;
    609 
    610   // central mv
    611   bestmv->row *= 8;
    612   bestmv->col *= 8;
    613 
    614   // calculate central point error
    615   // TODO(yunqingwang): central pointer error was already calculated in full-
    616   // pixel search, and can be passed in this function.
    617   comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
    618   besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
    619   *distortion = besterr;
    620   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
    621 
    622   // Each subsequent iteration checks at least one point in
    623   // common with the last iteration could be 2 ( if diag selected)
    624   // 1/2 pel
    625   FIRST_LEVEL_CHECKS;
    626   if (halfiters > 1) {
    627     SECOND_LEVEL_CHECKS;
    628   }
    629   tr = br;
    630   tc = bc;
    631 
    632   // Each subsequent iteration checks at least one point in common with
    633   // the last iteration could be 2 ( if diag selected) 1/4 pel
    634 
    635   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    636   if (forced_stop != 2) {
    637     hstep >>= 1;
    638     FIRST_LEVEL_CHECKS;
    639     if (quarteriters > 1) {
    640       SECOND_LEVEL_CHECKS;
    641     }
    642     tr = br;
    643     tc = bc;
    644   }
    645 
    646   if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
    647     hstep >>= 1;
    648     FIRST_LEVEL_CHECKS;
    649     if (eighthiters > 1) {
    650       SECOND_LEVEL_CHECKS;
    651     }
    652     tr = br;
    653     tc = bc;
    654   }
    655   bestmv->row = br;
    656   bestmv->col = bc;
    657 
    658   if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
    659       (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
    660     return INT_MAX;
    661 
    662   return besterr;
    663 }
    664 
    665 #undef MVC
    666 #undef PRE
    667 #undef DIST
    668 #undef IFMVCV
    669 #undef CHECK_BETTER
    670 #undef SP
    671 
    672 #define CHECK_BOUNDS(range) \
    673   {\
    674     all_in = 1;\
    675     all_in &= ((br-range) >= x->mv_row_min);\
    676     all_in &= ((br+range) <= x->mv_row_max);\
    677     all_in &= ((bc-range) >= x->mv_col_min);\
    678     all_in &= ((bc+range) <= x->mv_col_max);\
    679   }
    680 
    681 #define CHECK_POINT \
    682   {\
    683     if (this_mv.col < x->mv_col_min) continue;\
    684     if (this_mv.col > x->mv_col_max) continue;\
    685     if (this_mv.row < x->mv_row_min) continue;\
    686     if (this_mv.row > x->mv_row_max) continue;\
    687   }
    688 
    689 #define CHECK_BETTER \
    690   {\
    691     if (thissad < bestsad)\
    692     {\
    693       if (use_mvcost) \
    694         thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \
    695                                   mvjsadcost, mvsadcost, \
    696                                   sad_per_bit);\
    697       if (thissad < bestsad)\
    698       {\
    699         bestsad = thissad;\
    700         best_site = i;\
    701       }\
    702     }\
    703   }
    704 
    705 #define get_next_chkpts(list, i, n)   \
    706     list[0] = ((i) == 0 ? (n) - 1 : (i) - 1);  \
    707     list[1] = (i);                             \
    708     list[2] = ((i) == (n) - 1 ? 0 : (i) + 1);
    709 
    710 #define MAX_PATTERN_SCALES         11
    711 #define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
    712 #define PATTERN_CANDIDATES_REF      3  // number of refinement candidates
    713 
    714 // Generic pattern search function that searches over multiple scales.
    715 // Each scale can have a different number of candidates and shape of
    716 // candidates as indicated in the num_candidates and candidates arrays
    717 // passed into this function
    718 static int vp9_pattern_search(MACROBLOCK *x,
    719                               MV *ref_mv,
    720                               int search_param,
    721                               int sad_per_bit,
    722                               int do_init_search,
    723                               int do_refine,
    724                               const vp9_variance_fn_ptr_t *vfp,
    725                               int use_mvcost,
    726                               const MV *center_mv, MV *best_mv,
    727                               const int num_candidates[MAX_PATTERN_SCALES],
    728                               const MV candidates[MAX_PATTERN_SCALES]
    729                                                  [MAX_PATTERN_CANDIDATES]) {
    730   const MACROBLOCKD* const xd = &x->e_mbd;
    731   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
    732     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
    733   };
    734   int i, j, s, t;
    735   uint8_t *what = x->plane[0].src.buf;
    736   int what_stride = x->plane[0].src.stride;
    737   int in_what_stride = xd->plane[0].pre[0].stride;
    738   int br, bc;
    739   MV this_mv;
    740   int bestsad = INT_MAX;
    741   int thissad;
    742   uint8_t *base_offset;
    743   uint8_t *this_offset;
    744   int k = -1;
    745   int all_in;
    746   int best_site = -1;
    747   int_mv fcenter_mv;
    748   int best_init_s = search_param_to_steps[search_param];
    749   int *mvjsadcost = x->nmvjointsadcost;
    750   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
    751 
    752   fcenter_mv.as_mv.row = center_mv->row >> 3;
    753   fcenter_mv.as_mv.col = center_mv->col >> 3;
    754 
    755   // adjust ref_mv to make sure it is within MV range
    756   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
    757   br = ref_mv->row;
    758   bc = ref_mv->col;
    759 
    760   // Work out the start point for the search
    761   base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
    762   this_offset = base_offset + (br * in_what_stride) + bc;
    763   this_mv.row = br;
    764   this_mv.col = bc;
    765   bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
    766                 + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv,
    767                                  mvjsadcost, mvsadcost, sad_per_bit);
    768 
    769   // Search all possible scales upto the search param around the center point
    770   // pick the scale of the point that is best as the starting scale of
    771   // further steps around it.
    772   if (do_init_search) {
    773     s = best_init_s;
    774     best_init_s = -1;
    775     for (t = 0; t <= s; ++t) {
    776       best_site = -1;
    777       CHECK_BOUNDS((1 << t))
    778       if (all_in) {
    779         for (i = 0; i < num_candidates[t]; i++) {
    780           this_mv.row = br + candidates[t][i].row;
    781           this_mv.col = bc + candidates[t][i].col;
    782           this_offset = base_offset + (this_mv.row * in_what_stride) +
    783                                        this_mv.col;
    784           thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
    785                              bestsad);
    786           CHECK_BETTER
    787         }
    788       } else {
    789         for (i = 0; i < num_candidates[t]; i++) {
    790           this_mv.row = br + candidates[t][i].row;
    791           this_mv.col = bc + candidates[t][i].col;
    792           CHECK_POINT
    793           this_offset = base_offset + (this_mv.row * in_what_stride) +
    794                                        this_mv.col;
    795           thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
    796                              bestsad);
    797           CHECK_BETTER
    798         }
    799       }
    800       if (best_site == -1) {
    801         continue;
    802       } else {
    803         best_init_s = t;
    804         k = best_site;
    805       }
    806     }
    807     if (best_init_s != -1) {
    808       br += candidates[best_init_s][k].row;
    809       bc += candidates[best_init_s][k].col;
    810     }
    811   }
    812 
    813   // If the center point is still the best, just skip this and move to
    814   // the refinement step.
    815   if (best_init_s != -1) {
    816     s = best_init_s;
    817     best_site = -1;
    818     do {
    819       // No need to search all 6 points the 1st time if initial search was used
    820       if (!do_init_search || s != best_init_s) {
    821         CHECK_BOUNDS((1 << s))
    822         if (all_in) {
    823           for (i = 0; i < num_candidates[s]; i++) {
    824             this_mv.row = br + candidates[s][i].row;
    825             this_mv.col = bc + candidates[s][i].col;
    826             this_offset = base_offset + (this_mv.row * in_what_stride) +
    827                                          this_mv.col;
    828             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
    829                                bestsad);
    830             CHECK_BETTER
    831           }
    832         } else {
    833           for (i = 0; i < num_candidates[s]; i++) {
    834             this_mv.row = br + candidates[s][i].row;
    835             this_mv.col = bc + candidates[s][i].col;
    836             CHECK_POINT
    837             this_offset = base_offset + (this_mv.row * in_what_stride) +
    838                                          this_mv.col;
    839             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
    840                                bestsad);
    841             CHECK_BETTER
    842           }
    843         }
    844 
    845         if (best_site == -1) {
    846           continue;
    847         } else {
    848           br += candidates[s][best_site].row;
    849           bc += candidates[s][best_site].col;
    850           k = best_site;
    851         }
    852       }
    853 
    854       do {
    855         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
    856         best_site = -1;
    857         CHECK_BOUNDS((1 << s))
    858 
    859         get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
    860         if (all_in) {
    861           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
    862             this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
    863             this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
    864             this_offset = base_offset + (this_mv.row * (in_what_stride)) +
    865                                          this_mv.col;
    866             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
    867                                bestsad);
    868             CHECK_BETTER
    869           }
    870         } else {
    871           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
    872             this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
    873             this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
    874             CHECK_POINT
    875             this_offset = base_offset + (this_mv.row * (in_what_stride)) +
    876                                          this_mv.col;
    877             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
    878                                bestsad);
    879             CHECK_BETTER
    880           }
    881         }
    882 
    883         if (best_site != -1) {
    884           k = next_chkpts_indices[best_site];
    885           br += candidates[s][k].row;
    886           bc += candidates[s][k].col;
    887         }
    888       } while (best_site != -1);
    889     } while (s--);
    890   }
    891 
    892   // Check 4 1-away neighbors if do_refine is true.
    893   // For most well-designed schemes do_refine will not be necessary.
    894   if (do_refine) {
    895     static const MV neighbors[4] = {
    896       {0, -1}, { -1, 0}, {1, 0}, {0, 1},
    897     };
    898     for (j = 0; j < 16; j++) {
    899       best_site = -1;
    900       CHECK_BOUNDS(1)
    901       if (all_in) {
    902         for (i = 0; i < 4; i++) {
    903           this_mv.row = br + neighbors[i].row;
    904           this_mv.col = bc + neighbors[i].col;
    905           this_offset = base_offset + (this_mv.row * (in_what_stride)) +
    906                                        this_mv.col;
    907           thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
    908                              bestsad);
    909           CHECK_BETTER
    910         }
    911       } else {
    912         for (i = 0; i < 4; i++) {
    913           this_mv.row = br + neighbors[i].row;
    914           this_mv.col = bc + neighbors[i].col;
    915           CHECK_POINT
    916           this_offset = base_offset + (this_mv.row * (in_what_stride)) +
    917                                        this_mv.col;
    918           thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
    919                              bestsad);
    920           CHECK_BETTER
    921         }
    922           }
    923 
    924       if (best_site == -1) {
    925         break;
    926       } else {
    927         br += neighbors[best_site].row;
    928         bc += neighbors[best_site].col;
    929       }
    930     }
    931   }
    932 
    933   best_mv->row = br;
    934   best_mv->col = bc;
    935 
    936   this_offset = base_offset + (best_mv->row * in_what_stride) +
    937                                best_mv->col;
    938   this_mv.row = best_mv->row * 8;
    939   this_mv.col = best_mv->col * 8;
    940   if (bestsad == INT_MAX)
    941     return INT_MAX;
    942 
    943   return vfp->vf(what, what_stride, this_offset, in_what_stride,
    944                  (unsigned int *)&bestsad) +
    945          use_mvcost ? mv_err_cost(&this_mv, center_mv,
    946                                   x->nmvjointcost, x->mvcost, x->errorperbit)
    947                     : 0;
    948 }
    949 
    950 
    951 int vp9_hex_search(MACROBLOCK *x,
    952                    MV *ref_mv,
    953                    int search_param,
    954                    int sad_per_bit,
    955                    int do_init_search,
    956                    const vp9_variance_fn_ptr_t *vfp,
    957                    int use_mvcost,
    958                    const MV *center_mv, MV *best_mv) {
    959   // First scale has 8-closest points, the rest have 6 points in hex shape
    960   // at increasing scales
    961   static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
    962     8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
    963   };
    964   // Note that the largest candidate step at each scale is 2^scale
    965   static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
    966     {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
    967     {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
    968     {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
    969     {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
    970     {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
    971     {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
    972     {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
    973     {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
    974     {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
    975     {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
    976     {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
    977       { -1024, 0}},
    978   };
    979   return
    980       vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
    981                          do_init_search, 0, vfp, use_mvcost,
    982                          center_mv, best_mv,
    983                          hex_num_candidates, hex_candidates);
    984 }
    985 
    986 int vp9_bigdia_search(MACROBLOCK *x,
    987                       MV *ref_mv,
    988                       int search_param,
    989                       int sad_per_bit,
    990                       int do_init_search,
    991                       const vp9_variance_fn_ptr_t *vfp,
    992                       int use_mvcost,
    993                       const MV *center_mv,
    994                       MV *best_mv) {
    995   // First scale has 4-closest points, the rest have 8 points in diamond
    996   // shape at increasing scales
    997   static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
    998     4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    999   };
   1000   // Note that the largest candidate step at each scale is 2^scale
   1001   static const MV bigdia_candidates[MAX_PATTERN_SCALES]
   1002                                    [MAX_PATTERN_CANDIDATES] = {
   1003     {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
   1004     {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
   1005     {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
   1006     {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
   1007     {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
   1008     {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
   1009       {-16, 16}, {-32, 0}},
   1010     {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
   1011       {-32, 32}, {-64, 0}},
   1012     {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
   1013       {-64, 64}, {-128, 0}},
   1014     {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
   1015       {-128, 128}, {-256, 0}},
   1016     {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
   1017       {-256, 256}, {-512, 0}},
   1018     {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
   1019       {-512, 512}, {-1024, 0}},
   1020   };
   1021   return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
   1022                             do_init_search, 0, vfp, use_mvcost,
   1023                             center_mv, best_mv,
   1024                             bigdia_num_candidates, bigdia_candidates);
   1025 }
   1026 
   1027 int vp9_square_search(MACROBLOCK *x,
   1028                       MV *ref_mv,
   1029                       int search_param,
   1030                       int sad_per_bit,
   1031                       int do_init_search,
   1032                       const vp9_variance_fn_ptr_t *vfp,
   1033                       int use_mvcost,
   1034                       const MV *center_mv,
   1035                       MV *best_mv) {
   1036   // All scales have 8 closest points in square shape
   1037   static const int square_num_candidates[MAX_PATTERN_SCALES] = {
   1038     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
   1039   };
   1040   // Note that the largest candidate step at each scale is 2^scale
   1041   static const MV square_candidates[MAX_PATTERN_SCALES]
   1042                                    [MAX_PATTERN_CANDIDATES] = {
   1043     {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
   1044     {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
   1045     {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
   1046     {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
   1047     {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
   1048       {-16, 16}, {-16, 0}},
   1049     {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
   1050       {-32, 32}, {-32, 0}},
   1051     {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
   1052       {-64, 64}, {-64, 0}},
   1053     {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
   1054       {-128, 128}, {-128, 0}},
   1055     {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
   1056       {-256, 256}, {-256, 0}},
   1057     {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
   1058       {-512, 512}, {-512, 0}},
   1059     {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
   1060       {0, 1024}, {-1024, 1024}, {-1024, 0}},
   1061   };
   1062   return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
   1063                             do_init_search, 0, vfp, use_mvcost,
   1064                             center_mv, best_mv,
   1065                             square_num_candidates, square_candidates);
   1066 };
   1067 
   1068 #undef CHECK_BOUNDS
   1069 #undef CHECK_POINT
   1070 #undef CHECK_BETTER
   1071 
   1072 int vp9_diamond_search_sad_c(MACROBLOCK *x,
   1073                              int_mv *ref_mv, int_mv *best_mv,
   1074                              int search_param, int sad_per_bit, int *num00,
   1075                              vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
   1076                              int *mvcost[2], int_mv *center_mv) {
   1077   int i, j, step;
   1078 
   1079   const MACROBLOCKD* const xd = &x->e_mbd;
   1080   uint8_t *what = x->plane[0].src.buf;
   1081   int what_stride = x->plane[0].src.stride;
   1082   uint8_t *in_what;
   1083   int in_what_stride = xd->plane[0].pre[0].stride;
   1084   uint8_t *best_address;
   1085 
   1086   int tot_steps;
   1087   int_mv this_mv;
   1088 
   1089   int bestsad = INT_MAX;
   1090   int best_site = 0;
   1091   int last_site = 0;
   1092 
   1093   int ref_row, ref_col;
   1094   int this_row_offset, this_col_offset;
   1095   search_site *ss;
   1096 
   1097   uint8_t *check_here;
   1098   int thissad;
   1099   int_mv fcenter_mv;
   1100 
   1101   int *mvjsadcost = x->nmvjointsadcost;
   1102   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   1103 
   1104   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1105   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1106 
   1107   clamp_mv(&ref_mv->as_mv,
   1108            x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1109   ref_row = ref_mv->as_mv.row;
   1110   ref_col = ref_mv->as_mv.col;
   1111   *num00 = 0;
   1112   best_mv->as_mv.row = ref_row;
   1113   best_mv->as_mv.col = ref_col;
   1114 
   1115   // Work out the start point for the search
   1116   in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
   1117                         (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
   1118   best_address = in_what;
   1119 
   1120   // Check the starting position
   1121   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
   1122                 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
   1123                                  mvjsadcost, mvsadcost, sad_per_bit);
   1124 
   1125   // search_param determines the length of the initial step and hence the number
   1126   // of iterations
   1127   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
   1128   // (MAX_FIRST_STEP/4) pel... etc.
   1129   ss = &x->ss[search_param * x->searches_per_step];
   1130   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1131 
   1132   i = 1;
   1133 
   1134   for (step = 0; step < tot_steps; step++) {
   1135     for (j = 0; j < x->searches_per_step; j++) {
   1136       // Trap illegal vectors
   1137       this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1138       this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1139 
   1140       if ((this_col_offset > x->mv_col_min) &&
   1141           (this_col_offset < x->mv_col_max) &&
   1142           (this_row_offset > x->mv_row_min) &&
   1143           (this_row_offset < x->mv_row_max)) {
   1144         check_here = ss[i].offset + best_address;
   1145         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1146                               bestsad);
   1147 
   1148         if (thissad < bestsad) {
   1149           this_mv.as_mv.row = this_row_offset;
   1150           this_mv.as_mv.col = this_col_offset;
   1151           thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1152                                     mvjsadcost, mvsadcost, sad_per_bit);
   1153 
   1154           if (thissad < bestsad) {
   1155             bestsad = thissad;
   1156             best_site = i;
   1157           }
   1158         }
   1159       }
   1160 
   1161       i++;
   1162     }
   1163 
   1164     if (best_site != last_site) {
   1165       best_mv->as_mv.row += ss[best_site].mv.row;
   1166       best_mv->as_mv.col += ss[best_site].mv.col;
   1167       best_address += ss[best_site].offset;
   1168       last_site = best_site;
   1169 #if defined(NEW_DIAMOND_SEARCH)
   1170       while (1) {
   1171         this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
   1172         this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
   1173         if ((this_col_offset > x->mv_col_min) &&
   1174             (this_col_offset < x->mv_col_max) &&
   1175             (this_row_offset > x->mv_row_min) &&
   1176             (this_row_offset < x->mv_row_max)) {
   1177           check_here = ss[best_site].offset + best_address;
   1178           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1179                                 bestsad);
   1180           if (thissad < bestsad) {
   1181             this_mv.as_mv.row = this_row_offset;
   1182             this_mv.as_mv.col = this_col_offset;
   1183             thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1184                                       mvjsadcost, mvsadcost, sad_per_bit);
   1185             if (thissad < bestsad) {
   1186               bestsad = thissad;
   1187               best_mv->as_mv.row += ss[best_site].mv.row;
   1188               best_mv->as_mv.col += ss[best_site].mv.col;
   1189               best_address += ss[best_site].offset;
   1190               continue;
   1191             }
   1192           }
   1193         }
   1194         break;
   1195       };
   1196 #endif
   1197     } else if (best_address == in_what) {
   1198       (*num00)++;
   1199     }
   1200   }
   1201 
   1202   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1203   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1204 
   1205   if (bestsad == INT_MAX)
   1206     return INT_MAX;
   1207 
   1208   return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
   1209                     (unsigned int *)(&thissad)) +
   1210                        mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
   1211                                    mvjcost, mvcost, x->errorperbit);
   1212 }
   1213 
   1214 int vp9_diamond_search_sadx4(MACROBLOCK *x,
   1215                              int_mv *ref_mv, int_mv *best_mv, int search_param,
   1216                              int sad_per_bit, int *num00,
   1217                              vp9_variance_fn_ptr_t *fn_ptr,
   1218                              int *mvjcost, int *mvcost[2], int_mv *center_mv) {
   1219   int i, j, step;
   1220 
   1221   const MACROBLOCKD* const xd = &x->e_mbd;
   1222   uint8_t *what = x->plane[0].src.buf;
   1223   int what_stride = x->plane[0].src.stride;
   1224   uint8_t *in_what;
   1225   int in_what_stride = xd->plane[0].pre[0].stride;
   1226   uint8_t *best_address;
   1227 
   1228   int tot_steps;
   1229   int_mv this_mv;
   1230 
   1231   unsigned int bestsad = INT_MAX;
   1232   int best_site = 0;
   1233   int last_site = 0;
   1234 
   1235   int ref_row;
   1236   int ref_col;
   1237   int this_row_offset;
   1238   int this_col_offset;
   1239   search_site *ss;
   1240 
   1241   uint8_t *check_here;
   1242   unsigned int thissad;
   1243   int_mv fcenter_mv;
   1244 
   1245   int *mvjsadcost = x->nmvjointsadcost;
   1246   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   1247 
   1248   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1249   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1250 
   1251   clamp_mv(&ref_mv->as_mv,
   1252            x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1253   ref_row = ref_mv->as_mv.row;
   1254   ref_col = ref_mv->as_mv.col;
   1255   *num00 = 0;
   1256   best_mv->as_mv.row = ref_row;
   1257   best_mv->as_mv.col = ref_col;
   1258 
   1259   // Work out the start point for the search
   1260   in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
   1261                         (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
   1262   best_address = in_what;
   1263 
   1264   // Check the starting position
   1265   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
   1266                 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
   1267                                  mvjsadcost, mvsadcost, sad_per_bit);
   1268 
   1269   // search_param determines the length of the initial step and hence the number
   1270   // of iterations.
   1271   // 0 = initial step (MAX_FIRST_STEP) pel
   1272   // 1 = (MAX_FIRST_STEP/2) pel,
   1273   // 2 = (MAX_FIRST_STEP/4) pel...
   1274   ss = &x->ss[search_param * x->searches_per_step];
   1275   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1276 
   1277   i = 1;
   1278 
   1279   for (step = 0; step < tot_steps; step++) {
   1280     int all_in = 1, t;
   1281 
   1282     // All_in is true if every one of the points we are checking are within
   1283     // the bounds of the image.
   1284     all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
   1285     all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
   1286     all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
   1287     all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
   1288 
   1289     // If all the pixels are within the bounds we don't check whether the
   1290     // search point is valid in this loop,  otherwise we check each point
   1291     // for validity..
   1292     if (all_in) {
   1293       unsigned int sad_array[4];
   1294 
   1295       for (j = 0; j < x->searches_per_step; j += 4) {
   1296         unsigned char const *block_offset[4];
   1297 
   1298         for (t = 0; t < 4; t++)
   1299           block_offset[t] = ss[i + t].offset + best_address;
   1300 
   1301         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1302                        sad_array);
   1303 
   1304         for (t = 0; t < 4; t++, i++) {
   1305           if (sad_array[t] < bestsad) {
   1306             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1307             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1308             sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1309                                            mvjsadcost, mvsadcost, sad_per_bit);
   1310 
   1311             if (sad_array[t] < bestsad) {
   1312               bestsad = sad_array[t];
   1313               best_site = i;
   1314             }
   1315           }
   1316         }
   1317       }
   1318     } else {
   1319       for (j = 0; j < x->searches_per_step; j++) {
   1320         // Trap illegal vectors
   1321         this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1322         this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1323 
   1324         if ((this_col_offset > x->mv_col_min) &&
   1325             (this_col_offset < x->mv_col_max) &&
   1326             (this_row_offset > x->mv_row_min) &&
   1327             (this_row_offset < x->mv_row_max)) {
   1328           check_here = ss[i].offset + best_address;
   1329           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1330                                 bestsad);
   1331 
   1332           if (thissad < bestsad) {
   1333             this_mv.as_mv.row = this_row_offset;
   1334             this_mv.as_mv.col = this_col_offset;
   1335             thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1336                                       mvjsadcost, mvsadcost, sad_per_bit);
   1337 
   1338             if (thissad < bestsad) {
   1339               bestsad = thissad;
   1340               best_site = i;
   1341             }
   1342           }
   1343         }
   1344         i++;
   1345       }
   1346     }
   1347     if (best_site != last_site) {
   1348       best_mv->as_mv.row += ss[best_site].mv.row;
   1349       best_mv->as_mv.col += ss[best_site].mv.col;
   1350       best_address += ss[best_site].offset;
   1351       last_site = best_site;
   1352 #if defined(NEW_DIAMOND_SEARCH)
   1353       while (1) {
   1354         this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
   1355         this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
   1356         if ((this_col_offset > x->mv_col_min) &&
   1357             (this_col_offset < x->mv_col_max) &&
   1358             (this_row_offset > x->mv_row_min) &&
   1359             (this_row_offset < x->mv_row_max)) {
   1360           check_here = ss[best_site].offset + best_address;
   1361           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1362                                 bestsad);
   1363           if (thissad < bestsad) {
   1364             this_mv.as_mv.row = this_row_offset;
   1365             this_mv.as_mv.col = this_col_offset;
   1366             thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1367                                       mvjsadcost, mvsadcost, sad_per_bit);
   1368             if (thissad < bestsad) {
   1369               bestsad = thissad;
   1370               best_mv->as_mv.row += ss[best_site].mv.row;
   1371               best_mv->as_mv.col += ss[best_site].mv.col;
   1372               best_address += ss[best_site].offset;
   1373               continue;
   1374             }
   1375           }
   1376         }
   1377         break;
   1378       };
   1379 #endif
   1380     } else if (best_address == in_what) {
   1381       (*num00)++;
   1382     }
   1383   }
   1384 
   1385   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1386   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1387 
   1388   if (bestsad == INT_MAX)
   1389     return INT_MAX;
   1390 
   1391   return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
   1392                     (unsigned int *)(&thissad)) +
   1393                     mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
   1394                                 mvjcost, mvcost, x->errorperbit);
   1395 }
   1396 
   1397 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
   1398               point as the best match, we will do a final 1-away diamond
   1399               refining search  */
   1400 
   1401 int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
   1402                            int_mv *mvp_full, int step_param,
   1403                            int sadpb, int further_steps,
   1404                            int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
   1405                            int_mv *ref_mv, int_mv *dst_mv) {
   1406   int_mv temp_mv;
   1407   int thissme, n, num00;
   1408   int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
   1409                                         step_param, sadpb, &num00,
   1410                                         fn_ptr, x->nmvjointcost,
   1411                                         x->mvcost, ref_mv);
   1412   dst_mv->as_int = temp_mv.as_int;
   1413 
   1414   n = num00;
   1415   num00 = 0;
   1416 
   1417   /* If there won't be more n-step search, check to see if refining search is
   1418    * needed. */
   1419   if (n > further_steps)
   1420     do_refine = 0;
   1421 
   1422   while (n < further_steps) {
   1423     n++;
   1424 
   1425     if (num00) {
   1426       num00--;
   1427     } else {
   1428       thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
   1429                                         step_param + n, sadpb, &num00,
   1430                                         fn_ptr, x->nmvjointcost, x->mvcost,
   1431                                         ref_mv);
   1432 
   1433       /* check to see if refining search is needed. */
   1434       if (num00 > (further_steps - n))
   1435         do_refine = 0;
   1436 
   1437       if (thissme < bestsme) {
   1438         bestsme = thissme;
   1439         dst_mv->as_int = temp_mv.as_int;
   1440       }
   1441     }
   1442   }
   1443 
   1444   /* final 1-away diamond refining search */
   1445   if (do_refine == 1) {
   1446     int search_range = 8;
   1447     int_mv best_mv;
   1448     best_mv.as_int = dst_mv->as_int;
   1449     thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
   1450                                        fn_ptr, x->nmvjointcost, x->mvcost,
   1451                                        ref_mv);
   1452 
   1453     if (thissme < bestsme) {
   1454       bestsme = thissme;
   1455       dst_mv->as_int = best_mv.as_int;
   1456     }
   1457   }
   1458   return bestsme;
   1459 }
   1460 
   1461 int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
   1462                           int sad_per_bit, int distance,
   1463                           vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
   1464                           int *mvcost[2],
   1465                           int_mv *center_mv, int n) {
   1466   const MACROBLOCKD* const xd = &x->e_mbd;
   1467   uint8_t *what = x->plane[0].src.buf;
   1468   int what_stride = x->plane[0].src.stride;
   1469   uint8_t *in_what;
   1470   int in_what_stride = xd->plane[0].pre[0].stride;
   1471   int mv_stride = xd->plane[0].pre[0].stride;
   1472   uint8_t *bestaddress;
   1473   int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
   1474   int_mv this_mv;
   1475   int bestsad = INT_MAX;
   1476   int r, c;
   1477 
   1478   uint8_t *check_here;
   1479   int thissad;
   1480 
   1481   int ref_row = ref_mv->as_mv.row;
   1482   int ref_col = ref_mv->as_mv.col;
   1483 
   1484   int row_min = ref_row - distance;
   1485   int row_max = ref_row + distance;
   1486   int col_min = ref_col - distance;
   1487   int col_max = ref_col + distance;
   1488   int_mv fcenter_mv;
   1489 
   1490   int *mvjsadcost = x->nmvjointsadcost;
   1491   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   1492 
   1493   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1494   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1495 
   1496   // Work out the mid point for the search
   1497   in_what = xd->plane[0].pre[0].buf;
   1498   bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
   1499 
   1500   best_mv->as_mv.row = ref_row;
   1501   best_mv->as_mv.col = ref_col;
   1502 
   1503   // Baseline value at the centre
   1504   bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1505                         in_what_stride, 0x7fffffff)
   1506                            + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
   1507                                             mvjsadcost, mvsadcost, sad_per_bit);
   1508 
   1509   // Apply further limits to prevent us looking using vectors that stretch
   1510   // beyond the UMV border
   1511   col_min = MAX(col_min, x->mv_col_min);
   1512   col_max = MIN(col_max, x->mv_col_max);
   1513   row_min = MAX(row_min, x->mv_row_min);
   1514   row_max = MIN(row_max, x->mv_row_max);
   1515 
   1516   for (r = row_min; r < row_max; r++) {
   1517     this_mv.as_mv.row = r;
   1518     check_here = r * mv_stride + in_what + col_min;
   1519 
   1520     for (c = col_min; c < col_max; c++) {
   1521       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1522                             bestsad);
   1523 
   1524       this_mv.as_mv.col = c;
   1525       thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1526                                 mvjsadcost, mvsadcost, sad_per_bit);
   1527 
   1528       if (thissad < bestsad) {
   1529         bestsad = thissad;
   1530         best_mv->as_mv.row = r;
   1531         best_mv->as_mv.col = c;
   1532         bestaddress = check_here;
   1533       }
   1534 
   1535       check_here++;
   1536     }
   1537   }
   1538 
   1539   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1540   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1541 
   1542   if (bestsad < INT_MAX)
   1543     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
   1544                       (unsigned int *)(&thissad)) +
   1545                       mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
   1546                                   mvjcost, mvcost, x->errorperbit);
   1547   else
   1548     return INT_MAX;
   1549 }
   1550 
   1551 int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
   1552                           int sad_per_bit, int distance,
   1553                           vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
   1554                           int *mvcost[2], int_mv *center_mv, int n) {
   1555   const MACROBLOCKD* const xd = &x->e_mbd;
   1556   uint8_t *what = x->plane[0].src.buf;
   1557   int what_stride = x->plane[0].src.stride;
   1558   uint8_t *in_what;
   1559   int in_what_stride = xd->plane[0].pre[0].stride;
   1560   int mv_stride = xd->plane[0].pre[0].stride;
   1561   uint8_t *bestaddress;
   1562   int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
   1563   int_mv this_mv;
   1564   unsigned int bestsad = INT_MAX;
   1565   int r, c;
   1566 
   1567   uint8_t *check_here;
   1568   unsigned int thissad;
   1569 
   1570   int ref_row = ref_mv->as_mv.row;
   1571   int ref_col = ref_mv->as_mv.col;
   1572 
   1573   int row_min = ref_row - distance;
   1574   int row_max = ref_row + distance;
   1575   int col_min = ref_col - distance;
   1576   int col_max = ref_col + distance;
   1577 
   1578   unsigned int sad_array[3];
   1579   int_mv fcenter_mv;
   1580 
   1581   int *mvjsadcost = x->nmvjointsadcost;
   1582   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   1583 
   1584   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1585   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1586 
   1587   // Work out the mid point for the search
   1588   in_what = xd->plane[0].pre[0].buf;
   1589   bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
   1590 
   1591   best_mv->as_mv.row = ref_row;
   1592   best_mv->as_mv.col = ref_col;
   1593 
   1594   // Baseline value at the centre
   1595   bestsad = fn_ptr->sdf(what, what_stride,
   1596                         bestaddress, in_what_stride, 0x7fffffff)
   1597             + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
   1598                              mvjsadcost, mvsadcost, sad_per_bit);
   1599 
   1600   // Apply further limits to prevent us looking using vectors that stretch
   1601   // beyond the UMV border
   1602   col_min = MAX(col_min, x->mv_col_min);
   1603   col_max = MIN(col_max, x->mv_col_max);
   1604   row_min = MAX(row_min, x->mv_row_min);
   1605   row_max = MIN(row_max, x->mv_row_max);
   1606 
   1607   for (r = row_min; r < row_max; r++) {
   1608     this_mv.as_mv.row = r;
   1609     check_here = r * mv_stride + in_what + col_min;
   1610     c = col_min;
   1611 
   1612     while ((c + 2) < col_max) {
   1613       int i;
   1614 
   1615       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1616 
   1617       for (i = 0; i < 3; i++) {
   1618         thissad = sad_array[i];
   1619 
   1620         if (thissad < bestsad) {
   1621           this_mv.as_mv.col = c;
   1622           thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1623                                     mvjsadcost, mvsadcost, sad_per_bit);
   1624 
   1625           if (thissad < bestsad) {
   1626             bestsad = thissad;
   1627             best_mv->as_mv.row = r;
   1628             best_mv->as_mv.col = c;
   1629             bestaddress = check_here;
   1630           }
   1631         }
   1632 
   1633         check_here++;
   1634         c++;
   1635       }
   1636     }
   1637 
   1638     while (c < col_max) {
   1639       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1640                             bestsad);
   1641 
   1642       if (thissad < bestsad) {
   1643         this_mv.as_mv.col = c;
   1644         thissad  += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1645                                    mvjsadcost, mvsadcost, sad_per_bit);
   1646 
   1647         if (thissad < bestsad) {
   1648           bestsad = thissad;
   1649           best_mv->as_mv.row = r;
   1650           best_mv->as_mv.col = c;
   1651           bestaddress = check_here;
   1652         }
   1653       }
   1654 
   1655       check_here++;
   1656       c++;
   1657     }
   1658   }
   1659 
   1660   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1661   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1662 
   1663   if (bestsad < INT_MAX)
   1664     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
   1665                       (unsigned int *)(&thissad)) +
   1666                       mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
   1667                                   mvjcost, mvcost, x->errorperbit);
   1668   else
   1669     return INT_MAX;
   1670 }
   1671 
   1672 int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
   1673                           int sad_per_bit, int distance,
   1674                           vp9_variance_fn_ptr_t *fn_ptr,
   1675                           int *mvjcost, int *mvcost[2],
   1676                           int_mv *center_mv, int n) {
   1677   const MACROBLOCKD* const xd = &x->e_mbd;
   1678   uint8_t *what = x->plane[0].src.buf;
   1679   int what_stride = x->plane[0].src.stride;
   1680   uint8_t *in_what;
   1681   int in_what_stride = xd->plane[0].pre[0].stride;
   1682   int mv_stride = xd->plane[0].pre[0].stride;
   1683   uint8_t *bestaddress;
   1684   int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
   1685   int_mv this_mv;
   1686   unsigned int bestsad = INT_MAX;
   1687   int r, c;
   1688 
   1689   uint8_t *check_here;
   1690   unsigned int thissad;
   1691 
   1692   int ref_row = ref_mv->as_mv.row;
   1693   int ref_col = ref_mv->as_mv.col;
   1694 
   1695   int row_min = ref_row - distance;
   1696   int row_max = ref_row + distance;
   1697   int col_min = ref_col - distance;
   1698   int col_max = ref_col + distance;
   1699 
   1700   DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
   1701   unsigned int sad_array[3];
   1702   int_mv fcenter_mv;
   1703 
   1704   int *mvjsadcost = x->nmvjointsadcost;
   1705   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   1706 
   1707   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1708   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1709 
   1710   // Work out the mid point for the search
   1711   in_what = xd->plane[0].pre[0].buf;
   1712   bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
   1713 
   1714   best_mv->as_mv.row = ref_row;
   1715   best_mv->as_mv.col = ref_col;
   1716 
   1717   // Baseline value at the centre
   1718   bestsad = fn_ptr->sdf(what, what_stride,
   1719                         bestaddress, in_what_stride, 0x7fffffff)
   1720             + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
   1721                              mvjsadcost, mvsadcost, sad_per_bit);
   1722 
   1723   // Apply further limits to prevent us looking using vectors that stretch
   1724   // beyond the UMV border
   1725   col_min = MAX(col_min, x->mv_col_min);
   1726   col_max = MIN(col_max, x->mv_col_max);
   1727   row_min = MAX(row_min, x->mv_row_min);
   1728   row_max = MIN(row_max, x->mv_row_max);
   1729 
   1730   for (r = row_min; r < row_max; r++) {
   1731     this_mv.as_mv.row = r;
   1732     check_here = r * mv_stride + in_what + col_min;
   1733     c = col_min;
   1734 
   1735     while ((c + 7) < col_max) {
   1736       int i;
   1737 
   1738       fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1739 
   1740       for (i = 0; i < 8; i++) {
   1741         thissad = (unsigned int)sad_array8[i];
   1742 
   1743         if (thissad < bestsad) {
   1744           this_mv.as_mv.col = c;
   1745           thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1746                                     mvjsadcost, mvsadcost, sad_per_bit);
   1747 
   1748           if (thissad < bestsad) {
   1749             bestsad = thissad;
   1750             best_mv->as_mv.row = r;
   1751             best_mv->as_mv.col = c;
   1752             bestaddress = check_here;
   1753           }
   1754         }
   1755 
   1756         check_here++;
   1757         c++;
   1758       }
   1759     }
   1760 
   1761     while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
   1762       int i;
   1763 
   1764       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1765 
   1766       for (i = 0; i < 3; i++) {
   1767         thissad = sad_array[i];
   1768 
   1769         if (thissad < bestsad) {
   1770           this_mv.as_mv.col = c;
   1771           thissad  += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1772                                      mvjsadcost, mvsadcost, sad_per_bit);
   1773 
   1774           if (thissad < bestsad) {
   1775             bestsad = thissad;
   1776             best_mv->as_mv.row = r;
   1777             best_mv->as_mv.col = c;
   1778             bestaddress = check_here;
   1779           }
   1780         }
   1781 
   1782         check_here++;
   1783         c++;
   1784       }
   1785     }
   1786 
   1787     while (c < col_max) {
   1788       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1789                             bestsad);
   1790 
   1791       if (thissad < bestsad) {
   1792         this_mv.as_mv.col = c;
   1793         thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1794                                   mvjsadcost, mvsadcost, sad_per_bit);
   1795 
   1796         if (thissad < bestsad) {
   1797           bestsad = thissad;
   1798           best_mv->as_mv.row = r;
   1799           best_mv->as_mv.col = c;
   1800           bestaddress = check_here;
   1801         }
   1802       }
   1803 
   1804       check_here++;
   1805       c++;
   1806     }
   1807   }
   1808 
   1809   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1810   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1811 
   1812   if (bestsad < INT_MAX)
   1813     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
   1814                       (unsigned int *)(&thissad)) +
   1815                       mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
   1816                                   mvjcost, mvcost, x->errorperbit);
   1817   else
   1818     return INT_MAX;
   1819 }
   1820 int vp9_refining_search_sad_c(MACROBLOCK *x,
   1821                               int_mv *ref_mv, int error_per_bit,
   1822                               int search_range, vp9_variance_fn_ptr_t *fn_ptr,
   1823                               int *mvjcost, int *mvcost[2], int_mv *center_mv) {
   1824   const MACROBLOCKD* const xd = &x->e_mbd;
   1825   MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1826   int i, j;
   1827   int this_row_offset, this_col_offset;
   1828 
   1829   int what_stride = x->plane[0].src.stride;
   1830   int in_what_stride = xd->plane[0].pre[0].stride;
   1831   uint8_t *what = x->plane[0].src.buf;
   1832   uint8_t *best_address = xd->plane[0].pre[0].buf +
   1833                           (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
   1834                           ref_mv->as_mv.col;
   1835   uint8_t *check_here;
   1836   unsigned int thissad;
   1837   int_mv this_mv;
   1838   unsigned int bestsad = INT_MAX;
   1839   int_mv fcenter_mv;
   1840 
   1841   int *mvjsadcost = x->nmvjointsadcost;
   1842   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   1843 
   1844   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1845   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1846 
   1847   bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1848                         in_what_stride, 0x7fffffff) +
   1849                         mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
   1850                                        mvjsadcost, mvsadcost, error_per_bit);
   1851 
   1852   for (i = 0; i < search_range; i++) {
   1853     int best_site = -1;
   1854 
   1855     for (j = 0; j < 4; j++) {
   1856       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1857       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1858 
   1859       if ((this_col_offset > x->mv_col_min) &&
   1860           (this_col_offset < x->mv_col_max) &&
   1861           (this_row_offset > x->mv_row_min) &&
   1862           (this_row_offset < x->mv_row_max)) {
   1863         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
   1864                      best_address;
   1865         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1866                               bestsad);
   1867 
   1868         if (thissad < bestsad) {
   1869           this_mv.as_mv.row = this_row_offset;
   1870           this_mv.as_mv.col = this_col_offset;
   1871           thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1872                                     mvjsadcost, mvsadcost, error_per_bit);
   1873 
   1874           if (thissad < bestsad) {
   1875             bestsad = thissad;
   1876             best_site = j;
   1877           }
   1878         }
   1879       }
   1880     }
   1881 
   1882     if (best_site == -1) {
   1883       break;
   1884     } else {
   1885       ref_mv->as_mv.row += neighbors[best_site].row;
   1886       ref_mv->as_mv.col += neighbors[best_site].col;
   1887       best_address += (neighbors[best_site].row) * in_what_stride +
   1888                       neighbors[best_site].col;
   1889     }
   1890   }
   1891 
   1892   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   1893   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   1894 
   1895   if (bestsad < INT_MAX)
   1896     return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
   1897                       (unsigned int *)(&thissad)) +
   1898                       mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
   1899                                   mvjcost, mvcost, x->errorperbit);
   1900   else
   1901     return INT_MAX;
   1902 }
   1903 
   1904 int vp9_refining_search_sadx4(MACROBLOCK *x,
   1905                               int_mv *ref_mv, int error_per_bit,
   1906                               int search_range, vp9_variance_fn_ptr_t *fn_ptr,
   1907                               int *mvjcost, int *mvcost[2], int_mv *center_mv) {
   1908   const MACROBLOCKD* const xd = &x->e_mbd;
   1909   MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1910   int i, j;
   1911   int this_row_offset, this_col_offset;
   1912 
   1913   int what_stride = x->plane[0].src.stride;
   1914   int in_what_stride = xd->plane[0].pre[0].stride;
   1915   uint8_t *what = x->plane[0].src.buf;
   1916   uint8_t *best_address = xd->plane[0].pre[0].buf +
   1917                           (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
   1918                           ref_mv->as_mv.col;
   1919   uint8_t *check_here;
   1920   unsigned int thissad;
   1921   int_mv this_mv;
   1922   unsigned int bestsad = INT_MAX;
   1923   int_mv fcenter_mv;
   1924 
   1925   int *mvjsadcost = x->nmvjointsadcost;
   1926   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   1927 
   1928   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1929   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1930 
   1931   bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1932                         in_what_stride, 0x7fffffff) +
   1933       mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
   1934                      mvjsadcost, mvsadcost, error_per_bit);
   1935 
   1936   for (i = 0; i < search_range; i++) {
   1937     int best_site = -1;
   1938     int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) &
   1939                  ((ref_mv->as_mv.row + 1) < x->mv_row_max) &
   1940                  ((ref_mv->as_mv.col - 1) > x->mv_col_min) &
   1941                  ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1942 
   1943     if (all_in) {
   1944       unsigned int sad_array[4];
   1945       unsigned char const *block_offset[4];
   1946       block_offset[0] = best_address - in_what_stride;
   1947       block_offset[1] = best_address - 1;
   1948       block_offset[2] = best_address + 1;
   1949       block_offset[3] = best_address + in_what_stride;
   1950 
   1951       fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1952                      sad_array);
   1953 
   1954       for (j = 0; j < 4; j++) {
   1955         if (sad_array[j] < bestsad) {
   1956           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1957           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1958           sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1959                                          mvjsadcost, mvsadcost, error_per_bit);
   1960 
   1961           if (sad_array[j] < bestsad) {
   1962             bestsad = sad_array[j];
   1963             best_site = j;
   1964           }
   1965         }
   1966       }
   1967     } else {
   1968       for (j = 0; j < 4; j++) {
   1969         this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1970         this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1971 
   1972         if ((this_col_offset > x->mv_col_min) &&
   1973             (this_col_offset < x->mv_col_max) &&
   1974             (this_row_offset > x->mv_row_min) &&
   1975             (this_row_offset < x->mv_row_max)) {
   1976           check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
   1977                        best_address;
   1978           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
   1979                                 bestsad);
   1980 
   1981           if (thissad < bestsad) {
   1982             this_mv.as_mv.row = this_row_offset;
   1983             this_mv.as_mv.col = this_col_offset;
   1984             thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   1985                                       mvjsadcost, mvsadcost, error_per_bit);
   1986 
   1987             if (thissad < bestsad) {
   1988               bestsad = thissad;
   1989               best_site = j;
   1990             }
   1991           }
   1992         }
   1993       }
   1994     }
   1995 
   1996     if (best_site == -1) {
   1997       break;
   1998     } else {
   1999       ref_mv->as_mv.row += neighbors[best_site].row;
   2000       ref_mv->as_mv.col += neighbors[best_site].col;
   2001       best_address += (neighbors[best_site].row) * in_what_stride +
   2002                       neighbors[best_site].col;
   2003     }
   2004   }
   2005 
   2006   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   2007   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   2008 
   2009   if (bestsad < INT_MAX)
   2010     return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
   2011                       (unsigned int *)(&thissad)) +
   2012                       mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
   2013                                   mvjcost, mvcost, x->errorperbit);
   2014   else
   2015     return INT_MAX;
   2016 }
   2017 
   2018 /* This function is called when we do joint motion search in comp_inter_inter
   2019  * mode.
   2020  */
   2021 int vp9_refining_search_8p_c(MACROBLOCK *x,
   2022                              int_mv *ref_mv, int error_per_bit,
   2023                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
   2024                              int *mvjcost, int *mvcost[2], int_mv *center_mv,
   2025                              const uint8_t *second_pred, int w, int h) {
   2026   const MACROBLOCKD* const xd = &x->e_mbd;
   2027   MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
   2028       {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
   2029   int i, j;
   2030   int this_row_offset, this_col_offset;
   2031 
   2032   int what_stride = x->plane[0].src.stride;
   2033   int in_what_stride = xd->plane[0].pre[0].stride;
   2034   uint8_t *what = x->plane[0].src.buf;
   2035   uint8_t *best_address = xd->plane[0].pre[0].buf +
   2036                           (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
   2037                           ref_mv->as_mv.col;
   2038   uint8_t *check_here;
   2039   unsigned int thissad;
   2040   int_mv this_mv;
   2041   unsigned int bestsad = INT_MAX;
   2042   int_mv fcenter_mv;
   2043 
   2044   int *mvjsadcost = x->nmvjointsadcost;
   2045   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   2046 
   2047   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   2048   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   2049 
   2050   /* Get compound pred by averaging two pred blocks. */
   2051   bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride,
   2052                          second_pred, 0x7fffffff) +
   2053       mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
   2054                      mvjsadcost, mvsadcost, error_per_bit);
   2055 
   2056   for (i = 0; i < search_range; i++) {
   2057     int best_site = -1;
   2058 
   2059     for (j = 0; j < 8; j++) {
   2060       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   2061       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   2062 
   2063       if ((this_col_offset > x->mv_col_min) &&
   2064           (this_col_offset < x->mv_col_max) &&
   2065           (this_row_offset > x->mv_row_min) &&
   2066           (this_row_offset < x->mv_row_max)) {
   2067         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
   2068             best_address;
   2069 
   2070         /* Get compound block and use it to calculate SAD. */
   2071         thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride,
   2072                                second_pred, bestsad);
   2073 
   2074         if (thissad < bestsad) {
   2075           this_mv.as_mv.row = this_row_offset;
   2076           this_mv.as_mv.col = this_col_offset;
   2077           thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
   2078                                     mvjsadcost, mvsadcost, error_per_bit);
   2079           if (thissad < bestsad) {
   2080             bestsad = thissad;
   2081             best_site = j;
   2082           }
   2083         }
   2084       }
   2085     }
   2086 
   2087     if (best_site == -1) {
   2088       break;
   2089     } else {
   2090       ref_mv->as_mv.row += neighbors[best_site].row;
   2091       ref_mv->as_mv.col += neighbors[best_site].col;
   2092       best_address += (neighbors[best_site].row) * in_what_stride +
   2093           neighbors[best_site].col;
   2094     }
   2095   }
   2096 
   2097   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   2098   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   2099 
   2100   if (bestsad < INT_MAX) {
   2101     // FIXME(rbultje, yunqing): add full-pixel averaging variance functions
   2102     // so we don't have to use the subpixel with xoff=0,yoff=0 here.
   2103     return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride,
   2104                         (unsigned int *)(&thissad), second_pred) +
   2105                         mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
   2106                                     mvjcost, mvcost, x->errorperbit);
   2107   } else {
   2108     return INT_MAX;
   2109   }
   2110 }
   2111