Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vp8_rtcd.h"
     12 #include "./vpx_dsp_rtcd.h"
     13 #include "onyx_int.h"
     14 #include "mcomp.h"
     15 #include "vpx_mem/vpx_mem.h"
     16 #include "vpx_config.h"
     17 #include <stdio.h>
     18 #include <limits.h>
     19 #include <math.h>
     20 #include "vp8/common/findnearmv.h"
     21 #include "vp8/common/common.h"
     22 #include "vpx_dsp/vpx_dsp_common.h"
     23 
     24 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
     25   /* MV costing is based on the distribution of vectors in the previous
     26    * frame and as such will tend to over state the cost of vectors. In
     27    * addition coding a new vector can have a knock on effect on the cost
     28    * of subsequent vectors and the quality of prediction from NEAR and
     29    * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
     30    * limited extent, for some account to be taken of these factors.
     31    */
     32   const int mv_idx_row =
     33       clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
     34   const int mv_idx_col =
     35       clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
     36   return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
     37 }
     38 
     39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
     40                        int error_per_bit) {
     41   /* Ignore mv costing if mvcost is NULL */
     42   if (mvcost) {
     43     const int mv_idx_row =
     44         clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
     45     const int mv_idx_col =
     46         clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
     47     return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
     48             128) >>
     49            8;
     50   }
     51   return 0;
     52 }
     53 
     54 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
     55                           int error_per_bit) {
     56   /* Calculate sad error cost on full pixel basis. */
     57   /* Ignore mv costing if mvsadcost is NULL */
     58   if (mvsadcost) {
     59     return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
     60              mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
     61                 error_per_bit +
     62             128) >>
     63            8;
     64   }
     65   return 0;
     66 }
     67 
     68 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
     69   int Len;
     70   int search_site_count = 0;
     71 
     72   /* Generate offsets for 4 search sites per step. */
     73   Len = MAX_FIRST_STEP;
     74   x->ss[search_site_count].mv.col = 0;
     75   x->ss[search_site_count].mv.row = 0;
     76   x->ss[search_site_count].offset = 0;
     77   search_site_count++;
     78 
     79   while (Len > 0) {
     80     /* Compute offsets for search sites. */
     81     x->ss[search_site_count].mv.col = 0;
     82     x->ss[search_site_count].mv.row = -Len;
     83     x->ss[search_site_count].offset = -Len * stride;
     84     search_site_count++;
     85 
     86     /* Compute offsets for search sites. */
     87     x->ss[search_site_count].mv.col = 0;
     88     x->ss[search_site_count].mv.row = Len;
     89     x->ss[search_site_count].offset = Len * stride;
     90     search_site_count++;
     91 
     92     /* Compute offsets for search sites. */
     93     x->ss[search_site_count].mv.col = -Len;
     94     x->ss[search_site_count].mv.row = 0;
     95     x->ss[search_site_count].offset = -Len;
     96     search_site_count++;
     97 
     98     /* Compute offsets for search sites. */
     99     x->ss[search_site_count].mv.col = Len;
    100     x->ss[search_site_count].mv.row = 0;
    101     x->ss[search_site_count].offset = Len;
    102     search_site_count++;
    103 
    104     /* Contract. */
    105     Len /= 2;
    106   }
    107 
    108   x->ss_count = search_site_count;
    109   x->searches_per_step = 4;
    110 }
    111 
    112 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
    113   int Len;
    114   int search_site_count = 0;
    115 
    116   /* Generate offsets for 8 search sites per step. */
    117   Len = MAX_FIRST_STEP;
    118   x->ss[search_site_count].mv.col = 0;
    119   x->ss[search_site_count].mv.row = 0;
    120   x->ss[search_site_count].offset = 0;
    121   search_site_count++;
    122 
    123   while (Len > 0) {
    124     /* Compute offsets for search sites. */
    125     x->ss[search_site_count].mv.col = 0;
    126     x->ss[search_site_count].mv.row = -Len;
    127     x->ss[search_site_count].offset = -Len * stride;
    128     search_site_count++;
    129 
    130     /* Compute offsets for search sites. */
    131     x->ss[search_site_count].mv.col = 0;
    132     x->ss[search_site_count].mv.row = Len;
    133     x->ss[search_site_count].offset = Len * stride;
    134     search_site_count++;
    135 
    136     /* Compute offsets for search sites. */
    137     x->ss[search_site_count].mv.col = -Len;
    138     x->ss[search_site_count].mv.row = 0;
    139     x->ss[search_site_count].offset = -Len;
    140     search_site_count++;
    141 
    142     /* Compute offsets for search sites. */
    143     x->ss[search_site_count].mv.col = Len;
    144     x->ss[search_site_count].mv.row = 0;
    145     x->ss[search_site_count].offset = Len;
    146     search_site_count++;
    147 
    148     /* Compute offsets for search sites. */
    149     x->ss[search_site_count].mv.col = -Len;
    150     x->ss[search_site_count].mv.row = -Len;
    151     x->ss[search_site_count].offset = -Len * stride - Len;
    152     search_site_count++;
    153 
    154     /* Compute offsets for search sites. */
    155     x->ss[search_site_count].mv.col = Len;
    156     x->ss[search_site_count].mv.row = -Len;
    157     x->ss[search_site_count].offset = -Len * stride + Len;
    158     search_site_count++;
    159 
    160     /* Compute offsets for search sites. */
    161     x->ss[search_site_count].mv.col = -Len;
    162     x->ss[search_site_count].mv.row = Len;
    163     x->ss[search_site_count].offset = Len * stride - Len;
    164     search_site_count++;
    165 
    166     /* Compute offsets for search sites. */
    167     x->ss[search_site_count].mv.col = Len;
    168     x->ss[search_site_count].mv.row = Len;
    169     x->ss[search_site_count].offset = Len * stride + Len;
    170     search_site_count++;
    171 
    172     /* Contract. */
    173     Len /= 2;
    174   }
    175 
    176   x->ss_count = search_site_count;
    177   x->searches_per_step = 8;
    178 }
    179 
    180 /*
    181  * To avoid the penalty for crossing cache-line read, preload the reference
    182  * area in a small buffer, which is aligned to make sure there won't be crossing
    183  * cache-line read while reading from this buffer. This reduced the cpu
    184  * cycles spent on reading ref data in sub-pixel filter functions.
    185  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    186  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    187  * could reduce the area.
    188  */
    189 
    190 /* estimated cost of a motion vector (r,c) */
    191 #define MVC(r, c)                                                             \
    192   (mvcost                                                                     \
    193        ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
    194        : 0)
    195 /* pointer to predictor base of a motionvector */
    196 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
    197 /* convert motion vector component to offset for svf calc */
    198 #define SP(x) (((x)&3) << 1)
    199 /* returns subpixel variance error function. */
    200 #define DIST(r, c) \
    201   vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
    202 #define IFMVCV(r, c, s, e) \
    203   if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    204 /* returns distortion + motion vector cost */
    205 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
    206 /* checks if (r,c) has better score than previous best */
    207 #define CHECK_BETTER(v, r, c)                           \
    208   IFMVCV(r, c,                                          \
    209          {                                              \
    210            thismse = DIST(r, c);                        \
    211            if ((v = (MVC(r, c) + thismse)) < besterr) { \
    212              besterr = v;                               \
    213              br = r;                                    \
    214              bc = c;                                    \
    215              *distortion = thismse;                     \
    216              *sse1 = sse;                               \
    217            }                                            \
    218          },                                             \
    219          v = UINT_MAX;)
    220 
    221 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    222                                              int_mv *bestmv, int_mv *ref_mv,
    223                                              int error_per_bit,
    224                                              const vp8_variance_fn_ptr_t *vfp,
    225                                              int *mvcost[2], int *distortion,
    226                                              unsigned int *sse1) {
    227   unsigned char *z = (*(b->base_src) + b->src);
    228 
    229   int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
    230   int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
    231   int tr = br, tc = bc;
    232   unsigned int besterr;
    233   unsigned int left, right, up, down, diag;
    234   unsigned int sse;
    235   unsigned int whichdir;
    236   unsigned int halfiters = 4;
    237   unsigned int quarteriters = 4;
    238   int thismse;
    239 
    240   int minc = VPXMAX(x->mv_col_min * 4,
    241                     (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
    242   int maxc = VPXMIN(x->mv_col_max * 4,
    243                     (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
    244   int minr = VPXMAX(x->mv_row_min * 4,
    245                     (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
    246   int maxr = VPXMIN(x->mv_row_max * 4,
    247                     (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    248 
    249   int y_stride;
    250   int offset;
    251   int pre_stride = x->e_mbd.pre.y_stride;
    252   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    253 
    254 #if ARCH_X86 || ARCH_X86_64
    255   MACROBLOCKD *xd = &x->e_mbd;
    256   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    257                        bestmv->as_mv.col;
    258   unsigned char *y;
    259   int buf_r1, buf_r2, buf_c1;
    260 
    261   /* Clamping to avoid out-of-range data access */
    262   buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
    263                ? (bestmv->as_mv.row - x->mv_row_min)
    264                : 3;
    265   buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
    266                ? (x->mv_row_max - bestmv->as_mv.row)
    267                : 3;
    268   buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
    269                ? (bestmv->as_mv.col - x->mv_col_min)
    270                : 3;
    271   y_stride = 32;
    272 
    273   /* Copy to intermediate buffer before searching. */
    274   vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
    275                y_stride, 16 + buf_r1 + buf_r2);
    276   y = xd->y_buf + y_stride * buf_r1 + buf_c1;
    277 #else
    278   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    279                      bestmv->as_mv.col;
    280   y_stride = pre_stride;
    281 #endif
    282 
    283   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
    284 
    285   /* central mv */
    286   bestmv->as_mv.row *= 8;
    287   bestmv->as_mv.col *= 8;
    288 
    289   /* calculate central point error */
    290   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    291   *distortion = besterr;
    292   besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    293 
    294   /* TODO: Each subsequent iteration checks at least one point in common
    295    * with the last iteration could be 2 ( if diag selected)
    296    */
    297   while (--halfiters) {
    298     /* 1/2 pel */
    299     CHECK_BETTER(left, tr, tc - 2);
    300     CHECK_BETTER(right, tr, tc + 2);
    301     CHECK_BETTER(up, tr - 2, tc);
    302     CHECK_BETTER(down, tr + 2, tc);
    303 
    304     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    305 
    306     switch (whichdir) {
    307       case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
    308       case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
    309       case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
    310       case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
    311     }
    312 
    313     /* no reason to check the same one again. */
    314     if (tr == br && tc == bc) break;
    315 
    316     tr = br;
    317     tc = bc;
    318   }
    319 
    320   /* TODO: Each subsequent iteration checks at least one point in common
    321    * with the last iteration could be 2 ( if diag selected)
    322    */
    323 
    324   /* 1/4 pel */
    325   while (--quarteriters) {
    326     CHECK_BETTER(left, tr, tc - 1);
    327     CHECK_BETTER(right, tr, tc + 1);
    328     CHECK_BETTER(up, tr - 1, tc);
    329     CHECK_BETTER(down, tr + 1, tc);
    330 
    331     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    332 
    333     switch (whichdir) {
    334       case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
    335       case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
    336       case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
    337       case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
    338     }
    339 
    340     /* no reason to check the same one again. */
    341     if (tr == br && tc == bc) break;
    342 
    343     tr = br;
    344     tc = bc;
    345   }
    346 
    347   bestmv->as_mv.row = br * 2;
    348   bestmv->as_mv.col = bc * 2;
    349 
    350   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
    351       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
    352     return INT_MAX;
    353   }
    354 
    355   return besterr;
    356 }
    357 #undef MVC
    358 #undef PRE
    359 #undef SP
    360 #undef DIST
    361 #undef IFMVCV
    362 #undef ERR
    363 #undef CHECK_BETTER
    364 
    365 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    366                                  int_mv *bestmv, int_mv *ref_mv,
    367                                  int error_per_bit,
    368                                  const vp8_variance_fn_ptr_t *vfp,
    369                                  int *mvcost[2], int *distortion,
    370                                  unsigned int *sse1) {
    371   int bestmse = INT_MAX;
    372   int_mv startmv;
    373   int_mv this_mv;
    374   unsigned char *z = (*(b->base_src) + b->src);
    375   int left, right, up, down, diag;
    376   unsigned int sse;
    377   int whichdir;
    378   int thismse;
    379   int y_stride;
    380   int pre_stride = x->e_mbd.pre.y_stride;
    381   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    382 
    383 #if ARCH_X86 || ARCH_X86_64
    384   MACROBLOCKD *xd = &x->e_mbd;
    385   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    386                        bestmv->as_mv.col;
    387   unsigned char *y;
    388 
    389   y_stride = 32;
    390   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    391   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    392   y = xd->y_buf + y_stride + 1;
    393 #else
    394   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    395                      bestmv->as_mv.col;
    396   y_stride = pre_stride;
    397 #endif
    398 
    399   /* central mv */
    400   bestmv->as_mv.row *= 8;
    401   bestmv->as_mv.col *= 8;
    402   startmv = *bestmv;
    403 
    404   /* calculate central point error */
    405   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    406   *distortion = bestmse;
    407   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    408 
    409   /* go left then right and check error */
    410   this_mv.as_mv.row = startmv.as_mv.row;
    411   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    412   /* "halfpix" horizontal variance */
    413   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
    414   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    415 
    416   if (left < bestmse) {
    417     *bestmv = this_mv;
    418     bestmse = left;
    419     *distortion = thismse;
    420     *sse1 = sse;
    421   }
    422 
    423   this_mv.as_mv.col += 8;
    424   /* "halfpix" horizontal variance */
    425   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
    426   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    427 
    428   if (right < bestmse) {
    429     *bestmv = this_mv;
    430     bestmse = right;
    431     *distortion = thismse;
    432     *sse1 = sse;
    433   }
    434 
    435   /* go up then down and check error */
    436   this_mv.as_mv.col = startmv.as_mv.col;
    437   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    438   /* "halfpix" vertical variance */
    439   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
    440   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    441 
    442   if (up < bestmse) {
    443     *bestmv = this_mv;
    444     bestmse = up;
    445     *distortion = thismse;
    446     *sse1 = sse;
    447   }
    448 
    449   this_mv.as_mv.row += 8;
    450   /* "halfpix" vertical variance */
    451   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
    452   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    453 
    454   if (down < bestmse) {
    455     *bestmv = this_mv;
    456     bestmse = down;
    457     *distortion = thismse;
    458     *sse1 = sse;
    459   }
    460 
    461   /* now check 1 more diagonal */
    462   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    463   this_mv = startmv;
    464 
    465   switch (whichdir) {
    466     case 0:
    467       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    468       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    469       /* "halfpix" horizontal/vertical variance */
    470       thismse =
    471           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
    472       break;
    473     case 1:
    474       this_mv.as_mv.col += 4;
    475       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    476       /* "halfpix" horizontal/vertical variance */
    477       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
    478       break;
    479     case 2:
    480       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    481       this_mv.as_mv.row += 4;
    482       /* "halfpix" horizontal/vertical variance */
    483       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
    484       break;
    485     case 3:
    486     default:
    487       this_mv.as_mv.col += 4;
    488       this_mv.as_mv.row += 4;
    489       /* "halfpix" horizontal/vertical variance */
    490       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
    491       break;
    492   }
    493 
    494   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    495 
    496   if (diag < bestmse) {
    497     *bestmv = this_mv;
    498     bestmse = diag;
    499     *distortion = thismse;
    500     *sse1 = sse;
    501   }
    502 
    503   /* time to check quarter pels. */
    504   if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
    505 
    506   if (bestmv->as_mv.col < startmv.as_mv.col) y--;
    507 
    508   startmv = *bestmv;
    509 
    510   /* go left then right and check error */
    511   this_mv.as_mv.row = startmv.as_mv.row;
    512 
    513   if (startmv.as_mv.col & 7) {
    514     this_mv.as_mv.col = startmv.as_mv.col - 2;
    515     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    516                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    517   } else {
    518     this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    519     thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
    520                        b->src_stride, &sse);
    521   }
    522 
    523   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    524 
    525   if (left < bestmse) {
    526     *bestmv = this_mv;
    527     bestmse = left;
    528     *distortion = thismse;
    529     *sse1 = sse;
    530   }
    531 
    532   this_mv.as_mv.col += 4;
    533   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
    534                      z, b->src_stride, &sse);
    535   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    536 
    537   if (right < bestmse) {
    538     *bestmv = this_mv;
    539     bestmse = right;
    540     *distortion = thismse;
    541     *sse1 = sse;
    542   }
    543 
    544   /* go up then down and check error */
    545   this_mv.as_mv.col = startmv.as_mv.col;
    546 
    547   if (startmv.as_mv.row & 7) {
    548     this_mv.as_mv.row = startmv.as_mv.row - 2;
    549     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    550                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    551   } else {
    552     this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    553     thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
    554                        b->src_stride, &sse);
    555   }
    556 
    557   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    558 
    559   if (up < bestmse) {
    560     *bestmv = this_mv;
    561     bestmse = up;
    562     *distortion = thismse;
    563     *sse1 = sse;
    564   }
    565 
    566   this_mv.as_mv.row += 4;
    567   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
    568                      z, b->src_stride, &sse);
    569   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    570 
    571   if (down < bestmse) {
    572     *bestmv = this_mv;
    573     bestmse = down;
    574     *distortion = thismse;
    575     *sse1 = sse;
    576   }
    577 
    578   /* now check 1 more diagonal */
    579   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    580 
    581   this_mv = startmv;
    582 
    583   switch (whichdir) {
    584     case 0:
    585 
    586       if (startmv.as_mv.row & 7) {
    587         this_mv.as_mv.row -= 2;
    588 
    589         if (startmv.as_mv.col & 7) {
    590           this_mv.as_mv.col -= 2;
    591           thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    592                              this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    593         } else {
    594           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    595           thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
    596                              b->src_stride, &sse);
    597         }
    598       } else {
    599         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    600 
    601         if (startmv.as_mv.col & 7) {
    602           this_mv.as_mv.col -= 2;
    603           thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
    604                              z, b->src_stride, &sse);
    605         } else {
    606           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    607           thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
    608                              &sse);
    609         }
    610       }
    611 
    612       break;
    613     case 1:
    614       this_mv.as_mv.col += 2;
    615 
    616       if (startmv.as_mv.row & 7) {
    617         this_mv.as_mv.row -= 2;
    618         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    619                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    620       } else {
    621         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    622         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
    623                            b->src_stride, &sse);
    624       }
    625 
    626       break;
    627     case 2:
    628       this_mv.as_mv.row += 2;
    629 
    630       if (startmv.as_mv.col & 7) {
    631         this_mv.as_mv.col -= 2;
    632         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    633                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    634       } else {
    635         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    636         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
    637                            b->src_stride, &sse);
    638       }
    639 
    640       break;
    641     case 3:
    642       this_mv.as_mv.col += 2;
    643       this_mv.as_mv.row += 2;
    644       thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    645                          this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    646       break;
    647   }
    648 
    649   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    650 
    651   if (diag < bestmse) {
    652     *bestmv = this_mv;
    653     bestmse = diag;
    654     *distortion = thismse;
    655     *sse1 = sse;
    656   }
    657 
    658   return bestmse;
    659 }
    660 
    661 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    662                                   int_mv *bestmv, int_mv *ref_mv,
    663                                   int error_per_bit,
    664                                   const vp8_variance_fn_ptr_t *vfp,
    665                                   int *mvcost[2], int *distortion,
    666                                   unsigned int *sse1) {
    667   int bestmse = INT_MAX;
    668   int_mv startmv;
    669   int_mv this_mv;
    670   unsigned char *z = (*(b->base_src) + b->src);
    671   int left, right, up, down, diag;
    672   unsigned int sse;
    673   int whichdir;
    674   int thismse;
    675   int y_stride;
    676   int pre_stride = x->e_mbd.pre.y_stride;
    677   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    678 
    679 #if ARCH_X86 || ARCH_X86_64
    680   MACROBLOCKD *xd = &x->e_mbd;
    681   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    682                        bestmv->as_mv.col;
    683   unsigned char *y;
    684 
    685   y_stride = 32;
    686   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    687   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    688   y = xd->y_buf + y_stride + 1;
    689 #else
    690   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    691                      bestmv->as_mv.col;
    692   y_stride = pre_stride;
    693 #endif
    694 
    695   /* central mv */
    696   bestmv->as_mv.row *= 8;
    697   bestmv->as_mv.col *= 8;
    698   startmv = *bestmv;
    699 
    700   /* calculate central point error */
    701   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    702   *distortion = bestmse;
    703   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    704 
    705   /* go left then right and check error */
    706   this_mv.as_mv.row = startmv.as_mv.row;
    707   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    708   /* "halfpix" horizontal variance */
    709   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
    710   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    711 
    712   if (left < bestmse) {
    713     *bestmv = this_mv;
    714     bestmse = left;
    715     *distortion = thismse;
    716     *sse1 = sse;
    717   }
    718 
    719   this_mv.as_mv.col += 8;
    720   /* "halfpix" horizontal variance */
    721   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
    722   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    723 
    724   if (right < bestmse) {
    725     *bestmv = this_mv;
    726     bestmse = right;
    727     *distortion = thismse;
    728     *sse1 = sse;
    729   }
    730 
    731   /* go up then down and check error */
    732   this_mv.as_mv.col = startmv.as_mv.col;
    733   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    734   /* "halfpix" vertical variance */
    735   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
    736   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    737 
    738   if (up < bestmse) {
    739     *bestmv = this_mv;
    740     bestmse = up;
    741     *distortion = thismse;
    742     *sse1 = sse;
    743   }
    744 
    745   this_mv.as_mv.row += 8;
    746   /* "halfpix" vertical variance */
    747   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
    748   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    749 
    750   if (down < bestmse) {
    751     *bestmv = this_mv;
    752     bestmse = down;
    753     *distortion = thismse;
    754     *sse1 = sse;
    755   }
    756 
    757   /* now check 1 more diagonal - */
    758   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    759   this_mv = startmv;
    760 
    761   switch (whichdir) {
    762     case 0:
    763       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    764       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    765       /* "halfpix" horizontal/vertical variance */
    766       thismse =
    767           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
    768       break;
    769     case 1:
    770       this_mv.as_mv.col += 4;
    771       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    772       /* "halfpix" horizontal/vertical variance */
    773       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
    774       break;
    775     case 2:
    776       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    777       this_mv.as_mv.row += 4;
    778       /* "halfpix" horizontal/vertical variance */
    779       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
    780       break;
    781     case 3:
    782     default:
    783       this_mv.as_mv.col += 4;
    784       this_mv.as_mv.row += 4;
    785       /* "halfpix" horizontal/vertical variance */
    786       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
    787       break;
    788   }
    789 
    790   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    791 
    792   if (diag < bestmse) {
    793     *bestmv = this_mv;
    794     bestmse = diag;
    795     *distortion = thismse;
    796     *sse1 = sse;
    797   }
    798 
    799   return bestmse;
    800 }
    801 
    802 #define CHECK_BOUNDS(range)                    \
    803   {                                            \
    804     all_in = 1;                                \
    805     all_in &= ((br - range) >= x->mv_row_min); \
    806     all_in &= ((br + range) <= x->mv_row_max); \
    807     all_in &= ((bc - range) >= x->mv_col_min); \
    808     all_in &= ((bc + range) <= x->mv_col_max); \
    809   }
    810 
    811 #define CHECK_POINT                                  \
    812   {                                                  \
    813     if (this_mv.as_mv.col < x->mv_col_min) continue; \
    814     if (this_mv.as_mv.col > x->mv_col_max) continue; \
    815     if (this_mv.as_mv.row < x->mv_row_min) continue; \
    816     if (this_mv.as_mv.row > x->mv_row_max) continue; \
    817   }
    818 
    819 #define CHECK_BETTER                                                     \
    820   {                                                                      \
    821     if (thissad < bestsad) {                                             \
    822       thissad +=                                                         \
    823           mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
    824       if (thissad < bestsad) {                                           \
    825         bestsad = thissad;                                               \
    826         best_site = i;                                                   \
    827       }                                                                  \
    828     }                                                                    \
    829   }
    830 
    831 static const MV next_chkpts[6][3] = {
    832   { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
    833   { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
    834   { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
    835 };
    836 
    837 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
    838                    int_mv *best_mv, int search_param, int sad_per_bit,
    839                    const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
    840                    int *mvcost[2], int_mv *center_mv) {
    841   MV hex[6] = {
    842     { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
    843   };
    844   MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
    845   int i, j;
    846 
    847   unsigned char *what = (*(b->base_src) + b->src);
    848   int what_stride = b->src_stride;
    849   int pre_stride = x->e_mbd.pre.y_stride;
    850   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    851 
    852   int in_what_stride = pre_stride;
    853   int br, bc;
    854   int_mv this_mv;
    855   unsigned int bestsad;
    856   unsigned int thissad;
    857   unsigned char *base_offset;
    858   unsigned char *this_offset;
    859   int k = -1;
    860   int all_in;
    861   int best_site = -1;
    862   int hex_range = 127;
    863   int dia_range = 8;
    864 
    865   int_mv fcenter_mv;
    866   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    867   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    868 
    869   (void)mvcost;
    870 
    871   /* adjust ref_mv to make sure it is within MV range */
    872   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
    873                x->mv_row_max);
    874   br = ref_mv->as_mv.row;
    875   bc = ref_mv->as_mv.col;
    876 
    877   /* Work out the start point for the search */
    878   base_offset = (unsigned char *)(base_pre + d->offset);
    879   this_offset = base_offset + (br * (pre_stride)) + bc;
    880   this_mv.as_mv.row = br;
    881   this_mv.as_mv.col = bc;
    882   bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
    883             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    884 
    885 #if CONFIG_MULTI_RES_ENCODING
    886   /* Lower search range based on prediction info */
    887   if (search_param >= 6)
    888     goto cal_neighbors;
    889   else if (search_param >= 5)
    890     hex_range = 4;
    891   else if (search_param >= 4)
    892     hex_range = 6;
    893   else if (search_param >= 3)
    894     hex_range = 15;
    895   else if (search_param >= 2)
    896     hex_range = 31;
    897   else if (search_param >= 1)
    898     hex_range = 63;
    899 
    900   dia_range = 8;
    901 #else
    902   (void)search_param;
    903 #endif
    904 
    905   /* hex search */
    906   CHECK_BOUNDS(2)
    907 
    908   if (all_in) {
    909     for (i = 0; i < 6; ++i) {
    910       this_mv.as_mv.row = br + hex[i].row;
    911       this_mv.as_mv.col = bc + hex[i].col;
    912       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
    913                     this_mv.as_mv.col;
    914       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    915       CHECK_BETTER
    916     }
    917   } else {
    918     for (i = 0; i < 6; ++i) {
    919       this_mv.as_mv.row = br + hex[i].row;
    920       this_mv.as_mv.col = bc + hex[i].col;
    921       CHECK_POINT
    922       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
    923                     this_mv.as_mv.col;
    924       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    925       CHECK_BETTER
    926     }
    927   }
    928 
    929   if (best_site == -1) {
    930     goto cal_neighbors;
    931   } else {
    932     br += hex[best_site].row;
    933     bc += hex[best_site].col;
    934     k = best_site;
    935   }
    936 
    937   for (j = 1; j < hex_range; ++j) {
    938     best_site = -1;
    939     CHECK_BOUNDS(2)
    940 
    941     if (all_in) {
    942       for (i = 0; i < 3; ++i) {
    943         this_mv.as_mv.row = br + next_chkpts[k][i].row;
    944         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    945         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
    946                       this_mv.as_mv.col;
    947         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    948         CHECK_BETTER
    949       }
    950     } else {
    951       for (i = 0; i < 3; ++i) {
    952         this_mv.as_mv.row = br + next_chkpts[k][i].row;
    953         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    954         CHECK_POINT
    955         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
    956                       this_mv.as_mv.col;
    957         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    958         CHECK_BETTER
    959       }
    960     }
    961 
    962     if (best_site == -1) {
    963       break;
    964     } else {
    965       br += next_chkpts[k][best_site].row;
    966       bc += next_chkpts[k][best_site].col;
    967       k += 5 + best_site;
    968       if (k >= 12) {
    969         k -= 12;
    970       } else if (k >= 6) {
    971         k -= 6;
    972       }
    973     }
    974   }
    975 
    976 /* check 4 1-away neighbors */
    977 cal_neighbors:
    978   for (j = 0; j < dia_range; ++j) {
    979     best_site = -1;
    980     CHECK_BOUNDS(1)
    981 
    982     if (all_in) {
    983       for (i = 0; i < 4; ++i) {
    984         this_mv.as_mv.row = br + neighbors[i].row;
    985         this_mv.as_mv.col = bc + neighbors[i].col;
    986         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
    987                       this_mv.as_mv.col;
    988         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    989         CHECK_BETTER
    990       }
    991     } else {
    992       for (i = 0; i < 4; ++i) {
    993         this_mv.as_mv.row = br + neighbors[i].row;
    994         this_mv.as_mv.col = bc + neighbors[i].col;
    995         CHECK_POINT
    996         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
    997                       this_mv.as_mv.col;
    998         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    999         CHECK_BETTER
   1000       }
   1001     }
   1002 
   1003     if (best_site == -1) {
   1004       break;
   1005     } else {
   1006       br += neighbors[best_site].row;
   1007       bc += neighbors[best_site].col;
   1008     }
   1009   }
   1010 
   1011   best_mv->as_mv.row = br;
   1012   best_mv->as_mv.col = bc;
   1013 
   1014   return bestsad;
   1015 }
   1016 #undef CHECK_BOUNDS
   1017 #undef CHECK_POINT
   1018 #undef CHECK_BETTER
   1019 
   1020 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1021                              int_mv *best_mv, int search_param, int sad_per_bit,
   1022                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
   1023                              int *mvcost[2], int_mv *center_mv) {
   1024   int i, j, step;
   1025 
   1026   unsigned char *what = (*(b->base_src) + b->src);
   1027   int what_stride = b->src_stride;
   1028   unsigned char *in_what;
   1029   int pre_stride = x->e_mbd.pre.y_stride;
   1030   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1031   int in_what_stride = pre_stride;
   1032   unsigned char *best_address;
   1033 
   1034   int tot_steps;
   1035   int_mv this_mv;
   1036 
   1037   unsigned int bestsad;
   1038   unsigned int thissad;
   1039   int best_site = 0;
   1040   int last_site = 0;
   1041 
   1042   int ref_row;
   1043   int ref_col;
   1044   int this_row_offset;
   1045   int this_col_offset;
   1046   search_site *ss;
   1047 
   1048   unsigned char *check_here;
   1049 
   1050   int *mvsadcost[2];
   1051   int_mv fcenter_mv;
   1052 
   1053   mvsadcost[0] = x->mvsadcost[0];
   1054   mvsadcost[1] = x->mvsadcost[1];
   1055   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1056   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1057 
   1058   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
   1059                x->mv_row_max);
   1060   ref_row = ref_mv->as_mv.row;
   1061   ref_col = ref_mv->as_mv.col;
   1062   *num00 = 0;
   1063   best_mv->as_mv.row = ref_row;
   1064   best_mv->as_mv.col = ref_col;
   1065 
   1066   /* Work out the start point for the search */
   1067   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
   1068                               ref_col);
   1069   best_address = in_what;
   1070 
   1071   /* Check the starting position */
   1072   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
   1073             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1074 
   1075   /* search_param determines the length of the initial step and hence
   1076    * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
   1077    * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1078    */
   1079   ss = &x->ss[search_param * x->searches_per_step];
   1080   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1081 
   1082   i = 1;
   1083 
   1084   for (step = 0; step < tot_steps; ++step) {
   1085     for (j = 0; j < x->searches_per_step; ++j) {
   1086       /* Trap illegal vectors */
   1087       this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1088       this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1089 
   1090       if ((this_col_offset > x->mv_col_min) &&
   1091           (this_col_offset < x->mv_col_max) &&
   1092           (this_row_offset > x->mv_row_min) &&
   1093           (this_row_offset < x->mv_row_max))
   1094 
   1095       {
   1096         check_here = ss[i].offset + best_address;
   1097         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1098 
   1099         if (thissad < bestsad) {
   1100           this_mv.as_mv.row = this_row_offset;
   1101           this_mv.as_mv.col = this_col_offset;
   1102           thissad +=
   1103               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1104 
   1105           if (thissad < bestsad) {
   1106             bestsad = thissad;
   1107             best_site = i;
   1108           }
   1109         }
   1110       }
   1111 
   1112       i++;
   1113     }
   1114 
   1115     if (best_site != last_site) {
   1116       best_mv->as_mv.row += ss[best_site].mv.row;
   1117       best_mv->as_mv.col += ss[best_site].mv.col;
   1118       best_address += ss[best_site].offset;
   1119       last_site = best_site;
   1120     } else if (best_address == in_what) {
   1121       (*num00)++;
   1122     }
   1123   }
   1124 
   1125   this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1126   this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1127 
   1128   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
   1129          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1130 }
   1131 
   1132 #if HAVE_SSE2 || HAVE_MSA
   1133 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1134                              int_mv *best_mv, int search_param, int sad_per_bit,
   1135                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
   1136                              int *mvcost[2], int_mv *center_mv) {
   1137   int i, j, step;
   1138 
   1139   unsigned char *what = (*(b->base_src) + b->src);
   1140   int what_stride = b->src_stride;
   1141   unsigned char *in_what;
   1142   int pre_stride = x->e_mbd.pre.y_stride;
   1143   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1144   int in_what_stride = pre_stride;
   1145   unsigned char *best_address;
   1146 
   1147   int tot_steps;
   1148   int_mv this_mv;
   1149 
   1150   unsigned int bestsad;
   1151   unsigned int thissad;
   1152   int best_site = 0;
   1153   int last_site = 0;
   1154 
   1155   int ref_row;
   1156   int ref_col;
   1157   int this_row_offset;
   1158   int this_col_offset;
   1159   search_site *ss;
   1160 
   1161   unsigned char *check_here;
   1162 
   1163   int *mvsadcost[2];
   1164   int_mv fcenter_mv;
   1165 
   1166   mvsadcost[0] = x->mvsadcost[0];
   1167   mvsadcost[1] = x->mvsadcost[1];
   1168   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1169   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1170 
   1171   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
   1172                x->mv_row_max);
   1173   ref_row = ref_mv->as_mv.row;
   1174   ref_col = ref_mv->as_mv.col;
   1175   *num00 = 0;
   1176   best_mv->as_mv.row = ref_row;
   1177   best_mv->as_mv.col = ref_col;
   1178 
   1179   /* Work out the start point for the search */
   1180   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
   1181                               ref_col);
   1182   best_address = in_what;
   1183 
   1184   /* Check the starting position */
   1185   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
   1186             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1187 
   1188   /* search_param determines the length of the initial step and hence the
   1189    * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
   1190    * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1191    */
   1192   ss = &x->ss[search_param * x->searches_per_step];
   1193   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1194 
   1195   i = 1;
   1196 
   1197   for (step = 0; step < tot_steps; ++step) {
   1198     int all_in = 1, t;
   1199 
   1200     /* To know if all neighbor points are within the bounds, 4 bounds
   1201      * checking are enough instead of checking 4 bounds for each
   1202      * points.
   1203      */
   1204     all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
   1205     all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
   1206     all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
   1207     all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
   1208 
   1209     if (all_in) {
   1210       unsigned int sad_array[4];
   1211 
   1212       for (j = 0; j < x->searches_per_step; j += 4) {
   1213         const unsigned char *block_offset[4];
   1214 
   1215         for (t = 0; t < 4; ++t) {
   1216           block_offset[t] = ss[i + t].offset + best_address;
   1217         }
   1218 
   1219         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1220                        sad_array);
   1221 
   1222         for (t = 0; t < 4; t++, i++) {
   1223           if (sad_array[t] < bestsad) {
   1224             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1225             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1226             sad_array[t] +=
   1227                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1228 
   1229             if (sad_array[t] < bestsad) {
   1230               bestsad = sad_array[t];
   1231               best_site = i;
   1232             }
   1233           }
   1234         }
   1235       }
   1236     } else {
   1237       for (j = 0; j < x->searches_per_step; ++j) {
   1238         /* Trap illegal vectors */
   1239         this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1240         this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1241 
   1242         if ((this_col_offset > x->mv_col_min) &&
   1243             (this_col_offset < x->mv_col_max) &&
   1244             (this_row_offset > x->mv_row_min) &&
   1245             (this_row_offset < x->mv_row_max)) {
   1246           check_here = ss[i].offset + best_address;
   1247           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1248 
   1249           if (thissad < bestsad) {
   1250             this_mv.as_mv.row = this_row_offset;
   1251             this_mv.as_mv.col = this_col_offset;
   1252             thissad +=
   1253                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1254 
   1255             if (thissad < bestsad) {
   1256               bestsad = thissad;
   1257               best_site = i;
   1258             }
   1259           }
   1260         }
   1261         i++;
   1262       }
   1263     }
   1264 
   1265     if (best_site != last_site) {
   1266       best_mv->as_mv.row += ss[best_site].mv.row;
   1267       best_mv->as_mv.col += ss[best_site].mv.col;
   1268       best_address += ss[best_site].offset;
   1269       last_site = best_site;
   1270     } else if (best_address == in_what) {
   1271       (*num00)++;
   1272     }
   1273   }
   1274 
   1275   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1276   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1277 
   1278   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
   1279          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1280 }
   1281 #endif  // HAVE_SSE2 || HAVE_MSA
   1282 
   1283 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1284                           int sad_per_bit, int distance,
   1285                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1286                           int_mv *center_mv) {
   1287   unsigned char *what = (*(b->base_src) + b->src);
   1288   int what_stride = b->src_stride;
   1289   unsigned char *in_what;
   1290   int pre_stride = x->e_mbd.pre.y_stride;
   1291   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1292   int in_what_stride = pre_stride;
   1293   int mv_stride = pre_stride;
   1294   unsigned char *bestaddress;
   1295   int_mv *best_mv = &d->bmi.mv;
   1296   int_mv this_mv;
   1297   unsigned int bestsad;
   1298   unsigned int thissad;
   1299   int r, c;
   1300 
   1301   unsigned char *check_here;
   1302 
   1303   int ref_row = ref_mv->as_mv.row;
   1304   int ref_col = ref_mv->as_mv.col;
   1305 
   1306   int row_min = ref_row - distance;
   1307   int row_max = ref_row + distance;
   1308   int col_min = ref_col - distance;
   1309   int col_max = ref_col + distance;
   1310 
   1311   int *mvsadcost[2];
   1312   int_mv fcenter_mv;
   1313 
   1314   mvsadcost[0] = x->mvsadcost[0];
   1315   mvsadcost[1] = x->mvsadcost[1];
   1316   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1317   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1318 
   1319   /* Work out the mid point for the search */
   1320   in_what = base_pre + d->offset;
   1321   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1322 
   1323   best_mv->as_mv.row = ref_row;
   1324   best_mv->as_mv.col = ref_col;
   1325 
   1326   /* Baseline value at the centre */
   1327   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
   1328             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1329 
   1330   /* Apply further limits to prevent us looking using vectors that
   1331    * stretch beyiond the UMV border
   1332    */
   1333   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
   1334 
   1335   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
   1336 
   1337   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
   1338 
   1339   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
   1340 
   1341   for (r = row_min; r < row_max; ++r) {
   1342     this_mv.as_mv.row = r;
   1343     check_here = r * mv_stride + in_what + col_min;
   1344 
   1345     for (c = col_min; c < col_max; ++c) {
   1346       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1347 
   1348       this_mv.as_mv.col = c;
   1349       thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1350 
   1351       if (thissad < bestsad) {
   1352         bestsad = thissad;
   1353         best_mv->as_mv.row = r;
   1354         best_mv->as_mv.col = c;
   1355         bestaddress = check_here;
   1356       }
   1357 
   1358       check_here++;
   1359     }
   1360   }
   1361 
   1362   this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1363   this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1364 
   1365   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
   1366          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1367 }
   1368 
   1369 #if HAVE_SSSE3
   1370 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1371                           int sad_per_bit, int distance,
   1372                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1373                           int_mv *center_mv) {
   1374   unsigned char *what = (*(b->base_src) + b->src);
   1375   int what_stride = b->src_stride;
   1376   unsigned char *in_what;
   1377   int pre_stride = x->e_mbd.pre.y_stride;
   1378   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1379   int in_what_stride = pre_stride;
   1380   int mv_stride = pre_stride;
   1381   unsigned char *bestaddress;
   1382   int_mv *best_mv = &d->bmi.mv;
   1383   int_mv this_mv;
   1384   unsigned int bestsad;
   1385   unsigned int thissad;
   1386   int r, c;
   1387 
   1388   unsigned char *check_here;
   1389 
   1390   int ref_row = ref_mv->as_mv.row;
   1391   int ref_col = ref_mv->as_mv.col;
   1392 
   1393   int row_min = ref_row - distance;
   1394   int row_max = ref_row + distance;
   1395   int col_min = ref_col - distance;
   1396   int col_max = ref_col + distance;
   1397 
   1398   unsigned int sad_array[3];
   1399 
   1400   int *mvsadcost[2];
   1401   int_mv fcenter_mv;
   1402 
   1403   mvsadcost[0] = x->mvsadcost[0];
   1404   mvsadcost[1] = x->mvsadcost[1];
   1405   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1406   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1407 
   1408   /* Work out the mid point for the search */
   1409   in_what = base_pre + d->offset;
   1410   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1411 
   1412   best_mv->as_mv.row = ref_row;
   1413   best_mv->as_mv.col = ref_col;
   1414 
   1415   /* Baseline value at the centre */
   1416   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
   1417             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1418 
   1419   /* Apply further limits to prevent us looking using vectors that stretch
   1420    * beyond the UMV border
   1421    */
   1422   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
   1423 
   1424   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
   1425 
   1426   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
   1427 
   1428   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
   1429 
   1430   for (r = row_min; r < row_max; ++r) {
   1431     this_mv.as_mv.row = r;
   1432     check_here = r * mv_stride + in_what + col_min;
   1433     c = col_min;
   1434 
   1435     while ((c + 2) < col_max) {
   1436       int i;
   1437 
   1438       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1439 
   1440       for (i = 0; i < 3; ++i) {
   1441         thissad = sad_array[i];
   1442 
   1443         if (thissad < bestsad) {
   1444           this_mv.as_mv.col = c;
   1445           thissad +=
   1446               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1447 
   1448           if (thissad < bestsad) {
   1449             bestsad = thissad;
   1450             best_mv->as_mv.row = r;
   1451             best_mv->as_mv.col = c;
   1452             bestaddress = check_here;
   1453           }
   1454         }
   1455 
   1456         check_here++;
   1457         c++;
   1458       }
   1459     }
   1460 
   1461     while (c < col_max) {
   1462       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1463 
   1464       if (thissad < bestsad) {
   1465         this_mv.as_mv.col = c;
   1466         thissad +=
   1467             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1468 
   1469         if (thissad < bestsad) {
   1470           bestsad = thissad;
   1471           best_mv->as_mv.row = r;
   1472           best_mv->as_mv.col = c;
   1473           bestaddress = check_here;
   1474         }
   1475       }
   1476 
   1477       check_here++;
   1478       c++;
   1479     }
   1480   }
   1481 
   1482   this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1483   this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1484 
   1485   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
   1486          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1487 }
   1488 #endif  // HAVE_SSSE3
   1489 
   1490 #if HAVE_SSE4_1
   1491 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1492                           int sad_per_bit, int distance,
   1493                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1494                           int_mv *center_mv) {
   1495   unsigned char *what = (*(b->base_src) + b->src);
   1496   int what_stride = b->src_stride;
   1497   int pre_stride = x->e_mbd.pre.y_stride;
   1498   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1499   unsigned char *in_what;
   1500   int in_what_stride = pre_stride;
   1501   int mv_stride = pre_stride;
   1502   unsigned char *bestaddress;
   1503   int_mv *best_mv = &d->bmi.mv;
   1504   int_mv this_mv;
   1505   unsigned int bestsad;
   1506   unsigned int thissad;
   1507   int r, c;
   1508 
   1509   unsigned char *check_here;
   1510 
   1511   int ref_row = ref_mv->as_mv.row;
   1512   int ref_col = ref_mv->as_mv.col;
   1513 
   1514   int row_min = ref_row - distance;
   1515   int row_max = ref_row + distance;
   1516   int col_min = ref_col - distance;
   1517   int col_max = ref_col + distance;
   1518 
   1519   DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
   1520   unsigned int sad_array[3];
   1521 
   1522   int *mvsadcost[2];
   1523   int_mv fcenter_mv;
   1524 
   1525   mvsadcost[0] = x->mvsadcost[0];
   1526   mvsadcost[1] = x->mvsadcost[1];
   1527   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1528   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1529 
   1530   /* Work out the mid point for the search */
   1531   in_what = base_pre + d->offset;
   1532   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1533 
   1534   best_mv->as_mv.row = ref_row;
   1535   best_mv->as_mv.col = ref_col;
   1536 
   1537   /* Baseline value at the centre */
   1538   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
   1539             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1540 
   1541   /* Apply further limits to prevent us looking using vectors that stretch
   1542    * beyond the UMV border
   1543    */
   1544   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
   1545 
   1546   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
   1547 
   1548   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
   1549 
   1550   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
   1551 
   1552   for (r = row_min; r < row_max; ++r) {
   1553     this_mv.as_mv.row = r;
   1554     check_here = r * mv_stride + in_what + col_min;
   1555     c = col_min;
   1556 
   1557     while ((c + 7) < col_max) {
   1558       int i;
   1559 
   1560       fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1561 
   1562       for (i = 0; i < 8; ++i) {
   1563         thissad = sad_array8[i];
   1564 
   1565         if (thissad < bestsad) {
   1566           this_mv.as_mv.col = c;
   1567           thissad +=
   1568               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1569 
   1570           if (thissad < bestsad) {
   1571             bestsad = thissad;
   1572             best_mv->as_mv.row = r;
   1573             best_mv->as_mv.col = c;
   1574             bestaddress = check_here;
   1575           }
   1576         }
   1577 
   1578         check_here++;
   1579         c++;
   1580       }
   1581     }
   1582 
   1583     while ((c + 2) < col_max) {
   1584       int i;
   1585 
   1586       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1587 
   1588       for (i = 0; i < 3; ++i) {
   1589         thissad = sad_array[i];
   1590 
   1591         if (thissad < bestsad) {
   1592           this_mv.as_mv.col = c;
   1593           thissad +=
   1594               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1595 
   1596           if (thissad < bestsad) {
   1597             bestsad = thissad;
   1598             best_mv->as_mv.row = r;
   1599             best_mv->as_mv.col = c;
   1600             bestaddress = check_here;
   1601           }
   1602         }
   1603 
   1604         check_here++;
   1605         c++;
   1606       }
   1607     }
   1608 
   1609     while (c < col_max) {
   1610       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1611 
   1612       if (thissad < bestsad) {
   1613         this_mv.as_mv.col = c;
   1614         thissad +=
   1615             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1616 
   1617         if (thissad < bestsad) {
   1618           bestsad = thissad;
   1619           best_mv->as_mv.row = r;
   1620           best_mv->as_mv.col = c;
   1621           bestaddress = check_here;
   1622         }
   1623       }
   1624 
   1625       check_here++;
   1626       c++;
   1627     }
   1628   }
   1629 
   1630   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1631   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1632 
   1633   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
   1634          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1635 }
   1636 #endif  // HAVE_SSE4_1
   1637 
   1638 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1639                               int_mv *ref_mv, int error_per_bit,
   1640                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1641                               int *mvcost[2], int_mv *center_mv) {
   1642   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   1643   int i, j;
   1644   short this_row_offset, this_col_offset;
   1645 
   1646   int what_stride = b->src_stride;
   1647   int pre_stride = x->e_mbd.pre.y_stride;
   1648   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1649   int in_what_stride = pre_stride;
   1650   unsigned char *what = (*(b->base_src) + b->src);
   1651   unsigned char *best_address =
   1652       (unsigned char *)(base_pre + d->offset +
   1653                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1654   unsigned char *check_here;
   1655   int_mv this_mv;
   1656   unsigned int bestsad;
   1657   unsigned int thissad;
   1658 
   1659   int *mvsadcost[2];
   1660   int_mv fcenter_mv;
   1661 
   1662   mvsadcost[0] = x->mvsadcost[0];
   1663   mvsadcost[1] = x->mvsadcost[1];
   1664   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1665   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1666 
   1667   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
   1668             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1669 
   1670   for (i = 0; i < search_range; ++i) {
   1671     int best_site = -1;
   1672 
   1673     for (j = 0; j < 4; ++j) {
   1674       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1675       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1676 
   1677       if ((this_col_offset > x->mv_col_min) &&
   1678           (this_col_offset < x->mv_col_max) &&
   1679           (this_row_offset > x->mv_row_min) &&
   1680           (this_row_offset < x->mv_row_max)) {
   1681         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
   1682                      best_address;
   1683         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1684 
   1685         if (thissad < bestsad) {
   1686           this_mv.as_mv.row = this_row_offset;
   1687           this_mv.as_mv.col = this_col_offset;
   1688           thissad +=
   1689               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1690 
   1691           if (thissad < bestsad) {
   1692             bestsad = thissad;
   1693             best_site = j;
   1694           }
   1695         }
   1696       }
   1697     }
   1698 
   1699     if (best_site == -1) {
   1700       break;
   1701     } else {
   1702       ref_mv->as_mv.row += neighbors[best_site].row;
   1703       ref_mv->as_mv.col += neighbors[best_site].col;
   1704       best_address += (neighbors[best_site].row) * in_what_stride +
   1705                       neighbors[best_site].col;
   1706     }
   1707   }
   1708 
   1709   this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1710   this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1711 
   1712   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
   1713          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1714 }
   1715 
   1716 #if HAVE_SSE2 || HAVE_MSA
   1717 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1718                               int_mv *ref_mv, int error_per_bit,
   1719                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1720                               int *mvcost[2], int_mv *center_mv) {
   1721   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   1722   int i, j;
   1723   short this_row_offset, this_col_offset;
   1724 
   1725   int what_stride = b->src_stride;
   1726   int pre_stride = x->e_mbd.pre.y_stride;
   1727   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1728   int in_what_stride = pre_stride;
   1729   unsigned char *what = (*(b->base_src) + b->src);
   1730   unsigned char *best_address =
   1731       (unsigned char *)(base_pre + d->offset +
   1732                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1733   unsigned char *check_here;
   1734   int_mv this_mv;
   1735   unsigned int bestsad;
   1736   unsigned int thissad;
   1737 
   1738   int *mvsadcost[2];
   1739   int_mv fcenter_mv;
   1740 
   1741   mvsadcost[0] = x->mvsadcost[0];
   1742   mvsadcost[1] = x->mvsadcost[1];
   1743   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1744   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1745 
   1746   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
   1747             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1748 
   1749   for (i = 0; i < search_range; ++i) {
   1750     int best_site = -1;
   1751     int all_in = 1;
   1752 
   1753     all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
   1754     all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
   1755     all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
   1756     all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1757 
   1758     if (all_in) {
   1759       unsigned int sad_array[4];
   1760       const unsigned char *block_offset[4];
   1761       block_offset[0] = best_address - in_what_stride;
   1762       block_offset[1] = best_address - 1;
   1763       block_offset[2] = best_address + 1;
   1764       block_offset[3] = best_address + in_what_stride;
   1765 
   1766       fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1767                      sad_array);
   1768 
   1769       for (j = 0; j < 4; ++j) {
   1770         if (sad_array[j] < bestsad) {
   1771           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1772           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1773           sad_array[j] +=
   1774               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1775 
   1776           if (sad_array[j] < bestsad) {
   1777             bestsad = sad_array[j];
   1778             best_site = j;
   1779           }
   1780         }
   1781       }
   1782     } else {
   1783       for (j = 0; j < 4; ++j) {
   1784         this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1785         this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1786 
   1787         if ((this_col_offset > x->mv_col_min) &&
   1788             (this_col_offset < x->mv_col_max) &&
   1789             (this_row_offset > x->mv_row_min) &&
   1790             (this_row_offset < x->mv_row_max)) {
   1791           check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
   1792                        best_address;
   1793           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1794 
   1795           if (thissad < bestsad) {
   1796             this_mv.as_mv.row = this_row_offset;
   1797             this_mv.as_mv.col = this_col_offset;
   1798             thissad +=
   1799                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1800 
   1801             if (thissad < bestsad) {
   1802               bestsad = thissad;
   1803               best_site = j;
   1804             }
   1805           }
   1806         }
   1807       }
   1808     }
   1809 
   1810     if (best_site == -1) {
   1811       break;
   1812     } else {
   1813       ref_mv->as_mv.row += neighbors[best_site].row;
   1814       ref_mv->as_mv.col += neighbors[best_site].col;
   1815       best_address += (neighbors[best_site].row) * in_what_stride +
   1816                       neighbors[best_site].col;
   1817     }
   1818   }
   1819 
   1820   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   1821   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   1822 
   1823   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
   1824          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1825 }
   1826 #endif  // HAVE_SSE2 || HAVE_MSA
   1827