Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vp8_rtcd.h"
     12 #include "./vpx_dsp_rtcd.h"
     13 #include "onyx_int.h"
     14 #include "mcomp.h"
     15 #include "vpx_mem/vpx_mem.h"
     16 #include "vpx_config.h"
     17 #include <stdio.h>
     18 #include <limits.h>
     19 #include <math.h>
     20 #include "vp8/common/findnearmv.h"
     21 #include "vp8/common/common.h"
     22 #include "vpx_dsp/vpx_dsp_common.h"
     23 
     24 #ifdef VP8_ENTROPY_STATS
     25 static int mv_ref_ct[31][4][2];
     26 static int mv_mode_cts[4][2];
     27 #endif
     28 
     29 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
     30   /* MV costing is based on the distribution of vectors in the previous
     31    * frame and as such will tend to over state the cost of vectors. In
     32    * addition coding a new vector can have a knock on effect on the cost
     33    * of subsequent vectors and the quality of prediction from NEAR and
     34    * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
     35    * limited extent, for some account to be taken of these factors.
     36    */
     37   return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
     38            mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
     39           Weight) >>
     40          7;
     41 }
     42 
     43 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
     44                        int error_per_bit) {
     45   /* Ignore mv costing if mvcost is NULL */
     46   if (mvcost) {
     47     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
     48              mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
     49                 error_per_bit +
     50             128) >>
     51            8;
     52   }
     53   return 0;
     54 }
     55 
     56 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
     57                           int error_per_bit) {
     58   /* Calculate sad error cost on full pixel basis. */
     59   /* Ignore mv costing if mvsadcost is NULL */
     60   if (mvsadcost) {
     61     return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
     62              mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
     63                 error_per_bit +
     64             128) >>
     65            8;
     66   }
     67   return 0;
     68 }
     69 
     70 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
     71   int Len;
     72   int search_site_count = 0;
     73 
     74   /* Generate offsets for 4 search sites per step. */
     75   Len = MAX_FIRST_STEP;
     76   x->ss[search_site_count].mv.col = 0;
     77   x->ss[search_site_count].mv.row = 0;
     78   x->ss[search_site_count].offset = 0;
     79   search_site_count++;
     80 
     81   while (Len > 0) {
     82     /* Compute offsets for search sites. */
     83     x->ss[search_site_count].mv.col = 0;
     84     x->ss[search_site_count].mv.row = -Len;
     85     x->ss[search_site_count].offset = -Len * stride;
     86     search_site_count++;
     87 
     88     /* Compute offsets for search sites. */
     89     x->ss[search_site_count].mv.col = 0;
     90     x->ss[search_site_count].mv.row = Len;
     91     x->ss[search_site_count].offset = Len * stride;
     92     search_site_count++;
     93 
     94     /* Compute offsets for search sites. */
     95     x->ss[search_site_count].mv.col = -Len;
     96     x->ss[search_site_count].mv.row = 0;
     97     x->ss[search_site_count].offset = -Len;
     98     search_site_count++;
     99 
    100     /* Compute offsets for search sites. */
    101     x->ss[search_site_count].mv.col = Len;
    102     x->ss[search_site_count].mv.row = 0;
    103     x->ss[search_site_count].offset = Len;
    104     search_site_count++;
    105 
    106     /* Contract. */
    107     Len /= 2;
    108   }
    109 
    110   x->ss_count = search_site_count;
    111   x->searches_per_step = 4;
    112 }
    113 
    114 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
    115   int Len;
    116   int search_site_count = 0;
    117 
    118   /* Generate offsets for 8 search sites per step. */
    119   Len = MAX_FIRST_STEP;
    120   x->ss[search_site_count].mv.col = 0;
    121   x->ss[search_site_count].mv.row = 0;
    122   x->ss[search_site_count].offset = 0;
    123   search_site_count++;
    124 
    125   while (Len > 0) {
    126     /* Compute offsets for search sites. */
    127     x->ss[search_site_count].mv.col = 0;
    128     x->ss[search_site_count].mv.row = -Len;
    129     x->ss[search_site_count].offset = -Len * stride;
    130     search_site_count++;
    131 
    132     /* Compute offsets for search sites. */
    133     x->ss[search_site_count].mv.col = 0;
    134     x->ss[search_site_count].mv.row = Len;
    135     x->ss[search_site_count].offset = Len * stride;
    136     search_site_count++;
    137 
    138     /* Compute offsets for search sites. */
    139     x->ss[search_site_count].mv.col = -Len;
    140     x->ss[search_site_count].mv.row = 0;
    141     x->ss[search_site_count].offset = -Len;
    142     search_site_count++;
    143 
    144     /* Compute offsets for search sites. */
    145     x->ss[search_site_count].mv.col = Len;
    146     x->ss[search_site_count].mv.row = 0;
    147     x->ss[search_site_count].offset = Len;
    148     search_site_count++;
    149 
    150     /* Compute offsets for search sites. */
    151     x->ss[search_site_count].mv.col = -Len;
    152     x->ss[search_site_count].mv.row = -Len;
    153     x->ss[search_site_count].offset = -Len * stride - Len;
    154     search_site_count++;
    155 
    156     /* Compute offsets for search sites. */
    157     x->ss[search_site_count].mv.col = Len;
    158     x->ss[search_site_count].mv.row = -Len;
    159     x->ss[search_site_count].offset = -Len * stride + Len;
    160     search_site_count++;
    161 
    162     /* Compute offsets for search sites. */
    163     x->ss[search_site_count].mv.col = -Len;
    164     x->ss[search_site_count].mv.row = Len;
    165     x->ss[search_site_count].offset = Len * stride - Len;
    166     search_site_count++;
    167 
    168     /* Compute offsets for search sites. */
    169     x->ss[search_site_count].mv.col = Len;
    170     x->ss[search_site_count].mv.row = Len;
    171     x->ss[search_site_count].offset = Len * stride + Len;
    172     search_site_count++;
    173 
    174     /* Contract. */
    175     Len /= 2;
    176   }
    177 
    178   x->ss_count = search_site_count;
    179   x->searches_per_step = 8;
    180 }
    181 
    182 /*
    183  * To avoid the penalty for crossing cache-line read, preload the reference
    184  * area in a small buffer, which is aligned to make sure there won't be crossing
    185  * cache-line read while reading from this buffer. This reduced the cpu
    186  * cycles spent on reading ref data in sub-pixel filter functions.
    187  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    188  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    189  * could reduce the area.
    190  */
    191 
    192 /* estimated cost of a motion vector (r,c) */
    193 #define MVC(r, c)                                                             \
    194   (mvcost                                                                     \
    195        ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
    196        : 0)
    197 /* pointer to predictor base of a motionvector */
    198 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
    199 /* convert motion vector component to offset for svf calc */
    200 #define SP(x) (((x)&3) << 1)
    201 /* returns subpixel variance error function. */
    202 #define DIST(r, c) \
    203   vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
    204 #define IFMVCV(r, c, s, e) \
    205   if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    206 /* returns distortion + motion vector cost */
    207 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
    208 /* checks if (r,c) has better score than previous best */
    209 #define CHECK_BETTER(v, r, c)                           \
    210   IFMVCV(r, c,                                          \
    211          {                                              \
    212            thismse = DIST(r, c);                        \
    213            if ((v = (MVC(r, c) + thismse)) < besterr) { \
    214              besterr = v;                               \
    215              br = r;                                    \
    216              bc = c;                                    \
    217              *distortion = thismse;                     \
    218              *sse1 = sse;                               \
    219            }                                            \
    220          },                                             \
    221          v = UINT_MAX;)
    222 
    223 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    224                                              int_mv *bestmv, int_mv *ref_mv,
    225                                              int error_per_bit,
    226                                              const vp8_variance_fn_ptr_t *vfp,
    227                                              int *mvcost[2], int *distortion,
    228                                              unsigned int *sse1) {
    229   unsigned char *z = (*(b->base_src) + b->src);
    230 
    231   int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
    232   int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
    233   int tr = br, tc = bc;
    234   unsigned int besterr;
    235   unsigned int left, right, up, down, diag;
    236   unsigned int sse;
    237   unsigned int whichdir;
    238   unsigned int halfiters = 4;
    239   unsigned int quarteriters = 4;
    240   int thismse;
    241 
    242   int minc = VPXMAX(x->mv_col_min * 4,
    243                     (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
    244   int maxc = VPXMIN(x->mv_col_max * 4,
    245                     (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
    246   int minr = VPXMAX(x->mv_row_min * 4,
    247                     (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
    248   int maxr = VPXMIN(x->mv_row_max * 4,
    249                     (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    250 
    251   int y_stride;
    252   int offset;
    253   int pre_stride = x->e_mbd.pre.y_stride;
    254   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    255 
    256 #if ARCH_X86 || ARCH_X86_64
    257   MACROBLOCKD *xd = &x->e_mbd;
    258   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    259                        bestmv->as_mv.col;
    260   unsigned char *y;
    261   int buf_r1, buf_r2, buf_c1;
    262 
    263   /* Clamping to avoid out-of-range data access */
    264   buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
    265                ? (bestmv->as_mv.row - x->mv_row_min)
    266                : 3;
    267   buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
    268                ? (x->mv_row_max - bestmv->as_mv.row)
    269                : 3;
    270   buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
    271                ? (bestmv->as_mv.col - x->mv_col_min)
    272                : 3;
    273   y_stride = 32;
    274 
    275   /* Copy to intermediate buffer before searching. */
    276   vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
    277                y_stride, 16 + buf_r1 + buf_r2);
    278   y = xd->y_buf + y_stride * buf_r1 + buf_c1;
    279 #else
    280   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    281                      bestmv->as_mv.col;
    282   y_stride = pre_stride;
    283 #endif
    284 
    285   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
    286 
    287   /* central mv */
    288   bestmv->as_mv.row *= 8;
    289   bestmv->as_mv.col *= 8;
    290 
    291   /* calculate central point error */
    292   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    293   *distortion = besterr;
    294   besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    295 
    296   /* TODO: Each subsequent iteration checks at least one point in common
    297    * with the last iteration could be 2 ( if diag selected)
    298    */
    299   while (--halfiters) {
    300     /* 1/2 pel */
    301     CHECK_BETTER(left, tr, tc - 2);
    302     CHECK_BETTER(right, tr, tc + 2);
    303     CHECK_BETTER(up, tr - 2, tc);
    304     CHECK_BETTER(down, tr + 2, tc);
    305 
    306     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    307 
    308     switch (whichdir) {
    309       case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
    310       case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
    311       case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
    312       case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
    313     }
    314 
    315     /* no reason to check the same one again. */
    316     if (tr == br && tc == bc) break;
    317 
    318     tr = br;
    319     tc = bc;
    320   }
    321 
    322   /* TODO: Each subsequent iteration checks at least one point in common
    323    * with the last iteration could be 2 ( if diag selected)
    324    */
    325 
    326   /* 1/4 pel */
    327   while (--quarteriters) {
    328     CHECK_BETTER(left, tr, tc - 1);
    329     CHECK_BETTER(right, tr, tc + 1);
    330     CHECK_BETTER(up, tr - 1, tc);
    331     CHECK_BETTER(down, tr + 1, tc);
    332 
    333     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    334 
    335     switch (whichdir) {
    336       case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
    337       case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
    338       case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
    339       case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
    340     }
    341 
    342     /* no reason to check the same one again. */
    343     if (tr == br && tc == bc) break;
    344 
    345     tr = br;
    346     tc = bc;
    347   }
    348 
    349   bestmv->as_mv.row = br * 2;
    350   bestmv->as_mv.col = bc * 2;
    351 
    352   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
    353       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
    354     return INT_MAX;
    355   }
    356 
    357   return besterr;
    358 }
    359 #undef MVC
    360 #undef PRE
    361 #undef SP
    362 #undef DIST
    363 #undef IFMVCV
    364 #undef ERR
    365 #undef CHECK_BETTER
    366 
    367 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    368                                  int_mv *bestmv, int_mv *ref_mv,
    369                                  int error_per_bit,
    370                                  const vp8_variance_fn_ptr_t *vfp,
    371                                  int *mvcost[2], int *distortion,
    372                                  unsigned int *sse1) {
    373   int bestmse = INT_MAX;
    374   int_mv startmv;
    375   int_mv this_mv;
    376   unsigned char *z = (*(b->base_src) + b->src);
    377   int left, right, up, down, diag;
    378   unsigned int sse;
    379   int whichdir;
    380   int thismse;
    381   int y_stride;
    382   int pre_stride = x->e_mbd.pre.y_stride;
    383   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    384 
    385 #if ARCH_X86 || ARCH_X86_64
    386   MACROBLOCKD *xd = &x->e_mbd;
    387   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    388                        bestmv->as_mv.col;
    389   unsigned char *y;
    390 
    391   y_stride = 32;
    392   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    393   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    394   y = xd->y_buf + y_stride + 1;
    395 #else
    396   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    397                      bestmv->as_mv.col;
    398   y_stride = pre_stride;
    399 #endif
    400 
    401   /* central mv */
    402   bestmv->as_mv.row *= 8;
    403   bestmv->as_mv.col *= 8;
    404   startmv = *bestmv;
    405 
    406   /* calculate central point error */
    407   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    408   *distortion = bestmse;
    409   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    410 
    411   /* go left then right and check error */
    412   this_mv.as_mv.row = startmv.as_mv.row;
    413   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    414   /* "halfpix" horizontal variance */
    415   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
    416   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    417 
    418   if (left < bestmse) {
    419     *bestmv = this_mv;
    420     bestmse = left;
    421     *distortion = thismse;
    422     *sse1 = sse;
    423   }
    424 
    425   this_mv.as_mv.col += 8;
    426   /* "halfpix" horizontal variance */
    427   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
    428   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    429 
    430   if (right < bestmse) {
    431     *bestmv = this_mv;
    432     bestmse = right;
    433     *distortion = thismse;
    434     *sse1 = sse;
    435   }
    436 
    437   /* go up then down and check error */
    438   this_mv.as_mv.col = startmv.as_mv.col;
    439   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    440   /* "halfpix" vertical variance */
    441   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
    442   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    443 
    444   if (up < bestmse) {
    445     *bestmv = this_mv;
    446     bestmse = up;
    447     *distortion = thismse;
    448     *sse1 = sse;
    449   }
    450 
    451   this_mv.as_mv.row += 8;
    452   /* "halfpix" vertical variance */
    453   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
    454   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    455 
    456   if (down < bestmse) {
    457     *bestmv = this_mv;
    458     bestmse = down;
    459     *distortion = thismse;
    460     *sse1 = sse;
    461   }
    462 
    463   /* now check 1 more diagonal */
    464   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    465   this_mv = startmv;
    466 
    467   switch (whichdir) {
    468     case 0:
    469       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    470       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    471       /* "halfpix" horizontal/vertical variance */
    472       thismse =
    473           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
    474       break;
    475     case 1:
    476       this_mv.as_mv.col += 4;
    477       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    478       /* "halfpix" horizontal/vertical variance */
    479       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
    480       break;
    481     case 2:
    482       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    483       this_mv.as_mv.row += 4;
    484       /* "halfpix" horizontal/vertical variance */
    485       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
    486       break;
    487     case 3:
    488     default:
    489       this_mv.as_mv.col += 4;
    490       this_mv.as_mv.row += 4;
    491       /* "halfpix" horizontal/vertical variance */
    492       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
    493       break;
    494   }
    495 
    496   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    497 
    498   if (diag < bestmse) {
    499     *bestmv = this_mv;
    500     bestmse = diag;
    501     *distortion = thismse;
    502     *sse1 = sse;
    503   }
    504 
    505   /* time to check quarter pels. */
    506   if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
    507 
    508   if (bestmv->as_mv.col < startmv.as_mv.col) y--;
    509 
    510   startmv = *bestmv;
    511 
    512   /* go left then right and check error */
    513   this_mv.as_mv.row = startmv.as_mv.row;
    514 
    515   if (startmv.as_mv.col & 7) {
    516     this_mv.as_mv.col = startmv.as_mv.col - 2;
    517     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    518                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    519   } else {
    520     this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    521     thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
    522                        b->src_stride, &sse);
    523   }
    524 
    525   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    526 
    527   if (left < bestmse) {
    528     *bestmv = this_mv;
    529     bestmse = left;
    530     *distortion = thismse;
    531     *sse1 = sse;
    532   }
    533 
    534   this_mv.as_mv.col += 4;
    535   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
    536                      z, b->src_stride, &sse);
    537   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    538 
    539   if (right < bestmse) {
    540     *bestmv = this_mv;
    541     bestmse = right;
    542     *distortion = thismse;
    543     *sse1 = sse;
    544   }
    545 
    546   /* go up then down and check error */
    547   this_mv.as_mv.col = startmv.as_mv.col;
    548 
    549   if (startmv.as_mv.row & 7) {
    550     this_mv.as_mv.row = startmv.as_mv.row - 2;
    551     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    552                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    553   } else {
    554     this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    555     thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
    556                        b->src_stride, &sse);
    557   }
    558 
    559   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    560 
    561   if (up < bestmse) {
    562     *bestmv = this_mv;
    563     bestmse = up;
    564     *distortion = thismse;
    565     *sse1 = sse;
    566   }
    567 
    568   this_mv.as_mv.row += 4;
    569   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
    570                      z, b->src_stride, &sse);
    571   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    572 
    573   if (down < bestmse) {
    574     *bestmv = this_mv;
    575     bestmse = down;
    576     *distortion = thismse;
    577     *sse1 = sse;
    578   }
    579 
    580   /* now check 1 more diagonal */
    581   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    582 
    583   this_mv = startmv;
    584 
    585   switch (whichdir) {
    586     case 0:
    587 
    588       if (startmv.as_mv.row & 7) {
    589         this_mv.as_mv.row -= 2;
    590 
    591         if (startmv.as_mv.col & 7) {
    592           this_mv.as_mv.col -= 2;
    593           thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    594                              this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    595         } else {
    596           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    597           thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
    598                              b->src_stride, &sse);
    599         }
    600       } else {
    601         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    602 
    603         if (startmv.as_mv.col & 7) {
    604           this_mv.as_mv.col -= 2;
    605           thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
    606                              z, b->src_stride, &sse);
    607         } else {
    608           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    609           thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
    610                              &sse);
    611         }
    612       }
    613 
    614       break;
    615     case 1:
    616       this_mv.as_mv.col += 2;
    617 
    618       if (startmv.as_mv.row & 7) {
    619         this_mv.as_mv.row -= 2;
    620         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    621                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    622       } else {
    623         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    624         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
    625                            b->src_stride, &sse);
    626       }
    627 
    628       break;
    629     case 2:
    630       this_mv.as_mv.row += 2;
    631 
    632       if (startmv.as_mv.col & 7) {
    633         this_mv.as_mv.col -= 2;
    634         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    635                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    636       } else {
    637         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    638         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
    639                            b->src_stride, &sse);
    640       }
    641 
    642       break;
    643     case 3:
    644       this_mv.as_mv.col += 2;
    645       this_mv.as_mv.row += 2;
    646       thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
    647                          this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    648       break;
    649   }
    650 
    651   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    652 
    653   if (diag < bestmse) {
    654     *bestmv = this_mv;
    655     bestmse = diag;
    656     *distortion = thismse;
    657     *sse1 = sse;
    658   }
    659 
    660   return bestmse;
    661 }
    662 
    663 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    664                                   int_mv *bestmv, int_mv *ref_mv,
    665                                   int error_per_bit,
    666                                   const vp8_variance_fn_ptr_t *vfp,
    667                                   int *mvcost[2], int *distortion,
    668                                   unsigned int *sse1) {
    669   int bestmse = INT_MAX;
    670   int_mv startmv;
    671   int_mv this_mv;
    672   unsigned char *z = (*(b->base_src) + b->src);
    673   int left, right, up, down, diag;
    674   unsigned int sse;
    675   int whichdir;
    676   int thismse;
    677   int y_stride;
    678   int pre_stride = x->e_mbd.pre.y_stride;
    679   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    680 
    681 #if ARCH_X86 || ARCH_X86_64
    682   MACROBLOCKD *xd = &x->e_mbd;
    683   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    684                        bestmv->as_mv.col;
    685   unsigned char *y;
    686 
    687   y_stride = 32;
    688   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    689   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    690   y = xd->y_buf + y_stride + 1;
    691 #else
    692   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
    693                      bestmv->as_mv.col;
    694   y_stride = pre_stride;
    695 #endif
    696 
    697   /* central mv */
    698   bestmv->as_mv.row *= 8;
    699   bestmv->as_mv.col *= 8;
    700   startmv = *bestmv;
    701 
    702   /* calculate central point error */
    703   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    704   *distortion = bestmse;
    705   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    706 
    707   /* go left then right and check error */
    708   this_mv.as_mv.row = startmv.as_mv.row;
    709   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    710   /* "halfpix" horizontal variance */
    711   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
    712   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    713 
    714   if (left < bestmse) {
    715     *bestmv = this_mv;
    716     bestmse = left;
    717     *distortion = thismse;
    718     *sse1 = sse;
    719   }
    720 
    721   this_mv.as_mv.col += 8;
    722   /* "halfpix" horizontal variance */
    723   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
    724   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    725 
    726   if (right < bestmse) {
    727     *bestmv = this_mv;
    728     bestmse = right;
    729     *distortion = thismse;
    730     *sse1 = sse;
    731   }
    732 
    733   /* go up then down and check error */
    734   this_mv.as_mv.col = startmv.as_mv.col;
    735   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    736   /* "halfpix" vertical variance */
    737   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
    738   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    739 
    740   if (up < bestmse) {
    741     *bestmv = this_mv;
    742     bestmse = up;
    743     *distortion = thismse;
    744     *sse1 = sse;
    745   }
    746 
    747   this_mv.as_mv.row += 8;
    748   /* "halfpix" vertical variance */
    749   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
    750   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    751 
    752   if (down < bestmse) {
    753     *bestmv = this_mv;
    754     bestmse = down;
    755     *distortion = thismse;
    756     *sse1 = sse;
    757   }
    758 
    759   /* now check 1 more diagonal - */
    760   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    761   this_mv = startmv;
    762 
    763   switch (whichdir) {
    764     case 0:
    765       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    766       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    767       /* "halfpix" horizontal/vertical variance */
    768       thismse =
    769           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
    770       break;
    771     case 1:
    772       this_mv.as_mv.col += 4;
    773       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    774       /* "halfpix" horizontal/vertical variance */
    775       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
    776       break;
    777     case 2:
    778       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    779       this_mv.as_mv.row += 4;
    780       /* "halfpix" horizontal/vertical variance */
    781       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
    782       break;
    783     case 3:
    784     default:
    785       this_mv.as_mv.col += 4;
    786       this_mv.as_mv.row += 4;
    787       /* "halfpix" horizontal/vertical variance */
    788       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
    789       break;
    790   }
    791 
    792   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    793 
    794   if (diag < bestmse) {
    795     *bestmv = this_mv;
    796     bestmse = diag;
    797     *distortion = thismse;
    798     *sse1 = sse;
    799   }
    800 
    801   return bestmse;
    802 }
    803 
    804 #define CHECK_BOUNDS(range)                    \
    805   {                                            \
    806     all_in = 1;                                \
    807     all_in &= ((br - range) >= x->mv_row_min); \
    808     all_in &= ((br + range) <= x->mv_row_max); \
    809     all_in &= ((bc - range) >= x->mv_col_min); \
    810     all_in &= ((bc + range) <= x->mv_col_max); \
    811   }
    812 
    813 #define CHECK_POINT                                  \
    814   {                                                  \
    815     if (this_mv.as_mv.col < x->mv_col_min) continue; \
    816     if (this_mv.as_mv.col > x->mv_col_max) continue; \
    817     if (this_mv.as_mv.row < x->mv_row_min) continue; \
    818     if (this_mv.as_mv.row > x->mv_row_max) continue; \
    819   }
    820 
    821 #define CHECK_BETTER                                                     \
    822   {                                                                      \
    823     if (thissad < bestsad) {                                             \
    824       thissad +=                                                         \
    825           mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
    826       if (thissad < bestsad) {                                           \
    827         bestsad = thissad;                                               \
    828         best_site = i;                                                   \
    829       }                                                                  \
    830     }                                                                    \
    831   }
    832 
    833 static const MV next_chkpts[6][3] = {
    834   { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
    835   { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
    836   { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
    837 };
    838 
    839 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
    840                    int_mv *best_mv, int search_param, int sad_per_bit,
    841                    const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
    842                    int *mvcost[2], int_mv *center_mv) {
    843   MV hex[6] = {
    844     { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
    845   };
    846   MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
    847   int i, j;
    848 
    849   unsigned char *what = (*(b->base_src) + b->src);
    850   int what_stride = b->src_stride;
    851   int pre_stride = x->e_mbd.pre.y_stride;
    852   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    853 
    854   int in_what_stride = pre_stride;
    855   int br, bc;
    856   int_mv this_mv;
    857   unsigned int bestsad;
    858   unsigned int thissad;
    859   unsigned char *base_offset;
    860   unsigned char *this_offset;
    861   int k = -1;
    862   int all_in;
    863   int best_site = -1;
    864   int hex_range = 127;
    865   int dia_range = 8;
    866 
    867   int_mv fcenter_mv;
    868   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    869   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    870 
    871   (void)mvcost;
    872 
    873   /* adjust ref_mv to make sure it is within MV range */
    874   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
    875                x->mv_row_max);
    876   br = ref_mv->as_mv.row;
    877   bc = ref_mv->as_mv.col;
    878 
    879   /* Work out the start point for the search */
    880   base_offset = (unsigned char *)(base_pre + d->offset);
    881   this_offset = base_offset + (br * (pre_stride)) + bc;
    882   this_mv.as_mv.row = br;
    883   this_mv.as_mv.col = bc;
    884   bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
    885             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    886 
    887 #if CONFIG_MULTI_RES_ENCODING
    888   /* Lower search range based on prediction info */
    889   if (search_param >= 6)
    890     goto cal_neighbors;
    891   else if (search_param >= 5)
    892     hex_range = 4;
    893   else if (search_param >= 4)
    894     hex_range = 6;
    895   else if (search_param >= 3)
    896     hex_range = 15;
    897   else if (search_param >= 2)
    898     hex_range = 31;
    899   else if (search_param >= 1)
    900     hex_range = 63;
    901 
    902   dia_range = 8;
    903 #else
    904   (void)search_param;
    905 #endif
    906 
    907   /* hex search */
    908   CHECK_BOUNDS(2)
    909 
    910   if (all_in) {
    911     for (i = 0; i < 6; ++i) {
    912       this_mv.as_mv.row = br + hex[i].row;
    913       this_mv.as_mv.col = bc + hex[i].col;
    914       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
    915                     this_mv.as_mv.col;
    916       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    917       CHECK_BETTER
    918     }
    919   } else {
    920     for (i = 0; i < 6; ++i) {
    921       this_mv.as_mv.row = br + hex[i].row;
    922       this_mv.as_mv.col = bc + hex[i].col;
    923       CHECK_POINT
    924       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
    925                     this_mv.as_mv.col;
    926       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    927       CHECK_BETTER
    928     }
    929   }
    930 
    931   if (best_site == -1) {
    932     goto cal_neighbors;
    933   } else {
    934     br += hex[best_site].row;
    935     bc += hex[best_site].col;
    936     k = best_site;
    937   }
    938 
    939   for (j = 1; j < hex_range; ++j) {
    940     best_site = -1;
    941     CHECK_BOUNDS(2)
    942 
    943     if (all_in) {
    944       for (i = 0; i < 3; ++i) {
    945         this_mv.as_mv.row = br + next_chkpts[k][i].row;
    946         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    947         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
    948                       this_mv.as_mv.col;
    949         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    950         CHECK_BETTER
    951       }
    952     } else {
    953       for (i = 0; i < 3; ++i) {
    954         this_mv.as_mv.row = br + next_chkpts[k][i].row;
    955         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    956         CHECK_POINT
    957         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
    958                       this_mv.as_mv.col;
    959         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    960         CHECK_BETTER
    961       }
    962     }
    963 
    964     if (best_site == -1) {
    965       break;
    966     } else {
    967       br += next_chkpts[k][best_site].row;
    968       bc += next_chkpts[k][best_site].col;
    969       k += 5 + best_site;
    970       if (k >= 12) {
    971         k -= 12;
    972       } else if (k >= 6) {
    973         k -= 6;
    974       }
    975     }
    976   }
    977 
    978 /* check 4 1-away neighbors */
    979 cal_neighbors:
    980   for (j = 0; j < dia_range; ++j) {
    981     best_site = -1;
    982     CHECK_BOUNDS(1)
    983 
    984     if (all_in) {
    985       for (i = 0; i < 4; ++i) {
    986         this_mv.as_mv.row = br + neighbors[i].row;
    987         this_mv.as_mv.col = bc + neighbors[i].col;
    988         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
    989                       this_mv.as_mv.col;
    990         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    991         CHECK_BETTER
    992       }
    993     } else {
    994       for (i = 0; i < 4; ++i) {
    995         this_mv.as_mv.row = br + neighbors[i].row;
    996         this_mv.as_mv.col = bc + neighbors[i].col;
    997         CHECK_POINT
    998         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
    999                       this_mv.as_mv.col;
   1000         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
   1001         CHECK_BETTER
   1002       }
   1003     }
   1004 
   1005     if (best_site == -1) {
   1006       break;
   1007     } else {
   1008       br += neighbors[best_site].row;
   1009       bc += neighbors[best_site].col;
   1010     }
   1011   }
   1012 
   1013   best_mv->as_mv.row = br;
   1014   best_mv->as_mv.col = bc;
   1015 
   1016   return bestsad;
   1017 }
   1018 #undef CHECK_BOUNDS
   1019 #undef CHECK_POINT
   1020 #undef CHECK_BETTER
   1021 
   1022 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1023                              int_mv *best_mv, int search_param, int sad_per_bit,
   1024                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
   1025                              int *mvcost[2], int_mv *center_mv) {
   1026   int i, j, step;
   1027 
   1028   unsigned char *what = (*(b->base_src) + b->src);
   1029   int what_stride = b->src_stride;
   1030   unsigned char *in_what;
   1031   int pre_stride = x->e_mbd.pre.y_stride;
   1032   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1033   int in_what_stride = pre_stride;
   1034   unsigned char *best_address;
   1035 
   1036   int tot_steps;
   1037   int_mv this_mv;
   1038 
   1039   unsigned int bestsad;
   1040   unsigned int thissad;
   1041   int best_site = 0;
   1042   int last_site = 0;
   1043 
   1044   int ref_row;
   1045   int ref_col;
   1046   int this_row_offset;
   1047   int this_col_offset;
   1048   search_site *ss;
   1049 
   1050   unsigned char *check_here;
   1051 
   1052   int *mvsadcost[2];
   1053   int_mv fcenter_mv;
   1054 
   1055   mvsadcost[0] = x->mvsadcost[0];
   1056   mvsadcost[1] = x->mvsadcost[1];
   1057   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1058   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1059 
   1060   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
   1061                x->mv_row_max);
   1062   ref_row = ref_mv->as_mv.row;
   1063   ref_col = ref_mv->as_mv.col;
   1064   *num00 = 0;
   1065   best_mv->as_mv.row = ref_row;
   1066   best_mv->as_mv.col = ref_col;
   1067 
   1068   /* Work out the start point for the search */
   1069   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
   1070                               ref_col);
   1071   best_address = in_what;
   1072 
   1073   /* Check the starting position */
   1074   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
   1075             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1076 
   1077   /* search_param determines the length of the initial step and hence
   1078    * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
   1079    * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1080    */
   1081   ss = &x->ss[search_param * x->searches_per_step];
   1082   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1083 
   1084   i = 1;
   1085 
   1086   for (step = 0; step < tot_steps; ++step) {
   1087     for (j = 0; j < x->searches_per_step; ++j) {
   1088       /* Trap illegal vectors */
   1089       this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1090       this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1091 
   1092       if ((this_col_offset > x->mv_col_min) &&
   1093           (this_col_offset < x->mv_col_max) &&
   1094           (this_row_offset > x->mv_row_min) &&
   1095           (this_row_offset < x->mv_row_max))
   1096 
   1097       {
   1098         check_here = ss[i].offset + best_address;
   1099         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1100 
   1101         if (thissad < bestsad) {
   1102           this_mv.as_mv.row = this_row_offset;
   1103           this_mv.as_mv.col = this_col_offset;
   1104           thissad +=
   1105               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1106 
   1107           if (thissad < bestsad) {
   1108             bestsad = thissad;
   1109             best_site = i;
   1110           }
   1111         }
   1112       }
   1113 
   1114       i++;
   1115     }
   1116 
   1117     if (best_site != last_site) {
   1118       best_mv->as_mv.row += ss[best_site].mv.row;
   1119       best_mv->as_mv.col += ss[best_site].mv.col;
   1120       best_address += ss[best_site].offset;
   1121       last_site = best_site;
   1122     } else if (best_address == in_what) {
   1123       (*num00)++;
   1124     }
   1125   }
   1126 
   1127   this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1128   this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1129 
   1130   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
   1131          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1132 }
   1133 
   1134 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1135                              int_mv *best_mv, int search_param, int sad_per_bit,
   1136                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
   1137                              int *mvcost[2], int_mv *center_mv) {
   1138   int i, j, step;
   1139 
   1140   unsigned char *what = (*(b->base_src) + b->src);
   1141   int what_stride = b->src_stride;
   1142   unsigned char *in_what;
   1143   int pre_stride = x->e_mbd.pre.y_stride;
   1144   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1145   int in_what_stride = pre_stride;
   1146   unsigned char *best_address;
   1147 
   1148   int tot_steps;
   1149   int_mv this_mv;
   1150 
   1151   unsigned int bestsad;
   1152   unsigned int thissad;
   1153   int best_site = 0;
   1154   int last_site = 0;
   1155 
   1156   int ref_row;
   1157   int ref_col;
   1158   int this_row_offset;
   1159   int this_col_offset;
   1160   search_site *ss;
   1161 
   1162   unsigned char *check_here;
   1163 
   1164   int *mvsadcost[2];
   1165   int_mv fcenter_mv;
   1166 
   1167   mvsadcost[0] = x->mvsadcost[0];
   1168   mvsadcost[1] = x->mvsadcost[1];
   1169   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1170   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1171 
   1172   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
   1173                x->mv_row_max);
   1174   ref_row = ref_mv->as_mv.row;
   1175   ref_col = ref_mv->as_mv.col;
   1176   *num00 = 0;
   1177   best_mv->as_mv.row = ref_row;
   1178   best_mv->as_mv.col = ref_col;
   1179 
   1180   /* Work out the start point for the search */
   1181   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
   1182                               ref_col);
   1183   best_address = in_what;
   1184 
   1185   /* Check the starting position */
   1186   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
   1187             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1188 
   1189   /* search_param determines the length of the initial step and hence the
   1190    * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
   1191    * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1192    */
   1193   ss = &x->ss[search_param * x->searches_per_step];
   1194   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1195 
   1196   i = 1;
   1197 
   1198   for (step = 0; step < tot_steps; ++step) {
   1199     int all_in = 1, t;
   1200 
   1201     /* To know if all neighbor points are within the bounds, 4 bounds
   1202      * checking are enough instead of checking 4 bounds for each
   1203      * points.
   1204      */
   1205     all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
   1206     all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
   1207     all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
   1208     all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
   1209 
   1210     if (all_in) {
   1211       unsigned int sad_array[4];
   1212 
   1213       for (j = 0; j < x->searches_per_step; j += 4) {
   1214         const unsigned char *block_offset[4];
   1215 
   1216         for (t = 0; t < 4; ++t) {
   1217           block_offset[t] = ss[i + t].offset + best_address;
   1218         }
   1219 
   1220         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1221                        sad_array);
   1222 
   1223         for (t = 0; t < 4; t++, i++) {
   1224           if (sad_array[t] < bestsad) {
   1225             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1226             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1227             sad_array[t] +=
   1228                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1229 
   1230             if (sad_array[t] < bestsad) {
   1231               bestsad = sad_array[t];
   1232               best_site = i;
   1233             }
   1234           }
   1235         }
   1236       }
   1237     } else {
   1238       for (j = 0; j < x->searches_per_step; ++j) {
   1239         /* Trap illegal vectors */
   1240         this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1241         this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1242 
   1243         if ((this_col_offset > x->mv_col_min) &&
   1244             (this_col_offset < x->mv_col_max) &&
   1245             (this_row_offset > x->mv_row_min) &&
   1246             (this_row_offset < x->mv_row_max)) {
   1247           check_here = ss[i].offset + best_address;
   1248           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1249 
   1250           if (thissad < bestsad) {
   1251             this_mv.as_mv.row = this_row_offset;
   1252             this_mv.as_mv.col = this_col_offset;
   1253             thissad +=
   1254                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1255 
   1256             if (thissad < bestsad) {
   1257               bestsad = thissad;
   1258               best_site = i;
   1259             }
   1260           }
   1261         }
   1262         i++;
   1263       }
   1264     }
   1265 
   1266     if (best_site != last_site) {
   1267       best_mv->as_mv.row += ss[best_site].mv.row;
   1268       best_mv->as_mv.col += ss[best_site].mv.col;
   1269       best_address += ss[best_site].offset;
   1270       last_site = best_site;
   1271     } else if (best_address == in_what) {
   1272       (*num00)++;
   1273     }
   1274   }
   1275 
   1276   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1277   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1278 
   1279   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
   1280          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1281 }
   1282 
   1283 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1284                           int sad_per_bit, int distance,
   1285                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1286                           int_mv *center_mv) {
   1287   unsigned char *what = (*(b->base_src) + b->src);
   1288   int what_stride = b->src_stride;
   1289   unsigned char *in_what;
   1290   int pre_stride = x->e_mbd.pre.y_stride;
   1291   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1292   int in_what_stride = pre_stride;
   1293   int mv_stride = pre_stride;
   1294   unsigned char *bestaddress;
   1295   int_mv *best_mv = &d->bmi.mv;
   1296   int_mv this_mv;
   1297   unsigned int bestsad;
   1298   unsigned int thissad;
   1299   int r, c;
   1300 
   1301   unsigned char *check_here;
   1302 
   1303   int ref_row = ref_mv->as_mv.row;
   1304   int ref_col = ref_mv->as_mv.col;
   1305 
   1306   int row_min = ref_row - distance;
   1307   int row_max = ref_row + distance;
   1308   int col_min = ref_col - distance;
   1309   int col_max = ref_col + distance;
   1310 
   1311   int *mvsadcost[2];
   1312   int_mv fcenter_mv;
   1313 
   1314   mvsadcost[0] = x->mvsadcost[0];
   1315   mvsadcost[1] = x->mvsadcost[1];
   1316   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1317   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1318 
   1319   /* Work out the mid point for the search */
   1320   in_what = base_pre + d->offset;
   1321   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1322 
   1323   best_mv->as_mv.row = ref_row;
   1324   best_mv->as_mv.col = ref_col;
   1325 
   1326   /* Baseline value at the centre */
   1327   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
   1328             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1329 
   1330   /* Apply further limits to prevent us looking using vectors that
   1331    * stretch beyiond the UMV border
   1332    */
   1333   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
   1334 
   1335   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
   1336 
   1337   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
   1338 
   1339   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
   1340 
   1341   for (r = row_min; r < row_max; ++r) {
   1342     this_mv.as_mv.row = r;
   1343     check_here = r * mv_stride + in_what + col_min;
   1344 
   1345     for (c = col_min; c < col_max; ++c) {
   1346       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1347 
   1348       this_mv.as_mv.col = c;
   1349       thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1350 
   1351       if (thissad < bestsad) {
   1352         bestsad = thissad;
   1353         best_mv->as_mv.row = r;
   1354         best_mv->as_mv.col = c;
   1355         bestaddress = check_here;
   1356       }
   1357 
   1358       check_here++;
   1359     }
   1360   }
   1361 
   1362   this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1363   this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1364 
   1365   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
   1366          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1367 }
   1368 
   1369 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1370                           int sad_per_bit, int distance,
   1371                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1372                           int_mv *center_mv) {
   1373   unsigned char *what = (*(b->base_src) + b->src);
   1374   int what_stride = b->src_stride;
   1375   unsigned char *in_what;
   1376   int pre_stride = x->e_mbd.pre.y_stride;
   1377   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1378   int in_what_stride = pre_stride;
   1379   int mv_stride = pre_stride;
   1380   unsigned char *bestaddress;
   1381   int_mv *best_mv = &d->bmi.mv;
   1382   int_mv this_mv;
   1383   unsigned int bestsad;
   1384   unsigned int thissad;
   1385   int r, c;
   1386 
   1387   unsigned char *check_here;
   1388 
   1389   int ref_row = ref_mv->as_mv.row;
   1390   int ref_col = ref_mv->as_mv.col;
   1391 
   1392   int row_min = ref_row - distance;
   1393   int row_max = ref_row + distance;
   1394   int col_min = ref_col - distance;
   1395   int col_max = ref_col + distance;
   1396 
   1397   unsigned int sad_array[3];
   1398 
   1399   int *mvsadcost[2];
   1400   int_mv fcenter_mv;
   1401 
   1402   mvsadcost[0] = x->mvsadcost[0];
   1403   mvsadcost[1] = x->mvsadcost[1];
   1404   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1405   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1406 
   1407   /* Work out the mid point for the search */
   1408   in_what = base_pre + d->offset;
   1409   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1410 
   1411   best_mv->as_mv.row = ref_row;
   1412   best_mv->as_mv.col = ref_col;
   1413 
   1414   /* Baseline value at the centre */
   1415   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
   1416             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1417 
   1418   /* Apply further limits to prevent us looking using vectors that stretch
   1419    * beyond the UMV border
   1420    */
   1421   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
   1422 
   1423   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
   1424 
   1425   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
   1426 
   1427   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
   1428 
   1429   for (r = row_min; r < row_max; ++r) {
   1430     this_mv.as_mv.row = r;
   1431     check_here = r * mv_stride + in_what + col_min;
   1432     c = col_min;
   1433 
   1434     while ((c + 2) < col_max) {
   1435       int i;
   1436 
   1437       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1438 
   1439       for (i = 0; i < 3; ++i) {
   1440         thissad = sad_array[i];
   1441 
   1442         if (thissad < bestsad) {
   1443           this_mv.as_mv.col = c;
   1444           thissad +=
   1445               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1446 
   1447           if (thissad < bestsad) {
   1448             bestsad = thissad;
   1449             best_mv->as_mv.row = r;
   1450             best_mv->as_mv.col = c;
   1451             bestaddress = check_here;
   1452           }
   1453         }
   1454 
   1455         check_here++;
   1456         c++;
   1457       }
   1458     }
   1459 
   1460     while (c < col_max) {
   1461       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1462 
   1463       if (thissad < bestsad) {
   1464         this_mv.as_mv.col = c;
   1465         thissad +=
   1466             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1467 
   1468         if (thissad < bestsad) {
   1469           bestsad = thissad;
   1470           best_mv->as_mv.row = r;
   1471           best_mv->as_mv.col = c;
   1472           bestaddress = check_here;
   1473         }
   1474       }
   1475 
   1476       check_here++;
   1477       c++;
   1478     }
   1479   }
   1480 
   1481   this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1482   this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1483 
   1484   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
   1485          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1486 }
   1487 
   1488 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1489                           int sad_per_bit, int distance,
   1490                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1491                           int_mv *center_mv) {
   1492   unsigned char *what = (*(b->base_src) + b->src);
   1493   int what_stride = b->src_stride;
   1494   int pre_stride = x->e_mbd.pre.y_stride;
   1495   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1496   unsigned char *in_what;
   1497   int in_what_stride = pre_stride;
   1498   int mv_stride = pre_stride;
   1499   unsigned char *bestaddress;
   1500   int_mv *best_mv = &d->bmi.mv;
   1501   int_mv this_mv;
   1502   unsigned int bestsad;
   1503   unsigned int thissad;
   1504   int r, c;
   1505 
   1506   unsigned char *check_here;
   1507 
   1508   int ref_row = ref_mv->as_mv.row;
   1509   int ref_col = ref_mv->as_mv.col;
   1510 
   1511   int row_min = ref_row - distance;
   1512   int row_max = ref_row + distance;
   1513   int col_min = ref_col - distance;
   1514   int col_max = ref_col + distance;
   1515 
   1516   DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
   1517   unsigned int sad_array[3];
   1518 
   1519   int *mvsadcost[2];
   1520   int_mv fcenter_mv;
   1521 
   1522   mvsadcost[0] = x->mvsadcost[0];
   1523   mvsadcost[1] = x->mvsadcost[1];
   1524   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1525   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1526 
   1527   /* Work out the mid point for the search */
   1528   in_what = base_pre + d->offset;
   1529   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1530 
   1531   best_mv->as_mv.row = ref_row;
   1532   best_mv->as_mv.col = ref_col;
   1533 
   1534   /* Baseline value at the centre */
   1535   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
   1536             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1537 
   1538   /* Apply further limits to prevent us looking using vectors that stretch
   1539    * beyond the UMV border
   1540    */
   1541   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
   1542 
   1543   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
   1544 
   1545   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
   1546 
   1547   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
   1548 
   1549   for (r = row_min; r < row_max; ++r) {
   1550     this_mv.as_mv.row = r;
   1551     check_here = r * mv_stride + in_what + col_min;
   1552     c = col_min;
   1553 
   1554     while ((c + 7) < col_max) {
   1555       int i;
   1556 
   1557       fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1558 
   1559       for (i = 0; i < 8; ++i) {
   1560         thissad = sad_array8[i];
   1561 
   1562         if (thissad < bestsad) {
   1563           this_mv.as_mv.col = c;
   1564           thissad +=
   1565               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1566 
   1567           if (thissad < bestsad) {
   1568             bestsad = thissad;
   1569             best_mv->as_mv.row = r;
   1570             best_mv->as_mv.col = c;
   1571             bestaddress = check_here;
   1572           }
   1573         }
   1574 
   1575         check_here++;
   1576         c++;
   1577       }
   1578     }
   1579 
   1580     while ((c + 2) < col_max) {
   1581       int i;
   1582 
   1583       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1584 
   1585       for (i = 0; i < 3; ++i) {
   1586         thissad = sad_array[i];
   1587 
   1588         if (thissad < bestsad) {
   1589           this_mv.as_mv.col = c;
   1590           thissad +=
   1591               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1592 
   1593           if (thissad < bestsad) {
   1594             bestsad = thissad;
   1595             best_mv->as_mv.row = r;
   1596             best_mv->as_mv.col = c;
   1597             bestaddress = check_here;
   1598           }
   1599         }
   1600 
   1601         check_here++;
   1602         c++;
   1603       }
   1604     }
   1605 
   1606     while (c < col_max) {
   1607       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1608 
   1609       if (thissad < bestsad) {
   1610         this_mv.as_mv.col = c;
   1611         thissad +=
   1612             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1613 
   1614         if (thissad < bestsad) {
   1615           bestsad = thissad;
   1616           best_mv->as_mv.row = r;
   1617           best_mv->as_mv.col = c;
   1618           bestaddress = check_here;
   1619         }
   1620       }
   1621 
   1622       check_here++;
   1623       c++;
   1624     }
   1625   }
   1626 
   1627   this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1628   this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1629 
   1630   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
   1631          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1632 }
   1633 
   1634 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1635                               int_mv *ref_mv, int error_per_bit,
   1636                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1637                               int *mvcost[2], int_mv *center_mv) {
   1638   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   1639   int i, j;
   1640   short this_row_offset, this_col_offset;
   1641 
   1642   int what_stride = b->src_stride;
   1643   int pre_stride = x->e_mbd.pre.y_stride;
   1644   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1645   int in_what_stride = pre_stride;
   1646   unsigned char *what = (*(b->base_src) + b->src);
   1647   unsigned char *best_address =
   1648       (unsigned char *)(base_pre + d->offset +
   1649                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1650   unsigned char *check_here;
   1651   int_mv this_mv;
   1652   unsigned int bestsad;
   1653   unsigned int thissad;
   1654 
   1655   int *mvsadcost[2];
   1656   int_mv fcenter_mv;
   1657 
   1658   mvsadcost[0] = x->mvsadcost[0];
   1659   mvsadcost[1] = x->mvsadcost[1];
   1660   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1661   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1662 
   1663   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
   1664             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1665 
   1666   for (i = 0; i < search_range; ++i) {
   1667     int best_site = -1;
   1668 
   1669     for (j = 0; j < 4; ++j) {
   1670       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1671       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1672 
   1673       if ((this_col_offset > x->mv_col_min) &&
   1674           (this_col_offset < x->mv_col_max) &&
   1675           (this_row_offset > x->mv_row_min) &&
   1676           (this_row_offset < x->mv_row_max)) {
   1677         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
   1678                      best_address;
   1679         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1680 
   1681         if (thissad < bestsad) {
   1682           this_mv.as_mv.row = this_row_offset;
   1683           this_mv.as_mv.col = this_col_offset;
   1684           thissad +=
   1685               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1686 
   1687           if (thissad < bestsad) {
   1688             bestsad = thissad;
   1689             best_site = j;
   1690           }
   1691         }
   1692       }
   1693     }
   1694 
   1695     if (best_site == -1) {
   1696       break;
   1697     } else {
   1698       ref_mv->as_mv.row += neighbors[best_site].row;
   1699       ref_mv->as_mv.col += neighbors[best_site].col;
   1700       best_address += (neighbors[best_site].row) * in_what_stride +
   1701                       neighbors[best_site].col;
   1702     }
   1703   }
   1704 
   1705   this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1706   this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1707 
   1708   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
   1709          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1710 }
   1711 
   1712 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1713                               int_mv *ref_mv, int error_per_bit,
   1714                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1715                               int *mvcost[2], int_mv *center_mv) {
   1716   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   1717   int i, j;
   1718   short this_row_offset, this_col_offset;
   1719 
   1720   int what_stride = b->src_stride;
   1721   int pre_stride = x->e_mbd.pre.y_stride;
   1722   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1723   int in_what_stride = pre_stride;
   1724   unsigned char *what = (*(b->base_src) + b->src);
   1725   unsigned char *best_address =
   1726       (unsigned char *)(base_pre + d->offset +
   1727                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1728   unsigned char *check_here;
   1729   int_mv this_mv;
   1730   unsigned int bestsad;
   1731   unsigned int thissad;
   1732 
   1733   int *mvsadcost[2];
   1734   int_mv fcenter_mv;
   1735 
   1736   mvsadcost[0] = x->mvsadcost[0];
   1737   mvsadcost[1] = x->mvsadcost[1];
   1738   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1739   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1740 
   1741   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
   1742             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1743 
   1744   for (i = 0; i < search_range; ++i) {
   1745     int best_site = -1;
   1746     int all_in = 1;
   1747 
   1748     all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
   1749     all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
   1750     all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
   1751     all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1752 
   1753     if (all_in) {
   1754       unsigned int sad_array[4];
   1755       const unsigned char *block_offset[4];
   1756       block_offset[0] = best_address - in_what_stride;
   1757       block_offset[1] = best_address - 1;
   1758       block_offset[2] = best_address + 1;
   1759       block_offset[3] = best_address + in_what_stride;
   1760 
   1761       fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
   1762                      sad_array);
   1763 
   1764       for (j = 0; j < 4; ++j) {
   1765         if (sad_array[j] < bestsad) {
   1766           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1767           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1768           sad_array[j] +=
   1769               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1770 
   1771           if (sad_array[j] < bestsad) {
   1772             bestsad = sad_array[j];
   1773             best_site = j;
   1774           }
   1775         }
   1776       }
   1777     } else {
   1778       for (j = 0; j < 4; ++j) {
   1779         this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1780         this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1781 
   1782         if ((this_col_offset > x->mv_col_min) &&
   1783             (this_col_offset < x->mv_col_max) &&
   1784             (this_row_offset > x->mv_row_min) &&
   1785             (this_row_offset < x->mv_row_max)) {
   1786           check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
   1787                        best_address;
   1788           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1789 
   1790           if (thissad < bestsad) {
   1791             this_mv.as_mv.row = this_row_offset;
   1792             this_mv.as_mv.col = this_col_offset;
   1793             thissad +=
   1794                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1795 
   1796             if (thissad < bestsad) {
   1797               bestsad = thissad;
   1798               best_site = j;
   1799             }
   1800           }
   1801         }
   1802       }
   1803     }
   1804 
   1805     if (best_site == -1) {
   1806       break;
   1807     } else {
   1808       ref_mv->as_mv.row += neighbors[best_site].row;
   1809       ref_mv->as_mv.col += neighbors[best_site].col;
   1810       best_address += (neighbors[best_site].row) * in_what_stride +
   1811                       neighbors[best_site].col;
   1812     }
   1813   }
   1814 
   1815   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   1816   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   1817 
   1818   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
   1819          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1820 }
   1821 
   1822 #ifdef VP8_ENTROPY_STATS
   1823 void print_mode_context(void) {
   1824   FILE *f = fopen("modecont.c", "w");
   1825   int i, j;
   1826 
   1827   fprintf(f, "#include \"entropy.h\"\n");
   1828   fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1829   fprintf(f, "{\n");
   1830 
   1831   for (j = 0; j < 6; ++j) {
   1832     fprintf(f, "  { /* %d */\n", j);
   1833     fprintf(f, "    ");
   1834 
   1835     for (i = 0; i < 4; ++i) {
   1836       int overal_prob;
   1837       int this_prob;
   1838       int count;
   1839 
   1840       /* Overall probs */
   1841       count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1842 
   1843       if (count)
   1844         overal_prob = 256 * mv_mode_cts[i][0] / count;
   1845       else
   1846         overal_prob = 128;
   1847 
   1848       if (overal_prob == 0) overal_prob = 1;
   1849 
   1850       /* context probs */
   1851       count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1852 
   1853       if (count)
   1854         this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1855       else
   1856         this_prob = 128;
   1857 
   1858       if (this_prob == 0) this_prob = 1;
   1859 
   1860       fprintf(f, "%5d, ", this_prob);
   1861     }
   1862 
   1863     fprintf(f, "  },\n");
   1864   }
   1865 
   1866   fprintf(f, "};\n");
   1867   fclose(f);
   1868 }
   1869 
   1870 /* MV ref count VP8_ENTROPY_STATS stats code */
   1871 #ifdef VP8_ENTROPY_STATS
   1872 void init_mv_ref_counts() {
   1873   memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1874   memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1875 }
   1876 
   1877 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
   1878   if (m == ZEROMV) {
   1879     ++mv_ref_ct[ct[0]][0][0];
   1880     ++mv_mode_cts[0][0];
   1881   } else {
   1882     ++mv_ref_ct[ct[0]][0][1];
   1883     ++mv_mode_cts[0][1];
   1884 
   1885     if (m == NEARESTMV) {
   1886       ++mv_ref_ct[ct[1]][1][0];
   1887       ++mv_mode_cts[1][0];
   1888     } else {
   1889       ++mv_ref_ct[ct[1]][1][1];
   1890       ++mv_mode_cts[1][1];
   1891 
   1892       if (m == NEARMV) {
   1893         ++mv_ref_ct[ct[2]][2][0];
   1894         ++mv_mode_cts[2][0];
   1895       } else {
   1896         ++mv_ref_ct[ct[2]][2][1];
   1897         ++mv_mode_cts[2][1];
   1898 
   1899         if (m == NEWMV) {
   1900           ++mv_ref_ct[ct[3]][3][0];
   1901           ++mv_mode_cts[3][0];
   1902         } else {
   1903           ++mv_ref_ct[ct[3]][3][1];
   1904           ++mv_mode_cts[3][1];
   1905         }
   1906       }
   1907     }
   1908   }
   1909 }
   1910 
   1911 #endif /* END MV ref count VP8_ENTROPY_STATS stats code */
   1912 
   1913 #endif
   1914