Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "onyx_int.h"
     13 #include "mcomp.h"
     14 #include "vpx_mem/vpx_mem.h"
     15 #include "vpx_config.h"
     16 #include <stdio.h>
     17 #include <limits.h>
     18 #include <math.h>
     19 #include "vp8/common/findnearmv.h"
     20 #include "vp8/common/common.h"
     21 
     22 #ifdef VP8_ENTROPY_STATS
     23 static int mv_ref_ct [31] [4] [2];
     24 static int mv_mode_cts [4] [2];
     25 #endif
     26 
     27 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
     28 {
     29     /* MV costing is based on the distribution of vectors in the previous
     30      * frame and as such will tend to over state the cost of vectors. In
     31      * addition coding a new vector can have a knock on effect on the cost
     32      * of subsequent vectors and the quality of prediction from NEAR and
     33      * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
     34      * limited extent, for some account to be taken of these factors.
     35      */
     36     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
     37 }
     38 
     39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
     40 {
     41     /* Ignore mv costing if mvcost is NULL */
     42     if (mvcost)
     43         return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
     44                  mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
     45                  * error_per_bit + 128) >> 8;
     46     return 0;
     47 }
     48 
     49 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
     50 {
     51     /* Calculate sad error cost on full pixel basis. */
     52     /* Ignore mv costing if mvsadcost is NULL */
     53     if (mvsadcost)
     54         return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
     55                  mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
     56                 * error_per_bit + 128) >> 8;
     57     return 0;
     58 }
     59 
     60 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
     61 {
     62     int Len;
     63     int search_site_count = 0;
     64 
     65 
     66     /* Generate offsets for 4 search sites per step. */
     67     Len = MAX_FIRST_STEP;
     68     x->ss[search_site_count].mv.col = 0;
     69     x->ss[search_site_count].mv.row = 0;
     70     x->ss[search_site_count].offset = 0;
     71     search_site_count++;
     72 
     73     while (Len > 0)
     74     {
     75 
     76         /* Compute offsets for search sites. */
     77         x->ss[search_site_count].mv.col = 0;
     78         x->ss[search_site_count].mv.row = -Len;
     79         x->ss[search_site_count].offset = -Len * stride;
     80         search_site_count++;
     81 
     82         /* Compute offsets for search sites. */
     83         x->ss[search_site_count].mv.col = 0;
     84         x->ss[search_site_count].mv.row = Len;
     85         x->ss[search_site_count].offset = Len * stride;
     86         search_site_count++;
     87 
     88         /* Compute offsets for search sites. */
     89         x->ss[search_site_count].mv.col = -Len;
     90         x->ss[search_site_count].mv.row = 0;
     91         x->ss[search_site_count].offset = -Len;
     92         search_site_count++;
     93 
     94         /* Compute offsets for search sites. */
     95         x->ss[search_site_count].mv.col = Len;
     96         x->ss[search_site_count].mv.row = 0;
     97         x->ss[search_site_count].offset = Len;
     98         search_site_count++;
     99 
    100         /* Contract. */
    101         Len /= 2;
    102     }
    103 
    104     x->ss_count = search_site_count;
    105     x->searches_per_step = 4;
    106 }
    107 
    108 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    109 {
    110     int Len;
    111     int search_site_count = 0;
    112 
    113     /* Generate offsets for 8 search sites per step. */
    114     Len = MAX_FIRST_STEP;
    115     x->ss[search_site_count].mv.col = 0;
    116     x->ss[search_site_count].mv.row = 0;
    117     x->ss[search_site_count].offset = 0;
    118     search_site_count++;
    119 
    120     while (Len > 0)
    121     {
    122 
    123         /* Compute offsets for search sites. */
    124         x->ss[search_site_count].mv.col = 0;
    125         x->ss[search_site_count].mv.row = -Len;
    126         x->ss[search_site_count].offset = -Len * stride;
    127         search_site_count++;
    128 
    129         /* Compute offsets for search sites. */
    130         x->ss[search_site_count].mv.col = 0;
    131         x->ss[search_site_count].mv.row = Len;
    132         x->ss[search_site_count].offset = Len * stride;
    133         search_site_count++;
    134 
    135         /* Compute offsets for search sites. */
    136         x->ss[search_site_count].mv.col = -Len;
    137         x->ss[search_site_count].mv.row = 0;
    138         x->ss[search_site_count].offset = -Len;
    139         search_site_count++;
    140 
    141         /* Compute offsets for search sites. */
    142         x->ss[search_site_count].mv.col = Len;
    143         x->ss[search_site_count].mv.row = 0;
    144         x->ss[search_site_count].offset = Len;
    145         search_site_count++;
    146 
    147         /* Compute offsets for search sites. */
    148         x->ss[search_site_count].mv.col = -Len;
    149         x->ss[search_site_count].mv.row = -Len;
    150         x->ss[search_site_count].offset = -Len * stride - Len;
    151         search_site_count++;
    152 
    153         /* Compute offsets for search sites. */
    154         x->ss[search_site_count].mv.col = Len;
    155         x->ss[search_site_count].mv.row = -Len;
    156         x->ss[search_site_count].offset = -Len * stride + Len;
    157         search_site_count++;
    158 
    159         /* Compute offsets for search sites. */
    160         x->ss[search_site_count].mv.col = -Len;
    161         x->ss[search_site_count].mv.row = Len;
    162         x->ss[search_site_count].offset = Len * stride - Len;
    163         search_site_count++;
    164 
    165         /* Compute offsets for search sites. */
    166         x->ss[search_site_count].mv.col = Len;
    167         x->ss[search_site_count].mv.row = Len;
    168         x->ss[search_site_count].offset = Len * stride + Len;
    169         search_site_count++;
    170 
    171 
    172         /* Contract. */
    173         Len /= 2;
    174     }
    175 
    176     x->ss_count = search_site_count;
    177     x->searches_per_step = 8;
    178 }
    179 
    180 /*
    181  * To avoid the penalty for crossing cache-line read, preload the reference
    182  * area in a small buffer, which is aligned to make sure there won't be crossing
    183  * cache-line read while reading from this buffer. This reduced the cpu
    184  * cycles spent on reading ref data in sub-pixel filter functions.
    185  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    186  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    187  * could reduce the area.
    188  */
    189 
    190 /* estimated cost of a motion vector (r,c) */
    191 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
    192 /* pointer to predictor base of a motionvector */
    193 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
    194 /* convert motion vector component to offset for svf calc */
    195 #define SP(x) (((x)&3)<<1)
    196 /* returns subpixel variance error function. */
    197 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
    198 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    199 /* returns distortion + motion vector cost */
    200 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
    201 /* checks if (r,c) has better score than previous best */
    202 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
    203 
    204 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    205                                              int_mv *bestmv, int_mv *ref_mv,
    206                                              int error_per_bit,
    207                                              const vp8_variance_fn_ptr_t *vfp,
    208                                              int *mvcost[2], int *distortion,
    209                                              unsigned int *sse1)
    210 {
    211     unsigned char *z = (*(b->base_src) + b->src);
    212 
    213     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
    214     int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
    215     int tr = br, tc = bc;
    216     unsigned int besterr;
    217     unsigned int left, right, up, down, diag;
    218     unsigned int sse;
    219     unsigned int whichdir;
    220     unsigned int halfiters = 4;
    221     unsigned int quarteriters = 4;
    222     int thismse;
    223 
    224     int minc = MAX(x->mv_col_min * 4,
    225                    (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
    226     int maxc = MIN(x->mv_col_max * 4,
    227                    (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
    228     int minr = MAX(x->mv_row_min * 4,
    229                    (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
    230     int maxr = MIN(x->mv_row_max * 4,
    231                    (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    232 
    233     int y_stride;
    234     int offset;
    235     int pre_stride = x->e_mbd.pre.y_stride;
    236     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    237 
    238 
    239 #if ARCH_X86 || ARCH_X86_64
    240     MACROBLOCKD *xd = &x->e_mbd;
    241     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    242     unsigned char *y;
    243     int buf_r1, buf_r2, buf_c1;
    244 
    245     /* Clamping to avoid out-of-range data access */
    246     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
    247     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
    248     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
    249     y_stride = 32;
    250 
    251     /* Copy to intermediate buffer before searching. */
    252     vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    253     y = xd->y_buf + y_stride*buf_r1 +buf_c1;
    254 #else
    255     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    256     y_stride = pre_stride;
    257 #endif
    258 
    259     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
    260 
    261     /* central mv */
    262     bestmv->as_mv.row *= 8;
    263     bestmv->as_mv.col *= 8;
    264 
    265     /* calculate central point error */
    266     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    267     *distortion = besterr;
    268     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    269 
    270     /* TODO: Each subsequent iteration checks at least one point in common
    271      * with the last iteration could be 2 ( if diag selected)
    272      */
    273     while (--halfiters)
    274     {
    275         /* 1/2 pel */
    276         CHECK_BETTER(left, tr, tc - 2);
    277         CHECK_BETTER(right, tr, tc + 2);
    278         CHECK_BETTER(up, tr - 2, tc);
    279         CHECK_BETTER(down, tr + 2, tc);
    280 
    281         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    282 
    283         switch (whichdir)
    284         {
    285         case 0:
    286             CHECK_BETTER(diag, tr - 2, tc - 2);
    287             break;
    288         case 1:
    289             CHECK_BETTER(diag, tr - 2, tc + 2);
    290             break;
    291         case 2:
    292             CHECK_BETTER(diag, tr + 2, tc - 2);
    293             break;
    294         case 3:
    295             CHECK_BETTER(diag, tr + 2, tc + 2);
    296             break;
    297         }
    298 
    299         /* no reason to check the same one again. */
    300         if (tr == br && tc == bc)
    301             break;
    302 
    303         tr = br;
    304         tc = bc;
    305     }
    306 
    307     /* TODO: Each subsequent iteration checks at least one point in common
    308      * with the last iteration could be 2 ( if diag selected)
    309      */
    310 
    311     /* 1/4 pel */
    312     while (--quarteriters)
    313     {
    314         CHECK_BETTER(left, tr, tc - 1);
    315         CHECK_BETTER(right, tr, tc + 1);
    316         CHECK_BETTER(up, tr - 1, tc);
    317         CHECK_BETTER(down, tr + 1, tc);
    318 
    319         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    320 
    321         switch (whichdir)
    322         {
    323         case 0:
    324             CHECK_BETTER(diag, tr - 1, tc - 1);
    325             break;
    326         case 1:
    327             CHECK_BETTER(diag, tr - 1, tc + 1);
    328             break;
    329         case 2:
    330             CHECK_BETTER(diag, tr + 1, tc - 1);
    331             break;
    332         case 3:
    333             CHECK_BETTER(diag, tr + 1, tc + 1);
    334             break;
    335         }
    336 
    337         /* no reason to check the same one again. */
    338         if (tr == br && tc == bc)
    339             break;
    340 
    341         tr = br;
    342         tc = bc;
    343     }
    344 
    345     bestmv->as_mv.row = br * 2;
    346     bestmv->as_mv.col = bc * 2;
    347 
    348     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
    349         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
    350         return INT_MAX;
    351 
    352     return besterr;
    353 }
    354 #undef MVC
    355 #undef PRE
    356 #undef SP
    357 #undef DIST
    358 #undef IFMVCV
    359 #undef ERR
    360 #undef CHECK_BETTER
    361 
    362 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    363                                  int_mv *bestmv, int_mv *ref_mv,
    364                                  int error_per_bit,
    365                                  const vp8_variance_fn_ptr_t *vfp,
    366                                  int *mvcost[2], int *distortion,
    367                                  unsigned int *sse1)
    368 {
    369     int bestmse = INT_MAX;
    370     int_mv startmv;
    371     int_mv this_mv;
    372     unsigned char *z = (*(b->base_src) + b->src);
    373     int left, right, up, down, diag;
    374     unsigned int sse;
    375     int whichdir ;
    376     int thismse;
    377     int y_stride;
    378     int pre_stride = x->e_mbd.pre.y_stride;
    379     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    380 
    381 #if ARCH_X86 || ARCH_X86_64
    382     MACROBLOCKD *xd = &x->e_mbd;
    383     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    384     unsigned char *y;
    385 
    386     y_stride = 32;
    387     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    388      vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    389      y = xd->y_buf + y_stride + 1;
    390 #else
    391      unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    392      y_stride = pre_stride;
    393 #endif
    394 
    395     /* central mv */
    396     bestmv->as_mv.row <<= 3;
    397     bestmv->as_mv.col <<= 3;
    398     startmv = *bestmv;
    399 
    400     /* calculate central point error */
    401     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    402     *distortion = bestmse;
    403     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    404 
    405     /* go left then right and check error */
    406     this_mv.as_mv.row = startmv.as_mv.row;
    407     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    408     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    409     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    410 
    411     if (left < bestmse)
    412     {
    413         *bestmv = this_mv;
    414         bestmse = left;
    415         *distortion = thismse;
    416         *sse1 = sse;
    417     }
    418 
    419     this_mv.as_mv.col += 8;
    420     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    421     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    422 
    423     if (right < bestmse)
    424     {
    425         *bestmv = this_mv;
    426         bestmse = right;
    427         *distortion = thismse;
    428         *sse1 = sse;
    429     }
    430 
    431     /* go up then down and check error */
    432     this_mv.as_mv.col = startmv.as_mv.col;
    433     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    434     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    435     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    436 
    437     if (up < bestmse)
    438     {
    439         *bestmv = this_mv;
    440         bestmse = up;
    441         *distortion = thismse;
    442         *sse1 = sse;
    443     }
    444 
    445     this_mv.as_mv.row += 8;
    446     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    447     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    448 
    449     if (down < bestmse)
    450     {
    451         *bestmv = this_mv;
    452         bestmse = down;
    453         *distortion = thismse;
    454         *sse1 = sse;
    455     }
    456 
    457 
    458     /* now check 1 more diagonal */
    459     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    460     this_mv = startmv;
    461 
    462     switch (whichdir)
    463     {
    464     case 0:
    465         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    466         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    467         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    468         break;
    469     case 1:
    470         this_mv.as_mv.col += 4;
    471         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    472         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    473         break;
    474     case 2:
    475         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    476         this_mv.as_mv.row += 4;
    477         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    478         break;
    479     case 3:
    480     default:
    481         this_mv.as_mv.col += 4;
    482         this_mv.as_mv.row += 4;
    483         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    484         break;
    485     }
    486 
    487     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    488 
    489     if (diag < bestmse)
    490     {
    491         *bestmv = this_mv;
    492         bestmse = diag;
    493         *distortion = thismse;
    494         *sse1 = sse;
    495     }
    496 
    497 
    498     /* time to check quarter pels. */
    499     if (bestmv->as_mv.row < startmv.as_mv.row)
    500         y -= y_stride;
    501 
    502     if (bestmv->as_mv.col < startmv.as_mv.col)
    503         y--;
    504 
    505     startmv = *bestmv;
    506 
    507 
    508 
    509     /* go left then right and check error */
    510     this_mv.as_mv.row = startmv.as_mv.row;
    511 
    512     if (startmv.as_mv.col & 7)
    513     {
    514         this_mv.as_mv.col = startmv.as_mv.col - 2;
    515         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    516     }
    517     else
    518     {
    519         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    520         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    521     }
    522 
    523     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    524 
    525     if (left < bestmse)
    526     {
    527         *bestmv = this_mv;
    528         bestmse = left;
    529         *distortion = thismse;
    530         *sse1 = sse;
    531     }
    532 
    533     this_mv.as_mv.col += 4;
    534     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    535     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    536 
    537     if (right < bestmse)
    538     {
    539         *bestmv = this_mv;
    540         bestmse = right;
    541         *distortion = thismse;
    542         *sse1 = sse;
    543     }
    544 
    545     /* go up then down and check error */
    546     this_mv.as_mv.col = startmv.as_mv.col;
    547 
    548     if (startmv.as_mv.row & 7)
    549     {
    550         this_mv.as_mv.row = startmv.as_mv.row - 2;
    551         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    552     }
    553     else
    554     {
    555         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    556         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    557     }
    558 
    559     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    560 
    561     if (up < bestmse)
    562     {
    563         *bestmv = this_mv;
    564         bestmse = up;
    565         *distortion = thismse;
    566         *sse1 = sse;
    567     }
    568 
    569     this_mv.as_mv.row += 4;
    570     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    571     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    572 
    573     if (down < bestmse)
    574     {
    575         *bestmv = this_mv;
    576         bestmse = down;
    577         *distortion = thismse;
    578         *sse1 = sse;
    579     }
    580 
    581 
    582     /* now check 1 more diagonal */
    583     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    584 
    585     this_mv = startmv;
    586 
    587     switch (whichdir)
    588     {
    589     case 0:
    590 
    591         if (startmv.as_mv.row & 7)
    592         {
    593             this_mv.as_mv.row -= 2;
    594 
    595             if (startmv.as_mv.col & 7)
    596             {
    597                 this_mv.as_mv.col -= 2;
    598                 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    599             }
    600             else
    601             {
    602                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    603                 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
    604             }
    605         }
    606         else
    607         {
    608             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    609 
    610             if (startmv.as_mv.col & 7)
    611             {
    612                 this_mv.as_mv.col -= 2;
    613                 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    614             }
    615             else
    616             {
    617                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    618                 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
    619             }
    620         }
    621 
    622         break;
    623     case 1:
    624         this_mv.as_mv.col += 2;
    625 
    626         if (startmv.as_mv.row & 7)
    627         {
    628             this_mv.as_mv.row -= 2;
    629             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    630         }
    631         else
    632         {
    633             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    634             thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    635         }
    636 
    637         break;
    638     case 2:
    639         this_mv.as_mv.row += 2;
    640 
    641         if (startmv.as_mv.col & 7)
    642         {
    643             this_mv.as_mv.col -= 2;
    644             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    645         }
    646         else
    647         {
    648             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    649             thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    650         }
    651 
    652         break;
    653     case 3:
    654         this_mv.as_mv.col += 2;
    655         this_mv.as_mv.row += 2;
    656         thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    657         break;
    658     }
    659 
    660     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    661 
    662     if (diag < bestmse)
    663     {
    664         *bestmv = this_mv;
    665         bestmse = diag;
    666         *distortion = thismse;
    667         *sse1 = sse;
    668     }
    669 
    670     return bestmse;
    671 }
    672 
    673 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    674                                   int_mv *bestmv, int_mv *ref_mv,
    675                                   int error_per_bit,
    676                                   const vp8_variance_fn_ptr_t *vfp,
    677                                   int *mvcost[2], int *distortion,
    678                                   unsigned int *sse1)
    679 {
    680     int bestmse = INT_MAX;
    681     int_mv startmv;
    682     int_mv this_mv;
    683     unsigned char *z = (*(b->base_src) + b->src);
    684     int left, right, up, down, diag;
    685     unsigned int sse;
    686     int whichdir ;
    687     int thismse;
    688     int y_stride;
    689     int pre_stride = x->e_mbd.pre.y_stride;
    690     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    691 
    692 #if ARCH_X86 || ARCH_X86_64
    693     MACROBLOCKD *xd = &x->e_mbd;
    694     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    695     unsigned char *y;
    696 
    697     y_stride = 32;
    698     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    699     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    700     y = xd->y_buf + y_stride + 1;
    701 #else
    702     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    703     y_stride = pre_stride;
    704 #endif
    705 
    706     /* central mv */
    707     bestmv->as_mv.row *= 8;
    708     bestmv->as_mv.col *= 8;
    709     startmv = *bestmv;
    710 
    711     /* calculate central point error */
    712     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    713     *distortion = bestmse;
    714     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    715 
    716     /* go left then right and check error */
    717     this_mv.as_mv.row = startmv.as_mv.row;
    718     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    719     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    720     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    721 
    722     if (left < bestmse)
    723     {
    724         *bestmv = this_mv;
    725         bestmse = left;
    726         *distortion = thismse;
    727         *sse1 = sse;
    728     }
    729 
    730     this_mv.as_mv.col += 8;
    731     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    732     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    733 
    734     if (right < bestmse)
    735     {
    736         *bestmv = this_mv;
    737         bestmse = right;
    738         *distortion = thismse;
    739         *sse1 = sse;
    740     }
    741 
    742     /* go up then down and check error */
    743     this_mv.as_mv.col = startmv.as_mv.col;
    744     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    745     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    746     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    747 
    748     if (up < bestmse)
    749     {
    750         *bestmv = this_mv;
    751         bestmse = up;
    752         *distortion = thismse;
    753         *sse1 = sse;
    754     }
    755 
    756     this_mv.as_mv.row += 8;
    757     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    758     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    759 
    760     if (down < bestmse)
    761     {
    762         *bestmv = this_mv;
    763         bestmse = down;
    764         *distortion = thismse;
    765         *sse1 = sse;
    766     }
    767 
    768     /* now check 1 more diagonal - */
    769     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    770     this_mv = startmv;
    771 
    772     switch (whichdir)
    773     {
    774     case 0:
    775         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    776         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    777         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    778         break;
    779     case 1:
    780         this_mv.as_mv.col += 4;
    781         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    782         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    783         break;
    784     case 2:
    785         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    786         this_mv.as_mv.row += 4;
    787         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    788         break;
    789     case 3:
    790     default:
    791         this_mv.as_mv.col += 4;
    792         this_mv.as_mv.row += 4;
    793         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    794         break;
    795     }
    796 
    797     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    798 
    799     if (diag < bestmse)
    800     {
    801         *bestmv = this_mv;
    802         bestmse = diag;
    803         *distortion = thismse;
    804         *sse1 = sse;
    805     }
    806 
    807     return bestmse;
    808 }
    809 
    810 #define CHECK_BOUNDS(range) \
    811 {\
    812     all_in = 1;\
    813     all_in &= ((br-range) >= x->mv_row_min);\
    814     all_in &= ((br+range) <= x->mv_row_max);\
    815     all_in &= ((bc-range) >= x->mv_col_min);\
    816     all_in &= ((bc+range) <= x->mv_col_max);\
    817 }
    818 
    819 #define CHECK_POINT \
    820 {\
    821     if (this_mv.as_mv.col < x->mv_col_min) continue;\
    822     if (this_mv.as_mv.col > x->mv_col_max) continue;\
    823     if (this_mv.as_mv.row < x->mv_row_min) continue;\
    824     if (this_mv.as_mv.row > x->mv_row_max) continue;\
    825 }
    826 
    827 #define CHECK_BETTER \
    828 {\
    829     if (thissad < bestsad)\
    830     {\
    831         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
    832         if (thissad < bestsad)\
    833         {\
    834             bestsad = thissad;\
    835             best_site = i;\
    836         }\
    837     }\
    838 }
    839 
    840 static const MV next_chkpts[6][3] =
    841 {
    842     {{ -2, 0}, { -1, -2}, {1, -2}},
    843     {{ -1, -2}, {1, -2}, {2, 0}},
    844     {{1, -2}, {2, 0}, {1, 2}},
    845     {{2, 0}, {1, 2}, { -1, 2}},
    846     {{1, 2}, { -1, 2}, { -2, 0}},
    847     {{ -1, 2}, { -2, 0}, { -1, -2}}
    848 };
    849 
    850 int vp8_hex_search
    851 (
    852     MACROBLOCK *x,
    853     BLOCK *b,
    854     BLOCKD *d,
    855     int_mv *ref_mv,
    856     int_mv *best_mv,
    857     int search_param,
    858     int sad_per_bit,
    859     const vp8_variance_fn_ptr_t *vfp,
    860     int *mvsadcost[2],
    861     int *mvcost[2],
    862     int_mv *center_mv
    863 )
    864 {
    865     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    866     MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
    867     int i, j;
    868 
    869     unsigned char *what = (*(b->base_src) + b->src);
    870     int what_stride = b->src_stride;
    871     int pre_stride = x->e_mbd.pre.y_stride;
    872     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    873 
    874     int in_what_stride = pre_stride;
    875     int br, bc;
    876     int_mv this_mv;
    877     unsigned int bestsad;
    878     unsigned int thissad;
    879     unsigned char *base_offset;
    880     unsigned char *this_offset;
    881     int k = -1;
    882     int all_in;
    883     int best_site = -1;
    884     int hex_range = 127;
    885     int dia_range = 8;
    886 
    887     int_mv fcenter_mv;
    888     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    889     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    890 
    891     /* adjust ref_mv to make sure it is within MV range */
    892     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
    893     br = ref_mv->as_mv.row;
    894     bc = ref_mv->as_mv.col;
    895 
    896     /* Work out the start point for the search */
    897     base_offset = (unsigned char *)(base_pre + d->offset);
    898     this_offset = base_offset + (br * (pre_stride)) + bc;
    899     this_mv.as_mv.row = br;
    900     this_mv.as_mv.col = bc;
    901     bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
    902             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    903 
    904 #if CONFIG_MULTI_RES_ENCODING
    905     /* Lower search range based on prediction info */
    906     if (search_param >= 6) goto cal_neighbors;
    907     else if (search_param >= 5) hex_range = 4;
    908     else if (search_param >= 4) hex_range = 6;
    909     else if (search_param >= 3) hex_range = 15;
    910     else if (search_param >= 2) hex_range = 31;
    911     else if (search_param >= 1) hex_range = 63;
    912 
    913     dia_range = 8;
    914 #endif
    915 
    916     /* hex search */
    917     CHECK_BOUNDS(2)
    918 
    919     if(all_in)
    920     {
    921         for (i = 0; i < 6; i++)
    922         {
    923             this_mv.as_mv.row = br + hex[i].row;
    924             this_mv.as_mv.col = bc + hex[i].col;
    925             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    926             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    927             CHECK_BETTER
    928         }
    929     }else
    930     {
    931         for (i = 0; i < 6; i++)
    932         {
    933             this_mv.as_mv.row = br + hex[i].row;
    934             this_mv.as_mv.col = bc + hex[i].col;
    935             CHECK_POINT
    936             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    937             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    938             CHECK_BETTER
    939         }
    940     }
    941 
    942     if (best_site == -1)
    943         goto cal_neighbors;
    944     else
    945     {
    946         br += hex[best_site].row;
    947         bc += hex[best_site].col;
    948         k = best_site;
    949     }
    950 
    951     for (j = 1; j < hex_range; j++)
    952     {
    953         best_site = -1;
    954         CHECK_BOUNDS(2)
    955 
    956         if(all_in)
    957         {
    958             for (i = 0; i < 3; i++)
    959             {
    960                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    961                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    962                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    963                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    964                 CHECK_BETTER
    965             }
    966         }else
    967         {
    968             for (i = 0; i < 3; i++)
    969             {
    970                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    971                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    972                 CHECK_POINT
    973                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    974                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    975                 CHECK_BETTER
    976             }
    977         }
    978 
    979         if (best_site == -1)
    980             break;
    981         else
    982         {
    983             br += next_chkpts[k][best_site].row;
    984             bc += next_chkpts[k][best_site].col;
    985             k += 5 + best_site;
    986             if (k >= 12) k -= 12;
    987             else if (k >= 6) k -= 6;
    988         }
    989     }
    990 
    991     /* check 4 1-away neighbors */
    992 cal_neighbors:
    993     for (j = 0; j < dia_range; j++)
    994     {
    995         best_site = -1;
    996         CHECK_BOUNDS(1)
    997 
    998         if(all_in)
    999         {
   1000             for (i = 0; i < 4; i++)
   1001             {
   1002                 this_mv.as_mv.row = br + neighbors[i].row;
   1003                 this_mv.as_mv.col = bc + neighbors[i].col;
   1004                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1005                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1006                 CHECK_BETTER
   1007             }
   1008         }else
   1009         {
   1010             for (i = 0; i < 4; i++)
   1011             {
   1012                 this_mv.as_mv.row = br + neighbors[i].row;
   1013                 this_mv.as_mv.col = bc + neighbors[i].col;
   1014                 CHECK_POINT
   1015                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1016                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1017                 CHECK_BETTER
   1018             }
   1019         }
   1020 
   1021         if (best_site == -1)
   1022             break;
   1023         else
   1024         {
   1025             br += neighbors[best_site].row;
   1026             bc += neighbors[best_site].col;
   1027         }
   1028     }
   1029 
   1030     best_mv->as_mv.row = br;
   1031     best_mv->as_mv.col = bc;
   1032 
   1033     return bestsad;
   1034 }
   1035 #undef CHECK_BOUNDS
   1036 #undef CHECK_POINT
   1037 #undef CHECK_BETTER
   1038 
   1039 int vp8_diamond_search_sad_c
   1040 (
   1041     MACROBLOCK *x,
   1042     BLOCK *b,
   1043     BLOCKD *d,
   1044     int_mv *ref_mv,
   1045     int_mv *best_mv,
   1046     int search_param,
   1047     int sad_per_bit,
   1048     int *num00,
   1049     vp8_variance_fn_ptr_t *fn_ptr,
   1050     int *mvcost[2],
   1051     int_mv *center_mv
   1052 )
   1053 {
   1054     int i, j, step;
   1055 
   1056     unsigned char *what = (*(b->base_src) + b->src);
   1057     int what_stride = b->src_stride;
   1058     unsigned char *in_what;
   1059     int pre_stride = x->e_mbd.pre.y_stride;
   1060     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1061     int in_what_stride = pre_stride;
   1062     unsigned char *best_address;
   1063 
   1064     int tot_steps;
   1065     int_mv this_mv;
   1066 
   1067     unsigned int bestsad;
   1068     unsigned int thissad;
   1069     int best_site = 0;
   1070     int last_site = 0;
   1071 
   1072     int ref_row;
   1073     int ref_col;
   1074     int this_row_offset;
   1075     int this_col_offset;
   1076     search_site *ss;
   1077 
   1078     unsigned char *check_here;
   1079 
   1080     int *mvsadcost[2];
   1081     int_mv fcenter_mv;
   1082 
   1083     mvsadcost[0] = x->mvsadcost[0];
   1084     mvsadcost[1] = x->mvsadcost[1];
   1085     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1086     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1087 
   1088     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1089     ref_row = ref_mv->as_mv.row;
   1090     ref_col = ref_mv->as_mv.col;
   1091     *num00 = 0;
   1092     best_mv->as_mv.row = ref_row;
   1093     best_mv->as_mv.col = ref_col;
   1094 
   1095     /* Work out the start point for the search */
   1096     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1097     best_address = in_what;
   1098 
   1099     /* Check the starting position */
   1100     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1101             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1102 
   1103     /* search_param determines the length of the initial step and hence
   1104      * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
   1105      * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1106      */
   1107     ss = &x->ss[search_param * x->searches_per_step];
   1108     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1109 
   1110     i = 1;
   1111 
   1112     for (step = 0; step < tot_steps ; step++)
   1113     {
   1114         for (j = 0 ; j < x->searches_per_step ; j++)
   1115         {
   1116             /* Trap illegal vectors */
   1117             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1118             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1119 
   1120             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1121             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1122 
   1123             {
   1124                 check_here = ss[i].offset + best_address;
   1125                 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1126 
   1127                 if (thissad < bestsad)
   1128                 {
   1129                     this_mv.as_mv.row = this_row_offset;
   1130                     this_mv.as_mv.col = this_col_offset;
   1131                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1132                                               mvsadcost, sad_per_bit);
   1133 
   1134                     if (thissad < bestsad)
   1135                     {
   1136                         bestsad = thissad;
   1137                         best_site = i;
   1138                     }
   1139                 }
   1140             }
   1141 
   1142             i++;
   1143         }
   1144 
   1145         if (best_site != last_site)
   1146         {
   1147             best_mv->as_mv.row += ss[best_site].mv.row;
   1148             best_mv->as_mv.col += ss[best_site].mv.col;
   1149             best_address += ss[best_site].offset;
   1150             last_site = best_site;
   1151         }
   1152         else if (best_address == in_what)
   1153             (*num00)++;
   1154     }
   1155 
   1156     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1157     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1158 
   1159     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1160            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1161 }
   1162 
   1163 int vp8_diamond_search_sadx4
   1164 (
   1165     MACROBLOCK *x,
   1166     BLOCK *b,
   1167     BLOCKD *d,
   1168     int_mv *ref_mv,
   1169     int_mv *best_mv,
   1170     int search_param,
   1171     int sad_per_bit,
   1172     int *num00,
   1173     vp8_variance_fn_ptr_t *fn_ptr,
   1174     int *mvcost[2],
   1175     int_mv *center_mv
   1176 )
   1177 {
   1178     int i, j, step;
   1179 
   1180     unsigned char *what = (*(b->base_src) + b->src);
   1181     int what_stride = b->src_stride;
   1182     unsigned char *in_what;
   1183     int pre_stride = x->e_mbd.pre.y_stride;
   1184     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1185     int in_what_stride = pre_stride;
   1186     unsigned char *best_address;
   1187 
   1188     int tot_steps;
   1189     int_mv this_mv;
   1190 
   1191     unsigned int bestsad;
   1192     unsigned int thissad;
   1193     int best_site = 0;
   1194     int last_site = 0;
   1195 
   1196     int ref_row;
   1197     int ref_col;
   1198     int this_row_offset;
   1199     int this_col_offset;
   1200     search_site *ss;
   1201 
   1202     unsigned char *check_here;
   1203 
   1204     int *mvsadcost[2];
   1205     int_mv fcenter_mv;
   1206 
   1207     mvsadcost[0] = x->mvsadcost[0];
   1208     mvsadcost[1] = x->mvsadcost[1];
   1209     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1210     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1211 
   1212     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1213     ref_row = ref_mv->as_mv.row;
   1214     ref_col = ref_mv->as_mv.col;
   1215     *num00 = 0;
   1216     best_mv->as_mv.row = ref_row;
   1217     best_mv->as_mv.col = ref_col;
   1218 
   1219     /* Work out the start point for the search */
   1220     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1221     best_address = in_what;
   1222 
   1223     /* Check the starting position */
   1224     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1225             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1226 
   1227     /* search_param determines the length of the initial step and hence the
   1228      * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
   1229      * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1230      */
   1231     ss = &x->ss[search_param * x->searches_per_step];
   1232     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1233 
   1234     i = 1;
   1235 
   1236     for (step = 0; step < tot_steps ; step++)
   1237     {
   1238         int all_in = 1, t;
   1239 
   1240         /* To know if all neighbor points are within the bounds, 4 bounds
   1241          * checking are enough instead of checking 4 bounds for each
   1242          * points.
   1243          */
   1244         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
   1245         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
   1246         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
   1247         all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
   1248 
   1249         if (all_in)
   1250         {
   1251             unsigned int sad_array[4];
   1252 
   1253             for (j = 0 ; j < x->searches_per_step ; j += 4)
   1254             {
   1255                 const unsigned char *block_offset[4];
   1256 
   1257                 for (t = 0; t < 4; t++)
   1258                     block_offset[t] = ss[i+t].offset + best_address;
   1259 
   1260                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1261 
   1262                 for (t = 0; t < 4; t++, i++)
   1263                 {
   1264                     if (sad_array[t] < bestsad)
   1265                     {
   1266                         this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1267                         this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1268                         sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
   1269                                                        mvsadcost, sad_per_bit);
   1270 
   1271                         if (sad_array[t] < bestsad)
   1272                         {
   1273                             bestsad = sad_array[t];
   1274                             best_site = i;
   1275                         }
   1276                     }
   1277                 }
   1278             }
   1279         }
   1280         else
   1281         {
   1282             for (j = 0 ; j < x->searches_per_step ; j++)
   1283             {
   1284                 /* Trap illegal vectors */
   1285                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1286                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1287 
   1288                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1289                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1290                 {
   1291                     check_here = ss[i].offset + best_address;
   1292                     thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1293 
   1294                     if (thissad < bestsad)
   1295                     {
   1296                         this_mv.as_mv.row = this_row_offset;
   1297                         this_mv.as_mv.col = this_col_offset;
   1298                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1299                                                   mvsadcost, sad_per_bit);
   1300 
   1301                         if (thissad < bestsad)
   1302                         {
   1303                             bestsad = thissad;
   1304                             best_site = i;
   1305                         }
   1306                     }
   1307                 }
   1308                 i++;
   1309             }
   1310         }
   1311 
   1312         if (best_site != last_site)
   1313         {
   1314             best_mv->as_mv.row += ss[best_site].mv.row;
   1315             best_mv->as_mv.col += ss[best_site].mv.col;
   1316             best_address += ss[best_site].offset;
   1317             last_site = best_site;
   1318         }
   1319         else if (best_address == in_what)
   1320             (*num00)++;
   1321     }
   1322 
   1323     this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1324     this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1325 
   1326     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1327            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1328 }
   1329 
   1330 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1331                         int sad_per_bit, int distance,
   1332                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1333                         int_mv *center_mv)
   1334 {
   1335     unsigned char *what = (*(b->base_src) + b->src);
   1336     int what_stride = b->src_stride;
   1337     unsigned char *in_what;
   1338     int pre_stride = x->e_mbd.pre.y_stride;
   1339     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1340     int in_what_stride = pre_stride;
   1341     int mv_stride = pre_stride;
   1342     unsigned char *bestaddress;
   1343     int_mv *best_mv = &d->bmi.mv;
   1344     int_mv this_mv;
   1345     unsigned int bestsad;
   1346     unsigned int thissad;
   1347     int r, c;
   1348 
   1349     unsigned char *check_here;
   1350 
   1351     int ref_row = ref_mv->as_mv.row;
   1352     int ref_col = ref_mv->as_mv.col;
   1353 
   1354     int row_min = ref_row - distance;
   1355     int row_max = ref_row + distance;
   1356     int col_min = ref_col - distance;
   1357     int col_max = ref_col + distance;
   1358 
   1359     int *mvsadcost[2];
   1360     int_mv fcenter_mv;
   1361 
   1362     mvsadcost[0] = x->mvsadcost[0];
   1363     mvsadcost[1] = x->mvsadcost[1];
   1364     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1365     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1366 
   1367     /* Work out the mid point for the search */
   1368     in_what = base_pre + d->offset;
   1369     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1370 
   1371     best_mv->as_mv.row = ref_row;
   1372     best_mv->as_mv.col = ref_col;
   1373 
   1374     /* Baseline value at the centre */
   1375     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1376                           in_what_stride, UINT_MAX)
   1377             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1378 
   1379     /* Apply further limits to prevent us looking using vectors that
   1380      * stretch beyiond the UMV border
   1381      */
   1382     if (col_min < x->mv_col_min)
   1383         col_min = x->mv_col_min;
   1384 
   1385     if (col_max > x->mv_col_max)
   1386         col_max = x->mv_col_max;
   1387 
   1388     if (row_min < x->mv_row_min)
   1389         row_min = x->mv_row_min;
   1390 
   1391     if (row_max > x->mv_row_max)
   1392         row_max = x->mv_row_max;
   1393 
   1394     for (r = row_min; r < row_max ; r++)
   1395     {
   1396         this_mv.as_mv.row = r;
   1397         check_here = r * mv_stride + in_what + col_min;
   1398 
   1399         for (c = col_min; c < col_max; c++)
   1400         {
   1401             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1402 
   1403             this_mv.as_mv.col = c;
   1404             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1405                                       mvsadcost, sad_per_bit);
   1406 
   1407             if (thissad < bestsad)
   1408             {
   1409                 bestsad = thissad;
   1410                 best_mv->as_mv.row = r;
   1411                 best_mv->as_mv.col = c;
   1412                 bestaddress = check_here;
   1413             }
   1414 
   1415             check_here++;
   1416         }
   1417     }
   1418 
   1419     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1420     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1421 
   1422     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1423            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1424 }
   1425 
   1426 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1427                           int sad_per_bit, int distance,
   1428                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1429                           int_mv *center_mv)
   1430 {
   1431     unsigned char *what = (*(b->base_src) + b->src);
   1432     int what_stride = b->src_stride;
   1433     unsigned char *in_what;
   1434     int pre_stride = x->e_mbd.pre.y_stride;
   1435     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1436     int in_what_stride = pre_stride;
   1437     int mv_stride = pre_stride;
   1438     unsigned char *bestaddress;
   1439     int_mv *best_mv = &d->bmi.mv;
   1440     int_mv this_mv;
   1441     unsigned int bestsad;
   1442     unsigned int thissad;
   1443     int r, c;
   1444 
   1445     unsigned char *check_here;
   1446 
   1447     int ref_row = ref_mv->as_mv.row;
   1448     int ref_col = ref_mv->as_mv.col;
   1449 
   1450     int row_min = ref_row - distance;
   1451     int row_max = ref_row + distance;
   1452     int col_min = ref_col - distance;
   1453     int col_max = ref_col + distance;
   1454 
   1455     unsigned int sad_array[3];
   1456 
   1457     int *mvsadcost[2];
   1458     int_mv fcenter_mv;
   1459 
   1460     mvsadcost[0] = x->mvsadcost[0];
   1461     mvsadcost[1] = x->mvsadcost[1];
   1462     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1463     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1464 
   1465     /* Work out the mid point for the search */
   1466     in_what = base_pre + d->offset;
   1467     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1468 
   1469     best_mv->as_mv.row = ref_row;
   1470     best_mv->as_mv.col = ref_col;
   1471 
   1472     /* Baseline value at the centre */
   1473     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1474                           in_what_stride, UINT_MAX)
   1475             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1476 
   1477     /* Apply further limits to prevent us looking using vectors that stretch
   1478      * beyond the UMV border
   1479      */
   1480     if (col_min < x->mv_col_min)
   1481         col_min = x->mv_col_min;
   1482 
   1483     if (col_max > x->mv_col_max)
   1484         col_max = x->mv_col_max;
   1485 
   1486     if (row_min < x->mv_row_min)
   1487         row_min = x->mv_row_min;
   1488 
   1489     if (row_max > x->mv_row_max)
   1490         row_max = x->mv_row_max;
   1491 
   1492     for (r = row_min; r < row_max ; r++)
   1493     {
   1494         this_mv.as_mv.row = r;
   1495         check_here = r * mv_stride + in_what + col_min;
   1496         c = col_min;
   1497 
   1498         while ((c + 2) < col_max)
   1499         {
   1500             int i;
   1501 
   1502             fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1503 
   1504             for (i = 0; i < 3; i++)
   1505             {
   1506                 thissad = sad_array[i];
   1507 
   1508                 if (thissad < bestsad)
   1509                 {
   1510                     this_mv.as_mv.col = c;
   1511                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1512                                               mvsadcost, sad_per_bit);
   1513 
   1514                     if (thissad < bestsad)
   1515                     {
   1516                         bestsad = thissad;
   1517                         best_mv->as_mv.row = r;
   1518                         best_mv->as_mv.col = c;
   1519                         bestaddress = check_here;
   1520                     }
   1521                 }
   1522 
   1523                 check_here++;
   1524                 c++;
   1525             }
   1526         }
   1527 
   1528         while (c < col_max)
   1529         {
   1530             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1531 
   1532             if (thissad < bestsad)
   1533             {
   1534                 this_mv.as_mv.col = c;
   1535                 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1536                                           mvsadcost, sad_per_bit);
   1537 
   1538                 if (thissad < bestsad)
   1539                 {
   1540                     bestsad = thissad;
   1541                     best_mv->as_mv.row = r;
   1542                     best_mv->as_mv.col = c;
   1543                     bestaddress = check_here;
   1544                 }
   1545             }
   1546 
   1547             check_here ++;
   1548             c ++;
   1549         }
   1550 
   1551     }
   1552 
   1553     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1554     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1555 
   1556     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1557            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1558 }
   1559 
   1560 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1561                           int sad_per_bit, int distance,
   1562                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1563                           int_mv *center_mv)
   1564 {
   1565     unsigned char *what = (*(b->base_src) + b->src);
   1566     int what_stride = b->src_stride;
   1567     int pre_stride = x->e_mbd.pre.y_stride;
   1568     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1569     unsigned char *in_what;
   1570     int in_what_stride = pre_stride;
   1571     int mv_stride = pre_stride;
   1572     unsigned char *bestaddress;
   1573     int_mv *best_mv = &d->bmi.mv;
   1574     int_mv this_mv;
   1575     unsigned int bestsad;
   1576     unsigned int thissad;
   1577     int r, c;
   1578 
   1579     unsigned char *check_here;
   1580 
   1581     int ref_row = ref_mv->as_mv.row;
   1582     int ref_col = ref_mv->as_mv.col;
   1583 
   1584     int row_min = ref_row - distance;
   1585     int row_max = ref_row + distance;
   1586     int col_min = ref_col - distance;
   1587     int col_max = ref_col + distance;
   1588 
   1589     DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
   1590     unsigned int sad_array[3];
   1591 
   1592     int *mvsadcost[2];
   1593     int_mv fcenter_mv;
   1594 
   1595     mvsadcost[0] = x->mvsadcost[0];
   1596     mvsadcost[1] = x->mvsadcost[1];
   1597     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1598     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1599 
   1600     /* Work out the mid point for the search */
   1601     in_what = base_pre + d->offset;
   1602     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1603 
   1604     best_mv->as_mv.row = ref_row;
   1605     best_mv->as_mv.col = ref_col;
   1606 
   1607     /* Baseline value at the centre */
   1608     bestsad = fn_ptr->sdf(what, what_stride,
   1609                           bestaddress, in_what_stride, UINT_MAX)
   1610             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1611 
   1612     /* Apply further limits to prevent us looking using vectors that stretch
   1613      * beyond the UMV border
   1614      */
   1615     if (col_min < x->mv_col_min)
   1616         col_min = x->mv_col_min;
   1617 
   1618     if (col_max > x->mv_col_max)
   1619         col_max = x->mv_col_max;
   1620 
   1621     if (row_min < x->mv_row_min)
   1622         row_min = x->mv_row_min;
   1623 
   1624     if (row_max > x->mv_row_max)
   1625         row_max = x->mv_row_max;
   1626 
   1627     for (r = row_min; r < row_max ; r++)
   1628     {
   1629         this_mv.as_mv.row = r;
   1630         check_here = r * mv_stride + in_what + col_min;
   1631         c = col_min;
   1632 
   1633         while ((c + 7) < col_max)
   1634         {
   1635             int i;
   1636 
   1637             fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1638 
   1639             for (i = 0; i < 8; i++)
   1640             {
   1641                 thissad = sad_array8[i];
   1642 
   1643                 if (thissad < bestsad)
   1644                 {
   1645                     this_mv.as_mv.col = c;
   1646                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1647                                               mvsadcost, sad_per_bit);
   1648 
   1649                     if (thissad < bestsad)
   1650                     {
   1651                         bestsad = thissad;
   1652                         best_mv->as_mv.row = r;
   1653                         best_mv->as_mv.col = c;
   1654                         bestaddress = check_here;
   1655                     }
   1656                 }
   1657 
   1658                 check_here++;
   1659                 c++;
   1660             }
   1661         }
   1662 
   1663         while ((c + 2) < col_max)
   1664         {
   1665             int i;
   1666 
   1667             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1668 
   1669             for (i = 0; i < 3; i++)
   1670             {
   1671                 thissad = sad_array[i];
   1672 
   1673                 if (thissad < bestsad)
   1674                 {
   1675                     this_mv.as_mv.col = c;
   1676                     thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1677                         mvsadcost, sad_per_bit);
   1678 
   1679                     if (thissad < bestsad)
   1680                     {
   1681                         bestsad = thissad;
   1682                         best_mv->as_mv.row = r;
   1683                         best_mv->as_mv.col = c;
   1684                         bestaddress = check_here;
   1685                     }
   1686                 }
   1687 
   1688                 check_here++;
   1689                 c++;
   1690             }
   1691         }
   1692 
   1693         while (c < col_max)
   1694         {
   1695             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1696 
   1697             if (thissad < bestsad)
   1698             {
   1699                 this_mv.as_mv.col = c;
   1700                 thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1701                     mvsadcost, sad_per_bit);
   1702 
   1703                 if (thissad < bestsad)
   1704                 {
   1705                     bestsad = thissad;
   1706                     best_mv->as_mv.row = r;
   1707                     best_mv->as_mv.col = c;
   1708                     bestaddress = check_here;
   1709                 }
   1710             }
   1711 
   1712             check_here ++;
   1713             c ++;
   1714         }
   1715     }
   1716 
   1717     this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1718     this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1719 
   1720     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1721            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1722 }
   1723 
   1724 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1725                             int error_per_bit, int search_range,
   1726                             vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1727                             int_mv *center_mv)
   1728 {
   1729     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1730     int i, j;
   1731     short this_row_offset, this_col_offset;
   1732 
   1733     int what_stride = b->src_stride;
   1734     int pre_stride = x->e_mbd.pre.y_stride;
   1735     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1736     int in_what_stride = pre_stride;
   1737     unsigned char *what = (*(b->base_src) + b->src);
   1738     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1739         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1740     unsigned char *check_here;
   1741     int_mv this_mv;
   1742     unsigned int bestsad;
   1743     unsigned int thissad;
   1744 
   1745     int *mvsadcost[2];
   1746     int_mv fcenter_mv;
   1747 
   1748     mvsadcost[0] = x->mvsadcost[0];
   1749     mvsadcost[1] = x->mvsadcost[1];
   1750     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1751     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1752 
   1753     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1754                           in_what_stride, UINT_MAX)
   1755             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1756 
   1757     for (i=0; i<search_range; i++)
   1758     {
   1759         int best_site = -1;
   1760 
   1761         for (j = 0 ; j < 4 ; j++)
   1762         {
   1763             this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1764             this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1765 
   1766             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1767             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1768             {
   1769                 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1770                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1771 
   1772                 if (thissad < bestsad)
   1773                 {
   1774                     this_mv.as_mv.row = this_row_offset;
   1775                     this_mv.as_mv.col = this_col_offset;
   1776                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1777 
   1778                     if (thissad < bestsad)
   1779                     {
   1780                         bestsad = thissad;
   1781                         best_site = j;
   1782                     }
   1783                 }
   1784             }
   1785         }
   1786 
   1787         if (best_site == -1)
   1788             break;
   1789         else
   1790         {
   1791             ref_mv->as_mv.row += neighbors[best_site].row;
   1792             ref_mv->as_mv.col += neighbors[best_site].col;
   1793             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1794         }
   1795     }
   1796 
   1797     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1798     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1799 
   1800     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1801            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1802 }
   1803 
   1804 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1805                               int_mv *ref_mv, int error_per_bit,
   1806                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1807                               int *mvcost[2], int_mv *center_mv)
   1808 {
   1809     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1810     int i, j;
   1811     short this_row_offset, this_col_offset;
   1812 
   1813     int what_stride = b->src_stride;
   1814     int pre_stride = x->e_mbd.pre.y_stride;
   1815     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1816     int in_what_stride = pre_stride;
   1817     unsigned char *what = (*(b->base_src) + b->src);
   1818     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1819         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1820     unsigned char *check_here;
   1821     int_mv this_mv;
   1822     unsigned int bestsad;
   1823     unsigned int thissad;
   1824 
   1825     int *mvsadcost[2];
   1826     int_mv fcenter_mv;
   1827 
   1828     mvsadcost[0] = x->mvsadcost[0];
   1829     mvsadcost[1] = x->mvsadcost[1];
   1830     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1831     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1832 
   1833     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1834                           in_what_stride, UINT_MAX)
   1835             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1836 
   1837     for (i=0; i<search_range; i++)
   1838     {
   1839         int best_site = -1;
   1840         int all_in = 1;
   1841 
   1842         all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
   1843         all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
   1844         all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
   1845         all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1846 
   1847         if(all_in)
   1848         {
   1849             unsigned int sad_array[4];
   1850             const unsigned char *block_offset[4];
   1851             block_offset[0] = best_address - in_what_stride;
   1852             block_offset[1] = best_address - 1;
   1853             block_offset[2] = best_address + 1;
   1854             block_offset[3] = best_address + in_what_stride;
   1855 
   1856             fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1857 
   1858             for (j = 0; j < 4; j++)
   1859             {
   1860                 if (sad_array[j] < bestsad)
   1861                 {
   1862                     this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1863                     this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1864                     sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1865 
   1866                     if (sad_array[j] < bestsad)
   1867                     {
   1868                         bestsad = sad_array[j];
   1869                         best_site = j;
   1870                     }
   1871                 }
   1872             }
   1873         }
   1874         else
   1875         {
   1876             for (j = 0 ; j < 4 ; j++)
   1877             {
   1878                 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1879                 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1880 
   1881                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1882                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1883                 {
   1884                     check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1885                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1886 
   1887                     if (thissad < bestsad)
   1888                     {
   1889                         this_mv.as_mv.row = this_row_offset;
   1890                         this_mv.as_mv.col = this_col_offset;
   1891                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1892 
   1893                         if (thissad < bestsad)
   1894                         {
   1895                             bestsad = thissad;
   1896                             best_site = j;
   1897                         }
   1898                     }
   1899                 }
   1900             }
   1901         }
   1902 
   1903         if (best_site == -1)
   1904             break;
   1905         else
   1906         {
   1907             ref_mv->as_mv.row += neighbors[best_site].row;
   1908             ref_mv->as_mv.col += neighbors[best_site].col;
   1909             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1910         }
   1911     }
   1912 
   1913     this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   1914     this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   1915 
   1916     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1917            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1918 }
   1919 
   1920 #ifdef VP8_ENTROPY_STATS
   1921 void print_mode_context(void)
   1922 {
   1923     FILE *f = fopen("modecont.c", "w");
   1924     int i, j;
   1925 
   1926     fprintf(f, "#include \"entropy.h\"\n");
   1927     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1928     fprintf(f, "{\n");
   1929 
   1930     for (j = 0; j < 6; j++)
   1931     {
   1932         fprintf(f, "  { /* %d */\n", j);
   1933         fprintf(f, "    ");
   1934 
   1935         for (i = 0; i < 4; i++)
   1936         {
   1937             int overal_prob;
   1938             int this_prob;
   1939             int count;
   1940 
   1941             /* Overall probs */
   1942             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1943 
   1944             if (count)
   1945                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1946             else
   1947                 overal_prob = 128;
   1948 
   1949             if (overal_prob == 0)
   1950                 overal_prob = 1;
   1951 
   1952             /* context probs */
   1953             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1954 
   1955             if (count)
   1956                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1957             else
   1958                 this_prob = 128;
   1959 
   1960             if (this_prob == 0)
   1961                 this_prob = 1;
   1962 
   1963             fprintf(f, "%5d, ", this_prob);
   1964         }
   1965 
   1966         fprintf(f, "  },\n");
   1967     }
   1968 
   1969     fprintf(f, "};\n");
   1970     fclose(f);
   1971 }
   1972 
   1973 /* MV ref count VP8_ENTROPY_STATS stats code */
   1974 #ifdef VP8_ENTROPY_STATS
   1975 void init_mv_ref_counts()
   1976 {
   1977     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1978     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1979 }
   1980 
   1981 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1982 {
   1983     if (m == ZEROMV)
   1984     {
   1985         ++mv_ref_ct [ct[0]] [0] [0];
   1986         ++mv_mode_cts[0][0];
   1987     }
   1988     else
   1989     {
   1990         ++mv_ref_ct [ct[0]] [0] [1];
   1991         ++mv_mode_cts[0][1];
   1992 
   1993         if (m == NEARESTMV)
   1994         {
   1995             ++mv_ref_ct [ct[1]] [1] [0];
   1996             ++mv_mode_cts[1][0];
   1997         }
   1998         else
   1999         {
   2000             ++mv_ref_ct [ct[1]] [1] [1];
   2001             ++mv_mode_cts[1][1];
   2002 
   2003             if (m == NEARMV)
   2004             {
   2005                 ++mv_ref_ct [ct[2]] [2] [0];
   2006                 ++mv_mode_cts[2][0];
   2007             }
   2008             else
   2009             {
   2010                 ++mv_ref_ct [ct[2]] [2] [1];
   2011                 ++mv_mode_cts[2][1];
   2012 
   2013                 if (m == NEWMV)
   2014                 {
   2015                     ++mv_ref_ct [ct[3]] [3] [0];
   2016                     ++mv_mode_cts[3][0];
   2017                 }
   2018                 else
   2019                 {
   2020                     ++mv_ref_ct [ct[3]] [3] [1];
   2021                     ++mv_mode_cts[3][1];
   2022                 }
   2023             }
   2024         }
   2025     }
   2026 }
   2027 
   2028 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
   2029 
   2030 #endif
   2031