Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "onyx_int.h"
     13 #include "mcomp.h"
     14 #include "vpx_mem/vpx_mem.h"
     15 #include "vpx_config.h"
     16 #include <stdio.h>
     17 #include <limits.h>
     18 #include <math.h>
     19 #include "vp8/common/findnearmv.h"
     20 
     21 #ifdef ENTROPY_STATS
     22 static int mv_ref_ct [31] [4] [2];
     23 static int mv_mode_cts [4] [2];
     24 #endif
     25 
     26 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
     27 {
     28     /* MV costing is based on the distribution of vectors in the previous
     29      * frame and as such will tend to over state the cost of vectors. In
     30      * addition coding a new vector can have a knock on effect on the cost
     31      * of subsequent vectors and the quality of prediction from NEAR and
     32      * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
     33      * limited extent, for some account to be taken of these factors.
     34      */
     35     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
     36 }
     37 
     38 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
     39 {
     40     /* Ignore mv costing if mvcost is NULL */
     41     if (mvcost)
     42         return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
     43                  mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
     44                  * error_per_bit + 128) >> 8;
     45     return 0;
     46 }
     47 
     48 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
     49 {
     50     /* Calculate sad error cost on full pixel basis. */
     51     /* Ignore mv costing if mvsadcost is NULL */
     52     if (mvsadcost)
     53         return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
     54                  mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
     55                 * error_per_bit + 128) >> 8;
     56     return 0;
     57 }
     58 
     59 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
     60 {
     61     int Len;
     62     int search_site_count = 0;
     63 
     64 
     65     /* Generate offsets for 4 search sites per step. */
     66     Len = MAX_FIRST_STEP;
     67     x->ss[search_site_count].mv.col = 0;
     68     x->ss[search_site_count].mv.row = 0;
     69     x->ss[search_site_count].offset = 0;
     70     search_site_count++;
     71 
     72     while (Len > 0)
     73     {
     74 
     75         /* Compute offsets for search sites. */
     76         x->ss[search_site_count].mv.col = 0;
     77         x->ss[search_site_count].mv.row = -Len;
     78         x->ss[search_site_count].offset = -Len * stride;
     79         search_site_count++;
     80 
     81         /* Compute offsets for search sites. */
     82         x->ss[search_site_count].mv.col = 0;
     83         x->ss[search_site_count].mv.row = Len;
     84         x->ss[search_site_count].offset = Len * stride;
     85         search_site_count++;
     86 
     87         /* Compute offsets for search sites. */
     88         x->ss[search_site_count].mv.col = -Len;
     89         x->ss[search_site_count].mv.row = 0;
     90         x->ss[search_site_count].offset = -Len;
     91         search_site_count++;
     92 
     93         /* Compute offsets for search sites. */
     94         x->ss[search_site_count].mv.col = Len;
     95         x->ss[search_site_count].mv.row = 0;
     96         x->ss[search_site_count].offset = Len;
     97         search_site_count++;
     98 
     99         /* Contract. */
    100         Len /= 2;
    101     }
    102 
    103     x->ss_count = search_site_count;
    104     x->searches_per_step = 4;
    105 }
    106 
    107 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    108 {
    109     int Len;
    110     int search_site_count = 0;
    111 
    112     /* Generate offsets for 8 search sites per step. */
    113     Len = MAX_FIRST_STEP;
    114     x->ss[search_site_count].mv.col = 0;
    115     x->ss[search_site_count].mv.row = 0;
    116     x->ss[search_site_count].offset = 0;
    117     search_site_count++;
    118 
    119     while (Len > 0)
    120     {
    121 
    122         /* Compute offsets for search sites. */
    123         x->ss[search_site_count].mv.col = 0;
    124         x->ss[search_site_count].mv.row = -Len;
    125         x->ss[search_site_count].offset = -Len * stride;
    126         search_site_count++;
    127 
    128         /* Compute offsets for search sites. */
    129         x->ss[search_site_count].mv.col = 0;
    130         x->ss[search_site_count].mv.row = Len;
    131         x->ss[search_site_count].offset = Len * stride;
    132         search_site_count++;
    133 
    134         /* Compute offsets for search sites. */
    135         x->ss[search_site_count].mv.col = -Len;
    136         x->ss[search_site_count].mv.row = 0;
    137         x->ss[search_site_count].offset = -Len;
    138         search_site_count++;
    139 
    140         /* Compute offsets for search sites. */
    141         x->ss[search_site_count].mv.col = Len;
    142         x->ss[search_site_count].mv.row = 0;
    143         x->ss[search_site_count].offset = Len;
    144         search_site_count++;
    145 
    146         /* Compute offsets for search sites. */
    147         x->ss[search_site_count].mv.col = -Len;
    148         x->ss[search_site_count].mv.row = -Len;
    149         x->ss[search_site_count].offset = -Len * stride - Len;
    150         search_site_count++;
    151 
    152         /* Compute offsets for search sites. */
    153         x->ss[search_site_count].mv.col = Len;
    154         x->ss[search_site_count].mv.row = -Len;
    155         x->ss[search_site_count].offset = -Len * stride + Len;
    156         search_site_count++;
    157 
    158         /* Compute offsets for search sites. */
    159         x->ss[search_site_count].mv.col = -Len;
    160         x->ss[search_site_count].mv.row = Len;
    161         x->ss[search_site_count].offset = Len * stride - Len;
    162         search_site_count++;
    163 
    164         /* Compute offsets for search sites. */
    165         x->ss[search_site_count].mv.col = Len;
    166         x->ss[search_site_count].mv.row = Len;
    167         x->ss[search_site_count].offset = Len * stride + Len;
    168         search_site_count++;
    169 
    170 
    171         /* Contract. */
    172         Len /= 2;
    173     }
    174 
    175     x->ss_count = search_site_count;
    176     x->searches_per_step = 8;
    177 }
    178 
    179 /*
    180  * To avoid the penalty for crossing cache-line read, preload the reference
    181  * area in a small buffer, which is aligned to make sure there won't be crossing
    182  * cache-line read while reading from this buffer. This reduced the cpu
    183  * cycles spent on reading ref data in sub-pixel filter functions.
    184  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    185  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    186  * could reduce the area.
    187  */
    188 
    189 /* estimated cost of a motion vector (r,c) */
    190 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
    191 /* pointer to predictor base of a motionvector */
    192 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
    193 /* convert motion vector component to offset for svf calc */
    194 #define SP(x) (((x)&3)<<1)
    195 /* returns subpixel variance error function. */
    196 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
    197 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    198 /* returns distortion + motion vector cost */
    199 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
    200 /* checks if (r,c) has better score than previous best */
    201 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
    202 
    203 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    204                                              int_mv *bestmv, int_mv *ref_mv,
    205                                              int error_per_bit,
    206                                              const vp8_variance_fn_ptr_t *vfp,
    207                                              int *mvcost[2], int *distortion,
    208                                              unsigned int *sse1)
    209 {
    210     unsigned char *z = (*(b->base_src) + b->src);
    211 
    212     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
    213     int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
    214     int tr = br, tc = bc;
    215     unsigned int besterr;
    216     unsigned int left, right, up, down, diag;
    217     unsigned int sse;
    218     unsigned int whichdir;
    219     unsigned int halfiters = 4;
    220     unsigned int quarteriters = 4;
    221     int thismse;
    222 
    223     int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
    224     int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
    225     int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
    226     int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    227 
    228     int y_stride;
    229     int offset;
    230     int pre_stride = x->e_mbd.pre.y_stride;
    231     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    232 
    233 
    234 #if ARCH_X86 || ARCH_X86_64
    235     MACROBLOCKD *xd = &x->e_mbd;
    236     unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    237     unsigned char *y;
    238     int buf_r1, buf_r2, buf_c1, buf_c2;
    239 
    240     /* Clamping to avoid out-of-range data access */
    241     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
    242     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
    243     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
    244     buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3;
    245     y_stride = 32;
    246 
    247     /* Copy to intermediate buffer before searching. */
    248     vfp->copymem(y0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    249     y = xd->y_buf + y_stride*buf_r1 +buf_c1;
    250 #else
    251     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    252     y_stride = pre_stride;
    253 #endif
    254 
    255     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
    256 
    257     /* central mv */
    258     bestmv->as_mv.row <<= 3;
    259     bestmv->as_mv.col <<= 3;
    260 
    261     /* calculate central point error */
    262     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    263     *distortion = besterr;
    264     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    265 
    266     /* TODO: Each subsequent iteration checks at least one point in common
    267      * with the last iteration could be 2 ( if diag selected)
    268      */
    269     while (--halfiters)
    270     {
    271         /* 1/2 pel */
    272         CHECK_BETTER(left, tr, tc - 2);
    273         CHECK_BETTER(right, tr, tc + 2);
    274         CHECK_BETTER(up, tr - 2, tc);
    275         CHECK_BETTER(down, tr + 2, tc);
    276 
    277         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    278 
    279         switch (whichdir)
    280         {
    281         case 0:
    282             CHECK_BETTER(diag, tr - 2, tc - 2);
    283             break;
    284         case 1:
    285             CHECK_BETTER(diag, tr - 2, tc + 2);
    286             break;
    287         case 2:
    288             CHECK_BETTER(diag, tr + 2, tc - 2);
    289             break;
    290         case 3:
    291             CHECK_BETTER(diag, tr + 2, tc + 2);
    292             break;
    293         }
    294 
    295         /* no reason to check the same one again. */
    296         if (tr == br && tc == bc)
    297             break;
    298 
    299         tr = br;
    300         tc = bc;
    301     }
    302 
    303     /* TODO: Each subsequent iteration checks at least one point in common
    304      * with the last iteration could be 2 ( if diag selected)
    305      */
    306 
    307     /* 1/4 pel */
    308     while (--quarteriters)
    309     {
    310         CHECK_BETTER(left, tr, tc - 1);
    311         CHECK_BETTER(right, tr, tc + 1);
    312         CHECK_BETTER(up, tr - 1, tc);
    313         CHECK_BETTER(down, tr + 1, tc);
    314 
    315         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    316 
    317         switch (whichdir)
    318         {
    319         case 0:
    320             CHECK_BETTER(diag, tr - 1, tc - 1);
    321             break;
    322         case 1:
    323             CHECK_BETTER(diag, tr - 1, tc + 1);
    324             break;
    325         case 2:
    326             CHECK_BETTER(diag, tr + 1, tc - 1);
    327             break;
    328         case 3:
    329             CHECK_BETTER(diag, tr + 1, tc + 1);
    330             break;
    331         }
    332 
    333         /* no reason to check the same one again. */
    334         if (tr == br && tc == bc)
    335             break;
    336 
    337         tr = br;
    338         tc = bc;
    339     }
    340 
    341     bestmv->as_mv.row = br << 1;
    342     bestmv->as_mv.col = bc << 1;
    343 
    344     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
    345         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
    346         return INT_MAX;
    347 
    348     return besterr;
    349 }
    350 #undef MVC
    351 #undef PRE
    352 #undef SP
    353 #undef DIST
    354 #undef IFMVCV
    355 #undef ERR
    356 #undef CHECK_BETTER
    357 
    358 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    359                                  int_mv *bestmv, int_mv *ref_mv,
    360                                  int error_per_bit,
    361                                  const vp8_variance_fn_ptr_t *vfp,
    362                                  int *mvcost[2], int *distortion,
    363                                  unsigned int *sse1)
    364 {
    365     int bestmse = INT_MAX;
    366     int_mv startmv;
    367     int_mv this_mv;
    368     unsigned char *z = (*(b->base_src) + b->src);
    369     int left, right, up, down, diag;
    370     unsigned int sse;
    371     int whichdir ;
    372     int thismse;
    373     int y_stride;
    374     int pre_stride = x->e_mbd.pre.y_stride;
    375     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    376 
    377 #if ARCH_X86 || ARCH_X86_64
    378     MACROBLOCKD *xd = &x->e_mbd;
    379     unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    380     unsigned char *y;
    381 
    382     y_stride = 32;
    383     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    384      vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    385      y = xd->y_buf + y_stride + 1;
    386 #else
    387      unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    388      y_stride = pre_stride;
    389 #endif
    390 
    391     /* central mv */
    392     bestmv->as_mv.row <<= 3;
    393     bestmv->as_mv.col <<= 3;
    394     startmv = *bestmv;
    395 
    396     /* calculate central point error */
    397     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    398     *distortion = bestmse;
    399     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    400 
    401     /* go left then right and check error */
    402     this_mv.as_mv.row = startmv.as_mv.row;
    403     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    404     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    405     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    406 
    407     if (left < bestmse)
    408     {
    409         *bestmv = this_mv;
    410         bestmse = left;
    411         *distortion = thismse;
    412         *sse1 = sse;
    413     }
    414 
    415     this_mv.as_mv.col += 8;
    416     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    417     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    418 
    419     if (right < bestmse)
    420     {
    421         *bestmv = this_mv;
    422         bestmse = right;
    423         *distortion = thismse;
    424         *sse1 = sse;
    425     }
    426 
    427     /* go up then down and check error */
    428     this_mv.as_mv.col = startmv.as_mv.col;
    429     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    430     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    431     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    432 
    433     if (up < bestmse)
    434     {
    435         *bestmv = this_mv;
    436         bestmse = up;
    437         *distortion = thismse;
    438         *sse1 = sse;
    439     }
    440 
    441     this_mv.as_mv.row += 8;
    442     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    443     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    444 
    445     if (down < bestmse)
    446     {
    447         *bestmv = this_mv;
    448         bestmse = down;
    449         *distortion = thismse;
    450         *sse1 = sse;
    451     }
    452 
    453 
    454     /* now check 1 more diagonal */
    455     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    456     this_mv = startmv;
    457 
    458     switch (whichdir)
    459     {
    460     case 0:
    461         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    462         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    463         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    464         break;
    465     case 1:
    466         this_mv.as_mv.col += 4;
    467         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    468         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    469         break;
    470     case 2:
    471         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    472         this_mv.as_mv.row += 4;
    473         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    474         break;
    475     case 3:
    476     default:
    477         this_mv.as_mv.col += 4;
    478         this_mv.as_mv.row += 4;
    479         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    480         break;
    481     }
    482 
    483     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    484 
    485     if (diag < bestmse)
    486     {
    487         *bestmv = this_mv;
    488         bestmse = diag;
    489         *distortion = thismse;
    490         *sse1 = sse;
    491     }
    492 
    493 
    494     /* time to check quarter pels. */
    495     if (bestmv->as_mv.row < startmv.as_mv.row)
    496         y -= y_stride;
    497 
    498     if (bestmv->as_mv.col < startmv.as_mv.col)
    499         y--;
    500 
    501     startmv = *bestmv;
    502 
    503 
    504 
    505     /* go left then right and check error */
    506     this_mv.as_mv.row = startmv.as_mv.row;
    507 
    508     if (startmv.as_mv.col & 7)
    509     {
    510         this_mv.as_mv.col = startmv.as_mv.col - 2;
    511         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    512     }
    513     else
    514     {
    515         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    516         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    517     }
    518 
    519     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    520 
    521     if (left < bestmse)
    522     {
    523         *bestmv = this_mv;
    524         bestmse = left;
    525         *distortion = thismse;
    526         *sse1 = sse;
    527     }
    528 
    529     this_mv.as_mv.col += 4;
    530     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    531     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    532 
    533     if (right < bestmse)
    534     {
    535         *bestmv = this_mv;
    536         bestmse = right;
    537         *distortion = thismse;
    538         *sse1 = sse;
    539     }
    540 
    541     /* go up then down and check error */
    542     this_mv.as_mv.col = startmv.as_mv.col;
    543 
    544     if (startmv.as_mv.row & 7)
    545     {
    546         this_mv.as_mv.row = startmv.as_mv.row - 2;
    547         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    548     }
    549     else
    550     {
    551         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    552         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    553     }
    554 
    555     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    556 
    557     if (up < bestmse)
    558     {
    559         *bestmv = this_mv;
    560         bestmse = up;
    561         *distortion = thismse;
    562         *sse1 = sse;
    563     }
    564 
    565     this_mv.as_mv.row += 4;
    566     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    567     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    568 
    569     if (down < bestmse)
    570     {
    571         *bestmv = this_mv;
    572         bestmse = down;
    573         *distortion = thismse;
    574         *sse1 = sse;
    575     }
    576 
    577 
    578     /* now check 1 more diagonal */
    579     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    580 
    581     this_mv = startmv;
    582 
    583     switch (whichdir)
    584     {
    585     case 0:
    586 
    587         if (startmv.as_mv.row & 7)
    588         {
    589             this_mv.as_mv.row -= 2;
    590 
    591             if (startmv.as_mv.col & 7)
    592             {
    593                 this_mv.as_mv.col -= 2;
    594                 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    595             }
    596             else
    597             {
    598                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    599                 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
    600             }
    601         }
    602         else
    603         {
    604             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    605 
    606             if (startmv.as_mv.col & 7)
    607             {
    608                 this_mv.as_mv.col -= 2;
    609                 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    610             }
    611             else
    612             {
    613                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    614                 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
    615             }
    616         }
    617 
    618         break;
    619     case 1:
    620         this_mv.as_mv.col += 2;
    621 
    622         if (startmv.as_mv.row & 7)
    623         {
    624             this_mv.as_mv.row -= 2;
    625             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    626         }
    627         else
    628         {
    629             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    630             thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    631         }
    632 
    633         break;
    634     case 2:
    635         this_mv.as_mv.row += 2;
    636 
    637         if (startmv.as_mv.col & 7)
    638         {
    639             this_mv.as_mv.col -= 2;
    640             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    641         }
    642         else
    643         {
    644             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    645             thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    646         }
    647 
    648         break;
    649     case 3:
    650         this_mv.as_mv.col += 2;
    651         this_mv.as_mv.row += 2;
    652         thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    653         break;
    654     }
    655 
    656     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    657 
    658     if (diag < bestmse)
    659     {
    660         *bestmv = this_mv;
    661         bestmse = diag;
    662         *distortion = thismse;
    663         *sse1 = sse;
    664     }
    665 
    666     return bestmse;
    667 }
    668 
    669 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    670                                   int_mv *bestmv, int_mv *ref_mv,
    671                                   int error_per_bit,
    672                                   const vp8_variance_fn_ptr_t *vfp,
    673                                   int *mvcost[2], int *distortion,
    674                                   unsigned int *sse1)
    675 {
    676     int bestmse = INT_MAX;
    677     int_mv startmv;
    678     int_mv this_mv;
    679     unsigned char *z = (*(b->base_src) + b->src);
    680     int left, right, up, down, diag;
    681     unsigned int sse;
    682     int whichdir ;
    683     int thismse;
    684     int y_stride;
    685     int pre_stride = x->e_mbd.pre.y_stride;
    686     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    687 
    688 #if ARCH_X86 || ARCH_X86_64
    689     MACROBLOCKD *xd = &x->e_mbd;
    690     unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    691     unsigned char *y;
    692 
    693     y_stride = 32;
    694     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    695     vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    696     y = xd->y_buf + y_stride + 1;
    697 #else
    698     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    699     y_stride = pre_stride;
    700 #endif
    701 
    702     /* central mv */
    703     bestmv->as_mv.row <<= 3;
    704     bestmv->as_mv.col <<= 3;
    705     startmv = *bestmv;
    706 
    707     /* calculate central point error */
    708     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    709     *distortion = bestmse;
    710     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    711 
    712     /* go left then right and check error */
    713     this_mv.as_mv.row = startmv.as_mv.row;
    714     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    715     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    716     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    717 
    718     if (left < bestmse)
    719     {
    720         *bestmv = this_mv;
    721         bestmse = left;
    722         *distortion = thismse;
    723         *sse1 = sse;
    724     }
    725 
    726     this_mv.as_mv.col += 8;
    727     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    728     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    729 
    730     if (right < bestmse)
    731     {
    732         *bestmv = this_mv;
    733         bestmse = right;
    734         *distortion = thismse;
    735         *sse1 = sse;
    736     }
    737 
    738     /* go up then down and check error */
    739     this_mv.as_mv.col = startmv.as_mv.col;
    740     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    741     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    742     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    743 
    744     if (up < bestmse)
    745     {
    746         *bestmv = this_mv;
    747         bestmse = up;
    748         *distortion = thismse;
    749         *sse1 = sse;
    750     }
    751 
    752     this_mv.as_mv.row += 8;
    753     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    754     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    755 
    756     if (down < bestmse)
    757     {
    758         *bestmv = this_mv;
    759         bestmse = down;
    760         *distortion = thismse;
    761         *sse1 = sse;
    762     }
    763 
    764     /* now check 1 more diagonal - */
    765     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    766     this_mv = startmv;
    767 
    768     switch (whichdir)
    769     {
    770     case 0:
    771         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    772         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    773         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    774         break;
    775     case 1:
    776         this_mv.as_mv.col += 4;
    777         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    778         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    779         break;
    780     case 2:
    781         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    782         this_mv.as_mv.row += 4;
    783         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    784         break;
    785     case 3:
    786     default:
    787         this_mv.as_mv.col += 4;
    788         this_mv.as_mv.row += 4;
    789         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    790         break;
    791     }
    792 
    793     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    794 
    795     if (diag < bestmse)
    796     {
    797         *bestmv = this_mv;
    798         bestmse = diag;
    799         *distortion = thismse;
    800         *sse1 = sse;
    801     }
    802 
    803     return bestmse;
    804 }
    805 
    806 #define CHECK_BOUNDS(range) \
    807 {\
    808     all_in = 1;\
    809     all_in &= ((br-range) >= x->mv_row_min);\
    810     all_in &= ((br+range) <= x->mv_row_max);\
    811     all_in &= ((bc-range) >= x->mv_col_min);\
    812     all_in &= ((bc+range) <= x->mv_col_max);\
    813 }
    814 
    815 #define CHECK_POINT \
    816 {\
    817     if (this_mv.as_mv.col < x->mv_col_min) continue;\
    818     if (this_mv.as_mv.col > x->mv_col_max) continue;\
    819     if (this_mv.as_mv.row < x->mv_row_min) continue;\
    820     if (this_mv.as_mv.row > x->mv_row_max) continue;\
    821 }
    822 
    823 #define CHECK_BETTER \
    824 {\
    825     if (thissad < bestsad)\
    826     {\
    827         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
    828         if (thissad < bestsad)\
    829         {\
    830             bestsad = thissad;\
    831             best_site = i;\
    832         }\
    833     }\
    834 }
    835 
    836 static const MV next_chkpts[6][3] =
    837 {
    838     {{ -2, 0}, { -1, -2}, {1, -2}},
    839     {{ -1, -2}, {1, -2}, {2, 0}},
    840     {{1, -2}, {2, 0}, {1, 2}},
    841     {{2, 0}, {1, 2}, { -1, 2}},
    842     {{1, 2}, { -1, 2}, { -2, 0}},
    843     {{ -1, 2}, { -2, 0}, { -1, -2}}
    844 };
    845 
    846 int vp8_hex_search
    847 (
    848     MACROBLOCK *x,
    849     BLOCK *b,
    850     BLOCKD *d,
    851     int_mv *ref_mv,
    852     int_mv *best_mv,
    853     int search_param,
    854     int sad_per_bit,
    855     const vp8_variance_fn_ptr_t *vfp,
    856     int *mvsadcost[2],
    857     int *mvcost[2],
    858     int_mv *center_mv
    859 )
    860 {
    861     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    862     MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
    863     int i, j;
    864 
    865     unsigned char *what = (*(b->base_src) + b->src);
    866     int what_stride = b->src_stride;
    867     int pre_stride = x->e_mbd.pre.y_stride;
    868     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    869 
    870     int in_what_stride = pre_stride;
    871     int br, bc;
    872     int_mv this_mv;
    873     unsigned int bestsad;
    874     unsigned int thissad;
    875     unsigned char *base_offset;
    876     unsigned char *this_offset;
    877     int k = -1;
    878     int all_in;
    879     int best_site = -1;
    880     int hex_range = 127;
    881     int dia_range = 8;
    882 
    883     int_mv fcenter_mv;
    884     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    885     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    886 
    887     /* adjust ref_mv to make sure it is within MV range */
    888     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
    889     br = ref_mv->as_mv.row;
    890     bc = ref_mv->as_mv.col;
    891 
    892     /* Work out the start point for the search */
    893     base_offset = (unsigned char *)(base_pre + d->offset);
    894     this_offset = base_offset + (br * (pre_stride)) + bc;
    895     this_mv.as_mv.row = br;
    896     this_mv.as_mv.col = bc;
    897     bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
    898             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    899 
    900 #if CONFIG_MULTI_RES_ENCODING
    901     /* Lower search range based on prediction info */
    902     if (search_param >= 6) goto cal_neighbors;
    903     else if (search_param >= 5) hex_range = 4;
    904     else if (search_param >= 4) hex_range = 6;
    905     else if (search_param >= 3) hex_range = 15;
    906     else if (search_param >= 2) hex_range = 31;
    907     else if (search_param >= 1) hex_range = 63;
    908 
    909     dia_range = 8;
    910 #endif
    911 
    912     /* hex search */
    913     CHECK_BOUNDS(2)
    914 
    915     if(all_in)
    916     {
    917         for (i = 0; i < 6; i++)
    918         {
    919             this_mv.as_mv.row = br + hex[i].row;
    920             this_mv.as_mv.col = bc + hex[i].col;
    921             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    922             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    923             CHECK_BETTER
    924         }
    925     }else
    926     {
    927         for (i = 0; i < 6; i++)
    928         {
    929             this_mv.as_mv.row = br + hex[i].row;
    930             this_mv.as_mv.col = bc + hex[i].col;
    931             CHECK_POINT
    932             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    933             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    934             CHECK_BETTER
    935         }
    936     }
    937 
    938     if (best_site == -1)
    939         goto cal_neighbors;
    940     else
    941     {
    942         br += hex[best_site].row;
    943         bc += hex[best_site].col;
    944         k = best_site;
    945     }
    946 
    947     for (j = 1; j < hex_range; j++)
    948     {
    949         best_site = -1;
    950         CHECK_BOUNDS(2)
    951 
    952         if(all_in)
    953         {
    954             for (i = 0; i < 3; i++)
    955             {
    956                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    957                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    958                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    959                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    960                 CHECK_BETTER
    961             }
    962         }else
    963         {
    964             for (i = 0; i < 3; i++)
    965             {
    966                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    967                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    968                 CHECK_POINT
    969                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    970                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    971                 CHECK_BETTER
    972             }
    973         }
    974 
    975         if (best_site == -1)
    976             break;
    977         else
    978         {
    979             br += next_chkpts[k][best_site].row;
    980             bc += next_chkpts[k][best_site].col;
    981             k += 5 + best_site;
    982             if (k >= 12) k -= 12;
    983             else if (k >= 6) k -= 6;
    984         }
    985     }
    986 
    987     /* check 4 1-away neighbors */
    988 cal_neighbors:
    989     for (j = 0; j < dia_range; j++)
    990     {
    991         best_site = -1;
    992         CHECK_BOUNDS(1)
    993 
    994         if(all_in)
    995         {
    996             for (i = 0; i < 4; i++)
    997             {
    998                 this_mv.as_mv.row = br + neighbors[i].row;
    999                 this_mv.as_mv.col = bc + neighbors[i].col;
   1000                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1001                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1002                 CHECK_BETTER
   1003             }
   1004         }else
   1005         {
   1006             for (i = 0; i < 4; i++)
   1007             {
   1008                 this_mv.as_mv.row = br + neighbors[i].row;
   1009                 this_mv.as_mv.col = bc + neighbors[i].col;
   1010                 CHECK_POINT
   1011                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1012                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1013                 CHECK_BETTER
   1014             }
   1015         }
   1016 
   1017         if (best_site == -1)
   1018             break;
   1019         else
   1020         {
   1021             br += neighbors[best_site].row;
   1022             bc += neighbors[best_site].col;
   1023         }
   1024     }
   1025 
   1026     best_mv->as_mv.row = br;
   1027     best_mv->as_mv.col = bc;
   1028 
   1029     return bestsad;
   1030 }
   1031 #undef CHECK_BOUNDS
   1032 #undef CHECK_POINT
   1033 #undef CHECK_BETTER
   1034 
   1035 int vp8_diamond_search_sad_c
   1036 (
   1037     MACROBLOCK *x,
   1038     BLOCK *b,
   1039     BLOCKD *d,
   1040     int_mv *ref_mv,
   1041     int_mv *best_mv,
   1042     int search_param,
   1043     int sad_per_bit,
   1044     int *num00,
   1045     vp8_variance_fn_ptr_t *fn_ptr,
   1046     int *mvcost[2],
   1047     int_mv *center_mv
   1048 )
   1049 {
   1050     int i, j, step;
   1051 
   1052     unsigned char *what = (*(b->base_src) + b->src);
   1053     int what_stride = b->src_stride;
   1054     unsigned char *in_what;
   1055     int pre_stride = x->e_mbd.pre.y_stride;
   1056     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1057     int in_what_stride = pre_stride;
   1058     unsigned char *best_address;
   1059 
   1060     int tot_steps;
   1061     int_mv this_mv;
   1062 
   1063     unsigned int bestsad;
   1064     unsigned int thissad;
   1065     int best_site = 0;
   1066     int last_site = 0;
   1067 
   1068     int ref_row;
   1069     int ref_col;
   1070     int this_row_offset;
   1071     int this_col_offset;
   1072     search_site *ss;
   1073 
   1074     unsigned char *check_here;
   1075 
   1076     int *mvsadcost[2];
   1077     int_mv fcenter_mv;
   1078 
   1079     mvsadcost[0] = x->mvsadcost[0];
   1080     mvsadcost[1] = x->mvsadcost[1];
   1081     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1082     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1083 
   1084     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1085     ref_row = ref_mv->as_mv.row;
   1086     ref_col = ref_mv->as_mv.col;
   1087     *num00 = 0;
   1088     best_mv->as_mv.row = ref_row;
   1089     best_mv->as_mv.col = ref_col;
   1090 
   1091     /* Work out the start point for the search */
   1092     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1093     best_address = in_what;
   1094 
   1095     /* Check the starting position */
   1096     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1097             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1098 
   1099     /* search_param determines the length of the initial step and hence
   1100      * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
   1101      * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1102      */
   1103     ss = &x->ss[search_param * x->searches_per_step];
   1104     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1105 
   1106     i = 1;
   1107 
   1108     for (step = 0; step < tot_steps ; step++)
   1109     {
   1110         for (j = 0 ; j < x->searches_per_step ; j++)
   1111         {
   1112             /* Trap illegal vectors */
   1113             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1114             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1115 
   1116             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1117             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1118 
   1119             {
   1120                 check_here = ss[i].offset + best_address;
   1121                 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1122 
   1123                 if (thissad < bestsad)
   1124                 {
   1125                     this_mv.as_mv.row = this_row_offset;
   1126                     this_mv.as_mv.col = this_col_offset;
   1127                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1128                                               mvsadcost, sad_per_bit);
   1129 
   1130                     if (thissad < bestsad)
   1131                     {
   1132                         bestsad = thissad;
   1133                         best_site = i;
   1134                     }
   1135                 }
   1136             }
   1137 
   1138             i++;
   1139         }
   1140 
   1141         if (best_site != last_site)
   1142         {
   1143             best_mv->as_mv.row += ss[best_site].mv.row;
   1144             best_mv->as_mv.col += ss[best_site].mv.col;
   1145             best_address += ss[best_site].offset;
   1146             last_site = best_site;
   1147         }
   1148         else if (best_address == in_what)
   1149             (*num00)++;
   1150     }
   1151 
   1152     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1153     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1154 
   1155     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1156            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1157 }
   1158 
   1159 int vp8_diamond_search_sadx4
   1160 (
   1161     MACROBLOCK *x,
   1162     BLOCK *b,
   1163     BLOCKD *d,
   1164     int_mv *ref_mv,
   1165     int_mv *best_mv,
   1166     int search_param,
   1167     int sad_per_bit,
   1168     int *num00,
   1169     vp8_variance_fn_ptr_t *fn_ptr,
   1170     int *mvcost[2],
   1171     int_mv *center_mv
   1172 )
   1173 {
   1174     int i, j, step;
   1175 
   1176     unsigned char *what = (*(b->base_src) + b->src);
   1177     int what_stride = b->src_stride;
   1178     unsigned char *in_what;
   1179     int pre_stride = x->e_mbd.pre.y_stride;
   1180     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1181     int in_what_stride = pre_stride;
   1182     unsigned char *best_address;
   1183 
   1184     int tot_steps;
   1185     int_mv this_mv;
   1186 
   1187     unsigned int bestsad;
   1188     unsigned int thissad;
   1189     int best_site = 0;
   1190     int last_site = 0;
   1191 
   1192     int ref_row;
   1193     int ref_col;
   1194     int this_row_offset;
   1195     int this_col_offset;
   1196     search_site *ss;
   1197 
   1198     unsigned char *check_here;
   1199 
   1200     int *mvsadcost[2];
   1201     int_mv fcenter_mv;
   1202 
   1203     mvsadcost[0] = x->mvsadcost[0];
   1204     mvsadcost[1] = x->mvsadcost[1];
   1205     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1206     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1207 
   1208     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1209     ref_row = ref_mv->as_mv.row;
   1210     ref_col = ref_mv->as_mv.col;
   1211     *num00 = 0;
   1212     best_mv->as_mv.row = ref_row;
   1213     best_mv->as_mv.col = ref_col;
   1214 
   1215     /* Work out the start point for the search */
   1216     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1217     best_address = in_what;
   1218 
   1219     /* Check the starting position */
   1220     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1221             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1222 
   1223     /* search_param determines the length of the initial step and hence the
   1224      * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
   1225      * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1226      */
   1227     ss = &x->ss[search_param * x->searches_per_step];
   1228     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1229 
   1230     i = 1;
   1231 
   1232     for (step = 0; step < tot_steps ; step++)
   1233     {
   1234         int all_in = 1, t;
   1235 
   1236         /* To know if all neighbor points are within the bounds, 4 bounds
   1237          * checking are enough instead of checking 4 bounds for each
   1238          * points.
   1239          */
   1240         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
   1241         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
   1242         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
   1243         all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
   1244 
   1245         if (all_in)
   1246         {
   1247             unsigned int sad_array[4];
   1248 
   1249             for (j = 0 ; j < x->searches_per_step ; j += 4)
   1250             {
   1251                 const unsigned char *block_offset[4];
   1252 
   1253                 for (t = 0; t < 4; t++)
   1254                     block_offset[t] = ss[i+t].offset + best_address;
   1255 
   1256                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1257 
   1258                 for (t = 0; t < 4; t++, i++)
   1259                 {
   1260                     if (sad_array[t] < bestsad)
   1261                     {
   1262                         this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1263                         this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1264                         sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
   1265                                                        mvsadcost, sad_per_bit);
   1266 
   1267                         if (sad_array[t] < bestsad)
   1268                         {
   1269                             bestsad = sad_array[t];
   1270                             best_site = i;
   1271                         }
   1272                     }
   1273                 }
   1274             }
   1275         }
   1276         else
   1277         {
   1278             for (j = 0 ; j < x->searches_per_step ; j++)
   1279             {
   1280                 /* Trap illegal vectors */
   1281                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1282                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1283 
   1284                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1285                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1286                 {
   1287                     check_here = ss[i].offset + best_address;
   1288                     thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1289 
   1290                     if (thissad < bestsad)
   1291                     {
   1292                         this_mv.as_mv.row = this_row_offset;
   1293                         this_mv.as_mv.col = this_col_offset;
   1294                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1295                                                   mvsadcost, sad_per_bit);
   1296 
   1297                         if (thissad < bestsad)
   1298                         {
   1299                             bestsad = thissad;
   1300                             best_site = i;
   1301                         }
   1302                     }
   1303                 }
   1304                 i++;
   1305             }
   1306         }
   1307 
   1308         if (best_site != last_site)
   1309         {
   1310             best_mv->as_mv.row += ss[best_site].mv.row;
   1311             best_mv->as_mv.col += ss[best_site].mv.col;
   1312             best_address += ss[best_site].offset;
   1313             last_site = best_site;
   1314         }
   1315         else if (best_address == in_what)
   1316             (*num00)++;
   1317     }
   1318 
   1319     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1320     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1321 
   1322     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1323            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1324 }
   1325 
   1326 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1327                         int sad_per_bit, int distance,
   1328                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1329                         int_mv *center_mv)
   1330 {
   1331     unsigned char *what = (*(b->base_src) + b->src);
   1332     int what_stride = b->src_stride;
   1333     unsigned char *in_what;
   1334     int pre_stride = x->e_mbd.pre.y_stride;
   1335     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1336     int in_what_stride = pre_stride;
   1337     int mv_stride = pre_stride;
   1338     unsigned char *bestaddress;
   1339     int_mv *best_mv = &d->bmi.mv;
   1340     int_mv this_mv;
   1341     unsigned int bestsad;
   1342     unsigned int thissad;
   1343     int r, c;
   1344 
   1345     unsigned char *check_here;
   1346 
   1347     int ref_row = ref_mv->as_mv.row;
   1348     int ref_col = ref_mv->as_mv.col;
   1349 
   1350     int row_min = ref_row - distance;
   1351     int row_max = ref_row + distance;
   1352     int col_min = ref_col - distance;
   1353     int col_max = ref_col + distance;
   1354 
   1355     int *mvsadcost[2];
   1356     int_mv fcenter_mv;
   1357 
   1358     mvsadcost[0] = x->mvsadcost[0];
   1359     mvsadcost[1] = x->mvsadcost[1];
   1360     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1361     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1362 
   1363     /* Work out the mid point for the search */
   1364     in_what = base_pre + d->offset;
   1365     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1366 
   1367     best_mv->as_mv.row = ref_row;
   1368     best_mv->as_mv.col = ref_col;
   1369 
   1370     /* Baseline value at the centre */
   1371     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1372                           in_what_stride, UINT_MAX)
   1373             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1374 
   1375     /* Apply further limits to prevent us looking using vectors that
   1376      * stretch beyiond the UMV border
   1377      */
   1378     if (col_min < x->mv_col_min)
   1379         col_min = x->mv_col_min;
   1380 
   1381     if (col_max > x->mv_col_max)
   1382         col_max = x->mv_col_max;
   1383 
   1384     if (row_min < x->mv_row_min)
   1385         row_min = x->mv_row_min;
   1386 
   1387     if (row_max > x->mv_row_max)
   1388         row_max = x->mv_row_max;
   1389 
   1390     for (r = row_min; r < row_max ; r++)
   1391     {
   1392         this_mv.as_mv.row = r;
   1393         check_here = r * mv_stride + in_what + col_min;
   1394 
   1395         for (c = col_min; c < col_max; c++)
   1396         {
   1397             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1398 
   1399             this_mv.as_mv.col = c;
   1400             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1401                                       mvsadcost, sad_per_bit);
   1402 
   1403             if (thissad < bestsad)
   1404             {
   1405                 bestsad = thissad;
   1406                 best_mv->as_mv.row = r;
   1407                 best_mv->as_mv.col = c;
   1408                 bestaddress = check_here;
   1409             }
   1410 
   1411             check_here++;
   1412         }
   1413     }
   1414 
   1415     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1416     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1417 
   1418     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1419            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1420 }
   1421 
   1422 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1423                           int sad_per_bit, int distance,
   1424                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1425                           int_mv *center_mv)
   1426 {
   1427     unsigned char *what = (*(b->base_src) + b->src);
   1428     int what_stride = b->src_stride;
   1429     unsigned char *in_what;
   1430     int pre_stride = x->e_mbd.pre.y_stride;
   1431     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1432     int in_what_stride = pre_stride;
   1433     int mv_stride = pre_stride;
   1434     unsigned char *bestaddress;
   1435     int_mv *best_mv = &d->bmi.mv;
   1436     int_mv this_mv;
   1437     unsigned int bestsad;
   1438     unsigned int thissad;
   1439     int r, c;
   1440 
   1441     unsigned char *check_here;
   1442 
   1443     int ref_row = ref_mv->as_mv.row;
   1444     int ref_col = ref_mv->as_mv.col;
   1445 
   1446     int row_min = ref_row - distance;
   1447     int row_max = ref_row + distance;
   1448     int col_min = ref_col - distance;
   1449     int col_max = ref_col + distance;
   1450 
   1451     unsigned int sad_array[3];
   1452 
   1453     int *mvsadcost[2];
   1454     int_mv fcenter_mv;
   1455 
   1456     mvsadcost[0] = x->mvsadcost[0];
   1457     mvsadcost[1] = x->mvsadcost[1];
   1458     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1459     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1460 
   1461     /* Work out the mid point for the search */
   1462     in_what = base_pre + d->offset;
   1463     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1464 
   1465     best_mv->as_mv.row = ref_row;
   1466     best_mv->as_mv.col = ref_col;
   1467 
   1468     /* Baseline value at the centre */
   1469     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1470                           in_what_stride, UINT_MAX)
   1471             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1472 
   1473     /* Apply further limits to prevent us looking using vectors that stretch
   1474      * beyond the UMV border
   1475      */
   1476     if (col_min < x->mv_col_min)
   1477         col_min = x->mv_col_min;
   1478 
   1479     if (col_max > x->mv_col_max)
   1480         col_max = x->mv_col_max;
   1481 
   1482     if (row_min < x->mv_row_min)
   1483         row_min = x->mv_row_min;
   1484 
   1485     if (row_max > x->mv_row_max)
   1486         row_max = x->mv_row_max;
   1487 
   1488     for (r = row_min; r < row_max ; r++)
   1489     {
   1490         this_mv.as_mv.row = r;
   1491         check_here = r * mv_stride + in_what + col_min;
   1492         c = col_min;
   1493 
   1494         while ((c + 2) < col_max)
   1495         {
   1496             int i;
   1497 
   1498             fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1499 
   1500             for (i = 0; i < 3; i++)
   1501             {
   1502                 thissad = sad_array[i];
   1503 
   1504                 if (thissad < bestsad)
   1505                 {
   1506                     this_mv.as_mv.col = c;
   1507                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1508                                               mvsadcost, sad_per_bit);
   1509 
   1510                     if (thissad < bestsad)
   1511                     {
   1512                         bestsad = thissad;
   1513                         best_mv->as_mv.row = r;
   1514                         best_mv->as_mv.col = c;
   1515                         bestaddress = check_here;
   1516                     }
   1517                 }
   1518 
   1519                 check_here++;
   1520                 c++;
   1521             }
   1522         }
   1523 
   1524         while (c < col_max)
   1525         {
   1526             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1527 
   1528             if (thissad < bestsad)
   1529             {
   1530                 this_mv.as_mv.col = c;
   1531                 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1532                                           mvsadcost, sad_per_bit);
   1533 
   1534                 if (thissad < bestsad)
   1535                 {
   1536                     bestsad = thissad;
   1537                     best_mv->as_mv.row = r;
   1538                     best_mv->as_mv.col = c;
   1539                     bestaddress = check_here;
   1540                 }
   1541             }
   1542 
   1543             check_here ++;
   1544             c ++;
   1545         }
   1546 
   1547     }
   1548 
   1549     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1550     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1551 
   1552     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1553            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1554 }
   1555 
   1556 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1557                           int sad_per_bit, int distance,
   1558                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1559                           int_mv *center_mv)
   1560 {
   1561     unsigned char *what = (*(b->base_src) + b->src);
   1562     int what_stride = b->src_stride;
   1563     int pre_stride = x->e_mbd.pre.y_stride;
   1564     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1565     unsigned char *in_what;
   1566     int in_what_stride = pre_stride;
   1567     int mv_stride = pre_stride;
   1568     unsigned char *bestaddress;
   1569     int_mv *best_mv = &d->bmi.mv;
   1570     int_mv this_mv;
   1571     unsigned int bestsad;
   1572     unsigned int thissad;
   1573     int r, c;
   1574 
   1575     unsigned char *check_here;
   1576 
   1577     int ref_row = ref_mv->as_mv.row;
   1578     int ref_col = ref_mv->as_mv.col;
   1579 
   1580     int row_min = ref_row - distance;
   1581     int row_max = ref_row + distance;
   1582     int col_min = ref_col - distance;
   1583     int col_max = ref_col + distance;
   1584 
   1585     DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
   1586     unsigned int sad_array[3];
   1587 
   1588     int *mvsadcost[2];
   1589     int_mv fcenter_mv;
   1590 
   1591     mvsadcost[0] = x->mvsadcost[0];
   1592     mvsadcost[1] = x->mvsadcost[1];
   1593     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1594     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1595 
   1596     /* Work out the mid point for the search */
   1597     in_what = base_pre + d->offset;
   1598     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1599 
   1600     best_mv->as_mv.row = ref_row;
   1601     best_mv->as_mv.col = ref_col;
   1602 
   1603     /* Baseline value at the centre */
   1604     bestsad = fn_ptr->sdf(what, what_stride,
   1605                           bestaddress, in_what_stride, UINT_MAX)
   1606             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1607 
   1608     /* Apply further limits to prevent us looking using vectors that stretch
   1609      * beyond the UMV border
   1610      */
   1611     if (col_min < x->mv_col_min)
   1612         col_min = x->mv_col_min;
   1613 
   1614     if (col_max > x->mv_col_max)
   1615         col_max = x->mv_col_max;
   1616 
   1617     if (row_min < x->mv_row_min)
   1618         row_min = x->mv_row_min;
   1619 
   1620     if (row_max > x->mv_row_max)
   1621         row_max = x->mv_row_max;
   1622 
   1623     for (r = row_min; r < row_max ; r++)
   1624     {
   1625         this_mv.as_mv.row = r;
   1626         check_here = r * mv_stride + in_what + col_min;
   1627         c = col_min;
   1628 
   1629         while ((c + 7) < col_max)
   1630         {
   1631             int i;
   1632 
   1633             fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1634 
   1635             for (i = 0; i < 8; i++)
   1636             {
   1637                 thissad = sad_array8[i];
   1638 
   1639                 if (thissad < bestsad)
   1640                 {
   1641                     this_mv.as_mv.col = c;
   1642                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1643                                               mvsadcost, sad_per_bit);
   1644 
   1645                     if (thissad < bestsad)
   1646                     {
   1647                         bestsad = thissad;
   1648                         best_mv->as_mv.row = r;
   1649                         best_mv->as_mv.col = c;
   1650                         bestaddress = check_here;
   1651                     }
   1652                 }
   1653 
   1654                 check_here++;
   1655                 c++;
   1656             }
   1657         }
   1658 
   1659         while ((c + 2) < col_max)
   1660         {
   1661             int i;
   1662 
   1663             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1664 
   1665             for (i = 0; i < 3; i++)
   1666             {
   1667                 thissad = sad_array[i];
   1668 
   1669                 if (thissad < bestsad)
   1670                 {
   1671                     this_mv.as_mv.col = c;
   1672                     thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1673                         mvsadcost, sad_per_bit);
   1674 
   1675                     if (thissad < bestsad)
   1676                     {
   1677                         bestsad = thissad;
   1678                         best_mv->as_mv.row = r;
   1679                         best_mv->as_mv.col = c;
   1680                         bestaddress = check_here;
   1681                     }
   1682                 }
   1683 
   1684                 check_here++;
   1685                 c++;
   1686             }
   1687         }
   1688 
   1689         while (c < col_max)
   1690         {
   1691             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1692 
   1693             if (thissad < bestsad)
   1694             {
   1695                 this_mv.as_mv.col = c;
   1696                 thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1697                     mvsadcost, sad_per_bit);
   1698 
   1699                 if (thissad < bestsad)
   1700                 {
   1701                     bestsad = thissad;
   1702                     best_mv->as_mv.row = r;
   1703                     best_mv->as_mv.col = c;
   1704                     bestaddress = check_here;
   1705                 }
   1706             }
   1707 
   1708             check_here ++;
   1709             c ++;
   1710         }
   1711     }
   1712 
   1713     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1714     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1715 
   1716     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1717            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1718 }
   1719 
   1720 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1721                             int error_per_bit, int search_range,
   1722                             vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1723                             int_mv *center_mv)
   1724 {
   1725     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1726     int i, j;
   1727     short this_row_offset, this_col_offset;
   1728 
   1729     int what_stride = b->src_stride;
   1730     int pre_stride = x->e_mbd.pre.y_stride;
   1731     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1732     int in_what_stride = pre_stride;
   1733     unsigned char *what = (*(b->base_src) + b->src);
   1734     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1735         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1736     unsigned char *check_here;
   1737     int_mv this_mv;
   1738     unsigned int bestsad;
   1739     unsigned int thissad;
   1740 
   1741     int *mvsadcost[2];
   1742     int_mv fcenter_mv;
   1743 
   1744     mvsadcost[0] = x->mvsadcost[0];
   1745     mvsadcost[1] = x->mvsadcost[1];
   1746     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1747     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1748 
   1749     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1750                           in_what_stride, UINT_MAX)
   1751             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1752 
   1753     for (i=0; i<search_range; i++)
   1754     {
   1755         int best_site = -1;
   1756 
   1757         for (j = 0 ; j < 4 ; j++)
   1758         {
   1759             this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1760             this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1761 
   1762             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1763             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1764             {
   1765                 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1766                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1767 
   1768                 if (thissad < bestsad)
   1769                 {
   1770                     this_mv.as_mv.row = this_row_offset;
   1771                     this_mv.as_mv.col = this_col_offset;
   1772                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1773 
   1774                     if (thissad < bestsad)
   1775                     {
   1776                         bestsad = thissad;
   1777                         best_site = j;
   1778                     }
   1779                 }
   1780             }
   1781         }
   1782 
   1783         if (best_site == -1)
   1784             break;
   1785         else
   1786         {
   1787             ref_mv->as_mv.row += neighbors[best_site].row;
   1788             ref_mv->as_mv.col += neighbors[best_site].col;
   1789             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1790         }
   1791     }
   1792 
   1793     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1794     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1795 
   1796     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1797            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1798 }
   1799 
   1800 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1801                               int_mv *ref_mv, int error_per_bit,
   1802                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1803                               int *mvcost[2], int_mv *center_mv)
   1804 {
   1805     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1806     int i, j;
   1807     short this_row_offset, this_col_offset;
   1808 
   1809     int what_stride = b->src_stride;
   1810     int pre_stride = x->e_mbd.pre.y_stride;
   1811     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1812     int in_what_stride = pre_stride;
   1813     unsigned char *what = (*(b->base_src) + b->src);
   1814     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1815         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1816     unsigned char *check_here;
   1817     int_mv this_mv;
   1818     unsigned int bestsad;
   1819     unsigned int thissad;
   1820 
   1821     int *mvsadcost[2];
   1822     int_mv fcenter_mv;
   1823 
   1824     mvsadcost[0] = x->mvsadcost[0];
   1825     mvsadcost[1] = x->mvsadcost[1];
   1826     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1827     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1828 
   1829     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1830                           in_what_stride, UINT_MAX)
   1831             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1832 
   1833     for (i=0; i<search_range; i++)
   1834     {
   1835         int best_site = -1;
   1836         int all_in = 1;
   1837 
   1838         all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
   1839         all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
   1840         all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
   1841         all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1842 
   1843         if(all_in)
   1844         {
   1845             unsigned int sad_array[4];
   1846             const unsigned char *block_offset[4];
   1847             block_offset[0] = best_address - in_what_stride;
   1848             block_offset[1] = best_address - 1;
   1849             block_offset[2] = best_address + 1;
   1850             block_offset[3] = best_address + in_what_stride;
   1851 
   1852             fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1853 
   1854             for (j = 0; j < 4; j++)
   1855             {
   1856                 if (sad_array[j] < bestsad)
   1857                 {
   1858                     this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1859                     this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1860                     sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1861 
   1862                     if (sad_array[j] < bestsad)
   1863                     {
   1864                         bestsad = sad_array[j];
   1865                         best_site = j;
   1866                     }
   1867                 }
   1868             }
   1869         }
   1870         else
   1871         {
   1872             for (j = 0 ; j < 4 ; j++)
   1873             {
   1874                 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1875                 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1876 
   1877                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1878                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1879                 {
   1880                     check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1881                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1882 
   1883                     if (thissad < bestsad)
   1884                     {
   1885                         this_mv.as_mv.row = this_row_offset;
   1886                         this_mv.as_mv.col = this_col_offset;
   1887                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1888 
   1889                         if (thissad < bestsad)
   1890                         {
   1891                             bestsad = thissad;
   1892                             best_site = j;
   1893                         }
   1894                     }
   1895                 }
   1896             }
   1897         }
   1898 
   1899         if (best_site == -1)
   1900             break;
   1901         else
   1902         {
   1903             ref_mv->as_mv.row += neighbors[best_site].row;
   1904             ref_mv->as_mv.col += neighbors[best_site].col;
   1905             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1906         }
   1907     }
   1908 
   1909     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1910     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1911 
   1912     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1913            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1914 }
   1915 
   1916 #ifdef ENTROPY_STATS
   1917 void print_mode_context(void)
   1918 {
   1919     FILE *f = fopen("modecont.c", "w");
   1920     int i, j;
   1921 
   1922     fprintf(f, "#include \"entropy.h\"\n");
   1923     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1924     fprintf(f, "{\n");
   1925 
   1926     for (j = 0; j < 6; j++)
   1927     {
   1928         fprintf(f, "  { /* %d */\n", j);
   1929         fprintf(f, "    ");
   1930 
   1931         for (i = 0; i < 4; i++)
   1932         {
   1933             int overal_prob;
   1934             int this_prob;
   1935             int count;
   1936 
   1937             /* Overall probs */
   1938             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1939 
   1940             if (count)
   1941                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1942             else
   1943                 overal_prob = 128;
   1944 
   1945             if (overal_prob == 0)
   1946                 overal_prob = 1;
   1947 
   1948             /* context probs */
   1949             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1950 
   1951             if (count)
   1952                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1953             else
   1954                 this_prob = 128;
   1955 
   1956             if (this_prob == 0)
   1957                 this_prob = 1;
   1958 
   1959             fprintf(f, "%5d, ", this_prob);
   1960         }
   1961 
   1962         fprintf(f, "  },\n");
   1963     }
   1964 
   1965     fprintf(f, "};\n");
   1966     fclose(f);
   1967 }
   1968 
   1969 /* MV ref count ENTROPY_STATS stats code */
   1970 #ifdef ENTROPY_STATS
   1971 void init_mv_ref_counts()
   1972 {
   1973     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1974     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1975 }
   1976 
   1977 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1978 {
   1979     if (m == ZEROMV)
   1980     {
   1981         ++mv_ref_ct [ct[0]] [0] [0];
   1982         ++mv_mode_cts[0][0];
   1983     }
   1984     else
   1985     {
   1986         ++mv_ref_ct [ct[0]] [0] [1];
   1987         ++mv_mode_cts[0][1];
   1988 
   1989         if (m == NEARESTMV)
   1990         {
   1991             ++mv_ref_ct [ct[1]] [1] [0];
   1992             ++mv_mode_cts[1][0];
   1993         }
   1994         else
   1995         {
   1996             ++mv_ref_ct [ct[1]] [1] [1];
   1997             ++mv_mode_cts[1][1];
   1998 
   1999             if (m == NEARMV)
   2000             {
   2001                 ++mv_ref_ct [ct[2]] [2] [0];
   2002                 ++mv_mode_cts[2][0];
   2003             }
   2004             else
   2005             {
   2006                 ++mv_ref_ct [ct[2]] [2] [1];
   2007                 ++mv_mode_cts[2][1];
   2008 
   2009                 if (m == NEWMV)
   2010                 {
   2011                     ++mv_ref_ct [ct[3]] [3] [0];
   2012                     ++mv_mode_cts[3][0];
   2013                 }
   2014                 else
   2015                 {
   2016                     ++mv_ref_ct [ct[3]] [3] [1];
   2017                     ++mv_mode_cts[3][1];
   2018                 }
   2019             }
   2020         }
   2021     }
   2022 }
   2023 
   2024 #endif/* END MV ref count ENTROPY_STATS stats code */
   2025 
   2026 #endif
   2027