Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "onyx_int.h"
     13 #include "mcomp.h"
     14 #include "vpx_mem/vpx_mem.h"
     15 #include "vpx_config.h"
     16 #include <stdio.h>
     17 #include <limits.h>
     18 #include <math.h>
     19 #include "vp8/common/findnearmv.h"
     20 
     21 #ifdef VP8_ENTROPY_STATS
     22 static int mv_ref_ct [31] [4] [2];
     23 static int mv_mode_cts [4] [2];
     24 #endif
     25 
     26 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
     27 {
     28     /* MV costing is based on the distribution of vectors in the previous
     29      * frame and as such will tend to over state the cost of vectors. In
     30      * addition coding a new vector can have a knock on effect on the cost
     31      * of subsequent vectors and the quality of prediction from NEAR and
     32      * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
     33      * limited extent, for some account to be taken of these factors.
     34      */
     35     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
     36 }
     37 
     38 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
     39 {
     40     /* Ignore mv costing if mvcost is NULL */
     41     if (mvcost)
     42         return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
     43                  mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
     44                  * error_per_bit + 128) >> 8;
     45     return 0;
     46 }
     47 
     48 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
     49 {
     50     /* Calculate sad error cost on full pixel basis. */
     51     /* Ignore mv costing if mvsadcost is NULL */
     52     if (mvsadcost)
     53         return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
     54                  mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
     55                 * error_per_bit + 128) >> 8;
     56     return 0;
     57 }
     58 
     59 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
     60 {
     61     int Len;
     62     int search_site_count = 0;
     63 
     64 
     65     /* Generate offsets for 4 search sites per step. */
     66     Len = MAX_FIRST_STEP;
     67     x->ss[search_site_count].mv.col = 0;
     68     x->ss[search_site_count].mv.row = 0;
     69     x->ss[search_site_count].offset = 0;
     70     search_site_count++;
     71 
     72     while (Len > 0)
     73     {
     74 
     75         /* Compute offsets for search sites. */
     76         x->ss[search_site_count].mv.col = 0;
     77         x->ss[search_site_count].mv.row = -Len;
     78         x->ss[search_site_count].offset = -Len * stride;
     79         search_site_count++;
     80 
     81         /* Compute offsets for search sites. */
     82         x->ss[search_site_count].mv.col = 0;
     83         x->ss[search_site_count].mv.row = Len;
     84         x->ss[search_site_count].offset = Len * stride;
     85         search_site_count++;
     86 
     87         /* Compute offsets for search sites. */
     88         x->ss[search_site_count].mv.col = -Len;
     89         x->ss[search_site_count].mv.row = 0;
     90         x->ss[search_site_count].offset = -Len;
     91         search_site_count++;
     92 
     93         /* Compute offsets for search sites. */
     94         x->ss[search_site_count].mv.col = Len;
     95         x->ss[search_site_count].mv.row = 0;
     96         x->ss[search_site_count].offset = Len;
     97         search_site_count++;
     98 
     99         /* Contract. */
    100         Len /= 2;
    101     }
    102 
    103     x->ss_count = search_site_count;
    104     x->searches_per_step = 4;
    105 }
    106 
    107 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    108 {
    109     int Len;
    110     int search_site_count = 0;
    111 
    112     /* Generate offsets for 8 search sites per step. */
    113     Len = MAX_FIRST_STEP;
    114     x->ss[search_site_count].mv.col = 0;
    115     x->ss[search_site_count].mv.row = 0;
    116     x->ss[search_site_count].offset = 0;
    117     search_site_count++;
    118 
    119     while (Len > 0)
    120     {
    121 
    122         /* Compute offsets for search sites. */
    123         x->ss[search_site_count].mv.col = 0;
    124         x->ss[search_site_count].mv.row = -Len;
    125         x->ss[search_site_count].offset = -Len * stride;
    126         search_site_count++;
    127 
    128         /* Compute offsets for search sites. */
    129         x->ss[search_site_count].mv.col = 0;
    130         x->ss[search_site_count].mv.row = Len;
    131         x->ss[search_site_count].offset = Len * stride;
    132         search_site_count++;
    133 
    134         /* Compute offsets for search sites. */
    135         x->ss[search_site_count].mv.col = -Len;
    136         x->ss[search_site_count].mv.row = 0;
    137         x->ss[search_site_count].offset = -Len;
    138         search_site_count++;
    139 
    140         /* Compute offsets for search sites. */
    141         x->ss[search_site_count].mv.col = Len;
    142         x->ss[search_site_count].mv.row = 0;
    143         x->ss[search_site_count].offset = Len;
    144         search_site_count++;
    145 
    146         /* Compute offsets for search sites. */
    147         x->ss[search_site_count].mv.col = -Len;
    148         x->ss[search_site_count].mv.row = -Len;
    149         x->ss[search_site_count].offset = -Len * stride - Len;
    150         search_site_count++;
    151 
    152         /* Compute offsets for search sites. */
    153         x->ss[search_site_count].mv.col = Len;
    154         x->ss[search_site_count].mv.row = -Len;
    155         x->ss[search_site_count].offset = -Len * stride + Len;
    156         search_site_count++;
    157 
    158         /* Compute offsets for search sites. */
    159         x->ss[search_site_count].mv.col = -Len;
    160         x->ss[search_site_count].mv.row = Len;
    161         x->ss[search_site_count].offset = Len * stride - Len;
    162         search_site_count++;
    163 
    164         /* Compute offsets for search sites. */
    165         x->ss[search_site_count].mv.col = Len;
    166         x->ss[search_site_count].mv.row = Len;
    167         x->ss[search_site_count].offset = Len * stride + Len;
    168         search_site_count++;
    169 
    170 
    171         /* Contract. */
    172         Len /= 2;
    173     }
    174 
    175     x->ss_count = search_site_count;
    176     x->searches_per_step = 8;
    177 }
    178 
    179 /*
    180  * To avoid the penalty for crossing cache-line read, preload the reference
    181  * area in a small buffer, which is aligned to make sure there won't be crossing
    182  * cache-line read while reading from this buffer. This reduced the cpu
    183  * cycles spent on reading ref data in sub-pixel filter functions.
    184  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    185  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    186  * could reduce the area.
    187  */
    188 
    189 /* estimated cost of a motion vector (r,c) */
    190 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
    191 /* pointer to predictor base of a motionvector */
    192 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
    193 /* convert motion vector component to offset for svf calc */
    194 #define SP(x) (((x)&3)<<1)
    195 /* returns subpixel variance error function. */
    196 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
    197 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    198 /* returns distortion + motion vector cost */
    199 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
    200 /* checks if (r,c) has better score than previous best */
    201 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
    202 
    203 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    204                                              int_mv *bestmv, int_mv *ref_mv,
    205                                              int error_per_bit,
    206                                              const vp8_variance_fn_ptr_t *vfp,
    207                                              int *mvcost[2], int *distortion,
    208                                              unsigned int *sse1)
    209 {
    210     unsigned char *z = (*(b->base_src) + b->src);
    211 
    212     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
    213     int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
    214     int tr = br, tc = bc;
    215     unsigned int besterr;
    216     unsigned int left, right, up, down, diag;
    217     unsigned int sse;
    218     unsigned int whichdir;
    219     unsigned int halfiters = 4;
    220     unsigned int quarteriters = 4;
    221     int thismse;
    222 
    223     int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
    224     int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
    225     int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
    226     int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    227 
    228     int y_stride;
    229     int offset;
    230     int pre_stride = x->e_mbd.pre.y_stride;
    231     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    232 
    233 
    234 #if ARCH_X86 || ARCH_X86_64
    235     MACROBLOCKD *xd = &x->e_mbd;
    236     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    237     unsigned char *y;
    238     int buf_r1, buf_r2, buf_c1;
    239 
    240     /* Clamping to avoid out-of-range data access */
    241     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
    242     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
    243     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
    244     y_stride = 32;
    245 
    246     /* Copy to intermediate buffer before searching. */
    247     vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    248     y = xd->y_buf + y_stride*buf_r1 +buf_c1;
    249 #else
    250     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    251     y_stride = pre_stride;
    252 #endif
    253 
    254     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
    255 
    256     /* central mv */
    257     bestmv->as_mv.row <<= 3;
    258     bestmv->as_mv.col <<= 3;
    259 
    260     /* calculate central point error */
    261     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    262     *distortion = besterr;
    263     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    264 
    265     /* TODO: Each subsequent iteration checks at least one point in common
    266      * with the last iteration could be 2 ( if diag selected)
    267      */
    268     while (--halfiters)
    269     {
    270         /* 1/2 pel */
    271         CHECK_BETTER(left, tr, tc - 2);
    272         CHECK_BETTER(right, tr, tc + 2);
    273         CHECK_BETTER(up, tr - 2, tc);
    274         CHECK_BETTER(down, tr + 2, tc);
    275 
    276         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    277 
    278         switch (whichdir)
    279         {
    280         case 0:
    281             CHECK_BETTER(diag, tr - 2, tc - 2);
    282             break;
    283         case 1:
    284             CHECK_BETTER(diag, tr - 2, tc + 2);
    285             break;
    286         case 2:
    287             CHECK_BETTER(diag, tr + 2, tc - 2);
    288             break;
    289         case 3:
    290             CHECK_BETTER(diag, tr + 2, tc + 2);
    291             break;
    292         }
    293 
    294         /* no reason to check the same one again. */
    295         if (tr == br && tc == bc)
    296             break;
    297 
    298         tr = br;
    299         tc = bc;
    300     }
    301 
    302     /* TODO: Each subsequent iteration checks at least one point in common
    303      * with the last iteration could be 2 ( if diag selected)
    304      */
    305 
    306     /* 1/4 pel */
    307     while (--quarteriters)
    308     {
    309         CHECK_BETTER(left, tr, tc - 1);
    310         CHECK_BETTER(right, tr, tc + 1);
    311         CHECK_BETTER(up, tr - 1, tc);
    312         CHECK_BETTER(down, tr + 1, tc);
    313 
    314         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    315 
    316         switch (whichdir)
    317         {
    318         case 0:
    319             CHECK_BETTER(diag, tr - 1, tc - 1);
    320             break;
    321         case 1:
    322             CHECK_BETTER(diag, tr - 1, tc + 1);
    323             break;
    324         case 2:
    325             CHECK_BETTER(diag, tr + 1, tc - 1);
    326             break;
    327         case 3:
    328             CHECK_BETTER(diag, tr + 1, tc + 1);
    329             break;
    330         }
    331 
    332         /* no reason to check the same one again. */
    333         if (tr == br && tc == bc)
    334             break;
    335 
    336         tr = br;
    337         tc = bc;
    338     }
    339 
    340     bestmv->as_mv.row = br << 1;
    341     bestmv->as_mv.col = bc << 1;
    342 
    343     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
    344         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
    345         return INT_MAX;
    346 
    347     return besterr;
    348 }
    349 #undef MVC
    350 #undef PRE
    351 #undef SP
    352 #undef DIST
    353 #undef IFMVCV
    354 #undef ERR
    355 #undef CHECK_BETTER
    356 
    357 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    358                                  int_mv *bestmv, int_mv *ref_mv,
    359                                  int error_per_bit,
    360                                  const vp8_variance_fn_ptr_t *vfp,
    361                                  int *mvcost[2], int *distortion,
    362                                  unsigned int *sse1)
    363 {
    364     int bestmse = INT_MAX;
    365     int_mv startmv;
    366     int_mv this_mv;
    367     unsigned char *z = (*(b->base_src) + b->src);
    368     int left, right, up, down, diag;
    369     unsigned int sse;
    370     int whichdir ;
    371     int thismse;
    372     int y_stride;
    373     int pre_stride = x->e_mbd.pre.y_stride;
    374     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    375 
    376 #if ARCH_X86 || ARCH_X86_64
    377     MACROBLOCKD *xd = &x->e_mbd;
    378     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    379     unsigned char *y;
    380 
    381     y_stride = 32;
    382     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    383      vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    384      y = xd->y_buf + y_stride + 1;
    385 #else
    386      unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    387      y_stride = pre_stride;
    388 #endif
    389 
    390     /* central mv */
    391     bestmv->as_mv.row <<= 3;
    392     bestmv->as_mv.col <<= 3;
    393     startmv = *bestmv;
    394 
    395     /* calculate central point error */
    396     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    397     *distortion = bestmse;
    398     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    399 
    400     /* go left then right and check error */
    401     this_mv.as_mv.row = startmv.as_mv.row;
    402     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    403     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    404     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    405 
    406     if (left < bestmse)
    407     {
    408         *bestmv = this_mv;
    409         bestmse = left;
    410         *distortion = thismse;
    411         *sse1 = sse;
    412     }
    413 
    414     this_mv.as_mv.col += 8;
    415     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    416     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    417 
    418     if (right < bestmse)
    419     {
    420         *bestmv = this_mv;
    421         bestmse = right;
    422         *distortion = thismse;
    423         *sse1 = sse;
    424     }
    425 
    426     /* go up then down and check error */
    427     this_mv.as_mv.col = startmv.as_mv.col;
    428     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    429     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    430     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    431 
    432     if (up < bestmse)
    433     {
    434         *bestmv = this_mv;
    435         bestmse = up;
    436         *distortion = thismse;
    437         *sse1 = sse;
    438     }
    439 
    440     this_mv.as_mv.row += 8;
    441     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    442     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    443 
    444     if (down < bestmse)
    445     {
    446         *bestmv = this_mv;
    447         bestmse = down;
    448         *distortion = thismse;
    449         *sse1 = sse;
    450     }
    451 
    452 
    453     /* now check 1 more diagonal */
    454     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    455     this_mv = startmv;
    456 
    457     switch (whichdir)
    458     {
    459     case 0:
    460         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    461         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    462         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    463         break;
    464     case 1:
    465         this_mv.as_mv.col += 4;
    466         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    467         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    468         break;
    469     case 2:
    470         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    471         this_mv.as_mv.row += 4;
    472         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    473         break;
    474     case 3:
    475     default:
    476         this_mv.as_mv.col += 4;
    477         this_mv.as_mv.row += 4;
    478         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    479         break;
    480     }
    481 
    482     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    483 
    484     if (diag < bestmse)
    485     {
    486         *bestmv = this_mv;
    487         bestmse = diag;
    488         *distortion = thismse;
    489         *sse1 = sse;
    490     }
    491 
    492 
    493     /* time to check quarter pels. */
    494     if (bestmv->as_mv.row < startmv.as_mv.row)
    495         y -= y_stride;
    496 
    497     if (bestmv->as_mv.col < startmv.as_mv.col)
    498         y--;
    499 
    500     startmv = *bestmv;
    501 
    502 
    503 
    504     /* go left then right and check error */
    505     this_mv.as_mv.row = startmv.as_mv.row;
    506 
    507     if (startmv.as_mv.col & 7)
    508     {
    509         this_mv.as_mv.col = startmv.as_mv.col - 2;
    510         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    511     }
    512     else
    513     {
    514         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    515         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    516     }
    517 
    518     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    519 
    520     if (left < bestmse)
    521     {
    522         *bestmv = this_mv;
    523         bestmse = left;
    524         *distortion = thismse;
    525         *sse1 = sse;
    526     }
    527 
    528     this_mv.as_mv.col += 4;
    529     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    530     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    531 
    532     if (right < bestmse)
    533     {
    534         *bestmv = this_mv;
    535         bestmse = right;
    536         *distortion = thismse;
    537         *sse1 = sse;
    538     }
    539 
    540     /* go up then down and check error */
    541     this_mv.as_mv.col = startmv.as_mv.col;
    542 
    543     if (startmv.as_mv.row & 7)
    544     {
    545         this_mv.as_mv.row = startmv.as_mv.row - 2;
    546         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    547     }
    548     else
    549     {
    550         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    551         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    552     }
    553 
    554     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    555 
    556     if (up < bestmse)
    557     {
    558         *bestmv = this_mv;
    559         bestmse = up;
    560         *distortion = thismse;
    561         *sse1 = sse;
    562     }
    563 
    564     this_mv.as_mv.row += 4;
    565     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    566     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    567 
    568     if (down < bestmse)
    569     {
    570         *bestmv = this_mv;
    571         bestmse = down;
    572         *distortion = thismse;
    573         *sse1 = sse;
    574     }
    575 
    576 
    577     /* now check 1 more diagonal */
    578     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    579 
    580     this_mv = startmv;
    581 
    582     switch (whichdir)
    583     {
    584     case 0:
    585 
    586         if (startmv.as_mv.row & 7)
    587         {
    588             this_mv.as_mv.row -= 2;
    589 
    590             if (startmv.as_mv.col & 7)
    591             {
    592                 this_mv.as_mv.col -= 2;
    593                 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    594             }
    595             else
    596             {
    597                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    598                 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
    599             }
    600         }
    601         else
    602         {
    603             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    604 
    605             if (startmv.as_mv.col & 7)
    606             {
    607                 this_mv.as_mv.col -= 2;
    608                 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    609             }
    610             else
    611             {
    612                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    613                 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
    614             }
    615         }
    616 
    617         break;
    618     case 1:
    619         this_mv.as_mv.col += 2;
    620 
    621         if (startmv.as_mv.row & 7)
    622         {
    623             this_mv.as_mv.row -= 2;
    624             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    625         }
    626         else
    627         {
    628             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    629             thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    630         }
    631 
    632         break;
    633     case 2:
    634         this_mv.as_mv.row += 2;
    635 
    636         if (startmv.as_mv.col & 7)
    637         {
    638             this_mv.as_mv.col -= 2;
    639             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    640         }
    641         else
    642         {
    643             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    644             thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    645         }
    646 
    647         break;
    648     case 3:
    649         this_mv.as_mv.col += 2;
    650         this_mv.as_mv.row += 2;
    651         thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    652         break;
    653     }
    654 
    655     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    656 
    657     if (diag < bestmse)
    658     {
    659         *bestmv = this_mv;
    660         bestmse = diag;
    661         *distortion = thismse;
    662         *sse1 = sse;
    663     }
    664 
    665     return bestmse;
    666 }
    667 
    668 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    669                                   int_mv *bestmv, int_mv *ref_mv,
    670                                   int error_per_bit,
    671                                   const vp8_variance_fn_ptr_t *vfp,
    672                                   int *mvcost[2], int *distortion,
    673                                   unsigned int *sse1)
    674 {
    675     int bestmse = INT_MAX;
    676     int_mv startmv;
    677     int_mv this_mv;
    678     unsigned char *z = (*(b->base_src) + b->src);
    679     int left, right, up, down, diag;
    680     unsigned int sse;
    681     int whichdir ;
    682     int thismse;
    683     int y_stride;
    684     int pre_stride = x->e_mbd.pre.y_stride;
    685     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    686 
    687 #if ARCH_X86 || ARCH_X86_64
    688     MACROBLOCKD *xd = &x->e_mbd;
    689     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    690     unsigned char *y;
    691 
    692     y_stride = 32;
    693     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    694     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    695     y = xd->y_buf + y_stride + 1;
    696 #else
    697     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    698     y_stride = pre_stride;
    699 #endif
    700 
    701     /* central mv */
    702     bestmv->as_mv.row <<= 3;
    703     bestmv->as_mv.col <<= 3;
    704     startmv = *bestmv;
    705 
    706     /* calculate central point error */
    707     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    708     *distortion = bestmse;
    709     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    710 
    711     /* go left then right and check error */
    712     this_mv.as_mv.row = startmv.as_mv.row;
    713     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    714     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    715     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    716 
    717     if (left < bestmse)
    718     {
    719         *bestmv = this_mv;
    720         bestmse = left;
    721         *distortion = thismse;
    722         *sse1 = sse;
    723     }
    724 
    725     this_mv.as_mv.col += 8;
    726     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    727     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    728 
    729     if (right < bestmse)
    730     {
    731         *bestmv = this_mv;
    732         bestmse = right;
    733         *distortion = thismse;
    734         *sse1 = sse;
    735     }
    736 
    737     /* go up then down and check error */
    738     this_mv.as_mv.col = startmv.as_mv.col;
    739     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    740     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    741     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    742 
    743     if (up < bestmse)
    744     {
    745         *bestmv = this_mv;
    746         bestmse = up;
    747         *distortion = thismse;
    748         *sse1 = sse;
    749     }
    750 
    751     this_mv.as_mv.row += 8;
    752     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    753     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    754 
    755     if (down < bestmse)
    756     {
    757         *bestmv = this_mv;
    758         bestmse = down;
    759         *distortion = thismse;
    760         *sse1 = sse;
    761     }
    762 
    763     /* now check 1 more diagonal - */
    764     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    765     this_mv = startmv;
    766 
    767     switch (whichdir)
    768     {
    769     case 0:
    770         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    771         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    772         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    773         break;
    774     case 1:
    775         this_mv.as_mv.col += 4;
    776         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    777         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    778         break;
    779     case 2:
    780         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    781         this_mv.as_mv.row += 4;
    782         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    783         break;
    784     case 3:
    785     default:
    786         this_mv.as_mv.col += 4;
    787         this_mv.as_mv.row += 4;
    788         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    789         break;
    790     }
    791 
    792     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    793 
    794     if (diag < bestmse)
    795     {
    796         *bestmv = this_mv;
    797         bestmse = diag;
    798         *distortion = thismse;
    799         *sse1 = sse;
    800     }
    801 
    802     return bestmse;
    803 }
    804 
    805 #define CHECK_BOUNDS(range) \
    806 {\
    807     all_in = 1;\
    808     all_in &= ((br-range) >= x->mv_row_min);\
    809     all_in &= ((br+range) <= x->mv_row_max);\
    810     all_in &= ((bc-range) >= x->mv_col_min);\
    811     all_in &= ((bc+range) <= x->mv_col_max);\
    812 }
    813 
    814 #define CHECK_POINT \
    815 {\
    816     if (this_mv.as_mv.col < x->mv_col_min) continue;\
    817     if (this_mv.as_mv.col > x->mv_col_max) continue;\
    818     if (this_mv.as_mv.row < x->mv_row_min) continue;\
    819     if (this_mv.as_mv.row > x->mv_row_max) continue;\
    820 }
    821 
    822 #define CHECK_BETTER \
    823 {\
    824     if (thissad < bestsad)\
    825     {\
    826         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
    827         if (thissad < bestsad)\
    828         {\
    829             bestsad = thissad;\
    830             best_site = i;\
    831         }\
    832     }\
    833 }
    834 
    835 static const MV next_chkpts[6][3] =
    836 {
    837     {{ -2, 0}, { -1, -2}, {1, -2}},
    838     {{ -1, -2}, {1, -2}, {2, 0}},
    839     {{1, -2}, {2, 0}, {1, 2}},
    840     {{2, 0}, {1, 2}, { -1, 2}},
    841     {{1, 2}, { -1, 2}, { -2, 0}},
    842     {{ -1, 2}, { -2, 0}, { -1, -2}}
    843 };
    844 
    845 int vp8_hex_search
    846 (
    847     MACROBLOCK *x,
    848     BLOCK *b,
    849     BLOCKD *d,
    850     int_mv *ref_mv,
    851     int_mv *best_mv,
    852     int search_param,
    853     int sad_per_bit,
    854     const vp8_variance_fn_ptr_t *vfp,
    855     int *mvsadcost[2],
    856     int *mvcost[2],
    857     int_mv *center_mv
    858 )
    859 {
    860     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    861     MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
    862     int i, j;
    863 
    864     unsigned char *what = (*(b->base_src) + b->src);
    865     int what_stride = b->src_stride;
    866     int pre_stride = x->e_mbd.pre.y_stride;
    867     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    868 
    869     int in_what_stride = pre_stride;
    870     int br, bc;
    871     int_mv this_mv;
    872     unsigned int bestsad;
    873     unsigned int thissad;
    874     unsigned char *base_offset;
    875     unsigned char *this_offset;
    876     int k = -1;
    877     int all_in;
    878     int best_site = -1;
    879     int hex_range = 127;
    880     int dia_range = 8;
    881 
    882     int_mv fcenter_mv;
    883     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    884     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    885 
    886     /* adjust ref_mv to make sure it is within MV range */
    887     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
    888     br = ref_mv->as_mv.row;
    889     bc = ref_mv->as_mv.col;
    890 
    891     /* Work out the start point for the search */
    892     base_offset = (unsigned char *)(base_pre + d->offset);
    893     this_offset = base_offset + (br * (pre_stride)) + bc;
    894     this_mv.as_mv.row = br;
    895     this_mv.as_mv.col = bc;
    896     bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
    897             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    898 
    899 #if CONFIG_MULTI_RES_ENCODING
    900     /* Lower search range based on prediction info */
    901     if (search_param >= 6) goto cal_neighbors;
    902     else if (search_param >= 5) hex_range = 4;
    903     else if (search_param >= 4) hex_range = 6;
    904     else if (search_param >= 3) hex_range = 15;
    905     else if (search_param >= 2) hex_range = 31;
    906     else if (search_param >= 1) hex_range = 63;
    907 
    908     dia_range = 8;
    909 #endif
    910 
    911     /* hex search */
    912     CHECK_BOUNDS(2)
    913 
    914     if(all_in)
    915     {
    916         for (i = 0; i < 6; i++)
    917         {
    918             this_mv.as_mv.row = br + hex[i].row;
    919             this_mv.as_mv.col = bc + hex[i].col;
    920             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    921             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    922             CHECK_BETTER
    923         }
    924     }else
    925     {
    926         for (i = 0; i < 6; i++)
    927         {
    928             this_mv.as_mv.row = br + hex[i].row;
    929             this_mv.as_mv.col = bc + hex[i].col;
    930             CHECK_POINT
    931             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    932             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    933             CHECK_BETTER
    934         }
    935     }
    936 
    937     if (best_site == -1)
    938         goto cal_neighbors;
    939     else
    940     {
    941         br += hex[best_site].row;
    942         bc += hex[best_site].col;
    943         k = best_site;
    944     }
    945 
    946     for (j = 1; j < hex_range; j++)
    947     {
    948         best_site = -1;
    949         CHECK_BOUNDS(2)
    950 
    951         if(all_in)
    952         {
    953             for (i = 0; i < 3; i++)
    954             {
    955                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    956                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    957                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    958                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    959                 CHECK_BETTER
    960             }
    961         }else
    962         {
    963             for (i = 0; i < 3; i++)
    964             {
    965                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    966                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    967                 CHECK_POINT
    968                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    969                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    970                 CHECK_BETTER
    971             }
    972         }
    973 
    974         if (best_site == -1)
    975             break;
    976         else
    977         {
    978             br += next_chkpts[k][best_site].row;
    979             bc += next_chkpts[k][best_site].col;
    980             k += 5 + best_site;
    981             if (k >= 12) k -= 12;
    982             else if (k >= 6) k -= 6;
    983         }
    984     }
    985 
    986     /* check 4 1-away neighbors */
    987 cal_neighbors:
    988     for (j = 0; j < dia_range; j++)
    989     {
    990         best_site = -1;
    991         CHECK_BOUNDS(1)
    992 
    993         if(all_in)
    994         {
    995             for (i = 0; i < 4; i++)
    996             {
    997                 this_mv.as_mv.row = br + neighbors[i].row;
    998                 this_mv.as_mv.col = bc + neighbors[i].col;
    999                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1000                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1001                 CHECK_BETTER
   1002             }
   1003         }else
   1004         {
   1005             for (i = 0; i < 4; i++)
   1006             {
   1007                 this_mv.as_mv.row = br + neighbors[i].row;
   1008                 this_mv.as_mv.col = bc + neighbors[i].col;
   1009                 CHECK_POINT
   1010                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1011                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1012                 CHECK_BETTER
   1013             }
   1014         }
   1015 
   1016         if (best_site == -1)
   1017             break;
   1018         else
   1019         {
   1020             br += neighbors[best_site].row;
   1021             bc += neighbors[best_site].col;
   1022         }
   1023     }
   1024 
   1025     best_mv->as_mv.row = br;
   1026     best_mv->as_mv.col = bc;
   1027 
   1028     return bestsad;
   1029 }
   1030 #undef CHECK_BOUNDS
   1031 #undef CHECK_POINT
   1032 #undef CHECK_BETTER
   1033 
   1034 int vp8_diamond_search_sad_c
   1035 (
   1036     MACROBLOCK *x,
   1037     BLOCK *b,
   1038     BLOCKD *d,
   1039     int_mv *ref_mv,
   1040     int_mv *best_mv,
   1041     int search_param,
   1042     int sad_per_bit,
   1043     int *num00,
   1044     vp8_variance_fn_ptr_t *fn_ptr,
   1045     int *mvcost[2],
   1046     int_mv *center_mv
   1047 )
   1048 {
   1049     int i, j, step;
   1050 
   1051     unsigned char *what = (*(b->base_src) + b->src);
   1052     int what_stride = b->src_stride;
   1053     unsigned char *in_what;
   1054     int pre_stride = x->e_mbd.pre.y_stride;
   1055     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1056     int in_what_stride = pre_stride;
   1057     unsigned char *best_address;
   1058 
   1059     int tot_steps;
   1060     int_mv this_mv;
   1061 
   1062     unsigned int bestsad;
   1063     unsigned int thissad;
   1064     int best_site = 0;
   1065     int last_site = 0;
   1066 
   1067     int ref_row;
   1068     int ref_col;
   1069     int this_row_offset;
   1070     int this_col_offset;
   1071     search_site *ss;
   1072 
   1073     unsigned char *check_here;
   1074 
   1075     int *mvsadcost[2];
   1076     int_mv fcenter_mv;
   1077 
   1078     mvsadcost[0] = x->mvsadcost[0];
   1079     mvsadcost[1] = x->mvsadcost[1];
   1080     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1081     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1082 
   1083     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1084     ref_row = ref_mv->as_mv.row;
   1085     ref_col = ref_mv->as_mv.col;
   1086     *num00 = 0;
   1087     best_mv->as_mv.row = ref_row;
   1088     best_mv->as_mv.col = ref_col;
   1089 
   1090     /* Work out the start point for the search */
   1091     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1092     best_address = in_what;
   1093 
   1094     /* Check the starting position */
   1095     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1096             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1097 
   1098     /* search_param determines the length of the initial step and hence
   1099      * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
   1100      * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1101      */
   1102     ss = &x->ss[search_param * x->searches_per_step];
   1103     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1104 
   1105     i = 1;
   1106 
   1107     for (step = 0; step < tot_steps ; step++)
   1108     {
   1109         for (j = 0 ; j < x->searches_per_step ; j++)
   1110         {
   1111             /* Trap illegal vectors */
   1112             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1113             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1114 
   1115             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1116             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1117 
   1118             {
   1119                 check_here = ss[i].offset + best_address;
   1120                 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1121 
   1122                 if (thissad < bestsad)
   1123                 {
   1124                     this_mv.as_mv.row = this_row_offset;
   1125                     this_mv.as_mv.col = this_col_offset;
   1126                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1127                                               mvsadcost, sad_per_bit);
   1128 
   1129                     if (thissad < bestsad)
   1130                     {
   1131                         bestsad = thissad;
   1132                         best_site = i;
   1133                     }
   1134                 }
   1135             }
   1136 
   1137             i++;
   1138         }
   1139 
   1140         if (best_site != last_site)
   1141         {
   1142             best_mv->as_mv.row += ss[best_site].mv.row;
   1143             best_mv->as_mv.col += ss[best_site].mv.col;
   1144             best_address += ss[best_site].offset;
   1145             last_site = best_site;
   1146         }
   1147         else if (best_address == in_what)
   1148             (*num00)++;
   1149     }
   1150 
   1151     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1152     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1153 
   1154     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1155            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1156 }
   1157 
   1158 int vp8_diamond_search_sadx4
   1159 (
   1160     MACROBLOCK *x,
   1161     BLOCK *b,
   1162     BLOCKD *d,
   1163     int_mv *ref_mv,
   1164     int_mv *best_mv,
   1165     int search_param,
   1166     int sad_per_bit,
   1167     int *num00,
   1168     vp8_variance_fn_ptr_t *fn_ptr,
   1169     int *mvcost[2],
   1170     int_mv *center_mv
   1171 )
   1172 {
   1173     int i, j, step;
   1174 
   1175     unsigned char *what = (*(b->base_src) + b->src);
   1176     int what_stride = b->src_stride;
   1177     unsigned char *in_what;
   1178     int pre_stride = x->e_mbd.pre.y_stride;
   1179     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1180     int in_what_stride = pre_stride;
   1181     unsigned char *best_address;
   1182 
   1183     int tot_steps;
   1184     int_mv this_mv;
   1185 
   1186     unsigned int bestsad;
   1187     unsigned int thissad;
   1188     int best_site = 0;
   1189     int last_site = 0;
   1190 
   1191     int ref_row;
   1192     int ref_col;
   1193     int this_row_offset;
   1194     int this_col_offset;
   1195     search_site *ss;
   1196 
   1197     unsigned char *check_here;
   1198 
   1199     int *mvsadcost[2];
   1200     int_mv fcenter_mv;
   1201 
   1202     mvsadcost[0] = x->mvsadcost[0];
   1203     mvsadcost[1] = x->mvsadcost[1];
   1204     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1205     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1206 
   1207     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1208     ref_row = ref_mv->as_mv.row;
   1209     ref_col = ref_mv->as_mv.col;
   1210     *num00 = 0;
   1211     best_mv->as_mv.row = ref_row;
   1212     best_mv->as_mv.col = ref_col;
   1213 
   1214     /* Work out the start point for the search */
   1215     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1216     best_address = in_what;
   1217 
   1218     /* Check the starting position */
   1219     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1220             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1221 
   1222     /* search_param determines the length of the initial step and hence the
   1223      * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
   1224      * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1225      */
   1226     ss = &x->ss[search_param * x->searches_per_step];
   1227     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1228 
   1229     i = 1;
   1230 
   1231     for (step = 0; step < tot_steps ; step++)
   1232     {
   1233         int all_in = 1, t;
   1234 
   1235         /* To know if all neighbor points are within the bounds, 4 bounds
   1236          * checking are enough instead of checking 4 bounds for each
   1237          * points.
   1238          */
   1239         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
   1240         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
   1241         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
   1242         all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
   1243 
   1244         if (all_in)
   1245         {
   1246             unsigned int sad_array[4];
   1247 
   1248             for (j = 0 ; j < x->searches_per_step ; j += 4)
   1249             {
   1250                 const unsigned char *block_offset[4];
   1251 
   1252                 for (t = 0; t < 4; t++)
   1253                     block_offset[t] = ss[i+t].offset + best_address;
   1254 
   1255                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1256 
   1257                 for (t = 0; t < 4; t++, i++)
   1258                 {
   1259                     if (sad_array[t] < bestsad)
   1260                     {
   1261                         this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1262                         this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1263                         sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
   1264                                                        mvsadcost, sad_per_bit);
   1265 
   1266                         if (sad_array[t] < bestsad)
   1267                         {
   1268                             bestsad = sad_array[t];
   1269                             best_site = i;
   1270                         }
   1271                     }
   1272                 }
   1273             }
   1274         }
   1275         else
   1276         {
   1277             for (j = 0 ; j < x->searches_per_step ; j++)
   1278             {
   1279                 /* Trap illegal vectors */
   1280                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1281                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1282 
   1283                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1284                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1285                 {
   1286                     check_here = ss[i].offset + best_address;
   1287                     thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1288 
   1289                     if (thissad < bestsad)
   1290                     {
   1291                         this_mv.as_mv.row = this_row_offset;
   1292                         this_mv.as_mv.col = this_col_offset;
   1293                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1294                                                   mvsadcost, sad_per_bit);
   1295 
   1296                         if (thissad < bestsad)
   1297                         {
   1298                             bestsad = thissad;
   1299                             best_site = i;
   1300                         }
   1301                     }
   1302                 }
   1303                 i++;
   1304             }
   1305         }
   1306 
   1307         if (best_site != last_site)
   1308         {
   1309             best_mv->as_mv.row += ss[best_site].mv.row;
   1310             best_mv->as_mv.col += ss[best_site].mv.col;
   1311             best_address += ss[best_site].offset;
   1312             last_site = best_site;
   1313         }
   1314         else if (best_address == in_what)
   1315             (*num00)++;
   1316     }
   1317 
   1318     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1319     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1320 
   1321     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1322            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1323 }
   1324 
   1325 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1326                         int sad_per_bit, int distance,
   1327                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1328                         int_mv *center_mv)
   1329 {
   1330     unsigned char *what = (*(b->base_src) + b->src);
   1331     int what_stride = b->src_stride;
   1332     unsigned char *in_what;
   1333     int pre_stride = x->e_mbd.pre.y_stride;
   1334     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1335     int in_what_stride = pre_stride;
   1336     int mv_stride = pre_stride;
   1337     unsigned char *bestaddress;
   1338     int_mv *best_mv = &d->bmi.mv;
   1339     int_mv this_mv;
   1340     unsigned int bestsad;
   1341     unsigned int thissad;
   1342     int r, c;
   1343 
   1344     unsigned char *check_here;
   1345 
   1346     int ref_row = ref_mv->as_mv.row;
   1347     int ref_col = ref_mv->as_mv.col;
   1348 
   1349     int row_min = ref_row - distance;
   1350     int row_max = ref_row + distance;
   1351     int col_min = ref_col - distance;
   1352     int col_max = ref_col + distance;
   1353 
   1354     int *mvsadcost[2];
   1355     int_mv fcenter_mv;
   1356 
   1357     mvsadcost[0] = x->mvsadcost[0];
   1358     mvsadcost[1] = x->mvsadcost[1];
   1359     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1360     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1361 
   1362     /* Work out the mid point for the search */
   1363     in_what = base_pre + d->offset;
   1364     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1365 
   1366     best_mv->as_mv.row = ref_row;
   1367     best_mv->as_mv.col = ref_col;
   1368 
   1369     /* Baseline value at the centre */
   1370     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1371                           in_what_stride, UINT_MAX)
   1372             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1373 
   1374     /* Apply further limits to prevent us looking using vectors that
   1375      * stretch beyiond the UMV border
   1376      */
   1377     if (col_min < x->mv_col_min)
   1378         col_min = x->mv_col_min;
   1379 
   1380     if (col_max > x->mv_col_max)
   1381         col_max = x->mv_col_max;
   1382 
   1383     if (row_min < x->mv_row_min)
   1384         row_min = x->mv_row_min;
   1385 
   1386     if (row_max > x->mv_row_max)
   1387         row_max = x->mv_row_max;
   1388 
   1389     for (r = row_min; r < row_max ; r++)
   1390     {
   1391         this_mv.as_mv.row = r;
   1392         check_here = r * mv_stride + in_what + col_min;
   1393 
   1394         for (c = col_min; c < col_max; c++)
   1395         {
   1396             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1397 
   1398             this_mv.as_mv.col = c;
   1399             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1400                                       mvsadcost, sad_per_bit);
   1401 
   1402             if (thissad < bestsad)
   1403             {
   1404                 bestsad = thissad;
   1405                 best_mv->as_mv.row = r;
   1406                 best_mv->as_mv.col = c;
   1407                 bestaddress = check_here;
   1408             }
   1409 
   1410             check_here++;
   1411         }
   1412     }
   1413 
   1414     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1415     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1416 
   1417     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1418            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1419 }
   1420 
   1421 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1422                           int sad_per_bit, int distance,
   1423                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1424                           int_mv *center_mv)
   1425 {
   1426     unsigned char *what = (*(b->base_src) + b->src);
   1427     int what_stride = b->src_stride;
   1428     unsigned char *in_what;
   1429     int pre_stride = x->e_mbd.pre.y_stride;
   1430     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1431     int in_what_stride = pre_stride;
   1432     int mv_stride = pre_stride;
   1433     unsigned char *bestaddress;
   1434     int_mv *best_mv = &d->bmi.mv;
   1435     int_mv this_mv;
   1436     unsigned int bestsad;
   1437     unsigned int thissad;
   1438     int r, c;
   1439 
   1440     unsigned char *check_here;
   1441 
   1442     int ref_row = ref_mv->as_mv.row;
   1443     int ref_col = ref_mv->as_mv.col;
   1444 
   1445     int row_min = ref_row - distance;
   1446     int row_max = ref_row + distance;
   1447     int col_min = ref_col - distance;
   1448     int col_max = ref_col + distance;
   1449 
   1450     unsigned int sad_array[3];
   1451 
   1452     int *mvsadcost[2];
   1453     int_mv fcenter_mv;
   1454 
   1455     mvsadcost[0] = x->mvsadcost[0];
   1456     mvsadcost[1] = x->mvsadcost[1];
   1457     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1458     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1459 
   1460     /* Work out the mid point for the search */
   1461     in_what = base_pre + d->offset;
   1462     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1463 
   1464     best_mv->as_mv.row = ref_row;
   1465     best_mv->as_mv.col = ref_col;
   1466 
   1467     /* Baseline value at the centre */
   1468     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1469                           in_what_stride, UINT_MAX)
   1470             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1471 
   1472     /* Apply further limits to prevent us looking using vectors that stretch
   1473      * beyond the UMV border
   1474      */
   1475     if (col_min < x->mv_col_min)
   1476         col_min = x->mv_col_min;
   1477 
   1478     if (col_max > x->mv_col_max)
   1479         col_max = x->mv_col_max;
   1480 
   1481     if (row_min < x->mv_row_min)
   1482         row_min = x->mv_row_min;
   1483 
   1484     if (row_max > x->mv_row_max)
   1485         row_max = x->mv_row_max;
   1486 
   1487     for (r = row_min; r < row_max ; r++)
   1488     {
   1489         this_mv.as_mv.row = r;
   1490         check_here = r * mv_stride + in_what + col_min;
   1491         c = col_min;
   1492 
   1493         while ((c + 2) < col_max)
   1494         {
   1495             int i;
   1496 
   1497             fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1498 
   1499             for (i = 0; i < 3; i++)
   1500             {
   1501                 thissad = sad_array[i];
   1502 
   1503                 if (thissad < bestsad)
   1504                 {
   1505                     this_mv.as_mv.col = c;
   1506                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1507                                               mvsadcost, sad_per_bit);
   1508 
   1509                     if (thissad < bestsad)
   1510                     {
   1511                         bestsad = thissad;
   1512                         best_mv->as_mv.row = r;
   1513                         best_mv->as_mv.col = c;
   1514                         bestaddress = check_here;
   1515                     }
   1516                 }
   1517 
   1518                 check_here++;
   1519                 c++;
   1520             }
   1521         }
   1522 
   1523         while (c < col_max)
   1524         {
   1525             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1526 
   1527             if (thissad < bestsad)
   1528             {
   1529                 this_mv.as_mv.col = c;
   1530                 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1531                                           mvsadcost, sad_per_bit);
   1532 
   1533                 if (thissad < bestsad)
   1534                 {
   1535                     bestsad = thissad;
   1536                     best_mv->as_mv.row = r;
   1537                     best_mv->as_mv.col = c;
   1538                     bestaddress = check_here;
   1539                 }
   1540             }
   1541 
   1542             check_here ++;
   1543             c ++;
   1544         }
   1545 
   1546     }
   1547 
   1548     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1549     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1550 
   1551     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1552            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1553 }
   1554 
   1555 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1556                           int sad_per_bit, int distance,
   1557                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1558                           int_mv *center_mv)
   1559 {
   1560     unsigned char *what = (*(b->base_src) + b->src);
   1561     int what_stride = b->src_stride;
   1562     int pre_stride = x->e_mbd.pre.y_stride;
   1563     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1564     unsigned char *in_what;
   1565     int in_what_stride = pre_stride;
   1566     int mv_stride = pre_stride;
   1567     unsigned char *bestaddress;
   1568     int_mv *best_mv = &d->bmi.mv;
   1569     int_mv this_mv;
   1570     unsigned int bestsad;
   1571     unsigned int thissad;
   1572     int r, c;
   1573 
   1574     unsigned char *check_here;
   1575 
   1576     int ref_row = ref_mv->as_mv.row;
   1577     int ref_col = ref_mv->as_mv.col;
   1578 
   1579     int row_min = ref_row - distance;
   1580     int row_max = ref_row + distance;
   1581     int col_min = ref_col - distance;
   1582     int col_max = ref_col + distance;
   1583 
   1584     DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
   1585     unsigned int sad_array[3];
   1586 
   1587     int *mvsadcost[2];
   1588     int_mv fcenter_mv;
   1589 
   1590     mvsadcost[0] = x->mvsadcost[0];
   1591     mvsadcost[1] = x->mvsadcost[1];
   1592     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1593     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1594 
   1595     /* Work out the mid point for the search */
   1596     in_what = base_pre + d->offset;
   1597     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1598 
   1599     best_mv->as_mv.row = ref_row;
   1600     best_mv->as_mv.col = ref_col;
   1601 
   1602     /* Baseline value at the centre */
   1603     bestsad = fn_ptr->sdf(what, what_stride,
   1604                           bestaddress, in_what_stride, UINT_MAX)
   1605             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1606 
   1607     /* Apply further limits to prevent us looking using vectors that stretch
   1608      * beyond the UMV border
   1609      */
   1610     if (col_min < x->mv_col_min)
   1611         col_min = x->mv_col_min;
   1612 
   1613     if (col_max > x->mv_col_max)
   1614         col_max = x->mv_col_max;
   1615 
   1616     if (row_min < x->mv_row_min)
   1617         row_min = x->mv_row_min;
   1618 
   1619     if (row_max > x->mv_row_max)
   1620         row_max = x->mv_row_max;
   1621 
   1622     for (r = row_min; r < row_max ; r++)
   1623     {
   1624         this_mv.as_mv.row = r;
   1625         check_here = r * mv_stride + in_what + col_min;
   1626         c = col_min;
   1627 
   1628         while ((c + 7) < col_max)
   1629         {
   1630             int i;
   1631 
   1632             fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1633 
   1634             for (i = 0; i < 8; i++)
   1635             {
   1636                 thissad = sad_array8[i];
   1637 
   1638                 if (thissad < bestsad)
   1639                 {
   1640                     this_mv.as_mv.col = c;
   1641                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1642                                               mvsadcost, sad_per_bit);
   1643 
   1644                     if (thissad < bestsad)
   1645                     {
   1646                         bestsad = thissad;
   1647                         best_mv->as_mv.row = r;
   1648                         best_mv->as_mv.col = c;
   1649                         bestaddress = check_here;
   1650                     }
   1651                 }
   1652 
   1653                 check_here++;
   1654                 c++;
   1655             }
   1656         }
   1657 
   1658         while ((c + 2) < col_max)
   1659         {
   1660             int i;
   1661 
   1662             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1663 
   1664             for (i = 0; i < 3; i++)
   1665             {
   1666                 thissad = sad_array[i];
   1667 
   1668                 if (thissad < bestsad)
   1669                 {
   1670                     this_mv.as_mv.col = c;
   1671                     thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1672                         mvsadcost, sad_per_bit);
   1673 
   1674                     if (thissad < bestsad)
   1675                     {
   1676                         bestsad = thissad;
   1677                         best_mv->as_mv.row = r;
   1678                         best_mv->as_mv.col = c;
   1679                         bestaddress = check_here;
   1680                     }
   1681                 }
   1682 
   1683                 check_here++;
   1684                 c++;
   1685             }
   1686         }
   1687 
   1688         while (c < col_max)
   1689         {
   1690             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1691 
   1692             if (thissad < bestsad)
   1693             {
   1694                 this_mv.as_mv.col = c;
   1695                 thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1696                     mvsadcost, sad_per_bit);
   1697 
   1698                 if (thissad < bestsad)
   1699                 {
   1700                     bestsad = thissad;
   1701                     best_mv->as_mv.row = r;
   1702                     best_mv->as_mv.col = c;
   1703                     bestaddress = check_here;
   1704                 }
   1705             }
   1706 
   1707             check_here ++;
   1708             c ++;
   1709         }
   1710     }
   1711 
   1712     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1713     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1714 
   1715     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1716            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1717 }
   1718 
   1719 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1720                             int error_per_bit, int search_range,
   1721                             vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1722                             int_mv *center_mv)
   1723 {
   1724     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1725     int i, j;
   1726     short this_row_offset, this_col_offset;
   1727 
   1728     int what_stride = b->src_stride;
   1729     int pre_stride = x->e_mbd.pre.y_stride;
   1730     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1731     int in_what_stride = pre_stride;
   1732     unsigned char *what = (*(b->base_src) + b->src);
   1733     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1734         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1735     unsigned char *check_here;
   1736     int_mv this_mv;
   1737     unsigned int bestsad;
   1738     unsigned int thissad;
   1739 
   1740     int *mvsadcost[2];
   1741     int_mv fcenter_mv;
   1742 
   1743     mvsadcost[0] = x->mvsadcost[0];
   1744     mvsadcost[1] = x->mvsadcost[1];
   1745     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1746     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1747 
   1748     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1749                           in_what_stride, UINT_MAX)
   1750             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1751 
   1752     for (i=0; i<search_range; i++)
   1753     {
   1754         int best_site = -1;
   1755 
   1756         for (j = 0 ; j < 4 ; j++)
   1757         {
   1758             this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1759             this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1760 
   1761             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1762             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1763             {
   1764                 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1765                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1766 
   1767                 if (thissad < bestsad)
   1768                 {
   1769                     this_mv.as_mv.row = this_row_offset;
   1770                     this_mv.as_mv.col = this_col_offset;
   1771                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1772 
   1773                     if (thissad < bestsad)
   1774                     {
   1775                         bestsad = thissad;
   1776                         best_site = j;
   1777                     }
   1778                 }
   1779             }
   1780         }
   1781 
   1782         if (best_site == -1)
   1783             break;
   1784         else
   1785         {
   1786             ref_mv->as_mv.row += neighbors[best_site].row;
   1787             ref_mv->as_mv.col += neighbors[best_site].col;
   1788             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1789         }
   1790     }
   1791 
   1792     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1793     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1794 
   1795     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1796            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1797 }
   1798 
   1799 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1800                               int_mv *ref_mv, int error_per_bit,
   1801                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1802                               int *mvcost[2], int_mv *center_mv)
   1803 {
   1804     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1805     int i, j;
   1806     short this_row_offset, this_col_offset;
   1807 
   1808     int what_stride = b->src_stride;
   1809     int pre_stride = x->e_mbd.pre.y_stride;
   1810     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1811     int in_what_stride = pre_stride;
   1812     unsigned char *what = (*(b->base_src) + b->src);
   1813     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1814         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1815     unsigned char *check_here;
   1816     int_mv this_mv;
   1817     unsigned int bestsad;
   1818     unsigned int thissad;
   1819 
   1820     int *mvsadcost[2];
   1821     int_mv fcenter_mv;
   1822 
   1823     mvsadcost[0] = x->mvsadcost[0];
   1824     mvsadcost[1] = x->mvsadcost[1];
   1825     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1826     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1827 
   1828     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1829                           in_what_stride, UINT_MAX)
   1830             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1831 
   1832     for (i=0; i<search_range; i++)
   1833     {
   1834         int best_site = -1;
   1835         int all_in = 1;
   1836 
   1837         all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
   1838         all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
   1839         all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
   1840         all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1841 
   1842         if(all_in)
   1843         {
   1844             unsigned int sad_array[4];
   1845             const unsigned char *block_offset[4];
   1846             block_offset[0] = best_address - in_what_stride;
   1847             block_offset[1] = best_address - 1;
   1848             block_offset[2] = best_address + 1;
   1849             block_offset[3] = best_address + in_what_stride;
   1850 
   1851             fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1852 
   1853             for (j = 0; j < 4; j++)
   1854             {
   1855                 if (sad_array[j] < bestsad)
   1856                 {
   1857                     this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1858                     this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1859                     sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1860 
   1861                     if (sad_array[j] < bestsad)
   1862                     {
   1863                         bestsad = sad_array[j];
   1864                         best_site = j;
   1865                     }
   1866                 }
   1867             }
   1868         }
   1869         else
   1870         {
   1871             for (j = 0 ; j < 4 ; j++)
   1872             {
   1873                 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1874                 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1875 
   1876                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1877                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1878                 {
   1879                     check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1880                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1881 
   1882                     if (thissad < bestsad)
   1883                     {
   1884                         this_mv.as_mv.row = this_row_offset;
   1885                         this_mv.as_mv.col = this_col_offset;
   1886                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1887 
   1888                         if (thissad < bestsad)
   1889                         {
   1890                             bestsad = thissad;
   1891                             best_site = j;
   1892                         }
   1893                     }
   1894                 }
   1895             }
   1896         }
   1897 
   1898         if (best_site == -1)
   1899             break;
   1900         else
   1901         {
   1902             ref_mv->as_mv.row += neighbors[best_site].row;
   1903             ref_mv->as_mv.col += neighbors[best_site].col;
   1904             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1905         }
   1906     }
   1907 
   1908     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1909     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1910 
   1911     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1912            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1913 }
   1914 
   1915 #ifdef VP8_ENTROPY_STATS
   1916 void print_mode_context(void)
   1917 {
   1918     FILE *f = fopen("modecont.c", "w");
   1919     int i, j;
   1920 
   1921     fprintf(f, "#include \"entropy.h\"\n");
   1922     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1923     fprintf(f, "{\n");
   1924 
   1925     for (j = 0; j < 6; j++)
   1926     {
   1927         fprintf(f, "  { /* %d */\n", j);
   1928         fprintf(f, "    ");
   1929 
   1930         for (i = 0; i < 4; i++)
   1931         {
   1932             int overal_prob;
   1933             int this_prob;
   1934             int count;
   1935 
   1936             /* Overall probs */
   1937             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1938 
   1939             if (count)
   1940                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1941             else
   1942                 overal_prob = 128;
   1943 
   1944             if (overal_prob == 0)
   1945                 overal_prob = 1;
   1946 
   1947             /* context probs */
   1948             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1949 
   1950             if (count)
   1951                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1952             else
   1953                 this_prob = 128;
   1954 
   1955             if (this_prob == 0)
   1956                 this_prob = 1;
   1957 
   1958             fprintf(f, "%5d, ", this_prob);
   1959         }
   1960 
   1961         fprintf(f, "  },\n");
   1962     }
   1963 
   1964     fprintf(f, "};\n");
   1965     fclose(f);
   1966 }
   1967 
   1968 /* MV ref count VP8_ENTROPY_STATS stats code */
   1969 #ifdef VP8_ENTROPY_STATS
   1970 void init_mv_ref_counts()
   1971 {
   1972     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1973     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1974 }
   1975 
   1976 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1977 {
   1978     if (m == ZEROMV)
   1979     {
   1980         ++mv_ref_ct [ct[0]] [0] [0];
   1981         ++mv_mode_cts[0][0];
   1982     }
   1983     else
   1984     {
   1985         ++mv_ref_ct [ct[0]] [0] [1];
   1986         ++mv_mode_cts[0][1];
   1987 
   1988         if (m == NEARESTMV)
   1989         {
   1990             ++mv_ref_ct [ct[1]] [1] [0];
   1991             ++mv_mode_cts[1][0];
   1992         }
   1993         else
   1994         {
   1995             ++mv_ref_ct [ct[1]] [1] [1];
   1996             ++mv_mode_cts[1][1];
   1997 
   1998             if (m == NEARMV)
   1999             {
   2000                 ++mv_ref_ct [ct[2]] [2] [0];
   2001                 ++mv_mode_cts[2][0];
   2002             }
   2003             else
   2004             {
   2005                 ++mv_ref_ct [ct[2]] [2] [1];
   2006                 ++mv_mode_cts[2][1];
   2007 
   2008                 if (m == NEWMV)
   2009                 {
   2010                     ++mv_ref_ct [ct[3]] [3] [0];
   2011                     ++mv_mode_cts[3][0];
   2012                 }
   2013                 else
   2014                 {
   2015                     ++mv_ref_ct [ct[3]] [3] [1];
   2016                     ++mv_mode_cts[3][1];
   2017                 }
   2018             }
   2019         }
   2020     }
   2021 }
   2022 
   2023 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
   2024 
   2025 #endif
   2026