Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "onyx_int.h"
     13 #include "mcomp.h"
     14 #include "vpx_mem/vpx_mem.h"
     15 #include "vpx_config.h"
     16 #include <stdio.h>
     17 #include <limits.h>
     18 #include <math.h>
     19 #include "vp8/common/findnearmv.h"
     20 
     21 #ifdef VP8_ENTROPY_STATS
     22 static int mv_ref_ct [31] [4] [2];
     23 static int mv_mode_cts [4] [2];
     24 #endif
     25 
     26 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
     27 {
     28     /* MV costing is based on the distribution of vectors in the previous
     29      * frame and as such will tend to over state the cost of vectors. In
     30      * addition coding a new vector can have a knock on effect on the cost
     31      * of subsequent vectors and the quality of prediction from NEAR and
     32      * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
     33      * limited extent, for some account to be taken of these factors.
     34      */
     35     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
     36 }
     37 
     38 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
     39 {
     40     /* Ignore mv costing if mvcost is NULL */
     41     if (mvcost)
     42         return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
     43                  mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
     44                  * error_per_bit + 128) >> 8;
     45     return 0;
     46 }
     47 
     48 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
     49 {
     50     /* Calculate sad error cost on full pixel basis. */
     51     /* Ignore mv costing if mvsadcost is NULL */
     52     if (mvsadcost)
     53         return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
     54                  mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
     55                 * error_per_bit + 128) >> 8;
     56     return 0;
     57 }
     58 
     59 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
     60 {
     61     int Len;
     62     int search_site_count = 0;
     63 
     64 
     65     /* Generate offsets for 4 search sites per step. */
     66     Len = MAX_FIRST_STEP;
     67     x->ss[search_site_count].mv.col = 0;
     68     x->ss[search_site_count].mv.row = 0;
     69     x->ss[search_site_count].offset = 0;
     70     search_site_count++;
     71 
     72     while (Len > 0)
     73     {
     74 
     75         /* Compute offsets for search sites. */
     76         x->ss[search_site_count].mv.col = 0;
     77         x->ss[search_site_count].mv.row = -Len;
     78         x->ss[search_site_count].offset = -Len * stride;
     79         search_site_count++;
     80 
     81         /* Compute offsets for search sites. */
     82         x->ss[search_site_count].mv.col = 0;
     83         x->ss[search_site_count].mv.row = Len;
     84         x->ss[search_site_count].offset = Len * stride;
     85         search_site_count++;
     86 
     87         /* Compute offsets for search sites. */
     88         x->ss[search_site_count].mv.col = -Len;
     89         x->ss[search_site_count].mv.row = 0;
     90         x->ss[search_site_count].offset = -Len;
     91         search_site_count++;
     92 
     93         /* Compute offsets for search sites. */
     94         x->ss[search_site_count].mv.col = Len;
     95         x->ss[search_site_count].mv.row = 0;
     96         x->ss[search_site_count].offset = Len;
     97         search_site_count++;
     98 
     99         /* Contract. */
    100         Len /= 2;
    101     }
    102 
    103     x->ss_count = search_site_count;
    104     x->searches_per_step = 4;
    105 }
    106 
    107 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    108 {
    109     int Len;
    110     int search_site_count = 0;
    111 
    112     /* Generate offsets for 8 search sites per step. */
    113     Len = MAX_FIRST_STEP;
    114     x->ss[search_site_count].mv.col = 0;
    115     x->ss[search_site_count].mv.row = 0;
    116     x->ss[search_site_count].offset = 0;
    117     search_site_count++;
    118 
    119     while (Len > 0)
    120     {
    121 
    122         /* Compute offsets for search sites. */
    123         x->ss[search_site_count].mv.col = 0;
    124         x->ss[search_site_count].mv.row = -Len;
    125         x->ss[search_site_count].offset = -Len * stride;
    126         search_site_count++;
    127 
    128         /* Compute offsets for search sites. */
    129         x->ss[search_site_count].mv.col = 0;
    130         x->ss[search_site_count].mv.row = Len;
    131         x->ss[search_site_count].offset = Len * stride;
    132         search_site_count++;
    133 
    134         /* Compute offsets for search sites. */
    135         x->ss[search_site_count].mv.col = -Len;
    136         x->ss[search_site_count].mv.row = 0;
    137         x->ss[search_site_count].offset = -Len;
    138         search_site_count++;
    139 
    140         /* Compute offsets for search sites. */
    141         x->ss[search_site_count].mv.col = Len;
    142         x->ss[search_site_count].mv.row = 0;
    143         x->ss[search_site_count].offset = Len;
    144         search_site_count++;
    145 
    146         /* Compute offsets for search sites. */
    147         x->ss[search_site_count].mv.col = -Len;
    148         x->ss[search_site_count].mv.row = -Len;
    149         x->ss[search_site_count].offset = -Len * stride - Len;
    150         search_site_count++;
    151 
    152         /* Compute offsets for search sites. */
    153         x->ss[search_site_count].mv.col = Len;
    154         x->ss[search_site_count].mv.row = -Len;
    155         x->ss[search_site_count].offset = -Len * stride + Len;
    156         search_site_count++;
    157 
    158         /* Compute offsets for search sites. */
    159         x->ss[search_site_count].mv.col = -Len;
    160         x->ss[search_site_count].mv.row = Len;
    161         x->ss[search_site_count].offset = Len * stride - Len;
    162         search_site_count++;
    163 
    164         /* Compute offsets for search sites. */
    165         x->ss[search_site_count].mv.col = Len;
    166         x->ss[search_site_count].mv.row = Len;
    167         x->ss[search_site_count].offset = Len * stride + Len;
    168         search_site_count++;
    169 
    170 
    171         /* Contract. */
    172         Len /= 2;
    173     }
    174 
    175     x->ss_count = search_site_count;
    176     x->searches_per_step = 8;
    177 }
    178 
    179 /*
    180  * To avoid the penalty for crossing cache-line read, preload the reference
    181  * area in a small buffer, which is aligned to make sure there won't be crossing
    182  * cache-line read while reading from this buffer. This reduced the cpu
    183  * cycles spent on reading ref data in sub-pixel filter functions.
    184  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    185  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    186  * could reduce the area.
    187  */
    188 
    189 /* estimated cost of a motion vector (r,c) */
    190 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
    191 /* pointer to predictor base of a motionvector */
    192 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
    193 /* convert motion vector component to offset for svf calc */
    194 #define SP(x) (((x)&3)<<1)
    195 /* returns subpixel variance error function. */
    196 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
    197 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    198 /* returns distortion + motion vector cost */
    199 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
    200 /* checks if (r,c) has better score than previous best */
    201 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
    202 
    203 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    204                                              int_mv *bestmv, int_mv *ref_mv,
    205                                              int error_per_bit,
    206                                              const vp8_variance_fn_ptr_t *vfp,
    207                                              int *mvcost[2], int *distortion,
    208                                              unsigned int *sse1)
    209 {
    210     unsigned char *z = (*(b->base_src) + b->src);
    211 
    212     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
    213     int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
    214     int tr = br, tc = bc;
    215     unsigned int besterr;
    216     unsigned int left, right, up, down, diag;
    217     unsigned int sse;
    218     unsigned int whichdir;
    219     unsigned int halfiters = 4;
    220     unsigned int quarteriters = 4;
    221     int thismse;
    222 
    223     int minc = MAX(x->mv_col_min * 4,
    224                    (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
    225     int maxc = MIN(x->mv_col_max * 4,
    226                    (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
    227     int minr = MAX(x->mv_row_min * 4,
    228                    (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
    229     int maxr = MIN(x->mv_row_max * 4,
    230                    (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    231 
    232     int y_stride;
    233     int offset;
    234     int pre_stride = x->e_mbd.pre.y_stride;
    235     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    236 
    237 
    238 #if ARCH_X86 || ARCH_X86_64
    239     MACROBLOCKD *xd = &x->e_mbd;
    240     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    241     unsigned char *y;
    242     int buf_r1, buf_r2, buf_c1;
    243 
    244     /* Clamping to avoid out-of-range data access */
    245     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
    246     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
    247     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
    248     y_stride = 32;
    249 
    250     /* Copy to intermediate buffer before searching. */
    251     vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    252     y = xd->y_buf + y_stride*buf_r1 +buf_c1;
    253 #else
    254     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    255     y_stride = pre_stride;
    256 #endif
    257 
    258     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
    259 
    260     /* central mv */
    261     bestmv->as_mv.row *= 8;
    262     bestmv->as_mv.col *= 8;
    263 
    264     /* calculate central point error */
    265     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    266     *distortion = besterr;
    267     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    268 
    269     /* TODO: Each subsequent iteration checks at least one point in common
    270      * with the last iteration could be 2 ( if diag selected)
    271      */
    272     while (--halfiters)
    273     {
    274         /* 1/2 pel */
    275         CHECK_BETTER(left, tr, tc - 2);
    276         CHECK_BETTER(right, tr, tc + 2);
    277         CHECK_BETTER(up, tr - 2, tc);
    278         CHECK_BETTER(down, tr + 2, tc);
    279 
    280         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    281 
    282         switch (whichdir)
    283         {
    284         case 0:
    285             CHECK_BETTER(diag, tr - 2, tc - 2);
    286             break;
    287         case 1:
    288             CHECK_BETTER(diag, tr - 2, tc + 2);
    289             break;
    290         case 2:
    291             CHECK_BETTER(diag, tr + 2, tc - 2);
    292             break;
    293         case 3:
    294             CHECK_BETTER(diag, tr + 2, tc + 2);
    295             break;
    296         }
    297 
    298         /* no reason to check the same one again. */
    299         if (tr == br && tc == bc)
    300             break;
    301 
    302         tr = br;
    303         tc = bc;
    304     }
    305 
    306     /* TODO: Each subsequent iteration checks at least one point in common
    307      * with the last iteration could be 2 ( if diag selected)
    308      */
    309 
    310     /* 1/4 pel */
    311     while (--quarteriters)
    312     {
    313         CHECK_BETTER(left, tr, tc - 1);
    314         CHECK_BETTER(right, tr, tc + 1);
    315         CHECK_BETTER(up, tr - 1, tc);
    316         CHECK_BETTER(down, tr + 1, tc);
    317 
    318         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    319 
    320         switch (whichdir)
    321         {
    322         case 0:
    323             CHECK_BETTER(diag, tr - 1, tc - 1);
    324             break;
    325         case 1:
    326             CHECK_BETTER(diag, tr - 1, tc + 1);
    327             break;
    328         case 2:
    329             CHECK_BETTER(diag, tr + 1, tc - 1);
    330             break;
    331         case 3:
    332             CHECK_BETTER(diag, tr + 1, tc + 1);
    333             break;
    334         }
    335 
    336         /* no reason to check the same one again. */
    337         if (tr == br && tc == bc)
    338             break;
    339 
    340         tr = br;
    341         tc = bc;
    342     }
    343 
    344     bestmv->as_mv.row = br * 2;
    345     bestmv->as_mv.col = bc * 2;
    346 
    347     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
    348         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
    349         return INT_MAX;
    350 
    351     return besterr;
    352 }
    353 #undef MVC
    354 #undef PRE
    355 #undef SP
    356 #undef DIST
    357 #undef IFMVCV
    358 #undef ERR
    359 #undef CHECK_BETTER
    360 
    361 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    362                                  int_mv *bestmv, int_mv *ref_mv,
    363                                  int error_per_bit,
    364                                  const vp8_variance_fn_ptr_t *vfp,
    365                                  int *mvcost[2], int *distortion,
    366                                  unsigned int *sse1)
    367 {
    368     int bestmse = INT_MAX;
    369     int_mv startmv;
    370     int_mv this_mv;
    371     unsigned char *z = (*(b->base_src) + b->src);
    372     int left, right, up, down, diag;
    373     unsigned int sse;
    374     int whichdir ;
    375     int thismse;
    376     int y_stride;
    377     int pre_stride = x->e_mbd.pre.y_stride;
    378     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    379 
    380 #if ARCH_X86 || ARCH_X86_64
    381     MACROBLOCKD *xd = &x->e_mbd;
    382     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    383     unsigned char *y;
    384 
    385     y_stride = 32;
    386     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    387      vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    388      y = xd->y_buf + y_stride + 1;
    389 #else
    390      unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    391      y_stride = pre_stride;
    392 #endif
    393 
    394     /* central mv */
    395     bestmv->as_mv.row <<= 3;
    396     bestmv->as_mv.col <<= 3;
    397     startmv = *bestmv;
    398 
    399     /* calculate central point error */
    400     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    401     *distortion = bestmse;
    402     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    403 
    404     /* go left then right and check error */
    405     this_mv.as_mv.row = startmv.as_mv.row;
    406     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    407     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    408     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    409 
    410     if (left < bestmse)
    411     {
    412         *bestmv = this_mv;
    413         bestmse = left;
    414         *distortion = thismse;
    415         *sse1 = sse;
    416     }
    417 
    418     this_mv.as_mv.col += 8;
    419     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    420     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    421 
    422     if (right < bestmse)
    423     {
    424         *bestmv = this_mv;
    425         bestmse = right;
    426         *distortion = thismse;
    427         *sse1 = sse;
    428     }
    429 
    430     /* go up then down and check error */
    431     this_mv.as_mv.col = startmv.as_mv.col;
    432     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    433     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    434     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    435 
    436     if (up < bestmse)
    437     {
    438         *bestmv = this_mv;
    439         bestmse = up;
    440         *distortion = thismse;
    441         *sse1 = sse;
    442     }
    443 
    444     this_mv.as_mv.row += 8;
    445     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    446     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    447 
    448     if (down < bestmse)
    449     {
    450         *bestmv = this_mv;
    451         bestmse = down;
    452         *distortion = thismse;
    453         *sse1 = sse;
    454     }
    455 
    456 
    457     /* now check 1 more diagonal */
    458     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    459     this_mv = startmv;
    460 
    461     switch (whichdir)
    462     {
    463     case 0:
    464         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    465         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    466         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    467         break;
    468     case 1:
    469         this_mv.as_mv.col += 4;
    470         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    471         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    472         break;
    473     case 2:
    474         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    475         this_mv.as_mv.row += 4;
    476         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    477         break;
    478     case 3:
    479     default:
    480         this_mv.as_mv.col += 4;
    481         this_mv.as_mv.row += 4;
    482         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    483         break;
    484     }
    485 
    486     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    487 
    488     if (diag < bestmse)
    489     {
    490         *bestmv = this_mv;
    491         bestmse = diag;
    492         *distortion = thismse;
    493         *sse1 = sse;
    494     }
    495 
    496 
    497     /* time to check quarter pels. */
    498     if (bestmv->as_mv.row < startmv.as_mv.row)
    499         y -= y_stride;
    500 
    501     if (bestmv->as_mv.col < startmv.as_mv.col)
    502         y--;
    503 
    504     startmv = *bestmv;
    505 
    506 
    507 
    508     /* go left then right and check error */
    509     this_mv.as_mv.row = startmv.as_mv.row;
    510 
    511     if (startmv.as_mv.col & 7)
    512     {
    513         this_mv.as_mv.col = startmv.as_mv.col - 2;
    514         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    515     }
    516     else
    517     {
    518         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    519         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    520     }
    521 
    522     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    523 
    524     if (left < bestmse)
    525     {
    526         *bestmv = this_mv;
    527         bestmse = left;
    528         *distortion = thismse;
    529         *sse1 = sse;
    530     }
    531 
    532     this_mv.as_mv.col += 4;
    533     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    534     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    535 
    536     if (right < bestmse)
    537     {
    538         *bestmv = this_mv;
    539         bestmse = right;
    540         *distortion = thismse;
    541         *sse1 = sse;
    542     }
    543 
    544     /* go up then down and check error */
    545     this_mv.as_mv.col = startmv.as_mv.col;
    546 
    547     if (startmv.as_mv.row & 7)
    548     {
    549         this_mv.as_mv.row = startmv.as_mv.row - 2;
    550         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    551     }
    552     else
    553     {
    554         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    555         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    556     }
    557 
    558     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    559 
    560     if (up < bestmse)
    561     {
    562         *bestmv = this_mv;
    563         bestmse = up;
    564         *distortion = thismse;
    565         *sse1 = sse;
    566     }
    567 
    568     this_mv.as_mv.row += 4;
    569     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    570     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    571 
    572     if (down < bestmse)
    573     {
    574         *bestmv = this_mv;
    575         bestmse = down;
    576         *distortion = thismse;
    577         *sse1 = sse;
    578     }
    579 
    580 
    581     /* now check 1 more diagonal */
    582     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    583 
    584     this_mv = startmv;
    585 
    586     switch (whichdir)
    587     {
    588     case 0:
    589 
    590         if (startmv.as_mv.row & 7)
    591         {
    592             this_mv.as_mv.row -= 2;
    593 
    594             if (startmv.as_mv.col & 7)
    595             {
    596                 this_mv.as_mv.col -= 2;
    597                 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    598             }
    599             else
    600             {
    601                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    602                 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
    603             }
    604         }
    605         else
    606         {
    607             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    608 
    609             if (startmv.as_mv.col & 7)
    610             {
    611                 this_mv.as_mv.col -= 2;
    612                 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    613             }
    614             else
    615             {
    616                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    617                 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
    618             }
    619         }
    620 
    621         break;
    622     case 1:
    623         this_mv.as_mv.col += 2;
    624 
    625         if (startmv.as_mv.row & 7)
    626         {
    627             this_mv.as_mv.row -= 2;
    628             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    629         }
    630         else
    631         {
    632             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    633             thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    634         }
    635 
    636         break;
    637     case 2:
    638         this_mv.as_mv.row += 2;
    639 
    640         if (startmv.as_mv.col & 7)
    641         {
    642             this_mv.as_mv.col -= 2;
    643             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    644         }
    645         else
    646         {
    647             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    648             thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    649         }
    650 
    651         break;
    652     case 3:
    653         this_mv.as_mv.col += 2;
    654         this_mv.as_mv.row += 2;
    655         thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    656         break;
    657     }
    658 
    659     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    660 
    661     if (diag < bestmse)
    662     {
    663         *bestmv = this_mv;
    664         bestmse = diag;
    665         *distortion = thismse;
    666         *sse1 = sse;
    667     }
    668 
    669     return bestmse;
    670 }
    671 
    672 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    673                                   int_mv *bestmv, int_mv *ref_mv,
    674                                   int error_per_bit,
    675                                   const vp8_variance_fn_ptr_t *vfp,
    676                                   int *mvcost[2], int *distortion,
    677                                   unsigned int *sse1)
    678 {
    679     int bestmse = INT_MAX;
    680     int_mv startmv;
    681     int_mv this_mv;
    682     unsigned char *z = (*(b->base_src) + b->src);
    683     int left, right, up, down, diag;
    684     unsigned int sse;
    685     int whichdir ;
    686     int thismse;
    687     int y_stride;
    688     int pre_stride = x->e_mbd.pre.y_stride;
    689     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    690 
    691 #if ARCH_X86 || ARCH_X86_64
    692     MACROBLOCKD *xd = &x->e_mbd;
    693     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    694     unsigned char *y;
    695 
    696     y_stride = 32;
    697     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    698     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    699     y = xd->y_buf + y_stride + 1;
    700 #else
    701     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    702     y_stride = pre_stride;
    703 #endif
    704 
    705     /* central mv */
    706     bestmv->as_mv.row *= 8;
    707     bestmv->as_mv.col *= 8;
    708     startmv = *bestmv;
    709 
    710     /* calculate central point error */
    711     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    712     *distortion = bestmse;
    713     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    714 
    715     /* go left then right and check error */
    716     this_mv.as_mv.row = startmv.as_mv.row;
    717     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    718     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    719     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    720 
    721     if (left < bestmse)
    722     {
    723         *bestmv = this_mv;
    724         bestmse = left;
    725         *distortion = thismse;
    726         *sse1 = sse;
    727     }
    728 
    729     this_mv.as_mv.col += 8;
    730     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    731     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    732 
    733     if (right < bestmse)
    734     {
    735         *bestmv = this_mv;
    736         bestmse = right;
    737         *distortion = thismse;
    738         *sse1 = sse;
    739     }
    740 
    741     /* go up then down and check error */
    742     this_mv.as_mv.col = startmv.as_mv.col;
    743     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    744     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    745     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    746 
    747     if (up < bestmse)
    748     {
    749         *bestmv = this_mv;
    750         bestmse = up;
    751         *distortion = thismse;
    752         *sse1 = sse;
    753     }
    754 
    755     this_mv.as_mv.row += 8;
    756     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    757     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    758 
    759     if (down < bestmse)
    760     {
    761         *bestmv = this_mv;
    762         bestmse = down;
    763         *distortion = thismse;
    764         *sse1 = sse;
    765     }
    766 
    767     /* now check 1 more diagonal - */
    768     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    769     this_mv = startmv;
    770 
    771     switch (whichdir)
    772     {
    773     case 0:
    774         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    775         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    776         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    777         break;
    778     case 1:
    779         this_mv.as_mv.col += 4;
    780         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    781         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    782         break;
    783     case 2:
    784         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    785         this_mv.as_mv.row += 4;
    786         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    787         break;
    788     case 3:
    789     default:
    790         this_mv.as_mv.col += 4;
    791         this_mv.as_mv.row += 4;
    792         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    793         break;
    794     }
    795 
    796     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    797 
    798     if (diag < bestmse)
    799     {
    800         *bestmv = this_mv;
    801         bestmse = diag;
    802         *distortion = thismse;
    803         *sse1 = sse;
    804     }
    805 
    806     return bestmse;
    807 }
    808 
    809 #define CHECK_BOUNDS(range) \
    810 {\
    811     all_in = 1;\
    812     all_in &= ((br-range) >= x->mv_row_min);\
    813     all_in &= ((br+range) <= x->mv_row_max);\
    814     all_in &= ((bc-range) >= x->mv_col_min);\
    815     all_in &= ((bc+range) <= x->mv_col_max);\
    816 }
    817 
    818 #define CHECK_POINT \
    819 {\
    820     if (this_mv.as_mv.col < x->mv_col_min) continue;\
    821     if (this_mv.as_mv.col > x->mv_col_max) continue;\
    822     if (this_mv.as_mv.row < x->mv_row_min) continue;\
    823     if (this_mv.as_mv.row > x->mv_row_max) continue;\
    824 }
    825 
    826 #define CHECK_BETTER \
    827 {\
    828     if (thissad < bestsad)\
    829     {\
    830         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
    831         if (thissad < bestsad)\
    832         {\
    833             bestsad = thissad;\
    834             best_site = i;\
    835         }\
    836     }\
    837 }
    838 
    839 static const MV next_chkpts[6][3] =
    840 {
    841     {{ -2, 0}, { -1, -2}, {1, -2}},
    842     {{ -1, -2}, {1, -2}, {2, 0}},
    843     {{1, -2}, {2, 0}, {1, 2}},
    844     {{2, 0}, {1, 2}, { -1, 2}},
    845     {{1, 2}, { -1, 2}, { -2, 0}},
    846     {{ -1, 2}, { -2, 0}, { -1, -2}}
    847 };
    848 
    849 int vp8_hex_search
    850 (
    851     MACROBLOCK *x,
    852     BLOCK *b,
    853     BLOCKD *d,
    854     int_mv *ref_mv,
    855     int_mv *best_mv,
    856     int search_param,
    857     int sad_per_bit,
    858     const vp8_variance_fn_ptr_t *vfp,
    859     int *mvsadcost[2],
    860     int *mvcost[2],
    861     int_mv *center_mv
    862 )
    863 {
    864     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    865     MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
    866     int i, j;
    867 
    868     unsigned char *what = (*(b->base_src) + b->src);
    869     int what_stride = b->src_stride;
    870     int pre_stride = x->e_mbd.pre.y_stride;
    871     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    872 
    873     int in_what_stride = pre_stride;
    874     int br, bc;
    875     int_mv this_mv;
    876     unsigned int bestsad;
    877     unsigned int thissad;
    878     unsigned char *base_offset;
    879     unsigned char *this_offset;
    880     int k = -1;
    881     int all_in;
    882     int best_site = -1;
    883     int hex_range = 127;
    884     int dia_range = 8;
    885 
    886     int_mv fcenter_mv;
    887     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    888     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    889 
    890     /* adjust ref_mv to make sure it is within MV range */
    891     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
    892     br = ref_mv->as_mv.row;
    893     bc = ref_mv->as_mv.col;
    894 
    895     /* Work out the start point for the search */
    896     base_offset = (unsigned char *)(base_pre + d->offset);
    897     this_offset = base_offset + (br * (pre_stride)) + bc;
    898     this_mv.as_mv.row = br;
    899     this_mv.as_mv.col = bc;
    900     bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
    901             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    902 
    903 #if CONFIG_MULTI_RES_ENCODING
    904     /* Lower search range based on prediction info */
    905     if (search_param >= 6) goto cal_neighbors;
    906     else if (search_param >= 5) hex_range = 4;
    907     else if (search_param >= 4) hex_range = 6;
    908     else if (search_param >= 3) hex_range = 15;
    909     else if (search_param >= 2) hex_range = 31;
    910     else if (search_param >= 1) hex_range = 63;
    911 
    912     dia_range = 8;
    913 #endif
    914 
    915     /* hex search */
    916     CHECK_BOUNDS(2)
    917 
    918     if(all_in)
    919     {
    920         for (i = 0; i < 6; i++)
    921         {
    922             this_mv.as_mv.row = br + hex[i].row;
    923             this_mv.as_mv.col = bc + hex[i].col;
    924             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    925             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    926             CHECK_BETTER
    927         }
    928     }else
    929     {
    930         for (i = 0; i < 6; i++)
    931         {
    932             this_mv.as_mv.row = br + hex[i].row;
    933             this_mv.as_mv.col = bc + hex[i].col;
    934             CHECK_POINT
    935             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    936             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    937             CHECK_BETTER
    938         }
    939     }
    940 
    941     if (best_site == -1)
    942         goto cal_neighbors;
    943     else
    944     {
    945         br += hex[best_site].row;
    946         bc += hex[best_site].col;
    947         k = best_site;
    948     }
    949 
    950     for (j = 1; j < hex_range; j++)
    951     {
    952         best_site = -1;
    953         CHECK_BOUNDS(2)
    954 
    955         if(all_in)
    956         {
    957             for (i = 0; i < 3; i++)
    958             {
    959                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    960                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    961                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    962                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    963                 CHECK_BETTER
    964             }
    965         }else
    966         {
    967             for (i = 0; i < 3; i++)
    968             {
    969                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    970                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    971                 CHECK_POINT
    972                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    973                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    974                 CHECK_BETTER
    975             }
    976         }
    977 
    978         if (best_site == -1)
    979             break;
    980         else
    981         {
    982             br += next_chkpts[k][best_site].row;
    983             bc += next_chkpts[k][best_site].col;
    984             k += 5 + best_site;
    985             if (k >= 12) k -= 12;
    986             else if (k >= 6) k -= 6;
    987         }
    988     }
    989 
    990     /* check 4 1-away neighbors */
    991 cal_neighbors:
    992     for (j = 0; j < dia_range; j++)
    993     {
    994         best_site = -1;
    995         CHECK_BOUNDS(1)
    996 
    997         if(all_in)
    998         {
    999             for (i = 0; i < 4; i++)
   1000             {
   1001                 this_mv.as_mv.row = br + neighbors[i].row;
   1002                 this_mv.as_mv.col = bc + neighbors[i].col;
   1003                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1004                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1005                 CHECK_BETTER
   1006             }
   1007         }else
   1008         {
   1009             for (i = 0; i < 4; i++)
   1010             {
   1011                 this_mv.as_mv.row = br + neighbors[i].row;
   1012                 this_mv.as_mv.col = bc + neighbors[i].col;
   1013                 CHECK_POINT
   1014                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1015                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1016                 CHECK_BETTER
   1017             }
   1018         }
   1019 
   1020         if (best_site == -1)
   1021             break;
   1022         else
   1023         {
   1024             br += neighbors[best_site].row;
   1025             bc += neighbors[best_site].col;
   1026         }
   1027     }
   1028 
   1029     best_mv->as_mv.row = br;
   1030     best_mv->as_mv.col = bc;
   1031 
   1032     return bestsad;
   1033 }
   1034 #undef CHECK_BOUNDS
   1035 #undef CHECK_POINT
   1036 #undef CHECK_BETTER
   1037 
   1038 int vp8_diamond_search_sad_c
   1039 (
   1040     MACROBLOCK *x,
   1041     BLOCK *b,
   1042     BLOCKD *d,
   1043     int_mv *ref_mv,
   1044     int_mv *best_mv,
   1045     int search_param,
   1046     int sad_per_bit,
   1047     int *num00,
   1048     vp8_variance_fn_ptr_t *fn_ptr,
   1049     int *mvcost[2],
   1050     int_mv *center_mv
   1051 )
   1052 {
   1053     int i, j, step;
   1054 
   1055     unsigned char *what = (*(b->base_src) + b->src);
   1056     int what_stride = b->src_stride;
   1057     unsigned char *in_what;
   1058     int pre_stride = x->e_mbd.pre.y_stride;
   1059     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1060     int in_what_stride = pre_stride;
   1061     unsigned char *best_address;
   1062 
   1063     int tot_steps;
   1064     int_mv this_mv;
   1065 
   1066     unsigned int bestsad;
   1067     unsigned int thissad;
   1068     int best_site = 0;
   1069     int last_site = 0;
   1070 
   1071     int ref_row;
   1072     int ref_col;
   1073     int this_row_offset;
   1074     int this_col_offset;
   1075     search_site *ss;
   1076 
   1077     unsigned char *check_here;
   1078 
   1079     int *mvsadcost[2];
   1080     int_mv fcenter_mv;
   1081 
   1082     mvsadcost[0] = x->mvsadcost[0];
   1083     mvsadcost[1] = x->mvsadcost[1];
   1084     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1085     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1086 
   1087     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1088     ref_row = ref_mv->as_mv.row;
   1089     ref_col = ref_mv->as_mv.col;
   1090     *num00 = 0;
   1091     best_mv->as_mv.row = ref_row;
   1092     best_mv->as_mv.col = ref_col;
   1093 
   1094     /* Work out the start point for the search */
   1095     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1096     best_address = in_what;
   1097 
   1098     /* Check the starting position */
   1099     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1100             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1101 
   1102     /* search_param determines the length of the initial step and hence
   1103      * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
   1104      * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1105      */
   1106     ss = &x->ss[search_param * x->searches_per_step];
   1107     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1108 
   1109     i = 1;
   1110 
   1111     for (step = 0; step < tot_steps ; step++)
   1112     {
   1113         for (j = 0 ; j < x->searches_per_step ; j++)
   1114         {
   1115             /* Trap illegal vectors */
   1116             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1117             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1118 
   1119             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1120             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1121 
   1122             {
   1123                 check_here = ss[i].offset + best_address;
   1124                 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1125 
   1126                 if (thissad < bestsad)
   1127                 {
   1128                     this_mv.as_mv.row = this_row_offset;
   1129                     this_mv.as_mv.col = this_col_offset;
   1130                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1131                                               mvsadcost, sad_per_bit);
   1132 
   1133                     if (thissad < bestsad)
   1134                     {
   1135                         bestsad = thissad;
   1136                         best_site = i;
   1137                     }
   1138                 }
   1139             }
   1140 
   1141             i++;
   1142         }
   1143 
   1144         if (best_site != last_site)
   1145         {
   1146             best_mv->as_mv.row += ss[best_site].mv.row;
   1147             best_mv->as_mv.col += ss[best_site].mv.col;
   1148             best_address += ss[best_site].offset;
   1149             last_site = best_site;
   1150         }
   1151         else if (best_address == in_what)
   1152             (*num00)++;
   1153     }
   1154 
   1155     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1156     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1157 
   1158     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1159            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1160 }
   1161 
   1162 int vp8_diamond_search_sadx4
   1163 (
   1164     MACROBLOCK *x,
   1165     BLOCK *b,
   1166     BLOCKD *d,
   1167     int_mv *ref_mv,
   1168     int_mv *best_mv,
   1169     int search_param,
   1170     int sad_per_bit,
   1171     int *num00,
   1172     vp8_variance_fn_ptr_t *fn_ptr,
   1173     int *mvcost[2],
   1174     int_mv *center_mv
   1175 )
   1176 {
   1177     int i, j, step;
   1178 
   1179     unsigned char *what = (*(b->base_src) + b->src);
   1180     int what_stride = b->src_stride;
   1181     unsigned char *in_what;
   1182     int pre_stride = x->e_mbd.pre.y_stride;
   1183     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1184     int in_what_stride = pre_stride;
   1185     unsigned char *best_address;
   1186 
   1187     int tot_steps;
   1188     int_mv this_mv;
   1189 
   1190     unsigned int bestsad;
   1191     unsigned int thissad;
   1192     int best_site = 0;
   1193     int last_site = 0;
   1194 
   1195     int ref_row;
   1196     int ref_col;
   1197     int this_row_offset;
   1198     int this_col_offset;
   1199     search_site *ss;
   1200 
   1201     unsigned char *check_here;
   1202 
   1203     int *mvsadcost[2];
   1204     int_mv fcenter_mv;
   1205 
   1206     mvsadcost[0] = x->mvsadcost[0];
   1207     mvsadcost[1] = x->mvsadcost[1];
   1208     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1209     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1210 
   1211     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1212     ref_row = ref_mv->as_mv.row;
   1213     ref_col = ref_mv->as_mv.col;
   1214     *num00 = 0;
   1215     best_mv->as_mv.row = ref_row;
   1216     best_mv->as_mv.col = ref_col;
   1217 
   1218     /* Work out the start point for the search */
   1219     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1220     best_address = in_what;
   1221 
   1222     /* Check the starting position */
   1223     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1224             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1225 
   1226     /* search_param determines the length of the initial step and hence the
   1227      * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
   1228      * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1229      */
   1230     ss = &x->ss[search_param * x->searches_per_step];
   1231     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1232 
   1233     i = 1;
   1234 
   1235     for (step = 0; step < tot_steps ; step++)
   1236     {
   1237         int all_in = 1, t;
   1238 
   1239         /* To know if all neighbor points are within the bounds, 4 bounds
   1240          * checking are enough instead of checking 4 bounds for each
   1241          * points.
   1242          */
   1243         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
   1244         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
   1245         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
   1246         all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
   1247 
   1248         if (all_in)
   1249         {
   1250             unsigned int sad_array[4];
   1251 
   1252             for (j = 0 ; j < x->searches_per_step ; j += 4)
   1253             {
   1254                 const unsigned char *block_offset[4];
   1255 
   1256                 for (t = 0; t < 4; t++)
   1257                     block_offset[t] = ss[i+t].offset + best_address;
   1258 
   1259                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1260 
   1261                 for (t = 0; t < 4; t++, i++)
   1262                 {
   1263                     if (sad_array[t] < bestsad)
   1264                     {
   1265                         this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1266                         this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1267                         sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
   1268                                                        mvsadcost, sad_per_bit);
   1269 
   1270                         if (sad_array[t] < bestsad)
   1271                         {
   1272                             bestsad = sad_array[t];
   1273                             best_site = i;
   1274                         }
   1275                     }
   1276                 }
   1277             }
   1278         }
   1279         else
   1280         {
   1281             for (j = 0 ; j < x->searches_per_step ; j++)
   1282             {
   1283                 /* Trap illegal vectors */
   1284                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1285                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1286 
   1287                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1288                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1289                 {
   1290                     check_here = ss[i].offset + best_address;
   1291                     thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1292 
   1293                     if (thissad < bestsad)
   1294                     {
   1295                         this_mv.as_mv.row = this_row_offset;
   1296                         this_mv.as_mv.col = this_col_offset;
   1297                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1298                                                   mvsadcost, sad_per_bit);
   1299 
   1300                         if (thissad < bestsad)
   1301                         {
   1302                             bestsad = thissad;
   1303                             best_site = i;
   1304                         }
   1305                     }
   1306                 }
   1307                 i++;
   1308             }
   1309         }
   1310 
   1311         if (best_site != last_site)
   1312         {
   1313             best_mv->as_mv.row += ss[best_site].mv.row;
   1314             best_mv->as_mv.col += ss[best_site].mv.col;
   1315             best_address += ss[best_site].offset;
   1316             last_site = best_site;
   1317         }
   1318         else if (best_address == in_what)
   1319             (*num00)++;
   1320     }
   1321 
   1322     this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1323     this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1324 
   1325     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1326            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1327 }
   1328 
   1329 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1330                         int sad_per_bit, int distance,
   1331                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1332                         int_mv *center_mv)
   1333 {
   1334     unsigned char *what = (*(b->base_src) + b->src);
   1335     int what_stride = b->src_stride;
   1336     unsigned char *in_what;
   1337     int pre_stride = x->e_mbd.pre.y_stride;
   1338     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1339     int in_what_stride = pre_stride;
   1340     int mv_stride = pre_stride;
   1341     unsigned char *bestaddress;
   1342     int_mv *best_mv = &d->bmi.mv;
   1343     int_mv this_mv;
   1344     unsigned int bestsad;
   1345     unsigned int thissad;
   1346     int r, c;
   1347 
   1348     unsigned char *check_here;
   1349 
   1350     int ref_row = ref_mv->as_mv.row;
   1351     int ref_col = ref_mv->as_mv.col;
   1352 
   1353     int row_min = ref_row - distance;
   1354     int row_max = ref_row + distance;
   1355     int col_min = ref_col - distance;
   1356     int col_max = ref_col + distance;
   1357 
   1358     int *mvsadcost[2];
   1359     int_mv fcenter_mv;
   1360 
   1361     mvsadcost[0] = x->mvsadcost[0];
   1362     mvsadcost[1] = x->mvsadcost[1];
   1363     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1364     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1365 
   1366     /* Work out the mid point for the search */
   1367     in_what = base_pre + d->offset;
   1368     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1369 
   1370     best_mv->as_mv.row = ref_row;
   1371     best_mv->as_mv.col = ref_col;
   1372 
   1373     /* Baseline value at the centre */
   1374     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1375                           in_what_stride, UINT_MAX)
   1376             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1377 
   1378     /* Apply further limits to prevent us looking using vectors that
   1379      * stretch beyiond the UMV border
   1380      */
   1381     if (col_min < x->mv_col_min)
   1382         col_min = x->mv_col_min;
   1383 
   1384     if (col_max > x->mv_col_max)
   1385         col_max = x->mv_col_max;
   1386 
   1387     if (row_min < x->mv_row_min)
   1388         row_min = x->mv_row_min;
   1389 
   1390     if (row_max > x->mv_row_max)
   1391         row_max = x->mv_row_max;
   1392 
   1393     for (r = row_min; r < row_max ; r++)
   1394     {
   1395         this_mv.as_mv.row = r;
   1396         check_here = r * mv_stride + in_what + col_min;
   1397 
   1398         for (c = col_min; c < col_max; c++)
   1399         {
   1400             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1401 
   1402             this_mv.as_mv.col = c;
   1403             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1404                                       mvsadcost, sad_per_bit);
   1405 
   1406             if (thissad < bestsad)
   1407             {
   1408                 bestsad = thissad;
   1409                 best_mv->as_mv.row = r;
   1410                 best_mv->as_mv.col = c;
   1411                 bestaddress = check_here;
   1412             }
   1413 
   1414             check_here++;
   1415         }
   1416     }
   1417 
   1418     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1419     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1420 
   1421     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1422            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1423 }
   1424 
   1425 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1426                           int sad_per_bit, int distance,
   1427                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1428                           int_mv *center_mv)
   1429 {
   1430     unsigned char *what = (*(b->base_src) + b->src);
   1431     int what_stride = b->src_stride;
   1432     unsigned char *in_what;
   1433     int pre_stride = x->e_mbd.pre.y_stride;
   1434     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1435     int in_what_stride = pre_stride;
   1436     int mv_stride = pre_stride;
   1437     unsigned char *bestaddress;
   1438     int_mv *best_mv = &d->bmi.mv;
   1439     int_mv this_mv;
   1440     unsigned int bestsad;
   1441     unsigned int thissad;
   1442     int r, c;
   1443 
   1444     unsigned char *check_here;
   1445 
   1446     int ref_row = ref_mv->as_mv.row;
   1447     int ref_col = ref_mv->as_mv.col;
   1448 
   1449     int row_min = ref_row - distance;
   1450     int row_max = ref_row + distance;
   1451     int col_min = ref_col - distance;
   1452     int col_max = ref_col + distance;
   1453 
   1454     unsigned int sad_array[3];
   1455 
   1456     int *mvsadcost[2];
   1457     int_mv fcenter_mv;
   1458 
   1459     mvsadcost[0] = x->mvsadcost[0];
   1460     mvsadcost[1] = x->mvsadcost[1];
   1461     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1462     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1463 
   1464     /* Work out the mid point for the search */
   1465     in_what = base_pre + d->offset;
   1466     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1467 
   1468     best_mv->as_mv.row = ref_row;
   1469     best_mv->as_mv.col = ref_col;
   1470 
   1471     /* Baseline value at the centre */
   1472     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1473                           in_what_stride, UINT_MAX)
   1474             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1475 
   1476     /* Apply further limits to prevent us looking using vectors that stretch
   1477      * beyond the UMV border
   1478      */
   1479     if (col_min < x->mv_col_min)
   1480         col_min = x->mv_col_min;
   1481 
   1482     if (col_max > x->mv_col_max)
   1483         col_max = x->mv_col_max;
   1484 
   1485     if (row_min < x->mv_row_min)
   1486         row_min = x->mv_row_min;
   1487 
   1488     if (row_max > x->mv_row_max)
   1489         row_max = x->mv_row_max;
   1490 
   1491     for (r = row_min; r < row_max ; r++)
   1492     {
   1493         this_mv.as_mv.row = r;
   1494         check_here = r * mv_stride + in_what + col_min;
   1495         c = col_min;
   1496 
   1497         while ((c + 2) < col_max)
   1498         {
   1499             int i;
   1500 
   1501             fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1502 
   1503             for (i = 0; i < 3; i++)
   1504             {
   1505                 thissad = sad_array[i];
   1506 
   1507                 if (thissad < bestsad)
   1508                 {
   1509                     this_mv.as_mv.col = c;
   1510                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1511                                               mvsadcost, sad_per_bit);
   1512 
   1513                     if (thissad < bestsad)
   1514                     {
   1515                         bestsad = thissad;
   1516                         best_mv->as_mv.row = r;
   1517                         best_mv->as_mv.col = c;
   1518                         bestaddress = check_here;
   1519                     }
   1520                 }
   1521 
   1522                 check_here++;
   1523                 c++;
   1524             }
   1525         }
   1526 
   1527         while (c < col_max)
   1528         {
   1529             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1530 
   1531             if (thissad < bestsad)
   1532             {
   1533                 this_mv.as_mv.col = c;
   1534                 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1535                                           mvsadcost, sad_per_bit);
   1536 
   1537                 if (thissad < bestsad)
   1538                 {
   1539                     bestsad = thissad;
   1540                     best_mv->as_mv.row = r;
   1541                     best_mv->as_mv.col = c;
   1542                     bestaddress = check_here;
   1543                 }
   1544             }
   1545 
   1546             check_here ++;
   1547             c ++;
   1548         }
   1549 
   1550     }
   1551 
   1552     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1553     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1554 
   1555     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1556            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1557 }
   1558 
   1559 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1560                           int sad_per_bit, int distance,
   1561                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1562                           int_mv *center_mv)
   1563 {
   1564     unsigned char *what = (*(b->base_src) + b->src);
   1565     int what_stride = b->src_stride;
   1566     int pre_stride = x->e_mbd.pre.y_stride;
   1567     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1568     unsigned char *in_what;
   1569     int in_what_stride = pre_stride;
   1570     int mv_stride = pre_stride;
   1571     unsigned char *bestaddress;
   1572     int_mv *best_mv = &d->bmi.mv;
   1573     int_mv this_mv;
   1574     unsigned int bestsad;
   1575     unsigned int thissad;
   1576     int r, c;
   1577 
   1578     unsigned char *check_here;
   1579 
   1580     int ref_row = ref_mv->as_mv.row;
   1581     int ref_col = ref_mv->as_mv.col;
   1582 
   1583     int row_min = ref_row - distance;
   1584     int row_max = ref_row + distance;
   1585     int col_min = ref_col - distance;
   1586     int col_max = ref_col + distance;
   1587 
   1588     DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
   1589     unsigned int sad_array[3];
   1590 
   1591     int *mvsadcost[2];
   1592     int_mv fcenter_mv;
   1593 
   1594     mvsadcost[0] = x->mvsadcost[0];
   1595     mvsadcost[1] = x->mvsadcost[1];
   1596     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1597     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1598 
   1599     /* Work out the mid point for the search */
   1600     in_what = base_pre + d->offset;
   1601     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1602 
   1603     best_mv->as_mv.row = ref_row;
   1604     best_mv->as_mv.col = ref_col;
   1605 
   1606     /* Baseline value at the centre */
   1607     bestsad = fn_ptr->sdf(what, what_stride,
   1608                           bestaddress, in_what_stride, UINT_MAX)
   1609             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1610 
   1611     /* Apply further limits to prevent us looking using vectors that stretch
   1612      * beyond the UMV border
   1613      */
   1614     if (col_min < x->mv_col_min)
   1615         col_min = x->mv_col_min;
   1616 
   1617     if (col_max > x->mv_col_max)
   1618         col_max = x->mv_col_max;
   1619 
   1620     if (row_min < x->mv_row_min)
   1621         row_min = x->mv_row_min;
   1622 
   1623     if (row_max > x->mv_row_max)
   1624         row_max = x->mv_row_max;
   1625 
   1626     for (r = row_min; r < row_max ; r++)
   1627     {
   1628         this_mv.as_mv.row = r;
   1629         check_here = r * mv_stride + in_what + col_min;
   1630         c = col_min;
   1631 
   1632         while ((c + 7) < col_max)
   1633         {
   1634             int i;
   1635 
   1636             fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1637 
   1638             for (i = 0; i < 8; i++)
   1639             {
   1640                 thissad = sad_array8[i];
   1641 
   1642                 if (thissad < bestsad)
   1643                 {
   1644                     this_mv.as_mv.col = c;
   1645                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1646                                               mvsadcost, sad_per_bit);
   1647 
   1648                     if (thissad < bestsad)
   1649                     {
   1650                         bestsad = thissad;
   1651                         best_mv->as_mv.row = r;
   1652                         best_mv->as_mv.col = c;
   1653                         bestaddress = check_here;
   1654                     }
   1655                 }
   1656 
   1657                 check_here++;
   1658                 c++;
   1659             }
   1660         }
   1661 
   1662         while ((c + 2) < col_max)
   1663         {
   1664             int i;
   1665 
   1666             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1667 
   1668             for (i = 0; i < 3; i++)
   1669             {
   1670                 thissad = sad_array[i];
   1671 
   1672                 if (thissad < bestsad)
   1673                 {
   1674                     this_mv.as_mv.col = c;
   1675                     thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1676                         mvsadcost, sad_per_bit);
   1677 
   1678                     if (thissad < bestsad)
   1679                     {
   1680                         bestsad = thissad;
   1681                         best_mv->as_mv.row = r;
   1682                         best_mv->as_mv.col = c;
   1683                         bestaddress = check_here;
   1684                     }
   1685                 }
   1686 
   1687                 check_here++;
   1688                 c++;
   1689             }
   1690         }
   1691 
   1692         while (c < col_max)
   1693         {
   1694             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1695 
   1696             if (thissad < bestsad)
   1697             {
   1698                 this_mv.as_mv.col = c;
   1699                 thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1700                     mvsadcost, sad_per_bit);
   1701 
   1702                 if (thissad < bestsad)
   1703                 {
   1704                     bestsad = thissad;
   1705                     best_mv->as_mv.row = r;
   1706                     best_mv->as_mv.col = c;
   1707                     bestaddress = check_here;
   1708                 }
   1709             }
   1710 
   1711             check_here ++;
   1712             c ++;
   1713         }
   1714     }
   1715 
   1716     this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1717     this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1718 
   1719     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1720            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1721 }
   1722 
   1723 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1724                             int error_per_bit, int search_range,
   1725                             vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1726                             int_mv *center_mv)
   1727 {
   1728     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1729     int i, j;
   1730     short this_row_offset, this_col_offset;
   1731 
   1732     int what_stride = b->src_stride;
   1733     int pre_stride = x->e_mbd.pre.y_stride;
   1734     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1735     int in_what_stride = pre_stride;
   1736     unsigned char *what = (*(b->base_src) + b->src);
   1737     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1738         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1739     unsigned char *check_here;
   1740     int_mv this_mv;
   1741     unsigned int bestsad;
   1742     unsigned int thissad;
   1743 
   1744     int *mvsadcost[2];
   1745     int_mv fcenter_mv;
   1746 
   1747     mvsadcost[0] = x->mvsadcost[0];
   1748     mvsadcost[1] = x->mvsadcost[1];
   1749     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1750     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1751 
   1752     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1753                           in_what_stride, UINT_MAX)
   1754             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1755 
   1756     for (i=0; i<search_range; i++)
   1757     {
   1758         int best_site = -1;
   1759 
   1760         for (j = 0 ; j < 4 ; j++)
   1761         {
   1762             this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1763             this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1764 
   1765             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1766             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1767             {
   1768                 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1769                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1770 
   1771                 if (thissad < bestsad)
   1772                 {
   1773                     this_mv.as_mv.row = this_row_offset;
   1774                     this_mv.as_mv.col = this_col_offset;
   1775                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1776 
   1777                     if (thissad < bestsad)
   1778                     {
   1779                         bestsad = thissad;
   1780                         best_site = j;
   1781                     }
   1782                 }
   1783             }
   1784         }
   1785 
   1786         if (best_site == -1)
   1787             break;
   1788         else
   1789         {
   1790             ref_mv->as_mv.row += neighbors[best_site].row;
   1791             ref_mv->as_mv.col += neighbors[best_site].col;
   1792             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1793         }
   1794     }
   1795 
   1796     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1797     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1798 
   1799     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1800            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1801 }
   1802 
   1803 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1804                               int_mv *ref_mv, int error_per_bit,
   1805                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1806                               int *mvcost[2], int_mv *center_mv)
   1807 {
   1808     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1809     int i, j;
   1810     short this_row_offset, this_col_offset;
   1811 
   1812     int what_stride = b->src_stride;
   1813     int pre_stride = x->e_mbd.pre.y_stride;
   1814     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1815     int in_what_stride = pre_stride;
   1816     unsigned char *what = (*(b->base_src) + b->src);
   1817     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1818         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1819     unsigned char *check_here;
   1820     int_mv this_mv;
   1821     unsigned int bestsad;
   1822     unsigned int thissad;
   1823 
   1824     int *mvsadcost[2];
   1825     int_mv fcenter_mv;
   1826 
   1827     mvsadcost[0] = x->mvsadcost[0];
   1828     mvsadcost[1] = x->mvsadcost[1];
   1829     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1830     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1831 
   1832     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1833                           in_what_stride, UINT_MAX)
   1834             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1835 
   1836     for (i=0; i<search_range; i++)
   1837     {
   1838         int best_site = -1;
   1839         int all_in = 1;
   1840 
   1841         all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
   1842         all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
   1843         all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
   1844         all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1845 
   1846         if(all_in)
   1847         {
   1848             unsigned int sad_array[4];
   1849             const unsigned char *block_offset[4];
   1850             block_offset[0] = best_address - in_what_stride;
   1851             block_offset[1] = best_address - 1;
   1852             block_offset[2] = best_address + 1;
   1853             block_offset[3] = best_address + in_what_stride;
   1854 
   1855             fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1856 
   1857             for (j = 0; j < 4; j++)
   1858             {
   1859                 if (sad_array[j] < bestsad)
   1860                 {
   1861                     this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1862                     this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1863                     sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1864 
   1865                     if (sad_array[j] < bestsad)
   1866                     {
   1867                         bestsad = sad_array[j];
   1868                         best_site = j;
   1869                     }
   1870                 }
   1871             }
   1872         }
   1873         else
   1874         {
   1875             for (j = 0 ; j < 4 ; j++)
   1876             {
   1877                 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1878                 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1879 
   1880                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1881                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1882                 {
   1883                     check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1884                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1885 
   1886                     if (thissad < bestsad)
   1887                     {
   1888                         this_mv.as_mv.row = this_row_offset;
   1889                         this_mv.as_mv.col = this_col_offset;
   1890                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1891 
   1892                         if (thissad < bestsad)
   1893                         {
   1894                             bestsad = thissad;
   1895                             best_site = j;
   1896                         }
   1897                     }
   1898                 }
   1899             }
   1900         }
   1901 
   1902         if (best_site == -1)
   1903             break;
   1904         else
   1905         {
   1906             ref_mv->as_mv.row += neighbors[best_site].row;
   1907             ref_mv->as_mv.col += neighbors[best_site].col;
   1908             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1909         }
   1910     }
   1911 
   1912     this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   1913     this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   1914 
   1915     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1916            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1917 }
   1918 
   1919 #ifdef VP8_ENTROPY_STATS
   1920 void print_mode_context(void)
   1921 {
   1922     FILE *f = fopen("modecont.c", "w");
   1923     int i, j;
   1924 
   1925     fprintf(f, "#include \"entropy.h\"\n");
   1926     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1927     fprintf(f, "{\n");
   1928 
   1929     for (j = 0; j < 6; j++)
   1930     {
   1931         fprintf(f, "  { /* %d */\n", j);
   1932         fprintf(f, "    ");
   1933 
   1934         for (i = 0; i < 4; i++)
   1935         {
   1936             int overal_prob;
   1937             int this_prob;
   1938             int count;
   1939 
   1940             /* Overall probs */
   1941             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1942 
   1943             if (count)
   1944                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1945             else
   1946                 overal_prob = 128;
   1947 
   1948             if (overal_prob == 0)
   1949                 overal_prob = 1;
   1950 
   1951             /* context probs */
   1952             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1953 
   1954             if (count)
   1955                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1956             else
   1957                 this_prob = 128;
   1958 
   1959             if (this_prob == 0)
   1960                 this_prob = 1;
   1961 
   1962             fprintf(f, "%5d, ", this_prob);
   1963         }
   1964 
   1965         fprintf(f, "  },\n");
   1966     }
   1967 
   1968     fprintf(f, "};\n");
   1969     fclose(f);
   1970 }
   1971 
   1972 /* MV ref count VP8_ENTROPY_STATS stats code */
   1973 #ifdef VP8_ENTROPY_STATS
   1974 void init_mv_ref_counts()
   1975 {
   1976     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1977     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1978 }
   1979 
   1980 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1981 {
   1982     if (m == ZEROMV)
   1983     {
   1984         ++mv_ref_ct [ct[0]] [0] [0];
   1985         ++mv_mode_cts[0][0];
   1986     }
   1987     else
   1988     {
   1989         ++mv_ref_ct [ct[0]] [0] [1];
   1990         ++mv_mode_cts[0][1];
   1991 
   1992         if (m == NEARESTMV)
   1993         {
   1994             ++mv_ref_ct [ct[1]] [1] [0];
   1995             ++mv_mode_cts[1][0];
   1996         }
   1997         else
   1998         {
   1999             ++mv_ref_ct [ct[1]] [1] [1];
   2000             ++mv_mode_cts[1][1];
   2001 
   2002             if (m == NEARMV)
   2003             {
   2004                 ++mv_ref_ct [ct[2]] [2] [0];
   2005                 ++mv_mode_cts[2][0];
   2006             }
   2007             else
   2008             {
   2009                 ++mv_ref_ct [ct[2]] [2] [1];
   2010                 ++mv_mode_cts[2][1];
   2011 
   2012                 if (m == NEWMV)
   2013                 {
   2014                     ++mv_ref_ct [ct[3]] [3] [0];
   2015                     ++mv_mode_cts[3][0];
   2016                 }
   2017                 else
   2018                 {
   2019                     ++mv_ref_ct [ct[3]] [3] [1];
   2020                     ++mv_mode_cts[3][1];
   2021                 }
   2022             }
   2023         }
   2024     }
   2025 }
   2026 
   2027 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
   2028 
   2029 #endif
   2030