1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vp8_rtcd.h" 12 #include "./vpx_dsp_rtcd.h" 13 #include "onyx_int.h" 14 #include "mcomp.h" 15 #include "vpx_mem/vpx_mem.h" 16 #include "vpx_config.h" 17 #include <stdio.h> 18 #include <limits.h> 19 #include <math.h> 20 #include "vp8/common/findnearmv.h" 21 #include "vp8/common/common.h" 22 #include "vpx_dsp/vpx_dsp_common.h" 23 24 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) { 25 /* MV costing is based on the distribution of vectors in the previous 26 * frame and as such will tend to over state the cost of vectors. In 27 * addition coding a new vector can have a knock on effect on the cost 28 * of subsequent vectors and the quality of prediction from NEAR and 29 * NEAREST for subsequent blocks. The "Weight" parameter allows, to a 30 * limited extent, for some account to be taken of these factors. 31 */ 32 const int mv_idx_row = 33 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); 34 const int mv_idx_col = 35 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); 36 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7; 37 } 38 39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], 40 int error_per_bit) { 41 /* Ignore mv costing if mvcost is NULL */ 42 if (mvcost) { 43 const int mv_idx_row = 44 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); 45 const int mv_idx_col = 46 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); 47 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit + 48 128) >> 49 8; 50 } 51 return 0; 52 } 53 54 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], 55 int error_per_bit) { 56 /* Calculate sad error cost on full pixel basis. */ 57 /* Ignore mv costing if mvsadcost is NULL */ 58 if (mvsadcost) { 59 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + 60 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) * 61 error_per_bit + 62 128) >> 63 8; 64 } 65 return 0; 66 } 67 68 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) { 69 int Len; 70 int search_site_count = 0; 71 72 /* Generate offsets for 4 search sites per step. */ 73 Len = MAX_FIRST_STEP; 74 x->ss[search_site_count].mv.col = 0; 75 x->ss[search_site_count].mv.row = 0; 76 x->ss[search_site_count].offset = 0; 77 search_site_count++; 78 79 while (Len > 0) { 80 /* Compute offsets for search sites. */ 81 x->ss[search_site_count].mv.col = 0; 82 x->ss[search_site_count].mv.row = -Len; 83 x->ss[search_site_count].offset = -Len * stride; 84 search_site_count++; 85 86 /* Compute offsets for search sites. */ 87 x->ss[search_site_count].mv.col = 0; 88 x->ss[search_site_count].mv.row = Len; 89 x->ss[search_site_count].offset = Len * stride; 90 search_site_count++; 91 92 /* Compute offsets for search sites. */ 93 x->ss[search_site_count].mv.col = -Len; 94 x->ss[search_site_count].mv.row = 0; 95 x->ss[search_site_count].offset = -Len; 96 search_site_count++; 97 98 /* Compute offsets for search sites. */ 99 x->ss[search_site_count].mv.col = Len; 100 x->ss[search_site_count].mv.row = 0; 101 x->ss[search_site_count].offset = Len; 102 search_site_count++; 103 104 /* Contract. */ 105 Len /= 2; 106 } 107 108 x->ss_count = search_site_count; 109 x->searches_per_step = 4; 110 } 111 112 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) { 113 int Len; 114 int search_site_count = 0; 115 116 /* Generate offsets for 8 search sites per step. */ 117 Len = MAX_FIRST_STEP; 118 x->ss[search_site_count].mv.col = 0; 119 x->ss[search_site_count].mv.row = 0; 120 x->ss[search_site_count].offset = 0; 121 search_site_count++; 122 123 while (Len > 0) { 124 /* Compute offsets for search sites. */ 125 x->ss[search_site_count].mv.col = 0; 126 x->ss[search_site_count].mv.row = -Len; 127 x->ss[search_site_count].offset = -Len * stride; 128 search_site_count++; 129 130 /* Compute offsets for search sites. */ 131 x->ss[search_site_count].mv.col = 0; 132 x->ss[search_site_count].mv.row = Len; 133 x->ss[search_site_count].offset = Len * stride; 134 search_site_count++; 135 136 /* Compute offsets for search sites. */ 137 x->ss[search_site_count].mv.col = -Len; 138 x->ss[search_site_count].mv.row = 0; 139 x->ss[search_site_count].offset = -Len; 140 search_site_count++; 141 142 /* Compute offsets for search sites. */ 143 x->ss[search_site_count].mv.col = Len; 144 x->ss[search_site_count].mv.row = 0; 145 x->ss[search_site_count].offset = Len; 146 search_site_count++; 147 148 /* Compute offsets for search sites. */ 149 x->ss[search_site_count].mv.col = -Len; 150 x->ss[search_site_count].mv.row = -Len; 151 x->ss[search_site_count].offset = -Len * stride - Len; 152 search_site_count++; 153 154 /* Compute offsets for search sites. */ 155 x->ss[search_site_count].mv.col = Len; 156 x->ss[search_site_count].mv.row = -Len; 157 x->ss[search_site_count].offset = -Len * stride + Len; 158 search_site_count++; 159 160 /* Compute offsets for search sites. */ 161 x->ss[search_site_count].mv.col = -Len; 162 x->ss[search_site_count].mv.row = Len; 163 x->ss[search_site_count].offset = Len * stride - Len; 164 search_site_count++; 165 166 /* Compute offsets for search sites. */ 167 x->ss[search_site_count].mv.col = Len; 168 x->ss[search_site_count].mv.row = Len; 169 x->ss[search_site_count].offset = Len * stride + Len; 170 search_site_count++; 171 172 /* Contract. */ 173 Len /= 2; 174 } 175 176 x->ss_count = search_site_count; 177 x->searches_per_step = 8; 178 } 179 180 /* 181 * To avoid the penalty for crossing cache-line read, preload the reference 182 * area in a small buffer, which is aligned to make sure there won't be crossing 183 * cache-line read while reading from this buffer. This reduced the cpu 184 * cycles spent on reading ref data in sub-pixel filter functions. 185 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 186 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 187 * could reduce the area. 188 */ 189 190 /* estimated cost of a motion vector (r,c) */ 191 #define MVC(r, c) \ 192 (mvcost \ 193 ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \ 194 : 0) 195 /* pointer to predictor base of a motionvector */ 196 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset))) 197 /* convert motion vector component to offset for svf calc */ 198 #define SP(x) (((x)&3) << 1) 199 /* returns subpixel variance error function. */ 200 #define DIST(r, c) \ 201 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse) 202 #define IFMVCV(r, c, s, e) \ 203 if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; 204 /* returns distortion + motion vector cost */ 205 #define ERR(r, c) (MVC(r, c) + DIST(r, c)) 206 /* checks if (r,c) has better score than previous best */ 207 #define CHECK_BETTER(v, r, c) \ 208 IFMVCV(r, c, \ 209 { \ 210 thismse = DIST(r, c); \ 211 if ((v = (MVC(r, c) + thismse)) < besterr) { \ 212 besterr = v; \ 213 br = r; \ 214 bc = c; \ 215 *distortion = thismse; \ 216 *sse1 = sse; \ 217 } \ 218 }, \ 219 v = UINT_MAX;) 220 221 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 222 int_mv *bestmv, int_mv *ref_mv, 223 int error_per_bit, 224 const vp8_variance_fn_ptr_t *vfp, 225 int *mvcost[2], int *distortion, 226 unsigned int *sse1) { 227 unsigned char *z = (*(b->base_src) + b->src); 228 229 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; 230 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4; 231 int tr = br, tc = bc; 232 unsigned int besterr; 233 unsigned int left, right, up, down, diag; 234 unsigned int sse; 235 unsigned int whichdir; 236 unsigned int halfiters = 4; 237 unsigned int quarteriters = 4; 238 int thismse; 239 240 int minc = VPXMAX(x->mv_col_min * 4, 241 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); 242 int maxc = VPXMIN(x->mv_col_max * 4, 243 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); 244 int minr = VPXMAX(x->mv_row_min * 4, 245 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); 246 int maxr = VPXMIN(x->mv_row_max * 4, 247 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); 248 249 int y_stride; 250 int offset; 251 int pre_stride = x->e_mbd.pre.y_stride; 252 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 253 254 #if ARCH_X86 || ARCH_X86_64 255 MACROBLOCKD *xd = &x->e_mbd; 256 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + 257 bestmv->as_mv.col; 258 unsigned char *y; 259 int buf_r1, buf_r2, buf_c1; 260 261 /* Clamping to avoid out-of-range data access */ 262 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min) 263 ? (bestmv->as_mv.row - x->mv_row_min) 264 : 3; 265 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max) 266 ? (x->mv_row_max - bestmv->as_mv.row) 267 : 3; 268 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min) 269 ? (bestmv->as_mv.col - x->mv_col_min) 270 : 3; 271 y_stride = 32; 272 273 /* Copy to intermediate buffer before searching. */ 274 vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf, 275 y_stride, 16 + buf_r1 + buf_r2); 276 y = xd->y_buf + y_stride * buf_r1 + buf_c1; 277 #else 278 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + 279 bestmv->as_mv.col; 280 y_stride = pre_stride; 281 #endif 282 283 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; 284 285 /* central mv */ 286 bestmv->as_mv.row *= 8; 287 bestmv->as_mv.col *= 8; 288 289 /* calculate central point error */ 290 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); 291 *distortion = besterr; 292 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 293 294 /* TODO: Each subsequent iteration checks at least one point in common 295 * with the last iteration could be 2 ( if diag selected) 296 */ 297 while (--halfiters) { 298 /* 1/2 pel */ 299 CHECK_BETTER(left, tr, tc - 2); 300 CHECK_BETTER(right, tr, tc + 2); 301 CHECK_BETTER(up, tr - 2, tc); 302 CHECK_BETTER(down, tr + 2, tc); 303 304 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 305 306 switch (whichdir) { 307 case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break; 308 case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break; 309 case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break; 310 case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break; 311 } 312 313 /* no reason to check the same one again. */ 314 if (tr == br && tc == bc) break; 315 316 tr = br; 317 tc = bc; 318 } 319 320 /* TODO: Each subsequent iteration checks at least one point in common 321 * with the last iteration could be 2 ( if diag selected) 322 */ 323 324 /* 1/4 pel */ 325 while (--quarteriters) { 326 CHECK_BETTER(left, tr, tc - 1); 327 CHECK_BETTER(right, tr, tc + 1); 328 CHECK_BETTER(up, tr - 1, tc); 329 CHECK_BETTER(down, tr + 1, tc); 330 331 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 332 333 switch (whichdir) { 334 case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break; 335 case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break; 336 case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break; 337 case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break; 338 } 339 340 /* no reason to check the same one again. */ 341 if (tr == br && tc == bc) break; 342 343 tr = br; 344 tc = bc; 345 } 346 347 bestmv->as_mv.row = br * 2; 348 bestmv->as_mv.col = bc * 2; 349 350 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || 351 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) { 352 return INT_MAX; 353 } 354 355 return besterr; 356 } 357 #undef MVC 358 #undef PRE 359 #undef SP 360 #undef DIST 361 #undef IFMVCV 362 #undef ERR 363 #undef CHECK_BETTER 364 365 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 366 int_mv *bestmv, int_mv *ref_mv, 367 int error_per_bit, 368 const vp8_variance_fn_ptr_t *vfp, 369 int *mvcost[2], int *distortion, 370 unsigned int *sse1) { 371 int bestmse = INT_MAX; 372 int_mv startmv; 373 int_mv this_mv; 374 unsigned char *z = (*(b->base_src) + b->src); 375 int left, right, up, down, diag; 376 unsigned int sse; 377 int whichdir; 378 int thismse; 379 int y_stride; 380 int pre_stride = x->e_mbd.pre.y_stride; 381 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 382 383 #if ARCH_X86 || ARCH_X86_64 384 MACROBLOCKD *xd = &x->e_mbd; 385 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + 386 bestmv->as_mv.col; 387 unsigned char *y; 388 389 y_stride = 32; 390 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ 391 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); 392 y = xd->y_buf + y_stride + 1; 393 #else 394 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + 395 bestmv->as_mv.col; 396 y_stride = pre_stride; 397 #endif 398 399 /* central mv */ 400 bestmv->as_mv.row *= 8; 401 bestmv->as_mv.col *= 8; 402 startmv = *bestmv; 403 404 /* calculate central point error */ 405 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); 406 *distortion = bestmse; 407 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 408 409 /* go left then right and check error */ 410 this_mv.as_mv.row = startmv.as_mv.row; 411 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); 412 /* "halfpix" horizontal variance */ 413 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse); 414 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 415 416 if (left < bestmse) { 417 *bestmv = this_mv; 418 bestmse = left; 419 *distortion = thismse; 420 *sse1 = sse; 421 } 422 423 this_mv.as_mv.col += 8; 424 /* "halfpix" horizontal variance */ 425 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse); 426 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 427 428 if (right < bestmse) { 429 *bestmv = this_mv; 430 bestmse = right; 431 *distortion = thismse; 432 *sse1 = sse; 433 } 434 435 /* go up then down and check error */ 436 this_mv.as_mv.col = startmv.as_mv.col; 437 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); 438 /* "halfpix" vertical variance */ 439 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse); 440 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 441 442 if (up < bestmse) { 443 *bestmv = this_mv; 444 bestmse = up; 445 *distortion = thismse; 446 *sse1 = sse; 447 } 448 449 this_mv.as_mv.row += 8; 450 /* "halfpix" vertical variance */ 451 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse); 452 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 453 454 if (down < bestmse) { 455 *bestmv = this_mv; 456 bestmse = down; 457 *distortion = thismse; 458 *sse1 = sse; 459 } 460 461 /* now check 1 more diagonal */ 462 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 463 this_mv = startmv; 464 465 switch (whichdir) { 466 case 0: 467 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 468 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 469 /* "halfpix" horizontal/vertical variance */ 470 thismse = 471 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); 472 break; 473 case 1: 474 this_mv.as_mv.col += 4; 475 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 476 /* "halfpix" horizontal/vertical variance */ 477 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); 478 break; 479 case 2: 480 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 481 this_mv.as_mv.row += 4; 482 /* "halfpix" horizontal/vertical variance */ 483 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse); 484 break; 485 case 3: 486 default: 487 this_mv.as_mv.col += 4; 488 this_mv.as_mv.row += 4; 489 /* "halfpix" horizontal/vertical variance */ 490 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse); 491 break; 492 } 493 494 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 495 496 if (diag < bestmse) { 497 *bestmv = this_mv; 498 bestmse = diag; 499 *distortion = thismse; 500 *sse1 = sse; 501 } 502 503 /* time to check quarter pels. */ 504 if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride; 505 506 if (bestmv->as_mv.col < startmv.as_mv.col) y--; 507 508 startmv = *bestmv; 509 510 /* go left then right and check error */ 511 this_mv.as_mv.row = startmv.as_mv.row; 512 513 if (startmv.as_mv.col & 7) { 514 this_mv.as_mv.col = startmv.as_mv.col - 2; 515 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, 516 this_mv.as_mv.row & 7, z, b->src_stride, &sse); 517 } else { 518 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 519 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, 520 b->src_stride, &sse); 521 } 522 523 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 524 525 if (left < bestmse) { 526 *bestmv = this_mv; 527 bestmse = left; 528 *distortion = thismse; 529 *sse1 = sse; 530 } 531 532 this_mv.as_mv.col += 4; 533 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, 534 z, b->src_stride, &sse); 535 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 536 537 if (right < bestmse) { 538 *bestmv = this_mv; 539 bestmse = right; 540 *distortion = thismse; 541 *sse1 = sse; 542 } 543 544 /* go up then down and check error */ 545 this_mv.as_mv.col = startmv.as_mv.col; 546 547 if (startmv.as_mv.row & 7) { 548 this_mv.as_mv.row = startmv.as_mv.row - 2; 549 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, 550 this_mv.as_mv.row & 7, z, b->src_stride, &sse); 551 } else { 552 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 553 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, 554 b->src_stride, &sse); 555 } 556 557 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 558 559 if (up < bestmse) { 560 *bestmv = this_mv; 561 bestmse = up; 562 *distortion = thismse; 563 *sse1 = sse; 564 } 565 566 this_mv.as_mv.row += 4; 567 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, 568 z, b->src_stride, &sse); 569 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 570 571 if (down < bestmse) { 572 *bestmv = this_mv; 573 bestmse = down; 574 *distortion = thismse; 575 *sse1 = sse; 576 } 577 578 /* now check 1 more diagonal */ 579 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 580 581 this_mv = startmv; 582 583 switch (whichdir) { 584 case 0: 585 586 if (startmv.as_mv.row & 7) { 587 this_mv.as_mv.row -= 2; 588 589 if (startmv.as_mv.col & 7) { 590 this_mv.as_mv.col -= 2; 591 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, 592 this_mv.as_mv.row & 7, z, b->src_stride, &sse); 593 } else { 594 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 595 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, 596 b->src_stride, &sse); 597 } 598 } else { 599 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 600 601 if (startmv.as_mv.col & 7) { 602 this_mv.as_mv.col -= 2; 603 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, 604 z, b->src_stride, &sse); 605 } else { 606 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 607 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, 608 &sse); 609 } 610 } 611 612 break; 613 case 1: 614 this_mv.as_mv.col += 2; 615 616 if (startmv.as_mv.row & 7) { 617 this_mv.as_mv.row -= 2; 618 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, 619 this_mv.as_mv.row & 7, z, b->src_stride, &sse); 620 } else { 621 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 622 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, 623 b->src_stride, &sse); 624 } 625 626 break; 627 case 2: 628 this_mv.as_mv.row += 2; 629 630 if (startmv.as_mv.col & 7) { 631 this_mv.as_mv.col -= 2; 632 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, 633 this_mv.as_mv.row & 7, z, b->src_stride, &sse); 634 } else { 635 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 636 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, 637 b->src_stride, &sse); 638 } 639 640 break; 641 case 3: 642 this_mv.as_mv.col += 2; 643 this_mv.as_mv.row += 2; 644 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, 645 this_mv.as_mv.row & 7, z, b->src_stride, &sse); 646 break; 647 } 648 649 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 650 651 if (diag < bestmse) { 652 *bestmv = this_mv; 653 bestmse = diag; 654 *distortion = thismse; 655 *sse1 = sse; 656 } 657 658 return bestmse; 659 } 660 661 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 662 int_mv *bestmv, int_mv *ref_mv, 663 int error_per_bit, 664 const vp8_variance_fn_ptr_t *vfp, 665 int *mvcost[2], int *distortion, 666 unsigned int *sse1) { 667 int bestmse = INT_MAX; 668 int_mv startmv; 669 int_mv this_mv; 670 unsigned char *z = (*(b->base_src) + b->src); 671 int left, right, up, down, diag; 672 unsigned int sse; 673 int whichdir; 674 int thismse; 675 int y_stride; 676 int pre_stride = x->e_mbd.pre.y_stride; 677 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 678 679 #if ARCH_X86 || ARCH_X86_64 680 MACROBLOCKD *xd = &x->e_mbd; 681 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + 682 bestmv->as_mv.col; 683 unsigned char *y; 684 685 y_stride = 32; 686 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ 687 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); 688 y = xd->y_buf + y_stride + 1; 689 #else 690 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + 691 bestmv->as_mv.col; 692 y_stride = pre_stride; 693 #endif 694 695 /* central mv */ 696 bestmv->as_mv.row *= 8; 697 bestmv->as_mv.col *= 8; 698 startmv = *bestmv; 699 700 /* calculate central point error */ 701 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); 702 *distortion = bestmse; 703 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 704 705 /* go left then right and check error */ 706 this_mv.as_mv.row = startmv.as_mv.row; 707 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); 708 /* "halfpix" horizontal variance */ 709 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse); 710 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 711 712 if (left < bestmse) { 713 *bestmv = this_mv; 714 bestmse = left; 715 *distortion = thismse; 716 *sse1 = sse; 717 } 718 719 this_mv.as_mv.col += 8; 720 /* "halfpix" horizontal variance */ 721 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse); 722 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 723 724 if (right < bestmse) { 725 *bestmv = this_mv; 726 bestmse = right; 727 *distortion = thismse; 728 *sse1 = sse; 729 } 730 731 /* go up then down and check error */ 732 this_mv.as_mv.col = startmv.as_mv.col; 733 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); 734 /* "halfpix" vertical variance */ 735 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse); 736 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 737 738 if (up < bestmse) { 739 *bestmv = this_mv; 740 bestmse = up; 741 *distortion = thismse; 742 *sse1 = sse; 743 } 744 745 this_mv.as_mv.row += 8; 746 /* "halfpix" vertical variance */ 747 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse); 748 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 749 750 if (down < bestmse) { 751 *bestmv = this_mv; 752 bestmse = down; 753 *distortion = thismse; 754 *sse1 = sse; 755 } 756 757 /* now check 1 more diagonal - */ 758 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 759 this_mv = startmv; 760 761 switch (whichdir) { 762 case 0: 763 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 764 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 765 /* "halfpix" horizontal/vertical variance */ 766 thismse = 767 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); 768 break; 769 case 1: 770 this_mv.as_mv.col += 4; 771 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 772 /* "halfpix" horizontal/vertical variance */ 773 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); 774 break; 775 case 2: 776 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 777 this_mv.as_mv.row += 4; 778 /* "halfpix" horizontal/vertical variance */ 779 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse); 780 break; 781 case 3: 782 default: 783 this_mv.as_mv.col += 4; 784 this_mv.as_mv.row += 4; 785 /* "halfpix" horizontal/vertical variance */ 786 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse); 787 break; 788 } 789 790 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 791 792 if (diag < bestmse) { 793 *bestmv = this_mv; 794 bestmse = diag; 795 *distortion = thismse; 796 *sse1 = sse; 797 } 798 799 return bestmse; 800 } 801 802 #define CHECK_BOUNDS(range) \ 803 { \ 804 all_in = 1; \ 805 all_in &= ((br - range) >= x->mv_row_min); \ 806 all_in &= ((br + range) <= x->mv_row_max); \ 807 all_in &= ((bc - range) >= x->mv_col_min); \ 808 all_in &= ((bc + range) <= x->mv_col_max); \ 809 } 810 811 #define CHECK_POINT \ 812 { \ 813 if (this_mv.as_mv.col < x->mv_col_min) continue; \ 814 if (this_mv.as_mv.col > x->mv_col_max) continue; \ 815 if (this_mv.as_mv.row < x->mv_row_min) continue; \ 816 if (this_mv.as_mv.row > x->mv_row_max) continue; \ 817 } 818 819 #define CHECK_BETTER \ 820 { \ 821 if (thissad < bestsad) { \ 822 thissad += \ 823 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \ 824 if (thissad < bestsad) { \ 825 bestsad = thissad; \ 826 best_site = i; \ 827 } \ 828 } \ 829 } 830 831 static const MV next_chkpts[6][3] = { 832 { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } }, 833 { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } }, 834 { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } } 835 }; 836 837 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 838 int_mv *best_mv, int search_param, int sad_per_bit, 839 const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], 840 int *mvcost[2], int_mv *center_mv) { 841 MV hex[6] = { 842 { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } 843 }; 844 MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } }; 845 int i, j; 846 847 unsigned char *what = (*(b->base_src) + b->src); 848 int what_stride = b->src_stride; 849 int pre_stride = x->e_mbd.pre.y_stride; 850 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 851 852 int in_what_stride = pre_stride; 853 int br, bc; 854 int_mv this_mv; 855 unsigned int bestsad; 856 unsigned int thissad; 857 unsigned char *base_offset; 858 unsigned char *this_offset; 859 int k = -1; 860 int all_in; 861 int best_site = -1; 862 int hex_range = 127; 863 int dia_range = 8; 864 865 int_mv fcenter_mv; 866 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 867 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 868 869 (void)mvcost; 870 871 /* adjust ref_mv to make sure it is within MV range */ 872 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, 873 x->mv_row_max); 874 br = ref_mv->as_mv.row; 875 bc = ref_mv->as_mv.col; 876 877 /* Work out the start point for the search */ 878 base_offset = (unsigned char *)(base_pre + d->offset); 879 this_offset = base_offset + (br * (pre_stride)) + bc; 880 this_mv.as_mv.row = br; 881 this_mv.as_mv.col = bc; 882 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) + 883 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 884 885 #if CONFIG_MULTI_RES_ENCODING 886 /* Lower search range based on prediction info */ 887 if (search_param >= 6) 888 goto cal_neighbors; 889 else if (search_param >= 5) 890 hex_range = 4; 891 else if (search_param >= 4) 892 hex_range = 6; 893 else if (search_param >= 3) 894 hex_range = 15; 895 else if (search_param >= 2) 896 hex_range = 31; 897 else if (search_param >= 1) 898 hex_range = 63; 899 900 dia_range = 8; 901 #else 902 (void)search_param; 903 #endif 904 905 /* hex search */ 906 CHECK_BOUNDS(2) 907 908 if (all_in) { 909 for (i = 0; i < 6; ++i) { 910 this_mv.as_mv.row = br + hex[i].row; 911 this_mv.as_mv.col = bc + hex[i].col; 912 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + 913 this_mv.as_mv.col; 914 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); 915 CHECK_BETTER 916 } 917 } else { 918 for (i = 0; i < 6; ++i) { 919 this_mv.as_mv.row = br + hex[i].row; 920 this_mv.as_mv.col = bc + hex[i].col; 921 CHECK_POINT 922 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + 923 this_mv.as_mv.col; 924 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); 925 CHECK_BETTER 926 } 927 } 928 929 if (best_site == -1) { 930 goto cal_neighbors; 931 } else { 932 br += hex[best_site].row; 933 bc += hex[best_site].col; 934 k = best_site; 935 } 936 937 for (j = 1; j < hex_range; ++j) { 938 best_site = -1; 939 CHECK_BOUNDS(2) 940 941 if (all_in) { 942 for (i = 0; i < 3; ++i) { 943 this_mv.as_mv.row = br + next_chkpts[k][i].row; 944 this_mv.as_mv.col = bc + next_chkpts[k][i].col; 945 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + 946 this_mv.as_mv.col; 947 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); 948 CHECK_BETTER 949 } 950 } else { 951 for (i = 0; i < 3; ++i) { 952 this_mv.as_mv.row = br + next_chkpts[k][i].row; 953 this_mv.as_mv.col = bc + next_chkpts[k][i].col; 954 CHECK_POINT 955 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + 956 this_mv.as_mv.col; 957 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); 958 CHECK_BETTER 959 } 960 } 961 962 if (best_site == -1) { 963 break; 964 } else { 965 br += next_chkpts[k][best_site].row; 966 bc += next_chkpts[k][best_site].col; 967 k += 5 + best_site; 968 if (k >= 12) { 969 k -= 12; 970 } else if (k >= 6) { 971 k -= 6; 972 } 973 } 974 } 975 976 /* check 4 1-away neighbors */ 977 cal_neighbors: 978 for (j = 0; j < dia_range; ++j) { 979 best_site = -1; 980 CHECK_BOUNDS(1) 981 982 if (all_in) { 983 for (i = 0; i < 4; ++i) { 984 this_mv.as_mv.row = br + neighbors[i].row; 985 this_mv.as_mv.col = bc + neighbors[i].col; 986 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + 987 this_mv.as_mv.col; 988 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); 989 CHECK_BETTER 990 } 991 } else { 992 for (i = 0; i < 4; ++i) { 993 this_mv.as_mv.row = br + neighbors[i].row; 994 this_mv.as_mv.col = bc + neighbors[i].col; 995 CHECK_POINT 996 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + 997 this_mv.as_mv.col; 998 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); 999 CHECK_BETTER 1000 } 1001 } 1002 1003 if (best_site == -1) { 1004 break; 1005 } else { 1006 br += neighbors[best_site].row; 1007 bc += neighbors[best_site].col; 1008 } 1009 } 1010 1011 best_mv->as_mv.row = br; 1012 best_mv->as_mv.col = bc; 1013 1014 return bestsad; 1015 } 1016 #undef CHECK_BOUNDS 1017 #undef CHECK_POINT 1018 #undef CHECK_BETTER 1019 1020 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1021 int_mv *best_mv, int search_param, int sad_per_bit, 1022 int *num00, vp8_variance_fn_ptr_t *fn_ptr, 1023 int *mvcost[2], int_mv *center_mv) { 1024 int i, j, step; 1025 1026 unsigned char *what = (*(b->base_src) + b->src); 1027 int what_stride = b->src_stride; 1028 unsigned char *in_what; 1029 int pre_stride = x->e_mbd.pre.y_stride; 1030 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1031 int in_what_stride = pre_stride; 1032 unsigned char *best_address; 1033 1034 int tot_steps; 1035 int_mv this_mv; 1036 1037 unsigned int bestsad; 1038 unsigned int thissad; 1039 int best_site = 0; 1040 int last_site = 0; 1041 1042 int ref_row; 1043 int ref_col; 1044 int this_row_offset; 1045 int this_col_offset; 1046 search_site *ss; 1047 1048 unsigned char *check_here; 1049 1050 int *mvsadcost[2]; 1051 int_mv fcenter_mv; 1052 1053 mvsadcost[0] = x->mvsadcost[0]; 1054 mvsadcost[1] = x->mvsadcost[1]; 1055 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1056 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1057 1058 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, 1059 x->mv_row_max); 1060 ref_row = ref_mv->as_mv.row; 1061 ref_col = ref_mv->as_mv.col; 1062 *num00 = 0; 1063 best_mv->as_mv.row = ref_row; 1064 best_mv->as_mv.col = ref_col; 1065 1066 /* Work out the start point for the search */ 1067 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + 1068 ref_col); 1069 best_address = in_what; 1070 1071 /* Check the starting position */ 1072 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + 1073 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1074 1075 /* search_param determines the length of the initial step and hence 1076 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1077 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1078 */ 1079 ss = &x->ss[search_param * x->searches_per_step]; 1080 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1081 1082 i = 1; 1083 1084 for (step = 0; step < tot_steps; ++step) { 1085 for (j = 0; j < x->searches_per_step; ++j) { 1086 /* Trap illegal vectors */ 1087 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1088 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1089 1090 if ((this_col_offset > x->mv_col_min) && 1091 (this_col_offset < x->mv_col_max) && 1092 (this_row_offset > x->mv_row_min) && 1093 (this_row_offset < x->mv_row_max)) 1094 1095 { 1096 check_here = ss[i].offset + best_address; 1097 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1098 1099 if (thissad < bestsad) { 1100 this_mv.as_mv.row = this_row_offset; 1101 this_mv.as_mv.col = this_col_offset; 1102 thissad += 1103 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1104 1105 if (thissad < bestsad) { 1106 bestsad = thissad; 1107 best_site = i; 1108 } 1109 } 1110 } 1111 1112 i++; 1113 } 1114 1115 if (best_site != last_site) { 1116 best_mv->as_mv.row += ss[best_site].mv.row; 1117 best_mv->as_mv.col += ss[best_site].mv.col; 1118 best_address += ss[best_site].offset; 1119 last_site = best_site; 1120 } else if (best_address == in_what) { 1121 (*num00)++; 1122 } 1123 } 1124 1125 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1126 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1127 1128 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + 1129 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1130 } 1131 1132 #if HAVE_SSE2 || HAVE_MSA 1133 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1134 int_mv *best_mv, int search_param, int sad_per_bit, 1135 int *num00, vp8_variance_fn_ptr_t *fn_ptr, 1136 int *mvcost[2], int_mv *center_mv) { 1137 int i, j, step; 1138 1139 unsigned char *what = (*(b->base_src) + b->src); 1140 int what_stride = b->src_stride; 1141 unsigned char *in_what; 1142 int pre_stride = x->e_mbd.pre.y_stride; 1143 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1144 int in_what_stride = pre_stride; 1145 unsigned char *best_address; 1146 1147 int tot_steps; 1148 int_mv this_mv; 1149 1150 unsigned int bestsad; 1151 unsigned int thissad; 1152 int best_site = 0; 1153 int last_site = 0; 1154 1155 int ref_row; 1156 int ref_col; 1157 int this_row_offset; 1158 int this_col_offset; 1159 search_site *ss; 1160 1161 unsigned char *check_here; 1162 1163 int *mvsadcost[2]; 1164 int_mv fcenter_mv; 1165 1166 mvsadcost[0] = x->mvsadcost[0]; 1167 mvsadcost[1] = x->mvsadcost[1]; 1168 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1169 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1170 1171 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, 1172 x->mv_row_max); 1173 ref_row = ref_mv->as_mv.row; 1174 ref_col = ref_mv->as_mv.col; 1175 *num00 = 0; 1176 best_mv->as_mv.row = ref_row; 1177 best_mv->as_mv.col = ref_col; 1178 1179 /* Work out the start point for the search */ 1180 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + 1181 ref_col); 1182 best_address = in_what; 1183 1184 /* Check the starting position */ 1185 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + 1186 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1187 1188 /* search_param determines the length of the initial step and hence the 1189 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = 1190 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1191 */ 1192 ss = &x->ss[search_param * x->searches_per_step]; 1193 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1194 1195 i = 1; 1196 1197 for (step = 0; step < tot_steps; ++step) { 1198 int all_in = 1, t; 1199 1200 /* To know if all neighbor points are within the bounds, 4 bounds 1201 * checking are enough instead of checking 4 bounds for each 1202 * points. 1203 */ 1204 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); 1205 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); 1206 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); 1207 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); 1208 1209 if (all_in) { 1210 unsigned int sad_array[4]; 1211 1212 for (j = 0; j < x->searches_per_step; j += 4) { 1213 const unsigned char *block_offset[4]; 1214 1215 for (t = 0; t < 4; ++t) { 1216 block_offset[t] = ss[i + t].offset + best_address; 1217 } 1218 1219 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1220 sad_array); 1221 1222 for (t = 0; t < 4; t++, i++) { 1223 if (sad_array[t] < bestsad) { 1224 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; 1225 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; 1226 sad_array[t] += 1227 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1228 1229 if (sad_array[t] < bestsad) { 1230 bestsad = sad_array[t]; 1231 best_site = i; 1232 } 1233 } 1234 } 1235 } 1236 } else { 1237 for (j = 0; j < x->searches_per_step; ++j) { 1238 /* Trap illegal vectors */ 1239 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1240 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1241 1242 if ((this_col_offset > x->mv_col_min) && 1243 (this_col_offset < x->mv_col_max) && 1244 (this_row_offset > x->mv_row_min) && 1245 (this_row_offset < x->mv_row_max)) { 1246 check_here = ss[i].offset + best_address; 1247 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1248 1249 if (thissad < bestsad) { 1250 this_mv.as_mv.row = this_row_offset; 1251 this_mv.as_mv.col = this_col_offset; 1252 thissad += 1253 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1254 1255 if (thissad < bestsad) { 1256 bestsad = thissad; 1257 best_site = i; 1258 } 1259 } 1260 } 1261 i++; 1262 } 1263 } 1264 1265 if (best_site != last_site) { 1266 best_mv->as_mv.row += ss[best_site].mv.row; 1267 best_mv->as_mv.col += ss[best_site].mv.col; 1268 best_address += ss[best_site].offset; 1269 last_site = best_site; 1270 } else if (best_address == in_what) { 1271 (*num00)++; 1272 } 1273 } 1274 1275 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1276 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1277 1278 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + 1279 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1280 } 1281 #endif // HAVE_SSE2 || HAVE_MSA 1282 1283 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1284 int sad_per_bit, int distance, 1285 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1286 int_mv *center_mv) { 1287 unsigned char *what = (*(b->base_src) + b->src); 1288 int what_stride = b->src_stride; 1289 unsigned char *in_what; 1290 int pre_stride = x->e_mbd.pre.y_stride; 1291 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1292 int in_what_stride = pre_stride; 1293 int mv_stride = pre_stride; 1294 unsigned char *bestaddress; 1295 int_mv *best_mv = &d->bmi.mv; 1296 int_mv this_mv; 1297 unsigned int bestsad; 1298 unsigned int thissad; 1299 int r, c; 1300 1301 unsigned char *check_here; 1302 1303 int ref_row = ref_mv->as_mv.row; 1304 int ref_col = ref_mv->as_mv.col; 1305 1306 int row_min = ref_row - distance; 1307 int row_max = ref_row + distance; 1308 int col_min = ref_col - distance; 1309 int col_max = ref_col + distance; 1310 1311 int *mvsadcost[2]; 1312 int_mv fcenter_mv; 1313 1314 mvsadcost[0] = x->mvsadcost[0]; 1315 mvsadcost[1] = x->mvsadcost[1]; 1316 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1317 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1318 1319 /* Work out the mid point for the search */ 1320 in_what = base_pre + d->offset; 1321 bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1322 1323 best_mv->as_mv.row = ref_row; 1324 best_mv->as_mv.col = ref_col; 1325 1326 /* Baseline value at the centre */ 1327 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + 1328 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1329 1330 /* Apply further limits to prevent us looking using vectors that 1331 * stretch beyiond the UMV border 1332 */ 1333 if (col_min < x->mv_col_min) col_min = x->mv_col_min; 1334 1335 if (col_max > x->mv_col_max) col_max = x->mv_col_max; 1336 1337 if (row_min < x->mv_row_min) row_min = x->mv_row_min; 1338 1339 if (row_max > x->mv_row_max) row_max = x->mv_row_max; 1340 1341 for (r = row_min; r < row_max; ++r) { 1342 this_mv.as_mv.row = r; 1343 check_here = r * mv_stride + in_what + col_min; 1344 1345 for (c = col_min; c < col_max; ++c) { 1346 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1347 1348 this_mv.as_mv.col = c; 1349 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1350 1351 if (thissad < bestsad) { 1352 bestsad = thissad; 1353 best_mv->as_mv.row = r; 1354 best_mv->as_mv.col = c; 1355 bestaddress = check_here; 1356 } 1357 1358 check_here++; 1359 } 1360 } 1361 1362 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1363 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1364 1365 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + 1366 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1367 } 1368 1369 #if HAVE_SSSE3 1370 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1371 int sad_per_bit, int distance, 1372 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1373 int_mv *center_mv) { 1374 unsigned char *what = (*(b->base_src) + b->src); 1375 int what_stride = b->src_stride; 1376 unsigned char *in_what; 1377 int pre_stride = x->e_mbd.pre.y_stride; 1378 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1379 int in_what_stride = pre_stride; 1380 int mv_stride = pre_stride; 1381 unsigned char *bestaddress; 1382 int_mv *best_mv = &d->bmi.mv; 1383 int_mv this_mv; 1384 unsigned int bestsad; 1385 unsigned int thissad; 1386 int r, c; 1387 1388 unsigned char *check_here; 1389 1390 int ref_row = ref_mv->as_mv.row; 1391 int ref_col = ref_mv->as_mv.col; 1392 1393 int row_min = ref_row - distance; 1394 int row_max = ref_row + distance; 1395 int col_min = ref_col - distance; 1396 int col_max = ref_col + distance; 1397 1398 unsigned int sad_array[3]; 1399 1400 int *mvsadcost[2]; 1401 int_mv fcenter_mv; 1402 1403 mvsadcost[0] = x->mvsadcost[0]; 1404 mvsadcost[1] = x->mvsadcost[1]; 1405 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1406 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1407 1408 /* Work out the mid point for the search */ 1409 in_what = base_pre + d->offset; 1410 bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1411 1412 best_mv->as_mv.row = ref_row; 1413 best_mv->as_mv.col = ref_col; 1414 1415 /* Baseline value at the centre */ 1416 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + 1417 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1418 1419 /* Apply further limits to prevent us looking using vectors that stretch 1420 * beyond the UMV border 1421 */ 1422 if (col_min < x->mv_col_min) col_min = x->mv_col_min; 1423 1424 if (col_max > x->mv_col_max) col_max = x->mv_col_max; 1425 1426 if (row_min < x->mv_row_min) row_min = x->mv_row_min; 1427 1428 if (row_max > x->mv_row_max) row_max = x->mv_row_max; 1429 1430 for (r = row_min; r < row_max; ++r) { 1431 this_mv.as_mv.row = r; 1432 check_here = r * mv_stride + in_what + col_min; 1433 c = col_min; 1434 1435 while ((c + 2) < col_max) { 1436 int i; 1437 1438 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1439 1440 for (i = 0; i < 3; ++i) { 1441 thissad = sad_array[i]; 1442 1443 if (thissad < bestsad) { 1444 this_mv.as_mv.col = c; 1445 thissad += 1446 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1447 1448 if (thissad < bestsad) { 1449 bestsad = thissad; 1450 best_mv->as_mv.row = r; 1451 best_mv->as_mv.col = c; 1452 bestaddress = check_here; 1453 } 1454 } 1455 1456 check_here++; 1457 c++; 1458 } 1459 } 1460 1461 while (c < col_max) { 1462 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1463 1464 if (thissad < bestsad) { 1465 this_mv.as_mv.col = c; 1466 thissad += 1467 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1468 1469 if (thissad < bestsad) { 1470 bestsad = thissad; 1471 best_mv->as_mv.row = r; 1472 best_mv->as_mv.col = c; 1473 bestaddress = check_here; 1474 } 1475 } 1476 1477 check_here++; 1478 c++; 1479 } 1480 } 1481 1482 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1483 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1484 1485 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + 1486 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1487 } 1488 #endif // HAVE_SSSE3 1489 1490 #if HAVE_SSE4_1 1491 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1492 int sad_per_bit, int distance, 1493 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1494 int_mv *center_mv) { 1495 unsigned char *what = (*(b->base_src) + b->src); 1496 int what_stride = b->src_stride; 1497 int pre_stride = x->e_mbd.pre.y_stride; 1498 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1499 unsigned char *in_what; 1500 int in_what_stride = pre_stride; 1501 int mv_stride = pre_stride; 1502 unsigned char *bestaddress; 1503 int_mv *best_mv = &d->bmi.mv; 1504 int_mv this_mv; 1505 unsigned int bestsad; 1506 unsigned int thissad; 1507 int r, c; 1508 1509 unsigned char *check_here; 1510 1511 int ref_row = ref_mv->as_mv.row; 1512 int ref_col = ref_mv->as_mv.col; 1513 1514 int row_min = ref_row - distance; 1515 int row_max = ref_row + distance; 1516 int col_min = ref_col - distance; 1517 int col_max = ref_col + distance; 1518 1519 DECLARE_ALIGNED(16, unsigned int, sad_array8[8]); 1520 unsigned int sad_array[3]; 1521 1522 int *mvsadcost[2]; 1523 int_mv fcenter_mv; 1524 1525 mvsadcost[0] = x->mvsadcost[0]; 1526 mvsadcost[1] = x->mvsadcost[1]; 1527 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1528 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1529 1530 /* Work out the mid point for the search */ 1531 in_what = base_pre + d->offset; 1532 bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1533 1534 best_mv->as_mv.row = ref_row; 1535 best_mv->as_mv.col = ref_col; 1536 1537 /* Baseline value at the centre */ 1538 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + 1539 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1540 1541 /* Apply further limits to prevent us looking using vectors that stretch 1542 * beyond the UMV border 1543 */ 1544 if (col_min < x->mv_col_min) col_min = x->mv_col_min; 1545 1546 if (col_max > x->mv_col_max) col_max = x->mv_col_max; 1547 1548 if (row_min < x->mv_row_min) row_min = x->mv_row_min; 1549 1550 if (row_max > x->mv_row_max) row_max = x->mv_row_max; 1551 1552 for (r = row_min; r < row_max; ++r) { 1553 this_mv.as_mv.row = r; 1554 check_here = r * mv_stride + in_what + col_min; 1555 c = col_min; 1556 1557 while ((c + 7) < col_max) { 1558 int i; 1559 1560 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); 1561 1562 for (i = 0; i < 8; ++i) { 1563 thissad = sad_array8[i]; 1564 1565 if (thissad < bestsad) { 1566 this_mv.as_mv.col = c; 1567 thissad += 1568 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1569 1570 if (thissad < bestsad) { 1571 bestsad = thissad; 1572 best_mv->as_mv.row = r; 1573 best_mv->as_mv.col = c; 1574 bestaddress = check_here; 1575 } 1576 } 1577 1578 check_here++; 1579 c++; 1580 } 1581 } 1582 1583 while ((c + 2) < col_max) { 1584 int i; 1585 1586 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1587 1588 for (i = 0; i < 3; ++i) { 1589 thissad = sad_array[i]; 1590 1591 if (thissad < bestsad) { 1592 this_mv.as_mv.col = c; 1593 thissad += 1594 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1595 1596 if (thissad < bestsad) { 1597 bestsad = thissad; 1598 best_mv->as_mv.row = r; 1599 best_mv->as_mv.col = c; 1600 bestaddress = check_here; 1601 } 1602 } 1603 1604 check_here++; 1605 c++; 1606 } 1607 } 1608 1609 while (c < col_max) { 1610 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1611 1612 if (thissad < bestsad) { 1613 this_mv.as_mv.col = c; 1614 thissad += 1615 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1616 1617 if (thissad < bestsad) { 1618 bestsad = thissad; 1619 best_mv->as_mv.row = r; 1620 best_mv->as_mv.col = c; 1621 bestaddress = check_here; 1622 } 1623 } 1624 1625 check_here++; 1626 c++; 1627 } 1628 } 1629 1630 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1631 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1632 1633 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + 1634 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1635 } 1636 #endif // HAVE_SSE4_1 1637 1638 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 1639 int_mv *ref_mv, int error_per_bit, 1640 int search_range, vp8_variance_fn_ptr_t *fn_ptr, 1641 int *mvcost[2], int_mv *center_mv) { 1642 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; 1643 int i, j; 1644 short this_row_offset, this_col_offset; 1645 1646 int what_stride = b->src_stride; 1647 int pre_stride = x->e_mbd.pre.y_stride; 1648 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1649 int in_what_stride = pre_stride; 1650 unsigned char *what = (*(b->base_src) + b->src); 1651 unsigned char *best_address = 1652 (unsigned char *)(base_pre + d->offset + 1653 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); 1654 unsigned char *check_here; 1655 int_mv this_mv; 1656 unsigned int bestsad; 1657 unsigned int thissad; 1658 1659 int *mvsadcost[2]; 1660 int_mv fcenter_mv; 1661 1662 mvsadcost[0] = x->mvsadcost[0]; 1663 mvsadcost[1] = x->mvsadcost[1]; 1664 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1665 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1666 1667 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + 1668 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); 1669 1670 for (i = 0; i < search_range; ++i) { 1671 int best_site = -1; 1672 1673 for (j = 0; j < 4; ++j) { 1674 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1675 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1676 1677 if ((this_col_offset > x->mv_col_min) && 1678 (this_col_offset < x->mv_col_max) && 1679 (this_row_offset > x->mv_row_min) && 1680 (this_row_offset < x->mv_row_max)) { 1681 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 1682 best_address; 1683 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1684 1685 if (thissad < bestsad) { 1686 this_mv.as_mv.row = this_row_offset; 1687 this_mv.as_mv.col = this_col_offset; 1688 thissad += 1689 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1690 1691 if (thissad < bestsad) { 1692 bestsad = thissad; 1693 best_site = j; 1694 } 1695 } 1696 } 1697 } 1698 1699 if (best_site == -1) { 1700 break; 1701 } else { 1702 ref_mv->as_mv.row += neighbors[best_site].row; 1703 ref_mv->as_mv.col += neighbors[best_site].col; 1704 best_address += (neighbors[best_site].row) * in_what_stride + 1705 neighbors[best_site].col; 1706 } 1707 } 1708 1709 this_mv.as_mv.row = ref_mv->as_mv.row << 3; 1710 this_mv.as_mv.col = ref_mv->as_mv.col << 3; 1711 1712 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + 1713 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1714 } 1715 1716 #if HAVE_SSE2 || HAVE_MSA 1717 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 1718 int_mv *ref_mv, int error_per_bit, 1719 int search_range, vp8_variance_fn_ptr_t *fn_ptr, 1720 int *mvcost[2], int_mv *center_mv) { 1721 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; 1722 int i, j; 1723 short this_row_offset, this_col_offset; 1724 1725 int what_stride = b->src_stride; 1726 int pre_stride = x->e_mbd.pre.y_stride; 1727 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1728 int in_what_stride = pre_stride; 1729 unsigned char *what = (*(b->base_src) + b->src); 1730 unsigned char *best_address = 1731 (unsigned char *)(base_pre + d->offset + 1732 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); 1733 unsigned char *check_here; 1734 int_mv this_mv; 1735 unsigned int bestsad; 1736 unsigned int thissad; 1737 1738 int *mvsadcost[2]; 1739 int_mv fcenter_mv; 1740 1741 mvsadcost[0] = x->mvsadcost[0]; 1742 mvsadcost[1] = x->mvsadcost[1]; 1743 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1744 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1745 1746 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + 1747 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); 1748 1749 for (i = 0; i < search_range; ++i) { 1750 int best_site = -1; 1751 int all_in = 1; 1752 1753 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min); 1754 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max); 1755 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min); 1756 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max); 1757 1758 if (all_in) { 1759 unsigned int sad_array[4]; 1760 const unsigned char *block_offset[4]; 1761 block_offset[0] = best_address - in_what_stride; 1762 block_offset[1] = best_address - 1; 1763 block_offset[2] = best_address + 1; 1764 block_offset[3] = best_address + in_what_stride; 1765 1766 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1767 sad_array); 1768 1769 for (j = 0; j < 4; ++j) { 1770 if (sad_array[j] < bestsad) { 1771 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; 1772 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; 1773 sad_array[j] += 1774 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1775 1776 if (sad_array[j] < bestsad) { 1777 bestsad = sad_array[j]; 1778 best_site = j; 1779 } 1780 } 1781 } 1782 } else { 1783 for (j = 0; j < 4; ++j) { 1784 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1785 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1786 1787 if ((this_col_offset > x->mv_col_min) && 1788 (this_col_offset < x->mv_col_max) && 1789 (this_row_offset > x->mv_row_min) && 1790 (this_row_offset < x->mv_row_max)) { 1791 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 1792 best_address; 1793 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1794 1795 if (thissad < bestsad) { 1796 this_mv.as_mv.row = this_row_offset; 1797 this_mv.as_mv.col = this_col_offset; 1798 thissad += 1799 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1800 1801 if (thissad < bestsad) { 1802 bestsad = thissad; 1803 best_site = j; 1804 } 1805 } 1806 } 1807 } 1808 } 1809 1810 if (best_site == -1) { 1811 break; 1812 } else { 1813 ref_mv->as_mv.row += neighbors[best_site].row; 1814 ref_mv->as_mv.col += neighbors[best_site].col; 1815 best_address += (neighbors[best_site].row) * in_what_stride + 1816 neighbors[best_site].col; 1817 } 1818 } 1819 1820 this_mv.as_mv.row = ref_mv->as_mv.row * 8; 1821 this_mv.as_mv.col = ref_mv->as_mv.col * 8; 1822 1823 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + 1824 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1825 } 1826 #endif // HAVE_SSE2 || HAVE_MSA 1827