1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <limits.h> 12 #include <math.h> 13 #include <stdio.h> 14 15 #include "./vpx_config.h" 16 17 #include "vpx_mem/vpx_mem.h" 18 19 #include "vp9/common/vp9_findnearmv.h" 20 #include "vp9/common/vp9_common.h" 21 22 #include "vp9/encoder/vp9_onyx_int.h" 23 #include "vp9/encoder/vp9_mcomp.h" 24 25 // #define NEW_DIAMOND_SEARCH 26 27 void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) { 28 const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 29 const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 30 const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 31 const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 32 33 // Get intersection of UMV window and valid MV window to reduce # of checks 34 // in diamond search. 35 if (x->mv_col_min < col_min) 36 x->mv_col_min = col_min; 37 if (x->mv_col_max > col_max) 38 x->mv_col_max = col_max; 39 if (x->mv_row_min < row_min) 40 x->mv_row_min = row_min; 41 if (x->mv_row_max > row_max) 42 x->mv_row_max = row_max; 43 } 44 45 int vp9_init_search_range(VP9_COMP *cpi, int size) { 46 int sr = 0; 47 48 // Minimum search size no matter what the passed in value. 49 size = MAX(16, size); 50 51 while ((size << sr) < MAX_FULL_PEL_VAL) 52 sr++; 53 54 if (sr) 55 sr--; 56 57 sr += cpi->sf.reduce_first_step_size; 58 sr = MIN(sr, (cpi->sf.max_step_search_steps - 2)); 59 return sr; 60 } 61 62 int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], 63 int weight) { 64 MV v; 65 v.row = mv->as_mv.row - ref->as_mv.row; 66 v.col = mv->as_mv.col - ref->as_mv.col; 67 return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] + 68 mvcost[0][v.row] + 69 mvcost[1][v.col]) * weight, 7); 70 } 71 72 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], 73 int error_per_bit) { 74 if (mvcost) { 75 MV v; 76 v.row = mv->as_mv.row - ref->as_mv.row; 77 v.col = mv->as_mv.col - ref->as_mv.col; 78 return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] + 79 mvcost[0][v.row] + 80 mvcost[1][v.col]) * error_per_bit, 13); 81 } 82 return 0; 83 } 84 85 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost, 86 int *mvsadcost[2], int error_per_bit) { 87 if (mvsadcost) { 88 MV v; 89 v.row = mv->as_mv.row - ref->as_mv.row; 90 v.col = mv->as_mv.col - ref->as_mv.col; 91 return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] + 92 mvsadcost[0][v.row] + 93 mvsadcost[1][v.col]) * error_per_bit, 8); 94 } 95 return 0; 96 } 97 98 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { 99 int len; 100 int search_site_count = 0; 101 102 // Generate offsets for 4 search sites per step. 103 x->ss[search_site_count].mv.col = 0; 104 x->ss[search_site_count].mv.row = 0; 105 x->ss[search_site_count].offset = 0; 106 search_site_count++; 107 108 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 109 // Compute offsets for search sites. 110 x->ss[search_site_count].mv.col = 0; 111 x->ss[search_site_count].mv.row = -len; 112 x->ss[search_site_count].offset = -len * stride; 113 search_site_count++; 114 115 // Compute offsets for search sites. 116 x->ss[search_site_count].mv.col = 0; 117 x->ss[search_site_count].mv.row = len; 118 x->ss[search_site_count].offset = len * stride; 119 search_site_count++; 120 121 // Compute offsets for search sites. 122 x->ss[search_site_count].mv.col = -len; 123 x->ss[search_site_count].mv.row = 0; 124 x->ss[search_site_count].offset = -len; 125 search_site_count++; 126 127 // Compute offsets for search sites. 128 x->ss[search_site_count].mv.col = len; 129 x->ss[search_site_count].mv.row = 0; 130 x->ss[search_site_count].offset = len; 131 search_site_count++; 132 } 133 134 x->ss_count = search_site_count; 135 x->searches_per_step = 4; 136 } 137 138 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { 139 int len; 140 int search_site_count = 0; 141 142 // Generate offsets for 8 search sites per step. 143 x->ss[search_site_count].mv.col = 0; 144 x->ss[search_site_count].mv.row = 0; 145 x->ss[search_site_count].offset = 0; 146 search_site_count++; 147 148 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 149 // Compute offsets for search sites. 150 x->ss[search_site_count].mv.col = 0; 151 x->ss[search_site_count].mv.row = -len; 152 x->ss[search_site_count].offset = -len * stride; 153 search_site_count++; 154 155 // Compute offsets for search sites. 156 x->ss[search_site_count].mv.col = 0; 157 x->ss[search_site_count].mv.row = len; 158 x->ss[search_site_count].offset = len * stride; 159 search_site_count++; 160 161 // Compute offsets for search sites. 162 x->ss[search_site_count].mv.col = -len; 163 x->ss[search_site_count].mv.row = 0; 164 x->ss[search_site_count].offset = -len; 165 search_site_count++; 166 167 // Compute offsets for search sites. 168 x->ss[search_site_count].mv.col = len; 169 x->ss[search_site_count].mv.row = 0; 170 x->ss[search_site_count].offset = len; 171 search_site_count++; 172 173 // Compute offsets for search sites. 174 x->ss[search_site_count].mv.col = -len; 175 x->ss[search_site_count].mv.row = -len; 176 x->ss[search_site_count].offset = -len * stride - len; 177 search_site_count++; 178 179 // Compute offsets for search sites. 180 x->ss[search_site_count].mv.col = len; 181 x->ss[search_site_count].mv.row = -len; 182 x->ss[search_site_count].offset = -len * stride + len; 183 search_site_count++; 184 185 // Compute offsets for search sites. 186 x->ss[search_site_count].mv.col = -len; 187 x->ss[search_site_count].mv.row = len; 188 x->ss[search_site_count].offset = len * stride - len; 189 search_site_count++; 190 191 // Compute offsets for search sites. 192 x->ss[search_site_count].mv.col = len; 193 x->ss[search_site_count].mv.row = len; 194 x->ss[search_site_count].offset = len * stride + len; 195 search_site_count++; 196 } 197 198 x->ss_count = search_site_count; 199 x->searches_per_step = 8; 200 } 201 202 /* 203 * To avoid the penalty for crossing cache-line read, preload the reference 204 * area in a small buffer, which is aligned to make sure there won't be crossing 205 * cache-line read while reading from this buffer. This reduced the cpu 206 * cycles spent on reading ref data in sub-pixel filter functions. 207 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 208 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 209 * could reduce the area. 210 */ 211 212 /* estimated cost of a motion vector (r,c) */ 213 #define MVC(r, c) \ 214 (mvcost ? \ 215 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ 216 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ 217 error_per_bit + 4096) >> 13 : 0) 218 219 220 #define SP(x) (((x) & 7) << 1) // convert motion vector component to offset 221 // for svf calc 222 223 #define IFMVCV(r, c, s, e) \ 224 if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ 225 s \ 226 else \ 227 e; 228 229 /* pointer to predictor base of a motionvector */ 230 #define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) 231 232 /* returns subpixel variance error function */ 233 #define DIST(r, c) \ 234 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse) 235 236 /* checks if (r, c) has better score than previous best */ 237 #define CHECK_BETTER(v, r, c) \ 238 IFMVCV(r, c, { \ 239 thismse = (DIST(r, c)); \ 240 if ((v = MVC(r, c) + thismse) < besterr) { \ 241 besterr = v; \ 242 br = r; \ 243 bc = c; \ 244 *distortion = thismse; \ 245 *sse1 = sse; \ 246 } \ 247 }, \ 248 v = INT_MAX;) 249 250 #define FIRST_LEVEL_CHECKS \ 251 { \ 252 unsigned int left, right, up, down, diag; \ 253 CHECK_BETTER(left, tr, tc - hstep); \ 254 CHECK_BETTER(right, tr, tc + hstep); \ 255 CHECK_BETTER(up, tr - hstep, tc); \ 256 CHECK_BETTER(down, tr + hstep, tc); \ 257 whichdir = (left < right ? 0 : 1) + \ 258 (up < down ? 0 : 2); \ 259 switch (whichdir) { \ 260 case 0: \ 261 CHECK_BETTER(diag, tr - hstep, tc - hstep); \ 262 break; \ 263 case 1: \ 264 CHECK_BETTER(diag, tr - hstep, tc + hstep); \ 265 break; \ 266 case 2: \ 267 CHECK_BETTER(diag, tr + hstep, tc - hstep); \ 268 break; \ 269 case 3: \ 270 CHECK_BETTER(diag, tr + hstep, tc + hstep); \ 271 break; \ 272 } \ 273 } 274 275 #define SECOND_LEVEL_CHECKS \ 276 { \ 277 int kr, kc; \ 278 unsigned int second; \ 279 if (tr != br && tc != bc) { \ 280 kr = br - tr; \ 281 kc = bc - tc; \ 282 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 283 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 284 } else if (tr == br && tc != bc) { \ 285 kc = bc - tc; \ 286 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 287 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 288 switch (whichdir) { \ 289 case 0: \ 290 case 1: \ 291 CHECK_BETTER(second, tr + hstep, tc + kc); \ 292 break; \ 293 case 2: \ 294 case 3: \ 295 CHECK_BETTER(second, tr - hstep, tc + kc); \ 296 break; \ 297 } \ 298 } else if (tr != br && tc == bc) { \ 299 kr = br - tr; \ 300 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 301 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 302 switch (whichdir) { \ 303 case 0: \ 304 case 2: \ 305 CHECK_BETTER(second, tr + kr, tc + hstep); \ 306 break; \ 307 case 1: \ 308 case 3: \ 309 CHECK_BETTER(second, tr + kr, tc - hstep); \ 310 break; \ 311 } \ 312 } \ 313 } 314 315 int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, 316 int_mv *bestmv, int_mv *ref_mv, 317 int error_per_bit, 318 const vp9_variance_fn_ptr_t *vfp, 319 int forced_stop, 320 int iters_per_step, 321 int *mvjcost, int *mvcost[2], 322 int *distortion, 323 unsigned int *sse1) { 324 uint8_t *z = x->plane[0].src.buf; 325 int src_stride = x->plane[0].src.stride; 326 MACROBLOCKD *xd = &x->e_mbd; 327 328 unsigned int besterr = INT_MAX; 329 unsigned int sse; 330 unsigned int whichdir; 331 unsigned int halfiters = iters_per_step; 332 unsigned int quarteriters = iters_per_step; 333 unsigned int eighthiters = iters_per_step; 334 int thismse; 335 336 uint8_t *y = xd->plane[0].pre[0].buf + 337 (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + 338 bestmv->as_mv.col; 339 340 const int y_stride = xd->plane[0].pre[0].stride; 341 342 int rr = ref_mv->as_mv.row; 343 int rc = ref_mv->as_mv.col; 344 int br = bestmv->as_mv.row << 3; 345 int bc = bestmv->as_mv.col << 3; 346 int hstep = 4; 347 const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX); 348 const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX); 349 const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX); 350 const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX); 351 352 int tr = br; 353 int tc = bc; 354 355 const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; 356 357 // central mv 358 bestmv->as_mv.row <<= 3; 359 bestmv->as_mv.col <<= 3; 360 361 // calculate central point error 362 besterr = vfp->vf(y, y_stride, z, src_stride, sse1); 363 *distortion = besterr; 364 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 365 366 // TODO: Each subsequent iteration checks at least one point in 367 // common with the last iteration could be 2 ( if diag selected) 368 while (halfiters--) { 369 // 1/2 pel 370 FIRST_LEVEL_CHECKS; 371 // no reason to check the same one again. 372 if (tr == br && tc == bc) 373 break; 374 tr = br; 375 tc = bc; 376 } 377 378 // TODO: Each subsequent iteration checks at least one point in common with 379 // the last iteration could be 2 ( if diag selected) 1/4 pel 380 381 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 382 if (forced_stop != 2) { 383 hstep >>= 1; 384 while (quarteriters--) { 385 FIRST_LEVEL_CHECKS; 386 // no reason to check the same one again. 387 if (tr == br && tc == bc) 388 break; 389 tr = br; 390 tc = bc; 391 } 392 } 393 394 if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && 395 forced_stop == 0) { 396 hstep >>= 1; 397 while (eighthiters--) { 398 FIRST_LEVEL_CHECKS; 399 // no reason to check the same one again. 400 if (tr == br && tc == bc) 401 break; 402 tr = br; 403 tc = bc; 404 } 405 } 406 407 bestmv->as_mv.row = br; 408 bestmv->as_mv.col = bc; 409 410 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || 411 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) 412 return INT_MAX; 413 414 return besterr; 415 } 416 417 int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, 418 int_mv *bestmv, int_mv *ref_mv, 419 int error_per_bit, 420 const vp9_variance_fn_ptr_t *vfp, 421 int forced_stop, 422 int iters_per_step, 423 int *mvjcost, int *mvcost[2], 424 int *distortion, 425 unsigned int *sse1) { 426 uint8_t *z = x->plane[0].src.buf; 427 int src_stride = x->plane[0].src.stride; 428 MACROBLOCKD *xd = &x->e_mbd; 429 int rr, rc, br, bc, hstep; 430 int tr, tc; 431 unsigned int besterr = INT_MAX; 432 unsigned int sse; 433 unsigned int whichdir; 434 int thismse; 435 int maxc, minc, maxr, minr; 436 int y_stride; 437 int offset; 438 unsigned int halfiters = iters_per_step; 439 unsigned int quarteriters = iters_per_step; 440 unsigned int eighthiters = iters_per_step; 441 442 uint8_t *y = xd->plane[0].pre[0].buf + 443 (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + 444 bestmv->as_mv.col; 445 446 y_stride = xd->plane[0].pre[0].stride; 447 448 rr = ref_mv->as_mv.row; 449 rc = ref_mv->as_mv.col; 450 br = bestmv->as_mv.row << 3; 451 bc = bestmv->as_mv.col << 3; 452 hstep = 4; 453 minc = MAX(x->mv_col_min << 3, 454 (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)); 455 maxc = MIN(x->mv_col_max << 3, 456 (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)); 457 minr = MAX(x->mv_row_min << 3, 458 (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)); 459 maxr = MIN(x->mv_row_max << 3, 460 (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)); 461 462 tr = br; 463 tc = bc; 464 465 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; 466 467 // central mv 468 bestmv->as_mv.row <<= 3; 469 bestmv->as_mv.col <<= 3; 470 471 // calculate central point error 472 besterr = vfp->vf(y, y_stride, z, src_stride, sse1); 473 *distortion = besterr; 474 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 475 476 // 1/2 pel 477 FIRST_LEVEL_CHECKS; 478 if (halfiters > 1) { 479 SECOND_LEVEL_CHECKS; 480 } 481 tr = br; 482 tc = bc; 483 484 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 485 if (forced_stop != 2) { 486 hstep >>= 1; 487 FIRST_LEVEL_CHECKS; 488 if (quarteriters > 1) { 489 SECOND_LEVEL_CHECKS; 490 } 491 tr = br; 492 tc = bc; 493 } 494 495 if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && 496 forced_stop == 0) { 497 hstep >>= 1; 498 FIRST_LEVEL_CHECKS; 499 if (eighthiters > 1) { 500 SECOND_LEVEL_CHECKS; 501 } 502 tr = br; 503 tc = bc; 504 } 505 506 bestmv->as_mv.row = br; 507 bestmv->as_mv.col = bc; 508 509 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || 510 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) 511 return INT_MAX; 512 513 return besterr; 514 } 515 516 #undef DIST 517 /* returns subpixel variance error function */ 518 #define DIST(r, c) \ 519 vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ 520 z, src_stride, &sse, second_pred) 521 522 int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, 523 int_mv *bestmv, int_mv *ref_mv, 524 int error_per_bit, 525 const vp9_variance_fn_ptr_t *vfp, 526 int forced_stop, 527 int iters_per_step, 528 int *mvjcost, int *mvcost[2], 529 int *distortion, 530 unsigned int *sse1, 531 const uint8_t *second_pred, 532 int w, int h) { 533 uint8_t *const z = x->plane[0].src.buf; 534 const int src_stride = x->plane[0].src.stride; 535 MACROBLOCKD *const xd = &x->e_mbd; 536 537 unsigned int besterr = INT_MAX; 538 unsigned int sse; 539 unsigned int whichdir; 540 unsigned int halfiters = iters_per_step; 541 unsigned int quarteriters = iters_per_step; 542 unsigned int eighthiters = iters_per_step; 543 int thismse; 544 545 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 546 uint8_t *const y = xd->plane[0].pre[0].buf + 547 (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + 548 bestmv->as_mv.col; 549 550 const int y_stride = xd->plane[0].pre[0].stride; 551 552 int rr = ref_mv->as_mv.row; 553 int rc = ref_mv->as_mv.col; 554 int br = bestmv->as_mv.row << 3; 555 int bc = bestmv->as_mv.col << 3; 556 int hstep = 4; 557 const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX); 558 const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX); 559 const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX); 560 const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX); 561 562 int tr = br; 563 int tc = bc; 564 565 const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; 566 567 // central mv 568 bestmv->as_mv.row <<= 3; 569 bestmv->as_mv.col <<= 3; 570 571 // calculate central point error 572 // TODO(yunqingwang): central pointer error was already calculated in full- 573 // pixel search, and can be passed in this function. 574 comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 575 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 576 *distortion = besterr; 577 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 578 579 // Each subsequent iteration checks at least one point in 580 // common with the last iteration could be 2 ( if diag selected) 581 while (halfiters--) { 582 // 1/2 pel 583 FIRST_LEVEL_CHECKS; 584 // no reason to check the same one again. 585 if (tr == br && tc == bc) 586 break; 587 tr = br; 588 tc = bc; 589 } 590 591 // Each subsequent iteration checks at least one point in common with 592 // the last iteration could be 2 ( if diag selected) 1/4 pel 593 594 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 595 if (forced_stop != 2) { 596 hstep >>= 1; 597 while (quarteriters--) { 598 FIRST_LEVEL_CHECKS; 599 // no reason to check the same one again. 600 if (tr == br && tc == bc) 601 break; 602 tr = br; 603 tc = bc; 604 } 605 } 606 607 if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && 608 forced_stop == 0) { 609 hstep >>= 1; 610 while (eighthiters--) { 611 FIRST_LEVEL_CHECKS; 612 // no reason to check the same one again. 613 if (tr == br && tc == bc) 614 break; 615 tr = br; 616 tc = bc; 617 } 618 } 619 bestmv->as_mv.row = br; 620 bestmv->as_mv.col = bc; 621 622 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || 623 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) 624 return INT_MAX; 625 626 return besterr; 627 } 628 629 int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, 630 int_mv *bestmv, int_mv *ref_mv, 631 int error_per_bit, 632 const vp9_variance_fn_ptr_t *vfp, 633 int forced_stop, 634 int iters_per_step, 635 int *mvjcost, int *mvcost[2], 636 int *distortion, 637 unsigned int *sse1, 638 const uint8_t *second_pred, 639 int w, int h) { 640 uint8_t *z = x->plane[0].src.buf; 641 int src_stride = x->plane[0].src.stride; 642 MACROBLOCKD *xd = &x->e_mbd; 643 int rr, rc, br, bc, hstep; 644 int tr, tc; 645 unsigned int besterr = INT_MAX; 646 unsigned int sse; 647 unsigned int whichdir; 648 int thismse; 649 int maxc, minc, maxr, minr; 650 int y_stride; 651 int offset; 652 unsigned int halfiters = iters_per_step; 653 unsigned int quarteriters = iters_per_step; 654 unsigned int eighthiters = iters_per_step; 655 656 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 657 uint8_t *y = xd->plane[0].pre[0].buf + 658 (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + 659 bestmv->as_mv.col; 660 661 y_stride = xd->plane[0].pre[0].stride; 662 663 rr = ref_mv->as_mv.row; 664 rc = ref_mv->as_mv.col; 665 br = bestmv->as_mv.row << 3; 666 bc = bestmv->as_mv.col << 3; 667 hstep = 4; 668 minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - 669 ((1 << MV_MAX_BITS) - 1)); 670 maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + 671 ((1 << MV_MAX_BITS) - 1)); 672 minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - 673 ((1 << MV_MAX_BITS) - 1)); 674 maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + 675 ((1 << MV_MAX_BITS) - 1)); 676 677 tr = br; 678 tc = bc; 679 680 681 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; 682 683 // central mv 684 bestmv->as_mv.row <<= 3; 685 bestmv->as_mv.col <<= 3; 686 687 // calculate central point error 688 // TODO(yunqingwang): central pointer error was already calculated in full- 689 // pixel search, and can be passed in this function. 690 comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 691 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 692 *distortion = besterr; 693 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 694 695 // Each subsequent iteration checks at least one point in 696 // common with the last iteration could be 2 ( if diag selected) 697 // 1/2 pel 698 FIRST_LEVEL_CHECKS; 699 if (halfiters > 1) { 700 SECOND_LEVEL_CHECKS; 701 } 702 tr = br; 703 tc = bc; 704 705 // Each subsequent iteration checks at least one point in common with 706 // the last iteration could be 2 ( if diag selected) 1/4 pel 707 708 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 709 if (forced_stop != 2) { 710 hstep >>= 1; 711 FIRST_LEVEL_CHECKS; 712 if (quarteriters > 1) { 713 SECOND_LEVEL_CHECKS; 714 } 715 tr = br; 716 tc = bc; 717 } 718 719 if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && 720 forced_stop == 0) { 721 hstep >>= 1; 722 FIRST_LEVEL_CHECKS; 723 if (eighthiters > 1) { 724 SECOND_LEVEL_CHECKS; 725 } 726 tr = br; 727 tc = bc; 728 } 729 bestmv->as_mv.row = br; 730 bestmv->as_mv.col = bc; 731 732 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || 733 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) 734 return INT_MAX; 735 736 return besterr; 737 } 738 739 #undef MVC 740 #undef PRE 741 #undef DIST 742 #undef IFMVCV 743 #undef CHECK_BETTER 744 #undef SP 745 746 #define CHECK_BOUNDS(range) \ 747 {\ 748 all_in = 1;\ 749 all_in &= ((br-range) >= x->mv_row_min);\ 750 all_in &= ((br+range) <= x->mv_row_max);\ 751 all_in &= ((bc-range) >= x->mv_col_min);\ 752 all_in &= ((bc+range) <= x->mv_col_max);\ 753 } 754 755 #define CHECK_POINT \ 756 {\ 757 if (this_mv.as_mv.col < x->mv_col_min) continue;\ 758 if (this_mv.as_mv.col > x->mv_col_max) continue;\ 759 if (this_mv.as_mv.row < x->mv_row_min) continue;\ 760 if (this_mv.as_mv.row > x->mv_row_max) continue;\ 761 } 762 763 #define CHECK_BETTER \ 764 {\ 765 if (thissad < bestsad)\ 766 {\ 767 if (use_mvcost) \ 768 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \ 769 mvjsadcost, mvsadcost, \ 770 sad_per_bit);\ 771 if (thissad < bestsad)\ 772 {\ 773 bestsad = thissad;\ 774 best_site = i;\ 775 }\ 776 }\ 777 } 778 779 #define get_next_chkpts(list, i, n) \ 780 list[0] = ((i) == 0 ? (n) - 1 : (i) - 1); \ 781 list[1] = (i); \ 782 list[2] = ((i) == (n) - 1 ? 0 : (i) + 1); 783 784 #define MAX_PATTERN_SCALES 11 785 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 786 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 787 788 // Generic pattern search function that searches over multiple scales. 789 // Each scale can have a different number of candidates and shape of 790 // candidates as indicated in the num_candidates and candidates arrays 791 // passed into this function 792 static int vp9_pattern_search(MACROBLOCK *x, 793 int_mv *ref_mv, 794 int search_param, 795 int sad_per_bit, 796 int do_init_search, 797 int do_refine, 798 const vp9_variance_fn_ptr_t *vfp, 799 int use_mvcost, 800 int_mv *center_mv, int_mv *best_mv, 801 const int num_candidates[MAX_PATTERN_SCALES], 802 const MV candidates[MAX_PATTERN_SCALES] 803 [MAX_PATTERN_CANDIDATES]) { 804 const MACROBLOCKD* const xd = &x->e_mbd; 805 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 806 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 807 }; 808 int i, j, s, t; 809 uint8_t *what = x->plane[0].src.buf; 810 int what_stride = x->plane[0].src.stride; 811 int in_what_stride = xd->plane[0].pre[0].stride; 812 int br, bc; 813 int_mv this_mv; 814 int bestsad = INT_MAX; 815 int thissad; 816 uint8_t *base_offset; 817 uint8_t *this_offset; 818 int k = -1; 819 int all_in; 820 int best_site = -1; 821 int_mv fcenter_mv; 822 int best_init_s = search_param_to_steps[search_param]; 823 int *mvjsadcost = x->nmvjointsadcost; 824 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 825 826 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 827 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 828 829 // adjust ref_mv to make sure it is within MV range 830 clamp_mv(&ref_mv->as_mv, 831 x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 832 br = ref_mv->as_mv.row; 833 bc = ref_mv->as_mv.col; 834 835 // Work out the start point for the search 836 base_offset = (uint8_t *)(xd->plane[0].pre[0].buf); 837 this_offset = base_offset + (br * in_what_stride) + bc; 838 this_mv.as_mv.row = br; 839 this_mv.as_mv.col = bc; 840 bestsad = vfp->sdf(what, what_stride, this_offset, 841 in_what_stride, 0x7fffffff) 842 + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, 843 sad_per_bit); 844 845 // Search all possible scales upto the search param around the center point 846 // pick the scale of the point that is best as the starting scale of 847 // further steps around it. 848 if (do_init_search) { 849 s = best_init_s; 850 best_init_s = -1; 851 for (t = 0; t <= s; ++t) { 852 best_site = -1; 853 CHECK_BOUNDS((1 << t)) 854 if (all_in) { 855 for (i = 0; i < num_candidates[t]; i++) { 856 this_mv.as_mv.row = br + candidates[t][i].row; 857 this_mv.as_mv.col = bc + candidates[t][i].col; 858 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + 859 this_mv.as_mv.col; 860 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 861 bestsad); 862 CHECK_BETTER 863 } 864 } else { 865 for (i = 0; i < num_candidates[t]; i++) { 866 this_mv.as_mv.row = br + candidates[t][i].row; 867 this_mv.as_mv.col = bc + candidates[t][i].col; 868 CHECK_POINT 869 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + 870 this_mv.as_mv.col; 871 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 872 bestsad); 873 CHECK_BETTER 874 } 875 } 876 if (best_site == -1) { 877 continue; 878 } else { 879 best_init_s = t; 880 k = best_site; 881 } 882 } 883 if (best_init_s != -1) { 884 br += candidates[best_init_s][k].row; 885 bc += candidates[best_init_s][k].col; 886 } 887 } 888 889 // If the center point is still the best, just skip this and move to 890 // the refinement step. 891 if (best_init_s != -1) { 892 s = best_init_s; 893 best_site = -1; 894 do { 895 // No need to search all 6 points the 1st time if initial search was used 896 if (!do_init_search || s != best_init_s) { 897 CHECK_BOUNDS((1 << s)) 898 if (all_in) { 899 for (i = 0; i < num_candidates[s]; i++) { 900 this_mv.as_mv.row = br + candidates[s][i].row; 901 this_mv.as_mv.col = bc + candidates[s][i].col; 902 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + 903 this_mv.as_mv.col; 904 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 905 bestsad); 906 CHECK_BETTER 907 } 908 } else { 909 for (i = 0; i < num_candidates[s]; i++) { 910 this_mv.as_mv.row = br + candidates[s][i].row; 911 this_mv.as_mv.col = bc + candidates[s][i].col; 912 CHECK_POINT 913 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + 914 this_mv.as_mv.col; 915 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 916 bestsad); 917 CHECK_BETTER 918 } 919 } 920 921 if (best_site == -1) { 922 continue; 923 } else { 924 br += candidates[s][best_site].row; 925 bc += candidates[s][best_site].col; 926 k = best_site; 927 } 928 } 929 930 do { 931 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 932 best_site = -1; 933 CHECK_BOUNDS((1 << s)) 934 935 get_next_chkpts(next_chkpts_indices, k, num_candidates[s]); 936 if (all_in) { 937 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 938 this_mv.as_mv.row = br + 939 candidates[s][next_chkpts_indices[i]].row; 940 this_mv.as_mv.col = bc + 941 candidates[s][next_chkpts_indices[i]].col; 942 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + 943 this_mv.as_mv.col; 944 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 945 bestsad); 946 CHECK_BETTER 947 } 948 } else { 949 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 950 this_mv.as_mv.row = br + 951 candidates[s][next_chkpts_indices[i]].row; 952 this_mv.as_mv.col = bc + 953 candidates[s][next_chkpts_indices[i]].col; 954 CHECK_POINT 955 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + 956 this_mv.as_mv.col; 957 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 958 bestsad); 959 CHECK_BETTER 960 } 961 } 962 963 if (best_site != -1) { 964 k = next_chkpts_indices[best_site]; 965 br += candidates[s][k].row; 966 bc += candidates[s][k].col; 967 } 968 } while (best_site != -1); 969 } while (s--); 970 } 971 972 // Check 4 1-away neighbors if do_refine is true. 973 // For most well-designed schemes do_refine will not be necessary. 974 if (do_refine) { 975 static const MV neighbors[4] = { 976 {0, -1}, { -1, 0}, {1, 0}, {0, 1}, 977 }; 978 for (j = 0; j < 16; j++) { 979 best_site = -1; 980 CHECK_BOUNDS(1) 981 if (all_in) { 982 for (i = 0; i < 4; i++) { 983 this_mv.as_mv.row = br + neighbors[i].row; 984 this_mv.as_mv.col = bc + neighbors[i].col; 985 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + 986 this_mv.as_mv.col; 987 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 988 bestsad); 989 CHECK_BETTER 990 } 991 } else { 992 for (i = 0; i < 4; i++) { 993 this_mv.as_mv.row = br + neighbors[i].row; 994 this_mv.as_mv.col = bc + neighbors[i].col; 995 CHECK_POINT 996 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + 997 this_mv.as_mv.col; 998 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 999 bestsad); 1000 CHECK_BETTER 1001 } 1002 } 1003 1004 if (best_site == -1) { 1005 break; 1006 } else { 1007 br += neighbors[best_site].row; 1008 bc += neighbors[best_site].col; 1009 } 1010 } 1011 } 1012 1013 best_mv->as_mv.row = br; 1014 best_mv->as_mv.col = bc; 1015 1016 this_offset = base_offset + (best_mv->as_mv.row * (in_what_stride)) + 1017 best_mv->as_mv.col; 1018 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1019 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1020 if (bestsad == INT_MAX) 1021 return INT_MAX; 1022 return 1023 vfp->vf(what, what_stride, this_offset, in_what_stride, 1024 (unsigned int *)(&bestsad)) + 1025 use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, x->mvcost, 1026 x->errorperbit) : 0; 1027 } 1028 1029 1030 int vp9_hex_search(MACROBLOCK *x, 1031 int_mv *ref_mv, 1032 int search_param, 1033 int sad_per_bit, 1034 int do_init_search, 1035 const vp9_variance_fn_ptr_t *vfp, 1036 int use_mvcost, 1037 int_mv *center_mv, int_mv *best_mv) { 1038 // First scale has 8-closest points, the rest have 6 points in hex shape 1039 // at increasing scales 1040 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 1041 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 1042 }; 1043 // Note that the largest candidate step at each scale is 2^scale 1044 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1045 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, 1046 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, 1047 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, 1048 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, 1049 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, 1050 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, 1051 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, 1052 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, 1053 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, 1054 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, 1055 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, 1056 { -1024, 0}}, 1057 }; 1058 return 1059 vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1060 do_init_search, 0, vfp, use_mvcost, 1061 center_mv, best_mv, 1062 hex_num_candidates, hex_candidates); 1063 } 1064 1065 int vp9_bigdia_search(MACROBLOCK *x, 1066 int_mv *ref_mv, 1067 int search_param, 1068 int sad_per_bit, 1069 int do_init_search, 1070 const vp9_variance_fn_ptr_t *vfp, 1071 int use_mvcost, 1072 int_mv *center_mv, 1073 int_mv *best_mv) { 1074 // First scale has 4-closest points, the rest have 8 points in diamond 1075 // shape at increasing scales 1076 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 1077 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1078 }; 1079 // Note that the largest candidate step at each scale is 2^scale 1080 static const MV bigdia_candidates[MAX_PATTERN_SCALES] 1081 [MAX_PATTERN_CANDIDATES] = { 1082 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, 1083 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, 1084 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, 1085 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, 1086 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, 1087 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, 1088 {-16, 16}, {-32, 0}}, 1089 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, 1090 {-32, 32}, {-64, 0}}, 1091 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, 1092 {-64, 64}, {-128, 0}}, 1093 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, 1094 {-128, 128}, {-256, 0}}, 1095 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, 1096 {-256, 256}, {-512, 0}}, 1097 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, 1098 {-512, 512}, {-1024, 0}}, 1099 }; 1100 return 1101 vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1102 do_init_search, 0, vfp, use_mvcost, 1103 center_mv, best_mv, 1104 bigdia_num_candidates, bigdia_candidates); 1105 } 1106 1107 int vp9_square_search(MACROBLOCK *x, 1108 int_mv *ref_mv, 1109 int search_param, 1110 int sad_per_bit, 1111 int do_init_search, 1112 const vp9_variance_fn_ptr_t *vfp, 1113 int use_mvcost, 1114 int_mv *center_mv, 1115 int_mv *best_mv) { 1116 // All scales have 8 closest points in square shape 1117 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 1118 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1119 }; 1120 // Note that the largest candidate step at each scale is 2^scale 1121 static const MV square_candidates[MAX_PATTERN_SCALES] 1122 [MAX_PATTERN_CANDIDATES] = { 1123 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, 1124 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, 1125 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, 1126 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, 1127 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, 1128 {-16, 16}, {-16, 0}}, 1129 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, 1130 {-32, 32}, {-32, 0}}, 1131 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, 1132 {-64, 64}, {-64, 0}}, 1133 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, 1134 {-128, 128}, {-128, 0}}, 1135 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, 1136 {-256, 256}, {-256, 0}}, 1137 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, 1138 {-512, 512}, {-512, 0}}, 1139 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, 1140 {0, 1024}, {-1024, 1024}, {-1024, 0}}, 1141 }; 1142 return 1143 vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1144 do_init_search, 0, vfp, use_mvcost, 1145 center_mv, best_mv, 1146 square_num_candidates, square_candidates); 1147 }; 1148 1149 #undef CHECK_BOUNDS 1150 #undef CHECK_POINT 1151 #undef CHECK_BETTER 1152 1153 int vp9_diamond_search_sad_c(MACROBLOCK *x, 1154 int_mv *ref_mv, int_mv *best_mv, 1155 int search_param, int sad_per_bit, int *num00, 1156 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1157 int *mvcost[2], int_mv *center_mv) { 1158 int i, j, step; 1159 1160 const MACROBLOCKD* const xd = &x->e_mbd; 1161 uint8_t *what = x->plane[0].src.buf; 1162 int what_stride = x->plane[0].src.stride; 1163 uint8_t *in_what; 1164 int in_what_stride = xd->plane[0].pre[0].stride; 1165 uint8_t *best_address; 1166 1167 int tot_steps; 1168 int_mv this_mv; 1169 1170 int bestsad = INT_MAX; 1171 int best_site = 0; 1172 int last_site = 0; 1173 1174 int ref_row, ref_col; 1175 int this_row_offset, this_col_offset; 1176 search_site *ss; 1177 1178 uint8_t *check_here; 1179 int thissad; 1180 int_mv fcenter_mv; 1181 1182 int *mvjsadcost = x->nmvjointsadcost; 1183 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1184 1185 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1186 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1187 1188 clamp_mv(&ref_mv->as_mv, 1189 x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1190 ref_row = ref_mv->as_mv.row; 1191 ref_col = ref_mv->as_mv.col; 1192 *num00 = 0; 1193 best_mv->as_mv.row = ref_row; 1194 best_mv->as_mv.col = ref_col; 1195 1196 // Work out the start point for the search 1197 in_what = (uint8_t *)(xd->plane[0].pre[0].buf + 1198 (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); 1199 best_address = in_what; 1200 1201 // Check the starting position 1202 bestsad = fn_ptr->sdf(what, what_stride, in_what, 1203 in_what_stride, 0x7fffffff) 1204 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, 1205 sad_per_bit); 1206 1207 // search_param determines the length of the initial step and hence the number of iterations 1208 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1209 ss = &x->ss[search_param * x->searches_per_step]; 1210 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1211 1212 i = 1; 1213 1214 for (step = 0; step < tot_steps; step++) { 1215 for (j = 0; j < x->searches_per_step; j++) { 1216 // Trap illegal vectors 1217 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1218 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1219 1220 if ((this_col_offset > x->mv_col_min) && 1221 (this_col_offset < x->mv_col_max) && 1222 (this_row_offset > x->mv_row_min) && 1223 (this_row_offset < x->mv_row_max)) { 1224 check_here = ss[i].offset + best_address; 1225 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1226 bestsad); 1227 1228 if (thissad < bestsad) { 1229 this_mv.as_mv.row = this_row_offset; 1230 this_mv.as_mv.col = this_col_offset; 1231 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1232 mvjsadcost, mvsadcost, sad_per_bit); 1233 1234 if (thissad < bestsad) { 1235 bestsad = thissad; 1236 best_site = i; 1237 } 1238 } 1239 } 1240 1241 i++; 1242 } 1243 1244 if (best_site != last_site) { 1245 best_mv->as_mv.row += ss[best_site].mv.row; 1246 best_mv->as_mv.col += ss[best_site].mv.col; 1247 best_address += ss[best_site].offset; 1248 last_site = best_site; 1249 #if defined(NEW_DIAMOND_SEARCH) 1250 while (1) { 1251 this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; 1252 this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; 1253 if ((this_col_offset > x->mv_col_min) && 1254 (this_col_offset < x->mv_col_max) && 1255 (this_row_offset > x->mv_row_min) && 1256 (this_row_offset < x->mv_row_max)) { 1257 check_here = ss[best_site].offset + best_address; 1258 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1259 bestsad); 1260 if (thissad < bestsad) { 1261 this_mv.as_mv.row = this_row_offset; 1262 this_mv.as_mv.col = this_col_offset; 1263 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1264 mvjsadcost, mvsadcost, sad_per_bit); 1265 if (thissad < bestsad) { 1266 bestsad = thissad; 1267 best_mv->as_mv.row += ss[best_site].mv.row; 1268 best_mv->as_mv.col += ss[best_site].mv.col; 1269 best_address += ss[best_site].offset; 1270 continue; 1271 } 1272 } 1273 } 1274 break; 1275 }; 1276 #endif 1277 } else if (best_address == in_what) 1278 (*num00)++; 1279 } 1280 1281 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1282 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1283 1284 if (bestsad == INT_MAX) 1285 return INT_MAX; 1286 1287 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1288 (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, 1289 mvcost, x->errorperbit); 1290 } 1291 1292 int vp9_diamond_search_sadx4(MACROBLOCK *x, 1293 int_mv *ref_mv, int_mv *best_mv, int search_param, 1294 int sad_per_bit, int *num00, 1295 vp9_variance_fn_ptr_t *fn_ptr, 1296 int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1297 int i, j, step; 1298 1299 const MACROBLOCKD* const xd = &x->e_mbd; 1300 uint8_t *what = x->plane[0].src.buf; 1301 int what_stride = x->plane[0].src.stride; 1302 uint8_t *in_what; 1303 int in_what_stride = xd->plane[0].pre[0].stride; 1304 uint8_t *best_address; 1305 1306 int tot_steps; 1307 int_mv this_mv; 1308 1309 unsigned int bestsad = INT_MAX; 1310 int best_site = 0; 1311 int last_site = 0; 1312 1313 int ref_row; 1314 int ref_col; 1315 int this_row_offset; 1316 int this_col_offset; 1317 search_site *ss; 1318 1319 uint8_t *check_here; 1320 unsigned int thissad; 1321 int_mv fcenter_mv; 1322 1323 int *mvjsadcost = x->nmvjointsadcost; 1324 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1325 1326 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1327 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1328 1329 clamp_mv(&ref_mv->as_mv, 1330 x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1331 ref_row = ref_mv->as_mv.row; 1332 ref_col = ref_mv->as_mv.col; 1333 *num00 = 0; 1334 best_mv->as_mv.row = ref_row; 1335 best_mv->as_mv.col = ref_col; 1336 1337 // Work out the start point for the search 1338 in_what = (uint8_t *)(xd->plane[0].pre[0].buf + 1339 (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); 1340 best_address = in_what; 1341 1342 // Check the starting position 1343 bestsad = fn_ptr->sdf(what, what_stride, 1344 in_what, in_what_stride, 0x7fffffff) 1345 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, 1346 sad_per_bit); 1347 1348 // search_param determines the length of the initial step and hence the number of iterations 1349 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1350 ss = &x->ss[search_param * x->searches_per_step]; 1351 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1352 1353 i = 1; 1354 1355 for (step = 0; step < tot_steps; step++) { 1356 int all_in = 1, t; 1357 1358 // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of 1359 // checking 4 bounds for each points. 1360 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); 1361 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); 1362 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); 1363 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); 1364 1365 if (all_in) { 1366 unsigned int sad_array[4]; 1367 1368 for (j = 0; j < x->searches_per_step; j += 4) { 1369 unsigned char const *block_offset[4]; 1370 1371 for (t = 0; t < 4; t++) 1372 block_offset[t] = ss[i + t].offset + best_address; 1373 1374 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1375 sad_array); 1376 1377 for (t = 0; t < 4; t++, i++) { 1378 if (sad_array[t] < bestsad) { 1379 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; 1380 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; 1381 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, 1382 mvjsadcost, mvsadcost, sad_per_bit); 1383 1384 if (sad_array[t] < bestsad) { 1385 bestsad = sad_array[t]; 1386 best_site = i; 1387 } 1388 } 1389 } 1390 } 1391 } else { 1392 for (j = 0; j < x->searches_per_step; j++) { 1393 // Trap illegal vectors 1394 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1395 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1396 1397 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1398 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { 1399 check_here = ss[i].offset + best_address; 1400 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1401 1402 if (thissad < bestsad) { 1403 this_mv.as_mv.row = this_row_offset; 1404 this_mv.as_mv.col = this_col_offset; 1405 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1406 mvjsadcost, mvsadcost, sad_per_bit); 1407 1408 if (thissad < bestsad) { 1409 bestsad = thissad; 1410 best_site = i; 1411 } 1412 } 1413 } 1414 i++; 1415 } 1416 } 1417 if (best_site != last_site) { 1418 best_mv->as_mv.row += ss[best_site].mv.row; 1419 best_mv->as_mv.col += ss[best_site].mv.col; 1420 best_address += ss[best_site].offset; 1421 last_site = best_site; 1422 #if defined(NEW_DIAMOND_SEARCH) 1423 while (1) { 1424 this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; 1425 this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; 1426 if ((this_col_offset > x->mv_col_min) && 1427 (this_col_offset < x->mv_col_max) && 1428 (this_row_offset > x->mv_row_min) && 1429 (this_row_offset < x->mv_row_max)) { 1430 check_here = ss[best_site].offset + best_address; 1431 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1432 bestsad); 1433 if (thissad < bestsad) { 1434 this_mv.as_mv.row = this_row_offset; 1435 this_mv.as_mv.col = this_col_offset; 1436 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1437 mvjsadcost, mvsadcost, sad_per_bit); 1438 if (thissad < bestsad) { 1439 bestsad = thissad; 1440 best_mv->as_mv.row += ss[best_site].mv.row; 1441 best_mv->as_mv.col += ss[best_site].mv.col; 1442 best_address += ss[best_site].offset; 1443 continue; 1444 } 1445 } 1446 } 1447 break; 1448 }; 1449 #endif 1450 } else if (best_address == in_what) 1451 (*num00)++; 1452 } 1453 1454 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1455 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1456 1457 if (bestsad == INT_MAX) 1458 return INT_MAX; 1459 1460 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1461 (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, 1462 center_mv, mvjcost, mvcost, x->errorperbit); 1463 } 1464 1465 /* do_refine: If last step (1-away) of n-step search doesn't pick the center 1466 point as the best match, we will do a final 1-away diamond 1467 refining search */ 1468 1469 int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, 1470 int_mv *mvp_full, int step_param, 1471 int sadpb, int further_steps, 1472 int do_refine, vp9_variance_fn_ptr_t *fn_ptr, 1473 int_mv *ref_mv, int_mv *dst_mv) { 1474 int_mv temp_mv; 1475 int thissme, n, num00; 1476 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1477 step_param, sadpb, &num00, 1478 fn_ptr, x->nmvjointcost, 1479 x->mvcost, ref_mv); 1480 dst_mv->as_int = temp_mv.as_int; 1481 1482 n = num00; 1483 num00 = 0; 1484 1485 /* If there won't be more n-step search, check to see if refining search is needed. */ 1486 if (n > further_steps) 1487 do_refine = 0; 1488 1489 while (n < further_steps) { 1490 n++; 1491 1492 if (num00) 1493 num00--; 1494 else { 1495 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1496 step_param + n, sadpb, &num00, 1497 fn_ptr, x->nmvjointcost, x->mvcost, 1498 ref_mv); 1499 1500 /* check to see if refining search is needed. */ 1501 if (num00 > (further_steps - n)) 1502 do_refine = 0; 1503 1504 if (thissme < bestsme) { 1505 bestsme = thissme; 1506 dst_mv->as_int = temp_mv.as_int; 1507 } 1508 } 1509 } 1510 1511 /* final 1-away diamond refining search */ 1512 if (do_refine == 1) { 1513 int search_range = 8; 1514 int_mv best_mv; 1515 best_mv.as_int = dst_mv->as_int; 1516 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, 1517 fn_ptr, x->nmvjointcost, x->mvcost, 1518 ref_mv); 1519 1520 if (thissme < bestsme) { 1521 bestsme = thissme; 1522 dst_mv->as_int = best_mv.as_int; 1523 } 1524 } 1525 return bestsme; 1526 } 1527 1528 int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, 1529 int sad_per_bit, int distance, 1530 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1531 int *mvcost[2], 1532 int_mv *center_mv, int n) { 1533 const MACROBLOCKD* const xd = &x->e_mbd; 1534 uint8_t *what = x->plane[0].src.buf; 1535 int what_stride = x->plane[0].src.stride; 1536 uint8_t *in_what; 1537 int in_what_stride = xd->plane[0].pre[0].stride; 1538 int mv_stride = xd->plane[0].pre[0].stride; 1539 uint8_t *bestaddress; 1540 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1541 int_mv this_mv; 1542 int bestsad = INT_MAX; 1543 int r, c; 1544 1545 uint8_t *check_here; 1546 int thissad; 1547 1548 int ref_row = ref_mv->as_mv.row; 1549 int ref_col = ref_mv->as_mv.col; 1550 1551 int row_min = ref_row - distance; 1552 int row_max = ref_row + distance; 1553 int col_min = ref_col - distance; 1554 int col_max = ref_col + distance; 1555 int_mv fcenter_mv; 1556 1557 int *mvjsadcost = x->nmvjointsadcost; 1558 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1559 1560 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1561 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1562 1563 // Work out the mid point for the search 1564 in_what = xd->plane[0].pre[0].buf; 1565 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1566 1567 best_mv->as_mv.row = ref_row; 1568 best_mv->as_mv.col = ref_col; 1569 1570 // Baseline value at the centre 1571 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, 1572 in_what_stride, 0x7fffffff) 1573 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, 1574 sad_per_bit); 1575 1576 // Apply further limits to prevent us looking using vectors that stretch 1577 // beyond the UMV border 1578 col_min = MAX(col_min, x->mv_col_min); 1579 col_max = MIN(col_max, x->mv_col_max); 1580 row_min = MAX(row_min, x->mv_row_min); 1581 row_max = MIN(row_max, x->mv_row_max); 1582 1583 for (r = row_min; r < row_max; r++) { 1584 this_mv.as_mv.row = r; 1585 check_here = r * mv_stride + in_what + col_min; 1586 1587 for (c = col_min; c < col_max; c++) { 1588 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1589 1590 this_mv.as_mv.col = c; 1591 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1592 mvjsadcost, mvsadcost, sad_per_bit); 1593 1594 if (thissad < bestsad) { 1595 bestsad = thissad; 1596 best_mv->as_mv.row = r; 1597 best_mv->as_mv.col = c; 1598 bestaddress = check_here; 1599 } 1600 1601 check_here++; 1602 } 1603 } 1604 1605 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1606 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1607 1608 if (bestsad < INT_MAX) 1609 return 1610 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1611 (unsigned int *)(&thissad)) + 1612 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); 1613 else 1614 return INT_MAX; 1615 } 1616 1617 int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, 1618 int sad_per_bit, int distance, 1619 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1620 int *mvcost[2], int_mv *center_mv, int n) { 1621 const MACROBLOCKD* const xd = &x->e_mbd; 1622 uint8_t *what = x->plane[0].src.buf; 1623 int what_stride = x->plane[0].src.stride; 1624 uint8_t *in_what; 1625 int in_what_stride = xd->plane[0].pre[0].stride; 1626 int mv_stride = xd->plane[0].pre[0].stride; 1627 uint8_t *bestaddress; 1628 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1629 int_mv this_mv; 1630 unsigned int bestsad = INT_MAX; 1631 int r, c; 1632 1633 uint8_t *check_here; 1634 unsigned int thissad; 1635 1636 int ref_row = ref_mv->as_mv.row; 1637 int ref_col = ref_mv->as_mv.col; 1638 1639 int row_min = ref_row - distance; 1640 int row_max = ref_row + distance; 1641 int col_min = ref_col - distance; 1642 int col_max = ref_col + distance; 1643 1644 unsigned int sad_array[3]; 1645 int_mv fcenter_mv; 1646 1647 int *mvjsadcost = x->nmvjointsadcost; 1648 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1649 1650 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1651 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1652 1653 // Work out the mid point for the search 1654 in_what = xd->plane[0].pre[0].buf; 1655 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1656 1657 best_mv->as_mv.row = ref_row; 1658 best_mv->as_mv.col = ref_col; 1659 1660 // Baseline value at the centre 1661 bestsad = fn_ptr->sdf(what, what_stride, 1662 bestaddress, in_what_stride, 0x7fffffff) 1663 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, 1664 sad_per_bit); 1665 1666 // Apply further limits to prevent us looking using vectors that stretch 1667 // beyond the UMV border 1668 col_min = MAX(col_min, x->mv_col_min); 1669 col_max = MIN(col_max, x->mv_col_max); 1670 row_min = MAX(row_min, x->mv_row_min); 1671 row_max = MIN(row_max, x->mv_row_max); 1672 1673 for (r = row_min; r < row_max; r++) { 1674 this_mv.as_mv.row = r; 1675 check_here = r * mv_stride + in_what + col_min; 1676 c = col_min; 1677 1678 while ((c + 2) < col_max) { 1679 int i; 1680 1681 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1682 1683 for (i = 0; i < 3; i++) { 1684 thissad = sad_array[i]; 1685 1686 if (thissad < bestsad) { 1687 this_mv.as_mv.col = c; 1688 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1689 mvjsadcost, mvsadcost, sad_per_bit); 1690 1691 if (thissad < bestsad) { 1692 bestsad = thissad; 1693 best_mv->as_mv.row = r; 1694 best_mv->as_mv.col = c; 1695 bestaddress = check_here; 1696 } 1697 } 1698 1699 check_here++; 1700 c++; 1701 } 1702 } 1703 1704 while (c < col_max) { 1705 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1706 1707 if (thissad < bestsad) { 1708 this_mv.as_mv.col = c; 1709 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1710 mvjsadcost, mvsadcost, sad_per_bit); 1711 1712 if (thissad < bestsad) { 1713 bestsad = thissad; 1714 best_mv->as_mv.row = r; 1715 best_mv->as_mv.col = c; 1716 bestaddress = check_here; 1717 } 1718 } 1719 1720 check_here++; 1721 c++; 1722 } 1723 1724 } 1725 1726 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1727 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1728 1729 if (bestsad < INT_MAX) 1730 return 1731 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1732 (unsigned int *)(&thissad)) + 1733 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); 1734 else 1735 return INT_MAX; 1736 } 1737 1738 int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, 1739 int sad_per_bit, int distance, 1740 vp9_variance_fn_ptr_t *fn_ptr, 1741 int *mvjcost, int *mvcost[2], 1742 int_mv *center_mv, int n) { 1743 const MACROBLOCKD* const xd = &x->e_mbd; 1744 uint8_t *what = x->plane[0].src.buf; 1745 int what_stride = x->plane[0].src.stride; 1746 uint8_t *in_what; 1747 int in_what_stride = xd->plane[0].pre[0].stride; 1748 int mv_stride = xd->plane[0].pre[0].stride; 1749 uint8_t *bestaddress; 1750 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1751 int_mv this_mv; 1752 unsigned int bestsad = INT_MAX; 1753 int r, c; 1754 1755 uint8_t *check_here; 1756 unsigned int thissad; 1757 1758 int ref_row = ref_mv->as_mv.row; 1759 int ref_col = ref_mv->as_mv.col; 1760 1761 int row_min = ref_row - distance; 1762 int row_max = ref_row + distance; 1763 int col_min = ref_col - distance; 1764 int col_max = ref_col + distance; 1765 1766 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); 1767 unsigned int sad_array[3]; 1768 int_mv fcenter_mv; 1769 1770 int *mvjsadcost = x->nmvjointsadcost; 1771 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1772 1773 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1774 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1775 1776 // Work out the mid point for the search 1777 in_what = xd->plane[0].pre[0].buf; 1778 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1779 1780 best_mv->as_mv.row = ref_row; 1781 best_mv->as_mv.col = ref_col; 1782 1783 // Baseline value at the centre 1784 bestsad = fn_ptr->sdf(what, what_stride, 1785 bestaddress, in_what_stride, 0x7fffffff) 1786 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, 1787 sad_per_bit); 1788 1789 // Apply further limits to prevent us looking using vectors that stretch 1790 // beyond the UMV border 1791 col_min = MAX(col_min, x->mv_col_min); 1792 col_max = MIN(col_max, x->mv_col_max); 1793 row_min = MAX(row_min, x->mv_row_min); 1794 row_max = MIN(row_max, x->mv_row_max); 1795 1796 for (r = row_min; r < row_max; r++) { 1797 this_mv.as_mv.row = r; 1798 check_here = r * mv_stride + in_what + col_min; 1799 c = col_min; 1800 1801 while ((c + 7) < col_max) { 1802 int i; 1803 1804 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); 1805 1806 for (i = 0; i < 8; i++) { 1807 thissad = (unsigned int)sad_array8[i]; 1808 1809 if (thissad < bestsad) { 1810 this_mv.as_mv.col = c; 1811 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1812 mvjsadcost, mvsadcost, sad_per_bit); 1813 1814 if (thissad < bestsad) { 1815 bestsad = thissad; 1816 best_mv->as_mv.row = r; 1817 best_mv->as_mv.col = c; 1818 bestaddress = check_here; 1819 } 1820 } 1821 1822 check_here++; 1823 c++; 1824 } 1825 } 1826 1827 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { 1828 int i; 1829 1830 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1831 1832 for (i = 0; i < 3; i++) { 1833 thissad = sad_array[i]; 1834 1835 if (thissad < bestsad) { 1836 this_mv.as_mv.col = c; 1837 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1838 mvjsadcost, mvsadcost, sad_per_bit); 1839 1840 if (thissad < bestsad) { 1841 bestsad = thissad; 1842 best_mv->as_mv.row = r; 1843 best_mv->as_mv.col = c; 1844 bestaddress = check_here; 1845 } 1846 } 1847 1848 check_here++; 1849 c++; 1850 } 1851 } 1852 1853 while (c < col_max) { 1854 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1855 1856 if (thissad < bestsad) { 1857 this_mv.as_mv.col = c; 1858 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1859 mvjsadcost, mvsadcost, sad_per_bit); 1860 1861 if (thissad < bestsad) { 1862 bestsad = thissad; 1863 best_mv->as_mv.row = r; 1864 best_mv->as_mv.col = c; 1865 bestaddress = check_here; 1866 } 1867 } 1868 1869 check_here++; 1870 c++; 1871 } 1872 } 1873 1874 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1875 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1876 1877 if (bestsad < INT_MAX) 1878 return 1879 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1880 (unsigned int *)(&thissad)) + 1881 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); 1882 else 1883 return INT_MAX; 1884 } 1885 int vp9_refining_search_sad_c(MACROBLOCK *x, 1886 int_mv *ref_mv, int error_per_bit, 1887 int search_range, vp9_variance_fn_ptr_t *fn_ptr, 1888 int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1889 const MACROBLOCKD* const xd = &x->e_mbd; 1890 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1891 int i, j; 1892 int this_row_offset, this_col_offset; 1893 1894 int what_stride = x->plane[0].src.stride; 1895 int in_what_stride = xd->plane[0].pre[0].stride; 1896 uint8_t *what = x->plane[0].src.buf; 1897 uint8_t *best_address = xd->plane[0].pre[0].buf + 1898 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 1899 ref_mv->as_mv.col; 1900 uint8_t *check_here; 1901 unsigned int thissad; 1902 int_mv this_mv; 1903 unsigned int bestsad = INT_MAX; 1904 int_mv fcenter_mv; 1905 1906 int *mvjsadcost = x->nmvjointsadcost; 1907 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1908 1909 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1910 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1911 1912 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + 1913 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 1914 1915 for (i = 0; i < search_range; i++) { 1916 int best_site = -1; 1917 1918 for (j = 0; j < 4; j++) { 1919 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1920 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1921 1922 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1923 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { 1924 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; 1925 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1926 1927 if (thissad < bestsad) { 1928 this_mv.as_mv.row = this_row_offset; 1929 this_mv.as_mv.col = this_col_offset; 1930 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, 1931 mvsadcost, error_per_bit); 1932 1933 if (thissad < bestsad) { 1934 bestsad = thissad; 1935 best_site = j; 1936 } 1937 } 1938 } 1939 } 1940 1941 if (best_site == -1) 1942 break; 1943 else { 1944 ref_mv->as_mv.row += neighbors[best_site].row; 1945 ref_mv->as_mv.col += neighbors[best_site].col; 1946 best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; 1947 } 1948 } 1949 1950 this_mv.as_mv.row = ref_mv->as_mv.row << 3; 1951 this_mv.as_mv.col = ref_mv->as_mv.col << 3; 1952 1953 if (bestsad < INT_MAX) 1954 return 1955 fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1956 (unsigned int *)(&thissad)) + 1957 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); 1958 else 1959 return INT_MAX; 1960 } 1961 1962 int vp9_refining_search_sadx4(MACROBLOCK *x, 1963 int_mv *ref_mv, int error_per_bit, 1964 int search_range, vp9_variance_fn_ptr_t *fn_ptr, 1965 int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1966 const MACROBLOCKD* const xd = &x->e_mbd; 1967 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1968 int i, j; 1969 int this_row_offset, this_col_offset; 1970 1971 int what_stride = x->plane[0].src.stride; 1972 int in_what_stride = xd->plane[0].pre[0].stride; 1973 uint8_t *what = x->plane[0].src.buf; 1974 uint8_t *best_address = xd->plane[0].pre[0].buf + 1975 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 1976 ref_mv->as_mv.col; 1977 uint8_t *check_here; 1978 unsigned int thissad; 1979 int_mv this_mv; 1980 unsigned int bestsad = INT_MAX; 1981 int_mv fcenter_mv; 1982 1983 int *mvjsadcost = x->nmvjointsadcost; 1984 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1985 1986 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1987 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1988 1989 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + 1990 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 1991 1992 for (i = 0; i < search_range; i++) { 1993 int best_site = -1; 1994 int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) & 1995 ((ref_mv->as_mv.row + 1) < x->mv_row_max) & 1996 ((ref_mv->as_mv.col - 1) > x->mv_col_min) & 1997 ((ref_mv->as_mv.col + 1) < x->mv_col_max); 1998 1999 if (all_in) { 2000 unsigned int sad_array[4]; 2001 unsigned char const *block_offset[4]; 2002 block_offset[0] = best_address - in_what_stride; 2003 block_offset[1] = best_address - 1; 2004 block_offset[2] = best_address + 1; 2005 block_offset[3] = best_address + in_what_stride; 2006 2007 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); 2008 2009 for (j = 0; j < 4; j++) { 2010 if (sad_array[j] < bestsad) { 2011 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; 2012 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; 2013 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, 2014 mvsadcost, error_per_bit); 2015 2016 if (sad_array[j] < bestsad) { 2017 bestsad = sad_array[j]; 2018 best_site = j; 2019 } 2020 } 2021 } 2022 } else { 2023 for (j = 0; j < 4; j++) { 2024 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 2025 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 2026 2027 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 2028 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { 2029 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; 2030 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 2031 2032 if (thissad < bestsad) { 2033 this_mv.as_mv.row = this_row_offset; 2034 this_mv.as_mv.col = this_col_offset; 2035 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, 2036 mvsadcost, error_per_bit); 2037 2038 if (thissad < bestsad) { 2039 bestsad = thissad; 2040 best_site = j; 2041 } 2042 } 2043 } 2044 } 2045 } 2046 2047 if (best_site == -1) 2048 break; 2049 else { 2050 ref_mv->as_mv.row += neighbors[best_site].row; 2051 ref_mv->as_mv.col += neighbors[best_site].col; 2052 best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; 2053 } 2054 } 2055 2056 this_mv.as_mv.row = ref_mv->as_mv.row << 3; 2057 this_mv.as_mv.col = ref_mv->as_mv.col << 3; 2058 2059 if (bestsad < INT_MAX) 2060 return 2061 fn_ptr->vf(what, what_stride, best_address, in_what_stride, 2062 (unsigned int *)(&thissad)) + 2063 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); 2064 else 2065 return INT_MAX; 2066 } 2067 2068 /* This function is called when we do joint motion search in comp_inter_inter 2069 * mode. 2070 */ 2071 int vp9_refining_search_8p_c(MACROBLOCK *x, 2072 int_mv *ref_mv, int error_per_bit, 2073 int search_range, vp9_variance_fn_ptr_t *fn_ptr, 2074 int *mvjcost, int *mvcost[2], int_mv *center_mv, 2075 const uint8_t *second_pred, int w, int h) { 2076 const MACROBLOCKD* const xd = &x->e_mbd; 2077 MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, 2078 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; 2079 int i, j; 2080 int this_row_offset, this_col_offset; 2081 2082 int what_stride = x->plane[0].src.stride; 2083 int in_what_stride = xd->plane[0].pre[0].stride; 2084 uint8_t *what = x->plane[0].src.buf; 2085 uint8_t *best_address = xd->plane[0].pre[0].buf + 2086 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 2087 ref_mv->as_mv.col; 2088 uint8_t *check_here; 2089 unsigned int thissad; 2090 int_mv this_mv; 2091 unsigned int bestsad = INT_MAX; 2092 int_mv fcenter_mv; 2093 2094 int *mvjsadcost = x->nmvjointsadcost; 2095 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 2096 2097 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 2098 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 2099 2100 /* Get compound pred by averaging two pred blocks. */ 2101 bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride, 2102 second_pred, 0x7fffffff) + 2103 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 2104 2105 for (i = 0; i < search_range; i++) { 2106 int best_site = -1; 2107 2108 for (j = 0; j < 8; j++) { 2109 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 2110 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 2111 2112 if ((this_col_offset > x->mv_col_min) && 2113 (this_col_offset < x->mv_col_max) && 2114 (this_row_offset > x->mv_row_min) && 2115 (this_row_offset < x->mv_row_max)) { 2116 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 2117 best_address; 2118 2119 /* Get compound block and use it to calculate SAD. */ 2120 thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, 2121 second_pred, bestsad); 2122 2123 if (thissad < bestsad) { 2124 this_mv.as_mv.row = this_row_offset; 2125 this_mv.as_mv.col = this_col_offset; 2126 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, 2127 mvsadcost, error_per_bit); 2128 2129 if (thissad < bestsad) { 2130 bestsad = thissad; 2131 best_site = j; 2132 } 2133 } 2134 } 2135 } 2136 2137 if (best_site == -1) { 2138 break; 2139 } else { 2140 ref_mv->as_mv.row += neighbors[best_site].row; 2141 ref_mv->as_mv.col += neighbors[best_site].col; 2142 best_address += (neighbors[best_site].row) * in_what_stride + 2143 neighbors[best_site].col; 2144 } 2145 } 2146 2147 this_mv.as_mv.row = ref_mv->as_mv.row << 3; 2148 this_mv.as_mv.col = ref_mv->as_mv.col << 3; 2149 2150 if (bestsad < INT_MAX) { 2151 // FIXME(rbultje, yunqing): add full-pixel averaging variance functions 2152 // so we don't have to use the subpixel with xoff=0,yoff=0 here. 2153 return fn_ptr->svaf(best_address, in_what_stride, 0, 0, 2154 what, what_stride, (unsigned int *)(&thissad), 2155 second_pred) + 2156 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); 2157 } else { 2158 return INT_MAX; 2159 } 2160 } 2161