1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <limits.h> 13 #include <math.h> 14 #include <stdio.h> 15 16 #include "./vpx_config.h" 17 #include "./vpx_dsp_rtcd.h" 18 19 #include "vpx_dsp/vpx_dsp_common.h" 20 #include "vpx_mem/vpx_mem.h" 21 #include "vpx_ports/mem.h" 22 23 #include "vp9/common/vp9_common.h" 24 #include "vp9/common/vp9_mvref_common.h" 25 #include "vp9/common/vp9_reconinter.h" 26 27 #include "vp9/encoder/vp9_encoder.h" 28 #include "vp9/encoder/vp9_mcomp.h" 29 30 // #define NEW_DIAMOND_SEARCH 31 32 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, 33 const MV *mv) { 34 return &buf->buf[mv->row * buf->stride + mv->col]; 35 } 36 37 void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { 38 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 39 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 40 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 41 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 42 43 col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1); 44 row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1); 45 col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1); 46 row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1); 47 48 // Get intersection of UMV window and valid MV window to reduce # of checks 49 // in diamond search. 50 if (mv_limits->col_min < col_min) mv_limits->col_min = col_min; 51 if (mv_limits->col_max > col_max) mv_limits->col_max = col_max; 52 if (mv_limits->row_min < row_min) mv_limits->row_min = row_min; 53 if (mv_limits->row_max > row_max) mv_limits->row_max = row_max; 54 } 55 56 void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, 57 const MvLimits *umv_window_limits, 58 const MV *ref_mv) { 59 subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8, 60 ref_mv->col - MAX_FULL_PEL_VAL * 8); 61 subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8, 62 ref_mv->col + MAX_FULL_PEL_VAL * 8); 63 subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8, 64 ref_mv->row - MAX_FULL_PEL_VAL * 8); 65 subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8, 66 ref_mv->row + MAX_FULL_PEL_VAL * 8); 67 68 subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min); 69 subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max); 70 subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min); 71 subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max); 72 } 73 74 int vp9_init_search_range(int size) { 75 int sr = 0; 76 // Minimum search size no matter what the passed in value. 77 size = VPXMAX(16, size); 78 79 while ((size << sr) < MAX_FULL_PEL_VAL) sr++; 80 81 sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2); 82 return sr; 83 } 84 85 static INLINE int mv_cost(const MV *mv, const int *joint_cost, 86 int *const comp_cost[2]) { 87 assert(mv->row >= -MV_MAX && mv->row < MV_MAX); 88 assert(mv->col >= -MV_MAX && mv->col < MV_MAX); 89 return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] + 90 comp_cost[1][mv->col]; 91 } 92 93 int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, 94 int *mvcost[2], int weight) { 95 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 96 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 97 } 98 99 #define PIXEL_TRANSFORM_ERROR_SCALE 4 100 static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, 101 int *mvcost[2], int error_per_bit) { 102 if (mvcost) { 103 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 104 return (int)ROUND64_POWER_OF_TWO( 105 (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, 106 RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + 107 PIXEL_TRANSFORM_ERROR_SCALE); 108 } 109 return 0; 110 } 111 112 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, 113 int sad_per_bit) { 114 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 115 return ROUND_POWER_OF_TWO( 116 (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit, 117 VP9_PROB_COST_SHIFT); 118 } 119 120 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { 121 int len; 122 int ss_count = 0; 123 124 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 125 // Generate offsets for 4 search sites per step. 126 const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } }; 127 int i; 128 for (i = 0; i < 4; ++i, ++ss_count) { 129 cfg->ss_mv[ss_count] = ss_mvs[i]; 130 cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; 131 } 132 } 133 134 cfg->searches_per_step = 4; 135 cfg->total_steps = ss_count / cfg->searches_per_step; 136 } 137 138 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { 139 int len; 140 int ss_count = 0; 141 142 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 143 // Generate offsets for 8 search sites per step. 144 const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len }, 145 { 0, len }, { -len, -len }, { -len, len }, 146 { len, -len }, { len, len } }; 147 int i; 148 for (i = 0; i < 8; ++i, ++ss_count) { 149 cfg->ss_mv[ss_count] = ss_mvs[i]; 150 cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; 151 } 152 } 153 154 cfg->searches_per_step = 8; 155 cfg->total_steps = ss_count / cfg->searches_per_step; 156 } 157 158 // convert motion vector component to offset for sv[a]f calc 159 static INLINE int sp(int x) { return x & 7; } 160 161 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { 162 return &buf[(r >> 3) * stride + (c >> 3)]; 163 } 164 165 #if CONFIG_VP9_HIGHBITDEPTH 166 /* checks if (r, c) has better score than previous best */ 167 #define CHECK_BETTER(v, r, c) \ 168 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 169 int64_t tmpmse; \ 170 const MV mv = { r, c }; \ 171 const MV ref_mv = { rr, rc }; \ 172 if (second_pred == NULL) { \ 173 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 174 src_stride, &sse); \ 175 } else { \ 176 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 177 src_stride, &sse, second_pred); \ 178 } \ 179 tmpmse = thismse; \ 180 tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ 181 if (tmpmse >= INT_MAX) { \ 182 v = INT_MAX; \ 183 } else if ((v = (uint32_t)tmpmse) < besterr) { \ 184 besterr = v; \ 185 br = r; \ 186 bc = c; \ 187 *distortion = thismse; \ 188 *sse1 = sse; \ 189 } \ 190 } else { \ 191 v = INT_MAX; \ 192 } 193 #else 194 /* checks if (r, c) has better score than previous best */ 195 #define CHECK_BETTER(v, r, c) \ 196 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 197 const MV mv = { r, c }; \ 198 const MV ref_mv = { rr, rc }; \ 199 if (second_pred == NULL) \ 200 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 201 src_stride, &sse); \ 202 else \ 203 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 204 src_stride, &sse, second_pred); \ 205 if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ 206 thismse) < besterr) { \ 207 besterr = v; \ 208 br = r; \ 209 bc = c; \ 210 *distortion = thismse; \ 211 *sse1 = sse; \ 212 } \ 213 } else { \ 214 v = INT_MAX; \ 215 } 216 217 #endif 218 #define FIRST_LEVEL_CHECKS \ 219 { \ 220 unsigned int left, right, up, down, diag; \ 221 CHECK_BETTER(left, tr, tc - hstep); \ 222 CHECK_BETTER(right, tr, tc + hstep); \ 223 CHECK_BETTER(up, tr - hstep, tc); \ 224 CHECK_BETTER(down, tr + hstep, tc); \ 225 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \ 226 switch (whichdir) { \ 227 case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \ 228 case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \ 229 case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \ 230 case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \ 231 } \ 232 } 233 234 #define SECOND_LEVEL_CHECKS \ 235 { \ 236 int kr, kc; \ 237 unsigned int second; \ 238 if (tr != br && tc != bc) { \ 239 kr = br - tr; \ 240 kc = bc - tc; \ 241 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 242 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 243 } else if (tr == br && tc != bc) { \ 244 kc = bc - tc; \ 245 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 246 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 247 switch (whichdir) { \ 248 case 0: \ 249 case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \ 250 case 2: \ 251 case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \ 252 } \ 253 } else if (tr != br && tc == bc) { \ 254 kr = br - tr; \ 255 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 256 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 257 switch (whichdir) { \ 258 case 0: \ 259 case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \ 260 case 1: \ 261 case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \ 262 } \ 263 } \ 264 } 265 266 // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of 267 // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten 268 // later in the same way. 269 #define SECOND_LEVEL_CHECKS_BEST \ 270 { \ 271 unsigned int second; \ 272 int br0 = br; \ 273 int bc0 = bc; \ 274 assert(tr == br || tc == bc); \ 275 if (tr == br && tc != bc) { \ 276 kc = bc - tc; \ 277 } else if (tr != br && tc == bc) { \ 278 kr = br - tr; \ 279 } \ 280 CHECK_BETTER(second, br0 + kr, bc0); \ 281 CHECK_BETTER(second, br0, bc0 + kc); \ 282 if (br0 != br || bc0 != bc) { \ 283 CHECK_BETTER(second, br0 + kr, bc0 + kc); \ 284 } \ 285 } 286 287 #define SETUP_SUBPEL_SEARCH \ 288 const uint8_t *const z = x->plane[0].src.buf; \ 289 const int src_stride = x->plane[0].src.stride; \ 290 const MACROBLOCKD *xd = &x->e_mbd; \ 291 unsigned int besterr = UINT_MAX; \ 292 unsigned int sse; \ 293 unsigned int whichdir; \ 294 int thismse; \ 295 const unsigned int halfiters = iters_per_step; \ 296 const unsigned int quarteriters = iters_per_step; \ 297 const unsigned int eighthiters = iters_per_step; \ 298 const int y_stride = xd->plane[0].pre[0].stride; \ 299 const int offset = bestmv->row * y_stride + bestmv->col; \ 300 const uint8_t *const y = xd->plane[0].pre[0].buf; \ 301 \ 302 int rr = ref_mv->row; \ 303 int rc = ref_mv->col; \ 304 int br = bestmv->row * 8; \ 305 int bc = bestmv->col * 8; \ 306 int hstep = 4; \ 307 int minc, maxc, minr, maxr; \ 308 int tr = br; \ 309 int tc = bc; \ 310 MvLimits subpel_mv_limits; \ 311 \ 312 vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \ 313 minc = subpel_mv_limits.col_min; \ 314 maxc = subpel_mv_limits.col_max; \ 315 minr = subpel_mv_limits.row_min; \ 316 maxr = subpel_mv_limits.row_max; \ 317 \ 318 bestmv->row *= 8; \ 319 bestmv->col *= 8; 320 321 static unsigned int setup_center_error( 322 const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, 323 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, 324 const uint8_t *const src, const int src_stride, const uint8_t *const y, 325 int y_stride, const uint8_t *second_pred, int w, int h, int offset, 326 int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) { 327 #if CONFIG_VP9_HIGHBITDEPTH 328 uint64_t besterr; 329 if (second_pred != NULL) { 330 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 331 DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); 332 vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, 333 y_stride); 334 besterr = 335 vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); 336 } else { 337 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 338 vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 339 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 340 } 341 } else { 342 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); 343 } 344 *distortion = (uint32_t)besterr; 345 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 346 if (besterr >= UINT_MAX) return UINT_MAX; 347 return (uint32_t)besterr; 348 #else 349 uint32_t besterr; 350 (void)xd; 351 if (second_pred != NULL) { 352 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 353 vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 354 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 355 } else { 356 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); 357 } 358 *distortion = besterr; 359 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 360 return besterr; 361 #endif // CONFIG_VP9_HIGHBITDEPTH 362 } 363 364 static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) { 365 return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); 366 } 367 368 static INLINE int is_cost_list_wellbehaved(int *cost_list) { 369 return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && 370 cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; 371 } 372 373 // Returns surface minima estimate at given precision in 1/2^n bits. 374 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C 375 // For a given set of costs S0, S1, S2, S3, S4 at points 376 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, 377 // the solution for the location of the minima (x0, y0) is given by: 378 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), 379 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). 380 // The code below is an integerized version of that. 381 static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { 382 const int64_t x0 = (int64_t)cost_list[1] - cost_list[3]; 383 const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3]; 384 const int64_t x1 = (int64_t)cost_list[4] - cost_list[2]; 385 const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2]; 386 const int b = 1 << (bits - 1); 387 *ic = (int)divide_and_round(x0 * b, y0); 388 *ir = (int)divide_and_round(x1 * b, y1); 389 } 390 391 uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, 392 const MV *ref_mv, int allow_hp, 393 int error_per_bit, 394 const vp9_variance_fn_ptr_t *vfp, 395 int forced_stop, int iters_per_step, 396 int *cost_list, int *mvjcost, int *mvcost[2], 397 uint32_t *distortion, uint32_t *sse1, 398 const uint8_t *second_pred, int w, int h) { 399 SETUP_SUBPEL_SEARCH; 400 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 401 src_stride, y, y_stride, second_pred, w, h, 402 offset, mvjcost, mvcost, sse1, distortion); 403 (void)halfiters; 404 (void)quarteriters; 405 (void)eighthiters; 406 (void)whichdir; 407 (void)allow_hp; 408 (void)forced_stop; 409 (void)hstep; 410 (void)rr; 411 (void)rc; 412 (void)minr; 413 (void)minc; 414 (void)maxr; 415 (void)maxc; 416 (void)tr; 417 (void)tc; 418 (void)sse; 419 (void)thismse; 420 (void)cost_list; 421 422 return besterr; 423 } 424 425 uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( 426 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 427 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 428 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 429 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 430 int h) { 431 SETUP_SUBPEL_SEARCH; 432 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 433 src_stride, y, y_stride, second_pred, w, h, 434 offset, mvjcost, mvcost, sse1, distortion); 435 (void)halfiters; 436 (void)quarteriters; 437 (void)eighthiters; 438 (void)whichdir; 439 (void)allow_hp; 440 (void)forced_stop; 441 (void)hstep; 442 443 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 444 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 445 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { 446 int ir, ic; 447 unsigned int minpt = INT_MAX; 448 get_cost_surf_min(cost_list, &ir, &ic, 2); 449 if (ir != 0 || ic != 0) { 450 CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic); 451 } 452 } else { 453 FIRST_LEVEL_CHECKS; 454 if (halfiters > 1) { 455 SECOND_LEVEL_CHECKS; 456 } 457 458 tr = br; 459 tc = bc; 460 461 // Each subsequent iteration checks at least one point in common with 462 // the last iteration could be 2 ( if diag selected) 1/4 pel 463 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 464 if (forced_stop != 2) { 465 hstep >>= 1; 466 FIRST_LEVEL_CHECKS; 467 if (quarteriters > 1) { 468 SECOND_LEVEL_CHECKS; 469 } 470 } 471 } 472 473 tr = br; 474 tc = bc; 475 476 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 477 hstep >>= 1; 478 FIRST_LEVEL_CHECKS; 479 if (eighthiters > 1) { 480 SECOND_LEVEL_CHECKS; 481 } 482 } 483 484 bestmv->row = br; 485 bestmv->col = bc; 486 487 return besterr; 488 } 489 490 uint32_t vp9_find_best_sub_pixel_tree_pruned_more( 491 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 492 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 493 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 494 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 495 int h) { 496 SETUP_SUBPEL_SEARCH; 497 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 498 src_stride, y, y_stride, second_pred, w, h, 499 offset, mvjcost, mvcost, sse1, distortion); 500 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 501 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 502 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { 503 unsigned int minpt; 504 int ir, ic; 505 get_cost_surf_min(cost_list, &ir, &ic, 1); 506 if (ir != 0 || ic != 0) { 507 CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); 508 } 509 } else { 510 FIRST_LEVEL_CHECKS; 511 if (halfiters > 1) { 512 SECOND_LEVEL_CHECKS; 513 } 514 } 515 516 // Each subsequent iteration checks at least one point in common with 517 // the last iteration could be 2 ( if diag selected) 1/4 pel 518 519 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 520 if (forced_stop != 2) { 521 tr = br; 522 tc = bc; 523 hstep >>= 1; 524 FIRST_LEVEL_CHECKS; 525 if (quarteriters > 1) { 526 SECOND_LEVEL_CHECKS; 527 } 528 } 529 530 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 531 tr = br; 532 tc = bc; 533 hstep >>= 1; 534 FIRST_LEVEL_CHECKS; 535 if (eighthiters > 1) { 536 SECOND_LEVEL_CHECKS; 537 } 538 } 539 // These lines insure static analysis doesn't warn that 540 // tr and tc aren't used after the above point. 541 (void)tr; 542 (void)tc; 543 544 bestmv->row = br; 545 bestmv->col = bc; 546 547 return besterr; 548 } 549 550 uint32_t vp9_find_best_sub_pixel_tree_pruned( 551 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 552 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 553 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 554 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 555 int h) { 556 SETUP_SUBPEL_SEARCH; 557 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 558 src_stride, y, y_stride, second_pred, w, h, 559 offset, mvjcost, mvcost, sse1, distortion); 560 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 561 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 562 cost_list[4] != INT_MAX) { 563 unsigned int left, right, up, down, diag; 564 whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + 565 (cost_list[2] < cost_list[4] ? 0 : 2); 566 switch (whichdir) { 567 case 0: 568 CHECK_BETTER(left, tr, tc - hstep); 569 CHECK_BETTER(down, tr + hstep, tc); 570 CHECK_BETTER(diag, tr + hstep, tc - hstep); 571 break; 572 case 1: 573 CHECK_BETTER(right, tr, tc + hstep); 574 CHECK_BETTER(down, tr + hstep, tc); 575 CHECK_BETTER(diag, tr + hstep, tc + hstep); 576 break; 577 case 2: 578 CHECK_BETTER(left, tr, tc - hstep); 579 CHECK_BETTER(up, tr - hstep, tc); 580 CHECK_BETTER(diag, tr - hstep, tc - hstep); 581 break; 582 case 3: 583 CHECK_BETTER(right, tr, tc + hstep); 584 CHECK_BETTER(up, tr - hstep, tc); 585 CHECK_BETTER(diag, tr - hstep, tc + hstep); 586 break; 587 } 588 } else { 589 FIRST_LEVEL_CHECKS; 590 if (halfiters > 1) { 591 SECOND_LEVEL_CHECKS; 592 } 593 } 594 595 tr = br; 596 tc = bc; 597 598 // Each subsequent iteration checks at least one point in common with 599 // the last iteration could be 2 ( if diag selected) 1/4 pel 600 601 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 602 if (forced_stop != 2) { 603 hstep >>= 1; 604 FIRST_LEVEL_CHECKS; 605 if (quarteriters > 1) { 606 SECOND_LEVEL_CHECKS; 607 } 608 tr = br; 609 tc = bc; 610 } 611 612 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 613 hstep >>= 1; 614 FIRST_LEVEL_CHECKS; 615 if (eighthiters > 1) { 616 SECOND_LEVEL_CHECKS; 617 } 618 tr = br; 619 tc = bc; 620 } 621 // These lines insure static analysis doesn't warn that 622 // tr and tc aren't used after the above point. 623 (void)tr; 624 (void)tc; 625 626 bestmv->row = br; 627 bestmv->col = bc; 628 629 return besterr; 630 } 631 632 /* clang-format off */ 633 static const MV search_step_table[12] = { 634 // left, right, up, down 635 { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 }, 636 { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 }, 637 { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 } 638 }; 639 /* clang-format on */ 640 641 uint32_t vp9_find_best_sub_pixel_tree( 642 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 643 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 644 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 645 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 646 int h) { 647 const uint8_t *const z = x->plane[0].src.buf; 648 const uint8_t *const src_address = z; 649 const int src_stride = x->plane[0].src.stride; 650 const MACROBLOCKD *xd = &x->e_mbd; 651 unsigned int besterr = UINT_MAX; 652 unsigned int sse; 653 int thismse; 654 const int y_stride = xd->plane[0].pre[0].stride; 655 const int offset = bestmv->row * y_stride + bestmv->col; 656 const uint8_t *const y = xd->plane[0].pre[0].buf; 657 658 int rr = ref_mv->row; 659 int rc = ref_mv->col; 660 int br = bestmv->row * 8; 661 int bc = bestmv->col * 8; 662 int hstep = 4; 663 int iter, round = 3 - forced_stop; 664 665 int minc, maxc, minr, maxr; 666 int tr = br; 667 int tc = bc; 668 const MV *search_step = search_step_table; 669 int idx, best_idx = -1; 670 unsigned int cost_array[5]; 671 int kr, kc; 672 MvLimits subpel_mv_limits; 673 674 vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); 675 minc = subpel_mv_limits.col_min; 676 maxc = subpel_mv_limits.col_max; 677 minr = subpel_mv_limits.row_min; 678 maxr = subpel_mv_limits.row_max; 679 680 if (!(allow_hp && use_mv_hp(ref_mv))) 681 if (round == 3) round = 2; 682 683 bestmv->row *= 8; 684 bestmv->col *= 8; 685 686 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 687 src_stride, y, y_stride, second_pred, w, h, 688 offset, mvjcost, mvcost, sse1, distortion); 689 690 (void)cost_list; // to silence compiler warning 691 692 for (iter = 0; iter < round; ++iter) { 693 // Check vertical and horizontal sub-pixel positions. 694 for (idx = 0; idx < 4; ++idx) { 695 tr = br + search_step[idx].row; 696 tc = bc + search_step[idx].col; 697 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { 698 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); 699 MV this_mv; 700 this_mv.row = tr; 701 this_mv.col = tc; 702 if (second_pred == NULL) 703 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, 704 src_stride, &sse); 705 else 706 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), 707 src_address, src_stride, &sse, second_pred); 708 cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, 709 mvcost, error_per_bit); 710 711 if (cost_array[idx] < besterr) { 712 best_idx = idx; 713 besterr = cost_array[idx]; 714 *distortion = thismse; 715 *sse1 = sse; 716 } 717 } else { 718 cost_array[idx] = UINT_MAX; 719 } 720 } 721 722 // Check diagonal sub-pixel position 723 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); 724 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); 725 726 tc = bc + kc; 727 tr = br + kr; 728 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { 729 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); 730 MV this_mv = { tr, tc }; 731 if (second_pred == NULL) 732 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, 733 src_stride, &sse); 734 else 735 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, 736 src_stride, &sse, second_pred); 737 cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, 738 error_per_bit); 739 740 if (cost_array[4] < besterr) { 741 best_idx = 4; 742 besterr = cost_array[4]; 743 *distortion = thismse; 744 *sse1 = sse; 745 } 746 } else { 747 cost_array[idx] = UINT_MAX; 748 } 749 750 if (best_idx < 4 && best_idx >= 0) { 751 br += search_step[best_idx].row; 752 bc += search_step[best_idx].col; 753 } else if (best_idx == 4) { 754 br = tr; 755 bc = tc; 756 } 757 758 if (iters_per_step > 1 && best_idx != -1) SECOND_LEVEL_CHECKS_BEST; 759 760 tr = br; 761 tc = bc; 762 763 search_step += 4; 764 hstep >>= 1; 765 best_idx = -1; 766 } 767 768 // Each subsequent iteration checks at least one point in common with 769 // the last iteration could be 2 ( if diag selected) 1/4 pel 770 771 // These lines insure static analysis doesn't warn that 772 // tr and tc aren't used after the above point. 773 (void)tr; 774 (void)tc; 775 776 bestmv->row = br; 777 bestmv->col = bc; 778 779 return besterr; 780 } 781 782 #undef CHECK_BETTER 783 784 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, 785 int range) { 786 return ((row - range) >= mv_limits->row_min) & 787 ((row + range) <= mv_limits->row_max) & 788 ((col - range) >= mv_limits->col_min) & 789 ((col + range) <= mv_limits->col_max); 790 } 791 792 static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) { 793 return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) && 794 (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max); 795 } 796 797 #define CHECK_BETTER \ 798 { \ 799 if (thissad < bestsad) { \ 800 if (use_mvcost) \ 801 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \ 802 if (thissad < bestsad) { \ 803 bestsad = thissad; \ 804 best_site = i; \ 805 } \ 806 } \ 807 } 808 809 #define MAX_PATTERN_SCALES 11 810 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 811 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 812 813 // Calculate and return a sad+mvcost list around an integer best pel. 814 static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv, 815 int sadpb, 816 const vp9_variance_fn_ptr_t *fn_ptr, 817 const MV *best_mv, int *cost_list) { 818 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; 819 const struct buf_2d *const what = &x->plane[0].src; 820 const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; 821 const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; 822 int br = best_mv->row; 823 int bc = best_mv->col; 824 MV this_mv; 825 int i; 826 unsigned int sse; 827 828 this_mv.row = br; 829 this_mv.col = bc; 830 cost_list[0] = 831 fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), 832 in_what->stride, &sse) + 833 mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); 834 if (check_bounds(&x->mv_limits, br, bc, 1)) { 835 for (i = 0; i < 4; i++) { 836 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 837 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, 838 get_buf_from_mv(in_what, &this_mv), 839 in_what->stride, &sse) + 840 mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, 841 x->mvcost, x->errorperbit); 842 } 843 } else { 844 for (i = 0; i < 4; i++) { 845 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 846 if (!is_mv_in(&x->mv_limits, &this_mv)) 847 cost_list[i + 1] = INT_MAX; 848 else 849 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, 850 get_buf_from_mv(in_what, &this_mv), 851 in_what->stride, &sse) + 852 mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, 853 x->mvcost, x->errorperbit); 854 } 855 } 856 } 857 858 // Generic pattern search function that searches over multiple scales. 859 // Each scale can have a different number of candidates and shape of 860 // candidates as indicated in the num_candidates and candidates arrays 861 // passed into this function 862 // 863 static int vp9_pattern_search( 864 const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, 865 int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, 866 int use_mvcost, const MV *center_mv, MV *best_mv, 867 const int num_candidates[MAX_PATTERN_SCALES], 868 const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { 869 const MACROBLOCKD *const xd = &x->e_mbd; 870 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 871 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 872 }; 873 int i, s, t; 874 const struct buf_2d *const what = &x->plane[0].src; 875 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 876 int br, bc; 877 int bestsad = INT_MAX; 878 int thissad; 879 int k = -1; 880 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 881 int best_init_s = search_param_to_steps[search_param]; 882 // adjust ref_mv to make sure it is within MV range 883 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 884 x->mv_limits.row_min, x->mv_limits.row_max); 885 br = ref_mv->row; 886 bc = ref_mv->col; 887 888 // Work out the start point for the search 889 bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 890 in_what->stride) + 891 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 892 893 // Search all possible scales upto the search param around the center point 894 // pick the scale of the point that is best as the starting scale of 895 // further steps around it. 896 if (do_init_search) { 897 s = best_init_s; 898 best_init_s = -1; 899 for (t = 0; t <= s; ++t) { 900 int best_site = -1; 901 if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { 902 for (i = 0; i < num_candidates[t]; i++) { 903 const MV this_mv = { br + candidates[t][i].row, 904 bc + candidates[t][i].col }; 905 thissad = 906 vfp->sdf(what->buf, what->stride, 907 get_buf_from_mv(in_what, &this_mv), in_what->stride); 908 CHECK_BETTER 909 } 910 } else { 911 for (i = 0; i < num_candidates[t]; i++) { 912 const MV this_mv = { br + candidates[t][i].row, 913 bc + candidates[t][i].col }; 914 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 915 thissad = 916 vfp->sdf(what->buf, what->stride, 917 get_buf_from_mv(in_what, &this_mv), in_what->stride); 918 CHECK_BETTER 919 } 920 } 921 if (best_site == -1) { 922 continue; 923 } else { 924 best_init_s = t; 925 k = best_site; 926 } 927 } 928 if (best_init_s != -1) { 929 br += candidates[best_init_s][k].row; 930 bc += candidates[best_init_s][k].col; 931 } 932 } 933 934 // If the center point is still the best, just skip this and move to 935 // the refinement step. 936 if (best_init_s != -1) { 937 int best_site = -1; 938 s = best_init_s; 939 940 do { 941 // No need to search all 6 points the 1st time if initial search was used 942 if (!do_init_search || s != best_init_s) { 943 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 944 for (i = 0; i < num_candidates[s]; i++) { 945 const MV this_mv = { br + candidates[s][i].row, 946 bc + candidates[s][i].col }; 947 thissad = 948 vfp->sdf(what->buf, what->stride, 949 get_buf_from_mv(in_what, &this_mv), in_what->stride); 950 CHECK_BETTER 951 } 952 } else { 953 for (i = 0; i < num_candidates[s]; i++) { 954 const MV this_mv = { br + candidates[s][i].row, 955 bc + candidates[s][i].col }; 956 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 957 thissad = 958 vfp->sdf(what->buf, what->stride, 959 get_buf_from_mv(in_what, &this_mv), in_what->stride); 960 CHECK_BETTER 961 } 962 } 963 964 if (best_site == -1) { 965 continue; 966 } else { 967 br += candidates[s][best_site].row; 968 bc += candidates[s][best_site].col; 969 k = best_site; 970 } 971 } 972 973 do { 974 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 975 best_site = -1; 976 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 977 next_chkpts_indices[1] = k; 978 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 979 980 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 981 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 982 const MV this_mv = { 983 br + candidates[s][next_chkpts_indices[i]].row, 984 bc + candidates[s][next_chkpts_indices[i]].col 985 }; 986 thissad = 987 vfp->sdf(what->buf, what->stride, 988 get_buf_from_mv(in_what, &this_mv), in_what->stride); 989 CHECK_BETTER 990 } 991 } else { 992 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 993 const MV this_mv = { 994 br + candidates[s][next_chkpts_indices[i]].row, 995 bc + candidates[s][next_chkpts_indices[i]].col 996 }; 997 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 998 thissad = 999 vfp->sdf(what->buf, what->stride, 1000 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1001 CHECK_BETTER 1002 } 1003 } 1004 1005 if (best_site != -1) { 1006 k = next_chkpts_indices[best_site]; 1007 br += candidates[s][k].row; 1008 bc += candidates[s][k].col; 1009 } 1010 } while (best_site != -1); 1011 } while (s--); 1012 } 1013 1014 // Returns the one-away integer pel sad values around the best as follows: 1015 // cost_list[0]: cost at the best integer pel 1016 // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel 1017 // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel 1018 // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel 1019 // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel 1020 if (cost_list) { 1021 const MV best_mv = { br, bc }; 1022 calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list); 1023 } 1024 best_mv->row = br; 1025 best_mv->col = bc; 1026 return bestsad; 1027 } 1028 1029 // A specialized function where the smallest scale search candidates 1030 // are 4 1-away neighbors, and cost_list is non-null 1031 // TODO(debargha): Merge this function with the one above. Also remove 1032 // use_mvcost option since it is always 1, to save unnecessary branches. 1033 static int vp9_pattern_search_sad( 1034 const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, 1035 int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, 1036 int use_mvcost, const MV *center_mv, MV *best_mv, 1037 const int num_candidates[MAX_PATTERN_SCALES], 1038 const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { 1039 const MACROBLOCKD *const xd = &x->e_mbd; 1040 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 1041 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1042 }; 1043 int i, s, t; 1044 const struct buf_2d *const what = &x->plane[0].src; 1045 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1046 int br, bc; 1047 int bestsad = INT_MAX; 1048 int thissad; 1049 int k = -1; 1050 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 1051 int best_init_s = search_param_to_steps[search_param]; 1052 // adjust ref_mv to make sure it is within MV range 1053 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1054 x->mv_limits.row_min, x->mv_limits.row_max); 1055 br = ref_mv->row; 1056 bc = ref_mv->col; 1057 if (cost_list != NULL) { 1058 cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = 1059 INT_MAX; 1060 } 1061 1062 // Work out the start point for the search 1063 bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 1064 in_what->stride) + 1065 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1066 1067 // Search all possible scales upto the search param around the center point 1068 // pick the scale of the point that is best as the starting scale of 1069 // further steps around it. 1070 if (do_init_search) { 1071 s = best_init_s; 1072 best_init_s = -1; 1073 for (t = 0; t <= s; ++t) { 1074 int best_site = -1; 1075 if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { 1076 for (i = 0; i < num_candidates[t]; i++) { 1077 const MV this_mv = { br + candidates[t][i].row, 1078 bc + candidates[t][i].col }; 1079 thissad = 1080 vfp->sdf(what->buf, what->stride, 1081 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1082 CHECK_BETTER 1083 } 1084 } else { 1085 for (i = 0; i < num_candidates[t]; i++) { 1086 const MV this_mv = { br + candidates[t][i].row, 1087 bc + candidates[t][i].col }; 1088 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1089 thissad = 1090 vfp->sdf(what->buf, what->stride, 1091 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1092 CHECK_BETTER 1093 } 1094 } 1095 if (best_site == -1) { 1096 continue; 1097 } else { 1098 best_init_s = t; 1099 k = best_site; 1100 } 1101 } 1102 if (best_init_s != -1) { 1103 br += candidates[best_init_s][k].row; 1104 bc += candidates[best_init_s][k].col; 1105 } 1106 } 1107 1108 // If the center point is still the best, just skip this and move to 1109 // the refinement step. 1110 if (best_init_s != -1) { 1111 int do_sad = (num_candidates[0] == 4 && cost_list != NULL); 1112 int best_site = -1; 1113 s = best_init_s; 1114 1115 for (; s >= do_sad; s--) { 1116 if (!do_init_search || s != best_init_s) { 1117 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1118 for (i = 0; i < num_candidates[s]; i++) { 1119 const MV this_mv = { br + candidates[s][i].row, 1120 bc + candidates[s][i].col }; 1121 thissad = 1122 vfp->sdf(what->buf, what->stride, 1123 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1124 CHECK_BETTER 1125 } 1126 } else { 1127 for (i = 0; i < num_candidates[s]; i++) { 1128 const MV this_mv = { br + candidates[s][i].row, 1129 bc + candidates[s][i].col }; 1130 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1131 thissad = 1132 vfp->sdf(what->buf, what->stride, 1133 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1134 CHECK_BETTER 1135 } 1136 } 1137 1138 if (best_site == -1) { 1139 continue; 1140 } else { 1141 br += candidates[s][best_site].row; 1142 bc += candidates[s][best_site].col; 1143 k = best_site; 1144 } 1145 } 1146 1147 do { 1148 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1149 best_site = -1; 1150 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1151 next_chkpts_indices[1] = k; 1152 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1153 1154 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1155 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1156 const MV this_mv = { 1157 br + candidates[s][next_chkpts_indices[i]].row, 1158 bc + candidates[s][next_chkpts_indices[i]].col 1159 }; 1160 thissad = 1161 vfp->sdf(what->buf, what->stride, 1162 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1163 CHECK_BETTER 1164 } 1165 } else { 1166 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1167 const MV this_mv = { 1168 br + candidates[s][next_chkpts_indices[i]].row, 1169 bc + candidates[s][next_chkpts_indices[i]].col 1170 }; 1171 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1172 thissad = 1173 vfp->sdf(what->buf, what->stride, 1174 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1175 CHECK_BETTER 1176 } 1177 } 1178 1179 if (best_site != -1) { 1180 k = next_chkpts_indices[best_site]; 1181 br += candidates[s][k].row; 1182 bc += candidates[s][k].col; 1183 } 1184 } while (best_site != -1); 1185 } 1186 1187 // Note: If we enter the if below, then cost_list must be non-NULL. 1188 if (s == 0) { 1189 cost_list[0] = bestsad; 1190 if (!do_init_search || s != best_init_s) { 1191 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1192 for (i = 0; i < num_candidates[s]; i++) { 1193 const MV this_mv = { br + candidates[s][i].row, 1194 bc + candidates[s][i].col }; 1195 cost_list[i + 1] = thissad = 1196 vfp->sdf(what->buf, what->stride, 1197 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1198 CHECK_BETTER 1199 } 1200 } else { 1201 for (i = 0; i < num_candidates[s]; i++) { 1202 const MV this_mv = { br + candidates[s][i].row, 1203 bc + candidates[s][i].col }; 1204 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1205 cost_list[i + 1] = thissad = 1206 vfp->sdf(what->buf, what->stride, 1207 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1208 CHECK_BETTER 1209 } 1210 } 1211 1212 if (best_site != -1) { 1213 br += candidates[s][best_site].row; 1214 bc += candidates[s][best_site].col; 1215 k = best_site; 1216 } 1217 } 1218 while (best_site != -1) { 1219 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1220 best_site = -1; 1221 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1222 next_chkpts_indices[1] = k; 1223 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1224 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; 1225 cost_list[((k + 2) % 4) + 1] = cost_list[0]; 1226 cost_list[0] = bestsad; 1227 1228 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1229 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1230 const MV this_mv = { 1231 br + candidates[s][next_chkpts_indices[i]].row, 1232 bc + candidates[s][next_chkpts_indices[i]].col 1233 }; 1234 cost_list[next_chkpts_indices[i] + 1] = thissad = 1235 vfp->sdf(what->buf, what->stride, 1236 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1237 CHECK_BETTER 1238 } 1239 } else { 1240 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1241 const MV this_mv = { 1242 br + candidates[s][next_chkpts_indices[i]].row, 1243 bc + candidates[s][next_chkpts_indices[i]].col 1244 }; 1245 if (!is_mv_in(&x->mv_limits, &this_mv)) { 1246 cost_list[next_chkpts_indices[i] + 1] = INT_MAX; 1247 continue; 1248 } 1249 cost_list[next_chkpts_indices[i] + 1] = thissad = 1250 vfp->sdf(what->buf, what->stride, 1251 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1252 CHECK_BETTER 1253 } 1254 } 1255 1256 if (best_site != -1) { 1257 k = next_chkpts_indices[best_site]; 1258 br += candidates[s][k].row; 1259 bc += candidates[s][k].col; 1260 } 1261 } 1262 } 1263 } 1264 1265 // Returns the one-away integer pel sad values around the best as follows: 1266 // cost_list[0]: sad at the best integer pel 1267 // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel 1268 // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel 1269 // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel 1270 // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel 1271 if (cost_list) { 1272 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; 1273 if (cost_list[0] == INT_MAX) { 1274 cost_list[0] = bestsad; 1275 if (check_bounds(&x->mv_limits, br, bc, 1)) { 1276 for (i = 0; i < 4; i++) { 1277 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1278 cost_list[i + 1] = 1279 vfp->sdf(what->buf, what->stride, 1280 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1281 } 1282 } else { 1283 for (i = 0; i < 4; i++) { 1284 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1285 if (!is_mv_in(&x->mv_limits, &this_mv)) 1286 cost_list[i + 1] = INT_MAX; 1287 else 1288 cost_list[i + 1] = 1289 vfp->sdf(what->buf, what->stride, 1290 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1291 } 1292 } 1293 } else { 1294 if (use_mvcost) { 1295 for (i = 0; i < 4; i++) { 1296 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1297 if (cost_list[i + 1] != INT_MAX) { 1298 cost_list[i + 1] += 1299 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1300 } 1301 } 1302 } 1303 } 1304 } 1305 best_mv->row = br; 1306 best_mv->col = bc; 1307 return bestsad; 1308 } 1309 1310 int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, 1311 const MV *center_mv, const vp9_variance_fn_ptr_t *vfp, 1312 int use_mvcost) { 1313 const MACROBLOCKD *const xd = &x->e_mbd; 1314 const struct buf_2d *const what = &x->plane[0].src; 1315 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1316 const MV mv = { best_mv->row * 8, best_mv->col * 8 }; 1317 uint32_t unused; 1318 #if CONFIG_VP9_HIGHBITDEPTH 1319 uint64_t err = 1320 vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), 1321 in_what->stride, &unused); 1322 err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1323 x->errorperbit) 1324 : 0); 1325 if (err >= INT_MAX) return INT_MAX; 1326 return (int)err; 1327 #else 1328 return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), 1329 in_what->stride, &unused) + 1330 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1331 x->errorperbit) 1332 : 0); 1333 #endif 1334 } 1335 1336 int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, 1337 const MV *center_mv, const uint8_t *second_pred, 1338 const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { 1339 const MACROBLOCKD *const xd = &x->e_mbd; 1340 const struct buf_2d *const what = &x->plane[0].src; 1341 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1342 const MV mv = { best_mv->row * 8, best_mv->col * 8 }; 1343 unsigned int unused; 1344 1345 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, 1346 what->buf, what->stride, &unused, second_pred) + 1347 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1348 x->errorperbit) 1349 : 0); 1350 } 1351 1352 static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1353 int sad_per_bit, int do_init_search, int *cost_list, 1354 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1355 const MV *center_mv, MV *best_mv) { 1356 // First scale has 8-closest points, the rest have 6 points in hex shape 1357 // at increasing scales 1358 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6, 1359 6, 6, 6, 6, 6 }; 1360 // Note that the largest candidate step at each scale is 2^scale 1361 /* clang-format off */ 1362 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1363 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 }, 1364 { -1, 0 } }, 1365 { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }, 1366 { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } }, 1367 { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } }, 1368 { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } }, 1369 { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 }, 1370 { -32, 0 } }, 1371 { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 }, 1372 { -64, 0 } }, 1373 { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 }, 1374 { -128, 0 } }, 1375 { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 }, 1376 { -256, 0 } }, 1377 { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 }, 1378 { -512, 0 } }, 1379 { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 }, 1380 { -512, 1024 }, { -1024, 0 } } 1381 }; 1382 /* clang-format on */ 1383 return vp9_pattern_search( 1384 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1385 use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates); 1386 } 1387 1388 static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1389 int sad_per_bit, int do_init_search, int *cost_list, 1390 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1391 const MV *center_mv, MV *best_mv) { 1392 // First scale has 4-closest points, the rest have 8 points in diamond 1393 // shape at increasing scales 1394 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 1395 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1396 }; 1397 // Note that the largest candidate step at each scale is 2^scale 1398 /* clang-format off */ 1399 static const MV 1400 bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1401 { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }, 1402 { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 }, 1403 { -1, 1 }, { -2, 0 } }, 1404 { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 }, 1405 { -2, 2 }, { -4, 0 } }, 1406 { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 }, 1407 { -4, 4 }, { -8, 0 } }, 1408 { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 }, 1409 { -8, 8 }, { -16, 0 } }, 1410 { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 }, 1411 { 0, 32 }, { -16, 16 }, { -32, 0 } }, 1412 { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 }, 1413 { 0, 64 }, { -32, 32 }, { -64, 0 } }, 1414 { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 }, 1415 { 0, 128 }, { -64, 64 }, { -128, 0 } }, 1416 { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 }, 1417 { 0, 256 }, { -128, 128 }, { -256, 0 } }, 1418 { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 }, 1419 { 0, 512 }, { -256, 256 }, { -512, 0 } }, 1420 { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 }, 1421 { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } } 1422 }; 1423 /* clang-format on */ 1424 return vp9_pattern_search_sad( 1425 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1426 use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates); 1427 } 1428 1429 static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1430 int sad_per_bit, int do_init_search, int *cost_list, 1431 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1432 const MV *center_mv, MV *best_mv) { 1433 // All scales have 8 closest points in square shape 1434 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 1435 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1436 }; 1437 // Note that the largest candidate step at each scale is 2^scale 1438 /* clang-format off */ 1439 static const MV 1440 square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1441 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, 1442 { -1, 1 }, { -1, 0 } }, 1443 { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 }, 1444 { -2, 2 }, { -2, 0 } }, 1445 { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 }, 1446 { -4, 4 }, { -4, 0 } }, 1447 { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 }, 1448 { -8, 8 }, { -8, 0 } }, 1449 { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 }, 1450 { 0, 16 }, { -16, 16 }, { -16, 0 } }, 1451 { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 }, 1452 { 0, 32 }, { -32, 32 }, { -32, 0 } }, 1453 { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 }, 1454 { 0, 64 }, { -64, 64 }, { -64, 0 } }, 1455 { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 }, 1456 { 0, 128 }, { -128, 128 }, { -128, 0 } }, 1457 { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 }, 1458 { 0, 256 }, { -256, 256 }, { -256, 0 } }, 1459 { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 }, 1460 { 0, 512 }, { -512, 512 }, { -512, 0 } }, 1461 { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 }, 1462 { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } } 1463 }; 1464 /* clang-format on */ 1465 return vp9_pattern_search( 1466 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1467 use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); 1468 } 1469 1470 static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1471 int sad_per_bit, 1472 int do_init_search, // must be zero for fast_hex 1473 int *cost_list, const vp9_variance_fn_ptr_t *vfp, 1474 int use_mvcost, const MV *center_mv, MV *best_mv) { 1475 return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), 1476 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, 1477 center_mv, best_mv); 1478 } 1479 1480 static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1481 int sad_per_bit, int do_init_search, int *cost_list, 1482 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1483 const MV *center_mv, MV *best_mv) { 1484 return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), 1485 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, 1486 center_mv, best_mv); 1487 } 1488 1489 #undef CHECK_BETTER 1490 1491 // Exhuastive motion search around a given centre position with a given 1492 // step size. 1493 static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, 1494 int range, int step, int sad_per_bit, 1495 const vp9_variance_fn_ptr_t *fn_ptr, 1496 const MV *center_mv) { 1497 const MACROBLOCKD *const xd = &x->e_mbd; 1498 const struct buf_2d *const what = &x->plane[0].src; 1499 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1500 MV fcenter_mv = { center_mv->row, center_mv->col }; 1501 unsigned int best_sad = INT_MAX; 1502 int r, c, i; 1503 int start_col, end_col, start_row, end_row; 1504 int col_step = (step > 1) ? step : 4; 1505 1506 assert(step >= 1); 1507 1508 clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1509 x->mv_limits.row_min, x->mv_limits.row_max); 1510 *best_mv = fcenter_mv; 1511 best_sad = 1512 fn_ptr->sdf(what->buf, what->stride, 1513 get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + 1514 mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); 1515 start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row); 1516 start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col); 1517 end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row); 1518 end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col); 1519 1520 for (r = start_row; r <= end_row; r += step) { 1521 for (c = start_col; c <= end_col; c += col_step) { 1522 // Step > 1 means we are not checking every location in this pass. 1523 if (step > 1) { 1524 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; 1525 unsigned int sad = 1526 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), 1527 in_what->stride); 1528 if (sad < best_sad) { 1529 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1530 if (sad < best_sad) { 1531 best_sad = sad; 1532 *best_mv = mv; 1533 } 1534 } 1535 } else { 1536 // 4 sads in a single call if we are checking every location 1537 if (c + 3 <= end_col) { 1538 unsigned int sads[4]; 1539 const uint8_t *addrs[4]; 1540 for (i = 0; i < 4; ++i) { 1541 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1542 addrs[i] = get_buf_from_mv(in_what, &mv); 1543 } 1544 fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); 1545 1546 for (i = 0; i < 4; ++i) { 1547 if (sads[i] < best_sad) { 1548 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1549 const unsigned int sad = 1550 sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1551 if (sad < best_sad) { 1552 best_sad = sad; 1553 *best_mv = mv; 1554 } 1555 } 1556 } 1557 } else { 1558 for (i = 0; i < end_col - c; ++i) { 1559 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1560 unsigned int sad = 1561 fn_ptr->sdf(what->buf, what->stride, 1562 get_buf_from_mv(in_what, &mv), in_what->stride); 1563 if (sad < best_sad) { 1564 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1565 if (sad < best_sad) { 1566 best_sad = sad; 1567 *best_mv = mv; 1568 } 1569 } 1570 } 1571 } 1572 } 1573 } 1574 } 1575 1576 return best_sad; 1577 } 1578 1579 int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg, 1580 MV *ref_mv, MV *best_mv, int search_param, 1581 int sad_per_bit, int *num00, 1582 const vp9_variance_fn_ptr_t *fn_ptr, 1583 const MV *center_mv) { 1584 int i, j, step; 1585 1586 const MACROBLOCKD *const xd = &x->e_mbd; 1587 uint8_t *what = x->plane[0].src.buf; 1588 const int what_stride = x->plane[0].src.stride; 1589 const uint8_t *in_what; 1590 const int in_what_stride = xd->plane[0].pre[0].stride; 1591 const uint8_t *best_address; 1592 1593 unsigned int bestsad = INT_MAX; 1594 int best_site = -1; 1595 int last_site = -1; 1596 1597 int ref_row; 1598 int ref_col; 1599 1600 // search_param determines the length of the initial step and hence the number 1601 // of iterations. 1602 // 0 = initial step (MAX_FIRST_STEP) pel 1603 // 1 = (MAX_FIRST_STEP/2) pel, 1604 // 2 = (MAX_FIRST_STEP/4) pel... 1605 // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; 1606 const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; 1607 const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; 1608 const int tot_steps = cfg->total_steps - search_param; 1609 1610 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 1611 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1612 x->mv_limits.row_min, x->mv_limits.row_max); 1613 ref_row = ref_mv->row; 1614 ref_col = ref_mv->col; 1615 *num00 = 0; 1616 best_mv->row = ref_row; 1617 best_mv->col = ref_col; 1618 1619 // Work out the start point for the search 1620 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 1621 best_address = in_what; 1622 1623 // Check the starting position 1624 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + 1625 mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); 1626 1627 i = 0; 1628 1629 for (step = 0; step < tot_steps; step++) { 1630 int all_in = 1, t; 1631 1632 // All_in is true if every one of the points we are checking are within 1633 // the bounds of the image. 1634 all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min); 1635 all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); 1636 all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); 1637 all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); 1638 1639 // If all the pixels are within the bounds we don't check whether the 1640 // search point is valid in this loop, otherwise we check each point 1641 // for validity.. 1642 if (all_in) { 1643 unsigned int sad_array[4]; 1644 1645 for (j = 0; j < cfg->searches_per_step; j += 4) { 1646 unsigned char const *block_offset[4]; 1647 1648 for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; 1649 1650 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1651 sad_array); 1652 1653 for (t = 0; t < 4; t++, i++) { 1654 if (sad_array[t] < bestsad) { 1655 const MV this_mv = { best_mv->row + ss_mv[i].row, 1656 best_mv->col + ss_mv[i].col }; 1657 sad_array[t] += 1658 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1659 if (sad_array[t] < bestsad) { 1660 bestsad = sad_array[t]; 1661 best_site = i; 1662 } 1663 } 1664 } 1665 } 1666 } else { 1667 for (j = 0; j < cfg->searches_per_step; j++) { 1668 // Trap illegal vectors 1669 const MV this_mv = { best_mv->row + ss_mv[i].row, 1670 best_mv->col + ss_mv[i].col }; 1671 1672 if (is_mv_in(&x->mv_limits, &this_mv)) { 1673 const uint8_t *const check_here = ss_os[i] + best_address; 1674 unsigned int thissad = 1675 fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1676 1677 if (thissad < bestsad) { 1678 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1679 if (thissad < bestsad) { 1680 bestsad = thissad; 1681 best_site = i; 1682 } 1683 } 1684 } 1685 i++; 1686 } 1687 } 1688 if (best_site != last_site) { 1689 best_mv->row += ss_mv[best_site].row; 1690 best_mv->col += ss_mv[best_site].col; 1691 best_address += ss_os[best_site]; 1692 last_site = best_site; 1693 #if defined(NEW_DIAMOND_SEARCH) 1694 while (1) { 1695 const MV this_mv = { best_mv->row + ss_mv[best_site].row, 1696 best_mv->col + ss_mv[best_site].col }; 1697 if (is_mv_in(&x->mv_limits, &this_mv)) { 1698 const uint8_t *const check_here = ss_os[best_site] + best_address; 1699 unsigned int thissad = 1700 fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1701 if (thissad < bestsad) { 1702 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1703 if (thissad < bestsad) { 1704 bestsad = thissad; 1705 best_mv->row += ss_mv[best_site].row; 1706 best_mv->col += ss_mv[best_site].col; 1707 best_address += ss_os[best_site]; 1708 continue; 1709 } 1710 } 1711 } 1712 break; 1713 } 1714 #endif 1715 } else if (best_address == in_what) { 1716 (*num00)++; 1717 } 1718 } 1719 return bestsad; 1720 } 1721 1722 static int vector_match(int16_t *ref, int16_t *src, int bwl) { 1723 int best_sad = INT_MAX; 1724 int this_sad; 1725 int d; 1726 int center, offset = 0; 1727 int bw = 4 << bwl; // redundant variable, to be changed in the experiments. 1728 for (d = 0; d <= bw; d += 16) { 1729 this_sad = vpx_vector_var(&ref[d], src, bwl); 1730 if (this_sad < best_sad) { 1731 best_sad = this_sad; 1732 offset = d; 1733 } 1734 } 1735 center = offset; 1736 1737 for (d = -8; d <= 8; d += 16) { 1738 int this_pos = offset + d; 1739 // check limit 1740 if (this_pos < 0 || this_pos > bw) continue; 1741 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 1742 if (this_sad < best_sad) { 1743 best_sad = this_sad; 1744 center = this_pos; 1745 } 1746 } 1747 offset = center; 1748 1749 for (d = -4; d <= 4; d += 8) { 1750 int this_pos = offset + d; 1751 // check limit 1752 if (this_pos < 0 || this_pos > bw) continue; 1753 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 1754 if (this_sad < best_sad) { 1755 best_sad = this_sad; 1756 center = this_pos; 1757 } 1758 } 1759 offset = center; 1760 1761 for (d = -2; d <= 2; d += 4) { 1762 int this_pos = offset + d; 1763 // check limit 1764 if (this_pos < 0 || this_pos > bw) continue; 1765 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 1766 if (this_sad < best_sad) { 1767 best_sad = this_sad; 1768 center = this_pos; 1769 } 1770 } 1771 offset = center; 1772 1773 for (d = -1; d <= 1; d += 2) { 1774 int this_pos = offset + d; 1775 // check limit 1776 if (this_pos < 0 || this_pos > bw) continue; 1777 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 1778 if (this_sad < best_sad) { 1779 best_sad = this_sad; 1780 center = this_pos; 1781 } 1782 } 1783 1784 return (center - (bw >> 1)); 1785 } 1786 1787 static const MV search_pos[4] = { 1788 { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, 1789 }; 1790 1791 unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, 1792 BLOCK_SIZE bsize, int mi_row, 1793 int mi_col) { 1794 MACROBLOCKD *xd = &x->e_mbd; 1795 MODE_INFO *mi = xd->mi[0]; 1796 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; 1797 DECLARE_ALIGNED(16, int16_t, hbuf[128]); 1798 DECLARE_ALIGNED(16, int16_t, vbuf[128]); 1799 DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); 1800 DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); 1801 int idx; 1802 const int bw = 4 << b_width_log2_lookup[bsize]; 1803 const int bh = 4 << b_height_log2_lookup[bsize]; 1804 const int search_width = bw << 1; 1805 const int search_height = bh << 1; 1806 const int src_stride = x->plane[0].src.stride; 1807 const int ref_stride = xd->plane[0].pre[0].stride; 1808 uint8_t const *ref_buf, *src_buf; 1809 MV *tmp_mv = &xd->mi[0]->mv[0].as_mv; 1810 unsigned int best_sad, tmp_sad, this_sad[4]; 1811 MV this_mv; 1812 const int norm_factor = 3 + (bw >> 5); 1813 const YV12_BUFFER_CONFIG *scaled_ref_frame = 1814 vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]); 1815 1816 if (scaled_ref_frame) { 1817 int i; 1818 // Swap out the reference frame for a version that's been scaled to 1819 // match the resolution of the current frame, allowing the existing 1820 // motion search code to be used without additional modifications. 1821 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; 1822 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 1823 } 1824 1825 #if CONFIG_VP9_HIGHBITDEPTH 1826 // TODO(jingning): Implement integral projection functions for high bit-depth 1827 // setting and remove this part of code. 1828 if (xd->bd != 8) { 1829 unsigned int this_sad; 1830 tmp_mv->row = 0; 1831 tmp_mv->col = 0; 1832 this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, 1833 xd->plane[0].pre[0].buf, ref_stride); 1834 1835 if (scaled_ref_frame) { 1836 int i; 1837 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 1838 } 1839 return this_sad; 1840 } 1841 #endif 1842 1843 // Set up prediction 1-D reference set 1844 ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); 1845 for (idx = 0; idx < search_width; idx += 16) { 1846 vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); 1847 ref_buf += 16; 1848 } 1849 1850 ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; 1851 for (idx = 0; idx < search_height; ++idx) { 1852 vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor; 1853 ref_buf += ref_stride; 1854 } 1855 1856 // Set up src 1-D reference set 1857 for (idx = 0; idx < bw; idx += 16) { 1858 src_buf = x->plane[0].src.buf + idx; 1859 vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); 1860 } 1861 1862 src_buf = x->plane[0].src.buf; 1863 for (idx = 0; idx < bh; ++idx) { 1864 src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor; 1865 src_buf += src_stride; 1866 } 1867 1868 // Find the best match per 1-D search 1869 tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]); 1870 tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]); 1871 1872 this_mv = *tmp_mv; 1873 src_buf = x->plane[0].src.buf; 1874 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; 1875 best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 1876 1877 { 1878 const uint8_t *const pos[4] = { 1879 ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride, 1880 }; 1881 1882 cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); 1883 } 1884 1885 for (idx = 0; idx < 4; ++idx) { 1886 if (this_sad[idx] < best_sad) { 1887 best_sad = this_sad[idx]; 1888 tmp_mv->row = search_pos[idx].row + this_mv.row; 1889 tmp_mv->col = search_pos[idx].col + this_mv.col; 1890 } 1891 } 1892 1893 if (this_sad[0] < this_sad[3]) 1894 this_mv.row -= 1; 1895 else 1896 this_mv.row += 1; 1897 1898 if (this_sad[1] < this_sad[2]) 1899 this_mv.col -= 1; 1900 else 1901 this_mv.col += 1; 1902 1903 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; 1904 1905 tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 1906 if (best_sad > tmp_sad) { 1907 *tmp_mv = this_mv; 1908 best_sad = tmp_sad; 1909 } 1910 1911 tmp_mv->row *= 8; 1912 tmp_mv->col *= 8; 1913 1914 if (scaled_ref_frame) { 1915 int i; 1916 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 1917 } 1918 1919 return best_sad; 1920 } 1921 1922 // Runs sequence of diamond searches in smaller steps for RD. 1923 /* do_refine: If last step (1-away) of n-step search doesn't pick the center 1924 point as the best match, we will do a final 1-away diamond 1925 refining search */ 1926 static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, 1927 int step_param, int sadpb, int further_steps, 1928 int do_refine, int *cost_list, 1929 const vp9_variance_fn_ptr_t *fn_ptr, 1930 const MV *ref_mv, MV *dst_mv) { 1931 MV temp_mv; 1932 int thissme, n, num00 = 0; 1933 int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 1934 step_param, sadpb, &n, fn_ptr, ref_mv); 1935 if (bestsme < INT_MAX) 1936 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1937 *dst_mv = temp_mv; 1938 1939 // If there won't be more n-step search, check to see if refining search is 1940 // needed. 1941 if (n > further_steps) do_refine = 0; 1942 1943 while (n < further_steps) { 1944 ++n; 1945 1946 if (num00) { 1947 num00--; 1948 } else { 1949 thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 1950 step_param + n, sadpb, &num00, fn_ptr, 1951 ref_mv); 1952 if (thissme < INT_MAX) 1953 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1954 1955 // check to see if refining search is needed. 1956 if (num00 > further_steps - n) do_refine = 0; 1957 1958 if (thissme < bestsme) { 1959 bestsme = thissme; 1960 *dst_mv = temp_mv; 1961 } 1962 } 1963 } 1964 1965 // final 1-away diamond refining search 1966 if (do_refine) { 1967 const int search_range = 8; 1968 MV best_mv = *dst_mv; 1969 thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, 1970 ref_mv); 1971 if (thissme < INT_MAX) 1972 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); 1973 if (thissme < bestsme) { 1974 bestsme = thissme; 1975 *dst_mv = best_mv; 1976 } 1977 } 1978 1979 // Return cost list. 1980 if (cost_list) { 1981 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); 1982 } 1983 return bestsme; 1984 } 1985 1986 #define MIN_RANGE 7 1987 #define MAX_RANGE 256 1988 #define MIN_INTERVAL 1 1989 // Runs an limited range exhaustive mesh search using a pattern set 1990 // according to the encode speed profile. 1991 static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, 1992 MV *centre_mv_full, int sadpb, int *cost_list, 1993 const vp9_variance_fn_ptr_t *fn_ptr, 1994 const MV *ref_mv, MV *dst_mv) { 1995 const SPEED_FEATURES *const sf = &cpi->sf; 1996 MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; 1997 MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; 1998 int bestsme; 1999 int i; 2000 int interval = sf->mesh_patterns[0].interval; 2001 int range = sf->mesh_patterns[0].range; 2002 int baseline_interval_divisor; 2003 2004 // Trap illegal values for interval and range for this function. 2005 if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || 2006 (interval > range)) 2007 return INT_MAX; 2008 2009 baseline_interval_divisor = range / interval; 2010 2011 // Check size of proposed first range against magnitude of the centre 2012 // value used as a starting point. 2013 range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); 2014 range = VPXMIN(range, MAX_RANGE); 2015 interval = VPXMAX(interval, range / baseline_interval_divisor); 2016 2017 // initial search 2018 bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval, 2019 sadpb, fn_ptr, &temp_mv); 2020 2021 if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { 2022 // Progressive searches with range and step size decreasing each time 2023 // till we reach a step size of 1. Then break out. 2024 for (i = 1; i < MAX_MESH_STEP; ++i) { 2025 // First pass with coarser step and longer range 2026 bestsme = exhuastive_mesh_search( 2027 x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range, 2028 sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv); 2029 2030 if (sf->mesh_patterns[i].interval == 1) break; 2031 } 2032 } 2033 2034 if (bestsme < INT_MAX) 2035 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 2036 *dst_mv = temp_mv; 2037 2038 // Return cost list. 2039 if (cost_list) { 2040 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); 2041 } 2042 return bestsme; 2043 } 2044 2045 int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, 2046 int search_range, 2047 const vp9_variance_fn_ptr_t *fn_ptr, 2048 const MV *center_mv) { 2049 const MACROBLOCKD *const xd = &x->e_mbd; 2050 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; 2051 const struct buf_2d *const what = &x->plane[0].src; 2052 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2053 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2054 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); 2055 unsigned int best_sad = 2056 fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) + 2057 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 2058 int i, j; 2059 2060 for (i = 0; i < search_range; i++) { 2061 int best_site = -1; 2062 const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) & 2063 ((ref_mv->row + 1) < x->mv_limits.row_max) & 2064 ((ref_mv->col - 1) > x->mv_limits.col_min) & 2065 ((ref_mv->col + 1) < x->mv_limits.col_max); 2066 2067 if (all_in) { 2068 unsigned int sads[4]; 2069 const uint8_t *const positions[4] = { best_address - in_what->stride, 2070 best_address - 1, best_address + 1, 2071 best_address + in_what->stride }; 2072 2073 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); 2074 2075 for (j = 0; j < 4; ++j) { 2076 if (sads[j] < best_sad) { 2077 const MV mv = { ref_mv->row + neighbors[j].row, 2078 ref_mv->col + neighbors[j].col }; 2079 sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2080 if (sads[j] < best_sad) { 2081 best_sad = sads[j]; 2082 best_site = j; 2083 } 2084 } 2085 } 2086 } else { 2087 for (j = 0; j < 4; ++j) { 2088 const MV mv = { ref_mv->row + neighbors[j].row, 2089 ref_mv->col + neighbors[j].col }; 2090 2091 if (is_mv_in(&x->mv_limits, &mv)) { 2092 unsigned int sad = 2093 fn_ptr->sdf(what->buf, what->stride, 2094 get_buf_from_mv(in_what, &mv), in_what->stride); 2095 if (sad < best_sad) { 2096 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2097 if (sad < best_sad) { 2098 best_sad = sad; 2099 best_site = j; 2100 } 2101 } 2102 } 2103 } 2104 } 2105 2106 if (best_site == -1) { 2107 break; 2108 } else { 2109 ref_mv->row += neighbors[best_site].row; 2110 ref_mv->col += neighbors[best_site].col; 2111 best_address = get_buf_from_mv(in_what, ref_mv); 2112 } 2113 } 2114 2115 return best_sad; 2116 } 2117 2118 // This function is called when we do joint motion search in comp_inter_inter 2119 // mode. 2120 int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, 2121 int search_range, 2122 const vp9_variance_fn_ptr_t *fn_ptr, 2123 const MV *center_mv, const uint8_t *second_pred) { 2124 const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, 2125 { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; 2126 const MACROBLOCKD *const xd = &x->e_mbd; 2127 const struct buf_2d *const what = &x->plane[0].src; 2128 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2129 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2130 unsigned int best_sad = INT_MAX; 2131 int i, j; 2132 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 2133 x->mv_limits.row_min, x->mv_limits.row_max); 2134 best_sad = 2135 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 2136 in_what->stride, second_pred) + 2137 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 2138 2139 for (i = 0; i < search_range; ++i) { 2140 int best_site = -1; 2141 2142 for (j = 0; j < 8; ++j) { 2143 const MV mv = { ref_mv->row + neighbors[j].row, 2144 ref_mv->col + neighbors[j].col }; 2145 2146 if (is_mv_in(&x->mv_limits, &mv)) { 2147 unsigned int sad = 2148 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), 2149 in_what->stride, second_pred); 2150 if (sad < best_sad) { 2151 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2152 if (sad < best_sad) { 2153 best_sad = sad; 2154 best_site = j; 2155 } 2156 } 2157 } 2158 } 2159 2160 if (best_site == -1) { 2161 break; 2162 } else { 2163 ref_mv->row += neighbors[best_site].row; 2164 ref_mv->col += neighbors[best_site].col; 2165 } 2166 } 2167 return best_sad; 2168 } 2169 2170 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 2171 MV *mvp_full, int step_param, int search_method, 2172 int error_per_bit, int *cost_list, const MV *ref_mv, 2173 MV *tmp_mv, int var_max, int rd) { 2174 const SPEED_FEATURES *const sf = &cpi->sf; 2175 const SEARCH_METHODS method = (SEARCH_METHODS)search_method; 2176 vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; 2177 int var = 0; 2178 if (cost_list) { 2179 cost_list[0] = INT_MAX; 2180 cost_list[1] = INT_MAX; 2181 cost_list[2] = INT_MAX; 2182 cost_list[3] = INT_MAX; 2183 cost_list[4] = INT_MAX; 2184 } 2185 2186 switch (method) { 2187 case FAST_DIAMOND: 2188 var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, 2189 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2190 break; 2191 case FAST_HEX: 2192 var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, 2193 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2194 break; 2195 case HEX: 2196 var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2197 fn_ptr, 1, ref_mv, tmp_mv); 2198 break; 2199 case SQUARE: 2200 var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2201 fn_ptr, 1, ref_mv, tmp_mv); 2202 break; 2203 case BIGDIA: 2204 var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2205 fn_ptr, 1, ref_mv, tmp_mv); 2206 break; 2207 case NSTEP: 2208 var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, 2209 MAX_MVSEARCH_STEPS - 1 - step_param, 1, 2210 cost_list, fn_ptr, ref_mv, tmp_mv); 2211 2212 // Should we allow a follow on exhaustive search? 2213 if ((sf->exhaustive_searches_thresh < INT_MAX) && 2214 !cpi->rc.is_src_frame_alt_ref) { 2215 int64_t exhuastive_thr = sf->exhaustive_searches_thresh; 2216 exhuastive_thr >>= 2217 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); 2218 2219 // Threshold variance for an exhaustive full search. 2220 if (var > exhuastive_thr) { 2221 int var_ex; 2222 MV tmp_mv_ex; 2223 var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, 2224 cost_list, fn_ptr, ref_mv, &tmp_mv_ex); 2225 2226 if (var_ex < var) { 2227 var = var_ex; 2228 *tmp_mv = tmp_mv_ex; 2229 } 2230 } 2231 } 2232 break; 2233 default: assert(0 && "Invalid search method."); 2234 } 2235 2236 if (method != NSTEP && rd && var < var_max) 2237 var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); 2238 2239 return var; 2240 } 2241 2242 // Note(yunqingwang): The following 2 functions are only used in the motion 2243 // vector unit test, which return extreme motion vectors allowed by the MV 2244 // limits. 2245 #define COMMON_MV_TEST \ 2246 SETUP_SUBPEL_SEARCH; \ 2247 \ 2248 (void)error_per_bit; \ 2249 (void)vfp; \ 2250 (void)z; \ 2251 (void)src_stride; \ 2252 (void)y; \ 2253 (void)y_stride; \ 2254 (void)second_pred; \ 2255 (void)w; \ 2256 (void)h; \ 2257 (void)offset; \ 2258 (void)mvjcost; \ 2259 (void)mvcost; \ 2260 (void)sse1; \ 2261 (void)distortion; \ 2262 \ 2263 (void)halfiters; \ 2264 (void)quarteriters; \ 2265 (void)eighthiters; \ 2266 (void)whichdir; \ 2267 (void)allow_hp; \ 2268 (void)forced_stop; \ 2269 (void)hstep; \ 2270 (void)rr; \ 2271 (void)rc; \ 2272 \ 2273 (void)tr; \ 2274 (void)tc; \ 2275 (void)sse; \ 2276 (void)thismse; \ 2277 (void)cost_list; 2278 2279 // Return the maximum MV. 2280 uint32_t vp9_return_max_sub_pixel_mv( 2281 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 2282 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 2283 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 2284 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 2285 int h) { 2286 COMMON_MV_TEST; 2287 2288 (void)minr; 2289 (void)minc; 2290 2291 bestmv->row = maxr; 2292 bestmv->col = maxc; 2293 besterr = 0; 2294 2295 // In the sub-pel motion search, if hp is not used, then the last bit of mv 2296 // has to be 0. 2297 lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); 2298 2299 return besterr; 2300 } 2301 // Return the minimum MV. 2302 uint32_t vp9_return_min_sub_pixel_mv( 2303 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 2304 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 2305 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 2306 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 2307 int h) { 2308 COMMON_MV_TEST; 2309 2310 (void)maxr; 2311 (void)maxc; 2312 2313 bestmv->row = minr; 2314 bestmv->col = minc; 2315 besterr = 0; 2316 2317 // In the sub-pel motion search, if hp is not used, then the last bit of mv 2318 // has to be 0. 2319 lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); 2320 2321 return besterr; 2322 } 2323