1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <limits.h> 13 #include <math.h> 14 #include <stdio.h> 15 16 #include "./vpx_config.h" 17 #include "./vpx_dsp_rtcd.h" 18 19 #include "vpx_dsp/vpx_dsp_common.h" 20 #include "vpx_mem/vpx_mem.h" 21 #include "vpx_ports/mem.h" 22 23 #include "vp9/common/vp9_common.h" 24 #include "vp9/common/vp9_mvref_common.h" 25 #include "vp9/common/vp9_reconinter.h" 26 27 #include "vp9/encoder/vp9_encoder.h" 28 #include "vp9/encoder/vp9_mcomp.h" 29 30 // #define NEW_DIAMOND_SEARCH 31 32 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, 33 const MV *mv) { 34 return &buf->buf[mv->row * buf->stride + mv->col]; 35 } 36 37 void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { 38 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 39 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 40 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 41 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 42 43 col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1); 44 row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1); 45 col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1); 46 row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1); 47 48 // Get intersection of UMV window and valid MV window to reduce # of checks 49 // in diamond search. 50 if (mv_limits->col_min < col_min) mv_limits->col_min = col_min; 51 if (mv_limits->col_max > col_max) mv_limits->col_max = col_max; 52 if (mv_limits->row_min < row_min) mv_limits->row_min = row_min; 53 if (mv_limits->row_max > row_max) mv_limits->row_max = row_max; 54 } 55 56 void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, 57 const MvLimits *umv_window_limits, 58 const MV *ref_mv) { 59 subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8, 60 ref_mv->col - MAX_FULL_PEL_VAL * 8); 61 subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8, 62 ref_mv->col + MAX_FULL_PEL_VAL * 8); 63 subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8, 64 ref_mv->row - MAX_FULL_PEL_VAL * 8); 65 subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8, 66 ref_mv->row + MAX_FULL_PEL_VAL * 8); 67 68 subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min); 69 subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max); 70 subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min); 71 subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max); 72 } 73 74 int vp9_init_search_range(int size) { 75 int sr = 0; 76 // Minimum search size no matter what the passed in value. 77 size = VPXMAX(16, size); 78 79 while ((size << sr) < MAX_FULL_PEL_VAL) sr++; 80 81 sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2); 82 return sr; 83 } 84 85 static INLINE int mv_cost(const MV *mv, const int *joint_cost, 86 int *const comp_cost[2]) { 87 assert(mv->row >= -MV_MAX && mv->row < MV_MAX); 88 assert(mv->col >= -MV_MAX && mv->col < MV_MAX); 89 return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] + 90 comp_cost[1][mv->col]; 91 } 92 93 int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, 94 int *mvcost[2], int weight) { 95 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 96 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 97 } 98 99 #define PIXEL_TRANSFORM_ERROR_SCALE 4 100 static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, 101 int *mvcost[2], int error_per_bit) { 102 if (mvcost) { 103 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 104 return (int)ROUND64_POWER_OF_TWO( 105 (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, 106 RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + 107 PIXEL_TRANSFORM_ERROR_SCALE); 108 } 109 return 0; 110 } 111 112 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, 113 int sad_per_bit) { 114 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 115 return ROUND_POWER_OF_TWO( 116 (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit, 117 VP9_PROB_COST_SHIFT); 118 } 119 120 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { 121 int len; 122 int ss_count = 0; 123 124 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 125 // Generate offsets for 4 search sites per step. 126 const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } }; 127 int i; 128 for (i = 0; i < 4; ++i, ++ss_count) { 129 cfg->ss_mv[ss_count] = ss_mvs[i]; 130 cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; 131 } 132 } 133 134 cfg->searches_per_step = 4; 135 cfg->total_steps = ss_count / cfg->searches_per_step; 136 } 137 138 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { 139 int len; 140 int ss_count = 0; 141 142 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 143 // Generate offsets for 8 search sites per step. 144 const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len }, 145 { 0, len }, { -len, -len }, { -len, len }, 146 { len, -len }, { len, len } }; 147 int i; 148 for (i = 0; i < 8; ++i, ++ss_count) { 149 cfg->ss_mv[ss_count] = ss_mvs[i]; 150 cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; 151 } 152 } 153 154 cfg->searches_per_step = 8; 155 cfg->total_steps = ss_count / cfg->searches_per_step; 156 } 157 158 // convert motion vector component to offset for sv[a]f calc 159 static INLINE int sp(int x) { return x & 7; } 160 161 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { 162 return &buf[(r >> 3) * stride + (c >> 3)]; 163 } 164 165 #if CONFIG_VP9_HIGHBITDEPTH 166 /* checks if (r, c) has better score than previous best */ 167 #define CHECK_BETTER(v, r, c) \ 168 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 169 int64_t tmpmse; \ 170 const MV mv = { r, c }; \ 171 const MV ref_mv = { rr, rc }; \ 172 if (second_pred == NULL) { \ 173 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 174 src_stride, &sse); \ 175 } else { \ 176 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 177 src_stride, &sse, second_pred); \ 178 } \ 179 tmpmse = thismse; \ 180 tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ 181 if (tmpmse >= INT_MAX) { \ 182 v = INT_MAX; \ 183 } else if ((v = (uint32_t)tmpmse) < besterr) { \ 184 besterr = v; \ 185 br = r; \ 186 bc = c; \ 187 *distortion = thismse; \ 188 *sse1 = sse; \ 189 } \ 190 } else { \ 191 v = INT_MAX; \ 192 } 193 #else 194 /* checks if (r, c) has better score than previous best */ 195 #define CHECK_BETTER(v, r, c) \ 196 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 197 const MV mv = { r, c }; \ 198 const MV ref_mv = { rr, rc }; \ 199 if (second_pred == NULL) \ 200 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 201 src_stride, &sse); \ 202 else \ 203 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 204 src_stride, &sse, second_pred); \ 205 if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ 206 thismse) < besterr) { \ 207 besterr = v; \ 208 br = r; \ 209 bc = c; \ 210 *distortion = thismse; \ 211 *sse1 = sse; \ 212 } \ 213 } else { \ 214 v = INT_MAX; \ 215 } 216 217 #endif 218 #define FIRST_LEVEL_CHECKS \ 219 { \ 220 unsigned int left, right, up, down, diag; \ 221 CHECK_BETTER(left, tr, tc - hstep); \ 222 CHECK_BETTER(right, tr, tc + hstep); \ 223 CHECK_BETTER(up, tr - hstep, tc); \ 224 CHECK_BETTER(down, tr + hstep, tc); \ 225 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \ 226 switch (whichdir) { \ 227 case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \ 228 case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \ 229 case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \ 230 case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \ 231 } \ 232 } 233 234 #define SECOND_LEVEL_CHECKS \ 235 { \ 236 int kr, kc; \ 237 unsigned int second; \ 238 if (tr != br && tc != bc) { \ 239 kr = br - tr; \ 240 kc = bc - tc; \ 241 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 242 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 243 } else if (tr == br && tc != bc) { \ 244 kc = bc - tc; \ 245 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 246 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 247 switch (whichdir) { \ 248 case 0: \ 249 case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \ 250 case 2: \ 251 case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \ 252 } \ 253 } else if (tr != br && tc == bc) { \ 254 kr = br - tr; \ 255 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 256 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 257 switch (whichdir) { \ 258 case 0: \ 259 case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \ 260 case 1: \ 261 case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \ 262 } \ 263 } \ 264 } 265 266 #define SETUP_SUBPEL_SEARCH \ 267 const uint8_t *const z = x->plane[0].src.buf; \ 268 const int src_stride = x->plane[0].src.stride; \ 269 const MACROBLOCKD *xd = &x->e_mbd; \ 270 unsigned int besterr = UINT_MAX; \ 271 unsigned int sse; \ 272 unsigned int whichdir; \ 273 int thismse; \ 274 const unsigned int halfiters = iters_per_step; \ 275 const unsigned int quarteriters = iters_per_step; \ 276 const unsigned int eighthiters = iters_per_step; \ 277 const int y_stride = xd->plane[0].pre[0].stride; \ 278 const int offset = bestmv->row * y_stride + bestmv->col; \ 279 const uint8_t *const y = xd->plane[0].pre[0].buf; \ 280 \ 281 int rr = ref_mv->row; \ 282 int rc = ref_mv->col; \ 283 int br = bestmv->row * 8; \ 284 int bc = bestmv->col * 8; \ 285 int hstep = 4; \ 286 int minc, maxc, minr, maxr; \ 287 int tr = br; \ 288 int tc = bc; \ 289 MvLimits subpel_mv_limits; \ 290 \ 291 vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \ 292 minc = subpel_mv_limits.col_min; \ 293 maxc = subpel_mv_limits.col_max; \ 294 minr = subpel_mv_limits.row_min; \ 295 maxr = subpel_mv_limits.row_max; \ 296 \ 297 bestmv->row *= 8; \ 298 bestmv->col *= 8; 299 300 static unsigned int setup_center_error( 301 const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, 302 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, 303 const uint8_t *const src, const int src_stride, const uint8_t *const y, 304 int y_stride, const uint8_t *second_pred, int w, int h, int offset, 305 int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) { 306 #if CONFIG_VP9_HIGHBITDEPTH 307 uint64_t besterr; 308 if (second_pred != NULL) { 309 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 310 DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); 311 vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, 312 h, CONVERT_TO_SHORTPTR(y + offset), y_stride); 313 besterr = 314 vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); 315 } else { 316 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 317 vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 318 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 319 } 320 } else { 321 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); 322 } 323 *distortion = (uint32_t)besterr; 324 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 325 if (besterr >= UINT_MAX) return UINT_MAX; 326 return (uint32_t)besterr; 327 #else 328 uint32_t besterr; 329 (void)xd; 330 if (second_pred != NULL) { 331 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 332 vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 333 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 334 } else { 335 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); 336 } 337 *distortion = besterr; 338 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 339 return besterr; 340 #endif // CONFIG_VP9_HIGHBITDEPTH 341 } 342 343 static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) { 344 return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); 345 } 346 347 static INLINE int is_cost_list_wellbehaved(int *cost_list) { 348 return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && 349 cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; 350 } 351 352 // Returns surface minima estimate at given precision in 1/2^n bits. 353 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C 354 // For a given set of costs S0, S1, S2, S3, S4 at points 355 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, 356 // the solution for the location of the minima (x0, y0) is given by: 357 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), 358 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). 359 // The code below is an integerized version of that. 360 static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { 361 const int64_t x0 = (int64_t)cost_list[1] - cost_list[3]; 362 const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3]; 363 const int64_t x1 = (int64_t)cost_list[4] - cost_list[2]; 364 const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2]; 365 const int b = 1 << (bits - 1); 366 *ic = (int)divide_and_round(x0 * b, y0); 367 *ir = (int)divide_and_round(x1 * b, y1); 368 } 369 370 uint32_t vp9_skip_sub_pixel_tree( 371 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 372 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 373 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 374 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 375 int h, int use_accurate_subpel_search) { 376 SETUP_SUBPEL_SEARCH; 377 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 378 src_stride, y, y_stride, second_pred, w, h, 379 offset, mvjcost, mvcost, sse1, distortion); 380 (void)halfiters; 381 (void)quarteriters; 382 (void)eighthiters; 383 (void)whichdir; 384 (void)allow_hp; 385 (void)forced_stop; 386 (void)hstep; 387 (void)rr; 388 (void)rc; 389 (void)minr; 390 (void)minc; 391 (void)maxr; 392 (void)maxc; 393 (void)tr; 394 (void)tc; 395 (void)sse; 396 (void)thismse; 397 (void)cost_list; 398 (void)use_accurate_subpel_search; 399 400 return besterr; 401 } 402 403 uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( 404 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 405 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 406 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 407 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 408 int h, int use_accurate_subpel_search) { 409 SETUP_SUBPEL_SEARCH; 410 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 411 src_stride, y, y_stride, second_pred, w, h, 412 offset, mvjcost, mvcost, sse1, distortion); 413 (void)halfiters; 414 (void)quarteriters; 415 (void)eighthiters; 416 (void)whichdir; 417 (void)allow_hp; 418 (void)forced_stop; 419 (void)hstep; 420 (void)use_accurate_subpel_search; 421 422 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 423 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 424 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { 425 int ir, ic; 426 unsigned int minpt = INT_MAX; 427 get_cost_surf_min(cost_list, &ir, &ic, 2); 428 if (ir != 0 || ic != 0) { 429 CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic); 430 } 431 } else { 432 FIRST_LEVEL_CHECKS; 433 if (halfiters > 1) { 434 SECOND_LEVEL_CHECKS; 435 } 436 437 tr = br; 438 tc = bc; 439 440 // Each subsequent iteration checks at least one point in common with 441 // the last iteration could be 2 ( if diag selected) 1/4 pel 442 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 443 if (forced_stop != 2) { 444 hstep >>= 1; 445 FIRST_LEVEL_CHECKS; 446 if (quarteriters > 1) { 447 SECOND_LEVEL_CHECKS; 448 } 449 } 450 } 451 452 tr = br; 453 tc = bc; 454 455 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 456 hstep >>= 1; 457 FIRST_LEVEL_CHECKS; 458 if (eighthiters > 1) { 459 SECOND_LEVEL_CHECKS; 460 } 461 } 462 463 bestmv->row = br; 464 bestmv->col = bc; 465 466 return besterr; 467 } 468 469 uint32_t vp9_find_best_sub_pixel_tree_pruned_more( 470 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 471 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 472 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 473 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 474 int h, int use_accurate_subpel_search) { 475 SETUP_SUBPEL_SEARCH; 476 (void)use_accurate_subpel_search; 477 478 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 479 src_stride, y, y_stride, second_pred, w, h, 480 offset, mvjcost, mvcost, sse1, distortion); 481 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 482 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 483 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { 484 unsigned int minpt; 485 int ir, ic; 486 get_cost_surf_min(cost_list, &ir, &ic, 1); 487 if (ir != 0 || ic != 0) { 488 CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); 489 } 490 } else { 491 FIRST_LEVEL_CHECKS; 492 if (halfiters > 1) { 493 SECOND_LEVEL_CHECKS; 494 } 495 } 496 497 // Each subsequent iteration checks at least one point in common with 498 // the last iteration could be 2 ( if diag selected) 1/4 pel 499 500 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 501 if (forced_stop != 2) { 502 tr = br; 503 tc = bc; 504 hstep >>= 1; 505 FIRST_LEVEL_CHECKS; 506 if (quarteriters > 1) { 507 SECOND_LEVEL_CHECKS; 508 } 509 } 510 511 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 512 tr = br; 513 tc = bc; 514 hstep >>= 1; 515 FIRST_LEVEL_CHECKS; 516 if (eighthiters > 1) { 517 SECOND_LEVEL_CHECKS; 518 } 519 } 520 // These lines insure static analysis doesn't warn that 521 // tr and tc aren't used after the above point. 522 (void)tr; 523 (void)tc; 524 525 bestmv->row = br; 526 bestmv->col = bc; 527 528 return besterr; 529 } 530 531 uint32_t vp9_find_best_sub_pixel_tree_pruned( 532 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 533 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 534 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 535 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 536 int h, int use_accurate_subpel_search) { 537 SETUP_SUBPEL_SEARCH; 538 (void)use_accurate_subpel_search; 539 540 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 541 src_stride, y, y_stride, second_pred, w, h, 542 offset, mvjcost, mvcost, sse1, distortion); 543 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 544 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 545 cost_list[4] != INT_MAX) { 546 unsigned int left, right, up, down, diag; 547 whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + 548 (cost_list[2] < cost_list[4] ? 0 : 2); 549 switch (whichdir) { 550 case 0: 551 CHECK_BETTER(left, tr, tc - hstep); 552 CHECK_BETTER(down, tr + hstep, tc); 553 CHECK_BETTER(diag, tr + hstep, tc - hstep); 554 break; 555 case 1: 556 CHECK_BETTER(right, tr, tc + hstep); 557 CHECK_BETTER(down, tr + hstep, tc); 558 CHECK_BETTER(diag, tr + hstep, tc + hstep); 559 break; 560 case 2: 561 CHECK_BETTER(left, tr, tc - hstep); 562 CHECK_BETTER(up, tr - hstep, tc); 563 CHECK_BETTER(diag, tr - hstep, tc - hstep); 564 break; 565 case 3: 566 CHECK_BETTER(right, tr, tc + hstep); 567 CHECK_BETTER(up, tr - hstep, tc); 568 CHECK_BETTER(diag, tr - hstep, tc + hstep); 569 break; 570 } 571 } else { 572 FIRST_LEVEL_CHECKS; 573 if (halfiters > 1) { 574 SECOND_LEVEL_CHECKS; 575 } 576 } 577 578 tr = br; 579 tc = bc; 580 581 // Each subsequent iteration checks at least one point in common with 582 // the last iteration could be 2 ( if diag selected) 1/4 pel 583 584 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 585 if (forced_stop != 2) { 586 hstep >>= 1; 587 FIRST_LEVEL_CHECKS; 588 if (quarteriters > 1) { 589 SECOND_LEVEL_CHECKS; 590 } 591 tr = br; 592 tc = bc; 593 } 594 595 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 596 hstep >>= 1; 597 FIRST_LEVEL_CHECKS; 598 if (eighthiters > 1) { 599 SECOND_LEVEL_CHECKS; 600 } 601 tr = br; 602 tc = bc; 603 } 604 // These lines insure static analysis doesn't warn that 605 // tr and tc aren't used after the above point. 606 (void)tr; 607 (void)tc; 608 609 bestmv->row = br; 610 bestmv->col = bc; 611 612 return besterr; 613 } 614 615 /* clang-format off */ 616 static const MV search_step_table[12] = { 617 // left, right, up, down 618 { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 }, 619 { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 }, 620 { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 } 621 }; 622 /* clang-format on */ 623 624 static int accurate_sub_pel_search( 625 const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf, 626 const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp, 627 const uint8_t *const src_address, const int src_stride, 628 const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred, 629 int w, int h, uint32_t *sse) { 630 #if CONFIG_VP9_HIGHBITDEPTH 631 uint64_t besterr; 632 assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); 633 assert(w != 0 && h != 0); 634 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 635 DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]); 636 vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride, 637 pred16, w, this_mv, sf, w, h, 0, kernel, 638 MV_PRECISION_Q3, 0, 0, xd->bd); 639 if (second_pred != NULL) { 640 DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); 641 vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, 642 h, pred16, w); 643 besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address, 644 src_stride, sse); 645 } else { 646 besterr = 647 vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse); 648 } 649 } else { 650 DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); 651 vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, 652 0, kernel, MV_PRECISION_Q3, 0, 0); 653 if (second_pred != NULL) { 654 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 655 vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); 656 besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); 657 } else { 658 besterr = vfp->vf(pred, w, src_address, src_stride, sse); 659 } 660 } 661 if (besterr >= UINT_MAX) return UINT_MAX; 662 return (int)besterr; 663 #else 664 int besterr; 665 DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); 666 assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); 667 assert(w != 0 && h != 0); 668 (void)xd; 669 670 vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, 671 0, kernel, MV_PRECISION_Q3, 0, 0); 672 if (second_pred != NULL) { 673 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 674 vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); 675 besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); 676 } else { 677 besterr = vfp->vf(pred, w, src_address, src_stride, sse); 678 } 679 return besterr; 680 #endif // CONFIG_VP9_HIGHBITDEPTH 681 } 682 683 // TODO(yunqing): this part can be further refactored. 684 #if CONFIG_VP9_HIGHBITDEPTH 685 /* checks if (r, c) has better score than previous best */ 686 #define CHECK_BETTER1(v, r, c) \ 687 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 688 int64_t tmpmse; \ 689 const MV mv = { r, c }; \ 690 const MV ref_mv = { rr, rc }; \ 691 thismse = \ 692 accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ 693 y, y_stride, second_pred, w, h, &sse); \ 694 tmpmse = thismse; \ 695 tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ 696 if (tmpmse >= INT_MAX) { \ 697 v = INT_MAX; \ 698 } else if ((v = (uint32_t)tmpmse) < besterr) { \ 699 besterr = v; \ 700 br = r; \ 701 bc = c; \ 702 *distortion = thismse; \ 703 *sse1 = sse; \ 704 } \ 705 } else { \ 706 v = INT_MAX; \ 707 } 708 #else 709 /* checks if (r, c) has better score than previous best */ 710 #define CHECK_BETTER1(v, r, c) \ 711 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 712 const MV mv = { r, c }; \ 713 const MV ref_mv = { rr, rc }; \ 714 thismse = \ 715 accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ 716 y, y_stride, second_pred, w, h, &sse); \ 717 if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ 718 thismse) < besterr) { \ 719 besterr = v; \ 720 br = r; \ 721 bc = c; \ 722 *distortion = thismse; \ 723 *sse1 = sse; \ 724 } \ 725 } else { \ 726 v = INT_MAX; \ 727 } 728 729 #endif 730 731 uint32_t vp9_find_best_sub_pixel_tree( 732 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 733 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 734 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 735 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 736 int h, int use_accurate_subpel_search) { 737 const uint8_t *const z = x->plane[0].src.buf; 738 const uint8_t *const src_address = z; 739 const int src_stride = x->plane[0].src.stride; 740 const MACROBLOCKD *xd = &x->e_mbd; 741 unsigned int besterr = UINT_MAX; 742 unsigned int sse; 743 int thismse; 744 const int y_stride = xd->plane[0].pre[0].stride; 745 const int offset = bestmv->row * y_stride + bestmv->col; 746 const uint8_t *const y = xd->plane[0].pre[0].buf; 747 748 int rr = ref_mv->row; 749 int rc = ref_mv->col; 750 int br = bestmv->row * 8; 751 int bc = bestmv->col * 8; 752 int hstep = 4; 753 int iter, round = 3 - forced_stop; 754 755 int minc, maxc, minr, maxr; 756 int tr = br; 757 int tc = bc; 758 const MV *search_step = search_step_table; 759 int idx, best_idx = -1; 760 unsigned int cost_array[5]; 761 int kr, kc; 762 MvLimits subpel_mv_limits; 763 764 // TODO(yunqing): need to add 4-tap filter optimization to speed up the 765 // encoder. 766 const InterpKernel *kernel = 767 (use_accurate_subpel_search > 0) 768 ? ((use_accurate_subpel_search == USE_4_TAPS) 769 ? vp9_filter_kernels[FOURTAP] 770 : ((use_accurate_subpel_search == USE_8_TAPS) 771 ? vp9_filter_kernels[EIGHTTAP] 772 : vp9_filter_kernels[EIGHTTAP_SHARP])) 773 : vp9_filter_kernels[BILINEAR]; 774 775 vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); 776 minc = subpel_mv_limits.col_min; 777 maxc = subpel_mv_limits.col_max; 778 minr = subpel_mv_limits.row_min; 779 maxr = subpel_mv_limits.row_max; 780 781 if (!(allow_hp && use_mv_hp(ref_mv))) 782 if (round == 3) round = 2; 783 784 bestmv->row *= 8; 785 bestmv->col *= 8; 786 787 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 788 src_stride, y, y_stride, second_pred, w, h, 789 offset, mvjcost, mvcost, sse1, distortion); 790 791 (void)cost_list; // to silence compiler warning 792 793 for (iter = 0; iter < round; ++iter) { 794 // Check vertical and horizontal sub-pixel positions. 795 for (idx = 0; idx < 4; ++idx) { 796 tr = br + search_step[idx].row; 797 tc = bc + search_step[idx].col; 798 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { 799 MV this_mv; 800 this_mv.row = tr; 801 this_mv.col = tc; 802 803 if (use_accurate_subpel_search) { 804 thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, 805 src_address, src_stride, y, 806 y_stride, second_pred, w, h, &sse); 807 } else { 808 const uint8_t *const pre_address = 809 y + (tr >> 3) * y_stride + (tc >> 3); 810 if (second_pred == NULL) 811 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), 812 src_address, src_stride, &sse); 813 else 814 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), 815 src_address, src_stride, &sse, second_pred); 816 } 817 818 cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, 819 mvcost, error_per_bit); 820 821 if (cost_array[idx] < besterr) { 822 best_idx = idx; 823 besterr = cost_array[idx]; 824 *distortion = thismse; 825 *sse1 = sse; 826 } 827 } else { 828 cost_array[idx] = UINT_MAX; 829 } 830 } 831 832 // Check diagonal sub-pixel position 833 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); 834 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); 835 836 tc = bc + kc; 837 tr = br + kr; 838 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { 839 MV this_mv = { tr, tc }; 840 if (use_accurate_subpel_search) { 841 thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, 842 src_address, src_stride, y, y_stride, 843 second_pred, w, h, &sse); 844 } else { 845 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); 846 if (second_pred == NULL) 847 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, 848 src_stride, &sse); 849 else 850 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), 851 src_address, src_stride, &sse, second_pred); 852 } 853 854 cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, 855 error_per_bit); 856 857 if (cost_array[4] < besterr) { 858 best_idx = 4; 859 besterr = cost_array[4]; 860 *distortion = thismse; 861 *sse1 = sse; 862 } 863 } else { 864 cost_array[idx] = UINT_MAX; 865 } 866 867 if (best_idx < 4 && best_idx >= 0) { 868 br += search_step[best_idx].row; 869 bc += search_step[best_idx].col; 870 } else if (best_idx == 4) { 871 br = tr; 872 bc = tc; 873 } 874 875 if (iters_per_step > 0 && best_idx != -1) { 876 unsigned int second; 877 const int br0 = br; 878 const int bc0 = bc; 879 assert(tr == br || tc == bc); 880 881 if (tr == br && tc != bc) { 882 kc = bc - tc; 883 if (iters_per_step == 1) { 884 if (use_accurate_subpel_search) { 885 CHECK_BETTER1(second, br0, bc0 + kc); 886 } else { 887 CHECK_BETTER(second, br0, bc0 + kc); 888 } 889 } 890 } else if (tr != br && tc == bc) { 891 kr = br - tr; 892 if (iters_per_step == 1) { 893 if (use_accurate_subpel_search) { 894 CHECK_BETTER1(second, br0 + kr, bc0); 895 } else { 896 CHECK_BETTER(second, br0 + kr, bc0); 897 } 898 } 899 } 900 901 if (iters_per_step > 1) { 902 if (use_accurate_subpel_search) { 903 CHECK_BETTER1(second, br0 + kr, bc0); 904 CHECK_BETTER1(second, br0, bc0 + kc); 905 if (br0 != br || bc0 != bc) { 906 CHECK_BETTER1(second, br0 + kr, bc0 + kc); 907 } 908 } else { 909 CHECK_BETTER(second, br0 + kr, bc0); 910 CHECK_BETTER(second, br0, bc0 + kc); 911 if (br0 != br || bc0 != bc) { 912 CHECK_BETTER(second, br0 + kr, bc0 + kc); 913 } 914 } 915 } 916 } 917 918 search_step += 4; 919 hstep >>= 1; 920 best_idx = -1; 921 } 922 923 // Each subsequent iteration checks at least one point in common with 924 // the last iteration could be 2 ( if diag selected) 1/4 pel 925 926 // These lines insure static analysis doesn't warn that 927 // tr and tc aren't used after the above point. 928 (void)tr; 929 (void)tc; 930 931 bestmv->row = br; 932 bestmv->col = bc; 933 934 return besterr; 935 } 936 937 #undef CHECK_BETTER 938 #undef CHECK_BETTER1 939 940 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, 941 int range) { 942 return ((row - range) >= mv_limits->row_min) & 943 ((row + range) <= mv_limits->row_max) & 944 ((col - range) >= mv_limits->col_min) & 945 ((col + range) <= mv_limits->col_max); 946 } 947 948 static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) { 949 return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) && 950 (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max); 951 } 952 953 #define CHECK_BETTER \ 954 { \ 955 if (thissad < bestsad) { \ 956 if (use_mvcost) \ 957 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \ 958 if (thissad < bestsad) { \ 959 bestsad = thissad; \ 960 best_site = i; \ 961 } \ 962 } \ 963 } 964 965 #define MAX_PATTERN_SCALES 11 966 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 967 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 968 969 // Calculate and return a sad+mvcost list around an integer best pel. 970 static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv, 971 int sadpb, 972 const vp9_variance_fn_ptr_t *fn_ptr, 973 const MV *best_mv, int *cost_list) { 974 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; 975 const struct buf_2d *const what = &x->plane[0].src; 976 const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; 977 const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; 978 int br = best_mv->row; 979 int bc = best_mv->col; 980 MV this_mv; 981 int i; 982 unsigned int sse; 983 984 this_mv.row = br; 985 this_mv.col = bc; 986 cost_list[0] = 987 fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), 988 in_what->stride, &sse) + 989 mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); 990 if (check_bounds(&x->mv_limits, br, bc, 1)) { 991 for (i = 0; i < 4; i++) { 992 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 993 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, 994 get_buf_from_mv(in_what, &this_mv), 995 in_what->stride, &sse) + 996 mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, 997 x->mvcost, x->errorperbit); 998 } 999 } else { 1000 for (i = 0; i < 4; i++) { 1001 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1002 if (!is_mv_in(&x->mv_limits, &this_mv)) 1003 cost_list[i + 1] = INT_MAX; 1004 else 1005 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, 1006 get_buf_from_mv(in_what, &this_mv), 1007 in_what->stride, &sse) + 1008 mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, 1009 x->mvcost, x->errorperbit); 1010 } 1011 } 1012 } 1013 1014 // Generic pattern search function that searches over multiple scales. 1015 // Each scale can have a different number of candidates and shape of 1016 // candidates as indicated in the num_candidates and candidates arrays 1017 // passed into this function 1018 // 1019 static int vp9_pattern_search( 1020 const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, 1021 int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, 1022 int use_mvcost, const MV *center_mv, MV *best_mv, 1023 const int num_candidates[MAX_PATTERN_SCALES], 1024 const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { 1025 const MACROBLOCKD *const xd = &x->e_mbd; 1026 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 1027 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1028 }; 1029 int i, s, t; 1030 const struct buf_2d *const what = &x->plane[0].src; 1031 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1032 int br, bc; 1033 int bestsad = INT_MAX; 1034 int thissad; 1035 int k = -1; 1036 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 1037 int best_init_s = search_param_to_steps[search_param]; 1038 // adjust ref_mv to make sure it is within MV range 1039 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1040 x->mv_limits.row_min, x->mv_limits.row_max); 1041 br = ref_mv->row; 1042 bc = ref_mv->col; 1043 1044 // Work out the start point for the search 1045 bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 1046 in_what->stride) + 1047 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1048 1049 // Search all possible scales upto the search param around the center point 1050 // pick the scale of the point that is best as the starting scale of 1051 // further steps around it. 1052 if (do_init_search) { 1053 s = best_init_s; 1054 best_init_s = -1; 1055 for (t = 0; t <= s; ++t) { 1056 int best_site = -1; 1057 if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { 1058 for (i = 0; i < num_candidates[t]; i++) { 1059 const MV this_mv = { br + candidates[t][i].row, 1060 bc + candidates[t][i].col }; 1061 thissad = 1062 vfp->sdf(what->buf, what->stride, 1063 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1064 CHECK_BETTER 1065 } 1066 } else { 1067 for (i = 0; i < num_candidates[t]; i++) { 1068 const MV this_mv = { br + candidates[t][i].row, 1069 bc + candidates[t][i].col }; 1070 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1071 thissad = 1072 vfp->sdf(what->buf, what->stride, 1073 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1074 CHECK_BETTER 1075 } 1076 } 1077 if (best_site == -1) { 1078 continue; 1079 } else { 1080 best_init_s = t; 1081 k = best_site; 1082 } 1083 } 1084 if (best_init_s != -1) { 1085 br += candidates[best_init_s][k].row; 1086 bc += candidates[best_init_s][k].col; 1087 } 1088 } 1089 1090 // If the center point is still the best, just skip this and move to 1091 // the refinement step. 1092 if (best_init_s != -1) { 1093 int best_site = -1; 1094 s = best_init_s; 1095 1096 do { 1097 // No need to search all 6 points the 1st time if initial search was used 1098 if (!do_init_search || s != best_init_s) { 1099 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1100 for (i = 0; i < num_candidates[s]; i++) { 1101 const MV this_mv = { br + candidates[s][i].row, 1102 bc + candidates[s][i].col }; 1103 thissad = 1104 vfp->sdf(what->buf, what->stride, 1105 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1106 CHECK_BETTER 1107 } 1108 } else { 1109 for (i = 0; i < num_candidates[s]; i++) { 1110 const MV this_mv = { br + candidates[s][i].row, 1111 bc + candidates[s][i].col }; 1112 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1113 thissad = 1114 vfp->sdf(what->buf, what->stride, 1115 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1116 CHECK_BETTER 1117 } 1118 } 1119 1120 if (best_site == -1) { 1121 continue; 1122 } else { 1123 br += candidates[s][best_site].row; 1124 bc += candidates[s][best_site].col; 1125 k = best_site; 1126 } 1127 } 1128 1129 do { 1130 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1131 best_site = -1; 1132 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1133 next_chkpts_indices[1] = k; 1134 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1135 1136 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1137 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1138 const MV this_mv = { 1139 br + candidates[s][next_chkpts_indices[i]].row, 1140 bc + candidates[s][next_chkpts_indices[i]].col 1141 }; 1142 thissad = 1143 vfp->sdf(what->buf, what->stride, 1144 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1145 CHECK_BETTER 1146 } 1147 } else { 1148 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1149 const MV this_mv = { 1150 br + candidates[s][next_chkpts_indices[i]].row, 1151 bc + candidates[s][next_chkpts_indices[i]].col 1152 }; 1153 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1154 thissad = 1155 vfp->sdf(what->buf, what->stride, 1156 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1157 CHECK_BETTER 1158 } 1159 } 1160 1161 if (best_site != -1) { 1162 k = next_chkpts_indices[best_site]; 1163 br += candidates[s][k].row; 1164 bc += candidates[s][k].col; 1165 } 1166 } while (best_site != -1); 1167 } while (s--); 1168 } 1169 1170 // Returns the one-away integer pel sad values around the best as follows: 1171 // cost_list[0]: cost at the best integer pel 1172 // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel 1173 // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel 1174 // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel 1175 // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel 1176 if (cost_list) { 1177 const MV best_mv = { br, bc }; 1178 calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list); 1179 } 1180 best_mv->row = br; 1181 best_mv->col = bc; 1182 return bestsad; 1183 } 1184 1185 // A specialized function where the smallest scale search candidates 1186 // are 4 1-away neighbors, and cost_list is non-null 1187 // TODO(debargha): Merge this function with the one above. Also remove 1188 // use_mvcost option since it is always 1, to save unnecessary branches. 1189 static int vp9_pattern_search_sad( 1190 const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, 1191 int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, 1192 int use_mvcost, const MV *center_mv, MV *best_mv, 1193 const int num_candidates[MAX_PATTERN_SCALES], 1194 const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { 1195 const MACROBLOCKD *const xd = &x->e_mbd; 1196 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 1197 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1198 }; 1199 int i, s, t; 1200 const struct buf_2d *const what = &x->plane[0].src; 1201 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1202 int br, bc; 1203 int bestsad = INT_MAX; 1204 int thissad; 1205 int k = -1; 1206 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 1207 int best_init_s = search_param_to_steps[search_param]; 1208 // adjust ref_mv to make sure it is within MV range 1209 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1210 x->mv_limits.row_min, x->mv_limits.row_max); 1211 br = ref_mv->row; 1212 bc = ref_mv->col; 1213 if (cost_list != NULL) { 1214 cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = 1215 INT_MAX; 1216 } 1217 1218 // Work out the start point for the search 1219 bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 1220 in_what->stride) + 1221 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1222 1223 // Search all possible scales upto the search param around the center point 1224 // pick the scale of the point that is best as the starting scale of 1225 // further steps around it. 1226 if (do_init_search) { 1227 s = best_init_s; 1228 best_init_s = -1; 1229 for (t = 0; t <= s; ++t) { 1230 int best_site = -1; 1231 if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { 1232 for (i = 0; i < num_candidates[t]; i++) { 1233 const MV this_mv = { br + candidates[t][i].row, 1234 bc + candidates[t][i].col }; 1235 thissad = 1236 vfp->sdf(what->buf, what->stride, 1237 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1238 CHECK_BETTER 1239 } 1240 } else { 1241 for (i = 0; i < num_candidates[t]; i++) { 1242 const MV this_mv = { br + candidates[t][i].row, 1243 bc + candidates[t][i].col }; 1244 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1245 thissad = 1246 vfp->sdf(what->buf, what->stride, 1247 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1248 CHECK_BETTER 1249 } 1250 } 1251 if (best_site == -1) { 1252 continue; 1253 } else { 1254 best_init_s = t; 1255 k = best_site; 1256 } 1257 } 1258 if (best_init_s != -1) { 1259 br += candidates[best_init_s][k].row; 1260 bc += candidates[best_init_s][k].col; 1261 } 1262 } 1263 1264 // If the center point is still the best, just skip this and move to 1265 // the refinement step. 1266 if (best_init_s != -1) { 1267 int do_sad = (num_candidates[0] == 4 && cost_list != NULL); 1268 int best_site = -1; 1269 s = best_init_s; 1270 1271 for (; s >= do_sad; s--) { 1272 if (!do_init_search || s != best_init_s) { 1273 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1274 for (i = 0; i < num_candidates[s]; i++) { 1275 const MV this_mv = { br + candidates[s][i].row, 1276 bc + candidates[s][i].col }; 1277 thissad = 1278 vfp->sdf(what->buf, what->stride, 1279 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1280 CHECK_BETTER 1281 } 1282 } else { 1283 for (i = 0; i < num_candidates[s]; i++) { 1284 const MV this_mv = { br + candidates[s][i].row, 1285 bc + candidates[s][i].col }; 1286 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1287 thissad = 1288 vfp->sdf(what->buf, what->stride, 1289 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1290 CHECK_BETTER 1291 } 1292 } 1293 1294 if (best_site == -1) { 1295 continue; 1296 } else { 1297 br += candidates[s][best_site].row; 1298 bc += candidates[s][best_site].col; 1299 k = best_site; 1300 } 1301 } 1302 1303 do { 1304 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1305 best_site = -1; 1306 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1307 next_chkpts_indices[1] = k; 1308 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1309 1310 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1311 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1312 const MV this_mv = { 1313 br + candidates[s][next_chkpts_indices[i]].row, 1314 bc + candidates[s][next_chkpts_indices[i]].col 1315 }; 1316 thissad = 1317 vfp->sdf(what->buf, what->stride, 1318 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1319 CHECK_BETTER 1320 } 1321 } else { 1322 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1323 const MV this_mv = { 1324 br + candidates[s][next_chkpts_indices[i]].row, 1325 bc + candidates[s][next_chkpts_indices[i]].col 1326 }; 1327 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1328 thissad = 1329 vfp->sdf(what->buf, what->stride, 1330 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1331 CHECK_BETTER 1332 } 1333 } 1334 1335 if (best_site != -1) { 1336 k = next_chkpts_indices[best_site]; 1337 br += candidates[s][k].row; 1338 bc += candidates[s][k].col; 1339 } 1340 } while (best_site != -1); 1341 } 1342 1343 // Note: If we enter the if below, then cost_list must be non-NULL. 1344 if (s == 0) { 1345 cost_list[0] = bestsad; 1346 if (!do_init_search || s != best_init_s) { 1347 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1348 for (i = 0; i < num_candidates[s]; i++) { 1349 const MV this_mv = { br + candidates[s][i].row, 1350 bc + candidates[s][i].col }; 1351 cost_list[i + 1] = thissad = 1352 vfp->sdf(what->buf, what->stride, 1353 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1354 CHECK_BETTER 1355 } 1356 } else { 1357 for (i = 0; i < num_candidates[s]; i++) { 1358 const MV this_mv = { br + candidates[s][i].row, 1359 bc + candidates[s][i].col }; 1360 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1361 cost_list[i + 1] = thissad = 1362 vfp->sdf(what->buf, what->stride, 1363 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1364 CHECK_BETTER 1365 } 1366 } 1367 1368 if (best_site != -1) { 1369 br += candidates[s][best_site].row; 1370 bc += candidates[s][best_site].col; 1371 k = best_site; 1372 } 1373 } 1374 while (best_site != -1) { 1375 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1376 best_site = -1; 1377 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1378 next_chkpts_indices[1] = k; 1379 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1380 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; 1381 cost_list[((k + 2) % 4) + 1] = cost_list[0]; 1382 cost_list[0] = bestsad; 1383 1384 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1385 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1386 const MV this_mv = { 1387 br + candidates[s][next_chkpts_indices[i]].row, 1388 bc + candidates[s][next_chkpts_indices[i]].col 1389 }; 1390 cost_list[next_chkpts_indices[i] + 1] = thissad = 1391 vfp->sdf(what->buf, what->stride, 1392 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1393 CHECK_BETTER 1394 } 1395 } else { 1396 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1397 const MV this_mv = { 1398 br + candidates[s][next_chkpts_indices[i]].row, 1399 bc + candidates[s][next_chkpts_indices[i]].col 1400 }; 1401 if (!is_mv_in(&x->mv_limits, &this_mv)) { 1402 cost_list[next_chkpts_indices[i] + 1] = INT_MAX; 1403 continue; 1404 } 1405 cost_list[next_chkpts_indices[i] + 1] = thissad = 1406 vfp->sdf(what->buf, what->stride, 1407 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1408 CHECK_BETTER 1409 } 1410 } 1411 1412 if (best_site != -1) { 1413 k = next_chkpts_indices[best_site]; 1414 br += candidates[s][k].row; 1415 bc += candidates[s][k].col; 1416 } 1417 } 1418 } 1419 } 1420 1421 // Returns the one-away integer pel sad values around the best as follows: 1422 // cost_list[0]: sad at the best integer pel 1423 // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel 1424 // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel 1425 // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel 1426 // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel 1427 if (cost_list) { 1428 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; 1429 if (cost_list[0] == INT_MAX) { 1430 cost_list[0] = bestsad; 1431 if (check_bounds(&x->mv_limits, br, bc, 1)) { 1432 for (i = 0; i < 4; i++) { 1433 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1434 cost_list[i + 1] = 1435 vfp->sdf(what->buf, what->stride, 1436 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1437 } 1438 } else { 1439 for (i = 0; i < 4; i++) { 1440 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1441 if (!is_mv_in(&x->mv_limits, &this_mv)) 1442 cost_list[i + 1] = INT_MAX; 1443 else 1444 cost_list[i + 1] = 1445 vfp->sdf(what->buf, what->stride, 1446 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1447 } 1448 } 1449 } else { 1450 if (use_mvcost) { 1451 for (i = 0; i < 4; i++) { 1452 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1453 if (cost_list[i + 1] != INT_MAX) { 1454 cost_list[i + 1] += 1455 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1456 } 1457 } 1458 } 1459 } 1460 } 1461 best_mv->row = br; 1462 best_mv->col = bc; 1463 return bestsad; 1464 } 1465 1466 int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, 1467 const MV *center_mv, const vp9_variance_fn_ptr_t *vfp, 1468 int use_mvcost) { 1469 const MACROBLOCKD *const xd = &x->e_mbd; 1470 const struct buf_2d *const what = &x->plane[0].src; 1471 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1472 const MV mv = { best_mv->row * 8, best_mv->col * 8 }; 1473 uint32_t unused; 1474 #if CONFIG_VP9_HIGHBITDEPTH 1475 uint64_t err = 1476 vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), 1477 in_what->stride, &unused); 1478 err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1479 x->errorperbit) 1480 : 0); 1481 if (err >= INT_MAX) return INT_MAX; 1482 return (int)err; 1483 #else 1484 return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), 1485 in_what->stride, &unused) + 1486 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1487 x->errorperbit) 1488 : 0); 1489 #endif 1490 } 1491 1492 int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, 1493 const MV *center_mv, const uint8_t *second_pred, 1494 const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { 1495 const MACROBLOCKD *const xd = &x->e_mbd; 1496 const struct buf_2d *const what = &x->plane[0].src; 1497 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1498 const MV mv = { best_mv->row * 8, best_mv->col * 8 }; 1499 unsigned int unused; 1500 1501 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, 1502 what->buf, what->stride, &unused, second_pred) + 1503 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1504 x->errorperbit) 1505 : 0); 1506 } 1507 1508 static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1509 int sad_per_bit, int do_init_search, int *cost_list, 1510 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1511 const MV *center_mv, MV *best_mv) { 1512 // First scale has 8-closest points, the rest have 6 points in hex shape 1513 // at increasing scales 1514 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6, 1515 6, 6, 6, 6, 6 }; 1516 // Note that the largest candidate step at each scale is 2^scale 1517 /* clang-format off */ 1518 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1519 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 }, 1520 { -1, 0 } }, 1521 { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }, 1522 { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } }, 1523 { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } }, 1524 { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } }, 1525 { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 }, 1526 { -32, 0 } }, 1527 { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 }, 1528 { -64, 0 } }, 1529 { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 }, 1530 { -128, 0 } }, 1531 { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 }, 1532 { -256, 0 } }, 1533 { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 }, 1534 { -512, 0 } }, 1535 { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 }, 1536 { -512, 1024 }, { -1024, 0 } } 1537 }; 1538 /* clang-format on */ 1539 return vp9_pattern_search( 1540 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1541 use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates); 1542 } 1543 1544 static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1545 int sad_per_bit, int do_init_search, int *cost_list, 1546 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1547 const MV *center_mv, MV *best_mv) { 1548 // First scale has 4-closest points, the rest have 8 points in diamond 1549 // shape at increasing scales 1550 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 1551 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1552 }; 1553 // Note that the largest candidate step at each scale is 2^scale 1554 /* clang-format off */ 1555 static const MV 1556 bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1557 { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }, 1558 { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 }, 1559 { -1, 1 }, { -2, 0 } }, 1560 { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 }, 1561 { -2, 2 }, { -4, 0 } }, 1562 { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 }, 1563 { -4, 4 }, { -8, 0 } }, 1564 { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 }, 1565 { -8, 8 }, { -16, 0 } }, 1566 { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 }, 1567 { 0, 32 }, { -16, 16 }, { -32, 0 } }, 1568 { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 }, 1569 { 0, 64 }, { -32, 32 }, { -64, 0 } }, 1570 { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 }, 1571 { 0, 128 }, { -64, 64 }, { -128, 0 } }, 1572 { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 }, 1573 { 0, 256 }, { -128, 128 }, { -256, 0 } }, 1574 { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 }, 1575 { 0, 512 }, { -256, 256 }, { -512, 0 } }, 1576 { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 }, 1577 { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } } 1578 }; 1579 /* clang-format on */ 1580 return vp9_pattern_search_sad( 1581 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1582 use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates); 1583 } 1584 1585 static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1586 int sad_per_bit, int do_init_search, int *cost_list, 1587 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1588 const MV *center_mv, MV *best_mv) { 1589 // All scales have 8 closest points in square shape 1590 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 1591 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1592 }; 1593 // Note that the largest candidate step at each scale is 2^scale 1594 /* clang-format off */ 1595 static const MV 1596 square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1597 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, 1598 { -1, 1 }, { -1, 0 } }, 1599 { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 }, 1600 { -2, 2 }, { -2, 0 } }, 1601 { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 }, 1602 { -4, 4 }, { -4, 0 } }, 1603 { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 }, 1604 { -8, 8 }, { -8, 0 } }, 1605 { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 }, 1606 { 0, 16 }, { -16, 16 }, { -16, 0 } }, 1607 { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 }, 1608 { 0, 32 }, { -32, 32 }, { -32, 0 } }, 1609 { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 }, 1610 { 0, 64 }, { -64, 64 }, { -64, 0 } }, 1611 { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 }, 1612 { 0, 128 }, { -128, 128 }, { -128, 0 } }, 1613 { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 }, 1614 { 0, 256 }, { -256, 256 }, { -256, 0 } }, 1615 { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 }, 1616 { 0, 512 }, { -512, 512 }, { -512, 0 } }, 1617 { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 }, 1618 { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } } 1619 }; 1620 /* clang-format on */ 1621 return vp9_pattern_search( 1622 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1623 use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); 1624 } 1625 1626 static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1627 int sad_per_bit, 1628 int do_init_search, // must be zero for fast_hex 1629 int *cost_list, const vp9_variance_fn_ptr_t *vfp, 1630 int use_mvcost, const MV *center_mv, MV *best_mv) { 1631 return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), 1632 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, 1633 center_mv, best_mv); 1634 } 1635 1636 static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1637 int sad_per_bit, int do_init_search, int *cost_list, 1638 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1639 const MV *center_mv, MV *best_mv) { 1640 return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), 1641 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, 1642 center_mv, best_mv); 1643 } 1644 1645 #undef CHECK_BETTER 1646 1647 // Exhuastive motion search around a given centre position with a given 1648 // step size. 1649 static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, 1650 int range, int step, int sad_per_bit, 1651 const vp9_variance_fn_ptr_t *fn_ptr, 1652 const MV *center_mv) { 1653 const MACROBLOCKD *const xd = &x->e_mbd; 1654 const struct buf_2d *const what = &x->plane[0].src; 1655 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1656 MV fcenter_mv = { center_mv->row, center_mv->col }; 1657 unsigned int best_sad = INT_MAX; 1658 int r, c, i; 1659 int start_col, end_col, start_row, end_row; 1660 int col_step = (step > 1) ? step : 4; 1661 1662 assert(step >= 1); 1663 1664 clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1665 x->mv_limits.row_min, x->mv_limits.row_max); 1666 *best_mv = fcenter_mv; 1667 best_sad = 1668 fn_ptr->sdf(what->buf, what->stride, 1669 get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + 1670 mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); 1671 start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row); 1672 start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col); 1673 end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row); 1674 end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col); 1675 1676 for (r = start_row; r <= end_row; r += step) { 1677 for (c = start_col; c <= end_col; c += col_step) { 1678 // Step > 1 means we are not checking every location in this pass. 1679 if (step > 1) { 1680 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; 1681 unsigned int sad = 1682 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), 1683 in_what->stride); 1684 if (sad < best_sad) { 1685 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1686 if (sad < best_sad) { 1687 best_sad = sad; 1688 *best_mv = mv; 1689 } 1690 } 1691 } else { 1692 // 4 sads in a single call if we are checking every location 1693 if (c + 3 <= end_col) { 1694 unsigned int sads[4]; 1695 const uint8_t *addrs[4]; 1696 for (i = 0; i < 4; ++i) { 1697 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1698 addrs[i] = get_buf_from_mv(in_what, &mv); 1699 } 1700 fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); 1701 1702 for (i = 0; i < 4; ++i) { 1703 if (sads[i] < best_sad) { 1704 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1705 const unsigned int sad = 1706 sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1707 if (sad < best_sad) { 1708 best_sad = sad; 1709 *best_mv = mv; 1710 } 1711 } 1712 } 1713 } else { 1714 for (i = 0; i < end_col - c; ++i) { 1715 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1716 unsigned int sad = 1717 fn_ptr->sdf(what->buf, what->stride, 1718 get_buf_from_mv(in_what, &mv), in_what->stride); 1719 if (sad < best_sad) { 1720 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1721 if (sad < best_sad) { 1722 best_sad = sad; 1723 *best_mv = mv; 1724 } 1725 } 1726 } 1727 } 1728 } 1729 } 1730 } 1731 1732 return best_sad; 1733 } 1734 1735 #if CONFIG_NON_GREEDY_MV 1736 double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, 1737 int mv_num) { 1738 int i; 1739 int update = 0; 1740 double best_cost = 0; 1741 vpx_clear_system_state(); 1742 for (i = 0; i < mv_num; ++i) { 1743 if (nb_mvs[i].as_int != INVALID_MV) { 1744 MV nb_mv = nb_mvs[i].as_mv; 1745 const double row_diff = mv->row - nb_mv.row; 1746 const double col_diff = mv->col - nb_mv.col; 1747 double cost = row_diff * row_diff + col_diff * col_diff; 1748 cost = log2(1 + cost); 1749 if (update == 0) { 1750 best_cost = cost; 1751 update = 1; 1752 } else { 1753 best_cost = cost < best_cost ? cost : best_cost; 1754 } 1755 } 1756 } 1757 return best_cost; 1758 } 1759 1760 double vp9_diamond_search_sad_new(const MACROBLOCK *x, 1761 const search_site_config *cfg, 1762 const MV *init_full_mv, MV *best_full_mv, 1763 double *best_mv_dist, double *best_mv_cost, 1764 int search_param, double lambda, int *num00, 1765 const vp9_variance_fn_ptr_t *fn_ptr, 1766 const int_mv *nb_full_mvs, int full_mv_num) { 1767 int i, j, step; 1768 1769 const MACROBLOCKD *const xd = &x->e_mbd; 1770 uint8_t *what = x->plane[0].src.buf; 1771 const int what_stride = x->plane[0].src.stride; 1772 const uint8_t *in_what; 1773 const int in_what_stride = xd->plane[0].pre[0].stride; 1774 const uint8_t *best_address; 1775 1776 double bestsad; 1777 int best_site = -1; 1778 int last_site = -1; 1779 1780 // search_param determines the length of the initial step and hence the number 1781 // of iterations. 1782 // 0 = initial step (MAX_FIRST_STEP) pel 1783 // 1 = (MAX_FIRST_STEP/2) pel, 1784 // 2 = (MAX_FIRST_STEP/4) pel... 1785 // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; 1786 const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; 1787 const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; 1788 const int tot_steps = cfg->total_steps - search_param; 1789 vpx_clear_system_state(); 1790 1791 *best_full_mv = *init_full_mv; 1792 clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1793 x->mv_limits.row_min, x->mv_limits.row_max); 1794 *num00 = 0; 1795 1796 // Work out the start point for the search 1797 in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride + 1798 best_full_mv->col; 1799 best_address = in_what; 1800 1801 // Check the starting position 1802 *best_mv_dist = fn_ptr->sdf(what, what_stride, in_what, in_what_stride); 1803 *best_mv_cost = 1804 vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); 1805 bestsad = (*best_mv_dist) + lambda * (*best_mv_cost); 1806 1807 i = 0; 1808 1809 for (step = 0; step < tot_steps; step++) { 1810 int all_in = 1, t; 1811 1812 // All_in is true if every one of the points we are checking are within 1813 // the bounds of the image. 1814 all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min); 1815 all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); 1816 all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); 1817 all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); 1818 1819 // If all the pixels are within the bounds we don't check whether the 1820 // search point is valid in this loop, otherwise we check each point 1821 // for validity.. 1822 if (all_in) { 1823 unsigned int sad_array[4]; 1824 1825 for (j = 0; j < cfg->searches_per_step; j += 4) { 1826 unsigned char const *block_offset[4]; 1827 1828 for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; 1829 1830 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1831 sad_array); 1832 1833 for (t = 0; t < 4; t++, i++) { 1834 const MV this_mv = { best_full_mv->row + ss_mv[i].row, 1835 best_full_mv->col + ss_mv[i].col }; 1836 const double mv_dist = sad_array[t]; 1837 const double mv_cost = 1838 vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); 1839 double thissad = mv_dist + lambda * mv_cost; 1840 if (thissad < bestsad) { 1841 bestsad = thissad; 1842 *best_mv_dist = mv_dist; 1843 *best_mv_cost = mv_cost; 1844 best_site = i; 1845 } 1846 } 1847 } 1848 } else { 1849 for (j = 0; j < cfg->searches_per_step; j++) { 1850 // Trap illegal vectors 1851 const MV this_mv = { best_full_mv->row + ss_mv[i].row, 1852 best_full_mv->col + ss_mv[i].col }; 1853 1854 if (is_mv_in(&x->mv_limits, &this_mv)) { 1855 const uint8_t *const check_here = ss_os[i] + best_address; 1856 const double mv_dist = 1857 fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1858 const double mv_cost = 1859 vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); 1860 double thissad = mv_dist + lambda * mv_cost; 1861 if (thissad < bestsad) { 1862 bestsad = thissad; 1863 *best_mv_dist = mv_dist; 1864 *best_mv_cost = mv_cost; 1865 best_site = i; 1866 } 1867 } 1868 i++; 1869 } 1870 } 1871 if (best_site != last_site) { 1872 best_full_mv->row += ss_mv[best_site].row; 1873 best_full_mv->col += ss_mv[best_site].col; 1874 best_address += ss_os[best_site]; 1875 last_site = best_site; 1876 } else if (best_address == in_what) { 1877 (*num00)++; 1878 } 1879 } 1880 return bestsad; 1881 } 1882 1883 void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row, 1884 int mi_col, int rf_idx, BLOCK_SIZE bsize, 1885 int_mv *nb_full_mvs) { 1886 const int mi_width = num_8x8_blocks_wide_lookup[bsize]; 1887 const int mi_height = num_8x8_blocks_high_lookup[bsize]; 1888 const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; 1889 int i; 1890 for (i = 0; i < NB_MVS_NUM; ++i) { 1891 int r = dirs[i][0] * mi_height; 1892 int c = dirs[i][1] * mi_width; 1893 if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && 1894 mi_col + c < tpl_frame->mi_cols) { 1895 const TplDepStats *tpl_ptr = 1896 &tpl_frame 1897 ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c]; 1898 int_mv *mv = 1899 get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c); 1900 if (tpl_ptr->ready[rf_idx]) { 1901 nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv); 1902 } else { 1903 nb_full_mvs[i].as_int = INVALID_MV; 1904 } 1905 } else { 1906 nb_full_mvs[i].as_int = INVALID_MV; 1907 } 1908 } 1909 } 1910 #endif // CONFIG_NON_GREEDY_MV 1911 1912 int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg, 1913 MV *ref_mv, MV *best_mv, int search_param, 1914 int sad_per_bit, int *num00, 1915 const vp9_variance_fn_ptr_t *fn_ptr, 1916 const MV *center_mv) { 1917 int i, j, step; 1918 1919 const MACROBLOCKD *const xd = &x->e_mbd; 1920 uint8_t *what = x->plane[0].src.buf; 1921 const int what_stride = x->plane[0].src.stride; 1922 const uint8_t *in_what; 1923 const int in_what_stride = xd->plane[0].pre[0].stride; 1924 const uint8_t *best_address; 1925 1926 unsigned int bestsad = INT_MAX; 1927 int best_site = -1; 1928 int last_site = -1; 1929 1930 int ref_row; 1931 int ref_col; 1932 1933 // search_param determines the length of the initial step and hence the number 1934 // of iterations. 1935 // 0 = initial step (MAX_FIRST_STEP) pel 1936 // 1 = (MAX_FIRST_STEP/2) pel, 1937 // 2 = (MAX_FIRST_STEP/4) pel... 1938 // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; 1939 const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; 1940 const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; 1941 const int tot_steps = cfg->total_steps - search_param; 1942 1943 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 1944 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1945 x->mv_limits.row_min, x->mv_limits.row_max); 1946 ref_row = ref_mv->row; 1947 ref_col = ref_mv->col; 1948 *num00 = 0; 1949 best_mv->row = ref_row; 1950 best_mv->col = ref_col; 1951 1952 // Work out the start point for the search 1953 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 1954 best_address = in_what; 1955 1956 // Check the starting position 1957 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + 1958 mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); 1959 1960 i = 0; 1961 1962 for (step = 0; step < tot_steps; step++) { 1963 int all_in = 1, t; 1964 1965 // All_in is true if every one of the points we are checking are within 1966 // the bounds of the image. 1967 all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min); 1968 all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); 1969 all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); 1970 all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); 1971 1972 // If all the pixels are within the bounds we don't check whether the 1973 // search point is valid in this loop, otherwise we check each point 1974 // for validity.. 1975 if (all_in) { 1976 unsigned int sad_array[4]; 1977 1978 for (j = 0; j < cfg->searches_per_step; j += 4) { 1979 unsigned char const *block_offset[4]; 1980 1981 for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; 1982 1983 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1984 sad_array); 1985 1986 for (t = 0; t < 4; t++, i++) { 1987 if (sad_array[t] < bestsad) { 1988 const MV this_mv = { best_mv->row + ss_mv[i].row, 1989 best_mv->col + ss_mv[i].col }; 1990 sad_array[t] += 1991 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1992 if (sad_array[t] < bestsad) { 1993 bestsad = sad_array[t]; 1994 best_site = i; 1995 } 1996 } 1997 } 1998 } 1999 } else { 2000 for (j = 0; j < cfg->searches_per_step; j++) { 2001 // Trap illegal vectors 2002 const MV this_mv = { best_mv->row + ss_mv[i].row, 2003 best_mv->col + ss_mv[i].col }; 2004 2005 if (is_mv_in(&x->mv_limits, &this_mv)) { 2006 const uint8_t *const check_here = ss_os[i] + best_address; 2007 unsigned int thissad = 2008 fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 2009 2010 if (thissad < bestsad) { 2011 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 2012 if (thissad < bestsad) { 2013 bestsad = thissad; 2014 best_site = i; 2015 } 2016 } 2017 } 2018 i++; 2019 } 2020 } 2021 if (best_site != last_site) { 2022 best_mv->row += ss_mv[best_site].row; 2023 best_mv->col += ss_mv[best_site].col; 2024 best_address += ss_os[best_site]; 2025 last_site = best_site; 2026 #if defined(NEW_DIAMOND_SEARCH) 2027 while (1) { 2028 const MV this_mv = { best_mv->row + ss_mv[best_site].row, 2029 best_mv->col + ss_mv[best_site].col }; 2030 if (is_mv_in(&x->mv_limits, &this_mv)) { 2031 const uint8_t *const check_here = ss_os[best_site] + best_address; 2032 unsigned int thissad = 2033 fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 2034 if (thissad < bestsad) { 2035 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 2036 if (thissad < bestsad) { 2037 bestsad = thissad; 2038 best_mv->row += ss_mv[best_site].row; 2039 best_mv->col += ss_mv[best_site].col; 2040 best_address += ss_os[best_site]; 2041 continue; 2042 } 2043 } 2044 } 2045 break; 2046 } 2047 #endif 2048 } else if (best_address == in_what) { 2049 (*num00)++; 2050 } 2051 } 2052 return bestsad; 2053 } 2054 2055 static int vector_match(int16_t *ref, int16_t *src, int bwl) { 2056 int best_sad = INT_MAX; 2057 int this_sad; 2058 int d; 2059 int center, offset = 0; 2060 int bw = 4 << bwl; // redundant variable, to be changed in the experiments. 2061 for (d = 0; d <= bw; d += 16) { 2062 this_sad = vpx_vector_var(&ref[d], src, bwl); 2063 if (this_sad < best_sad) { 2064 best_sad = this_sad; 2065 offset = d; 2066 } 2067 } 2068 center = offset; 2069 2070 for (d = -8; d <= 8; d += 16) { 2071 int this_pos = offset + d; 2072 // check limit 2073 if (this_pos < 0 || this_pos > bw) continue; 2074 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 2075 if (this_sad < best_sad) { 2076 best_sad = this_sad; 2077 center = this_pos; 2078 } 2079 } 2080 offset = center; 2081 2082 for (d = -4; d <= 4; d += 8) { 2083 int this_pos = offset + d; 2084 // check limit 2085 if (this_pos < 0 || this_pos > bw) continue; 2086 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 2087 if (this_sad < best_sad) { 2088 best_sad = this_sad; 2089 center = this_pos; 2090 } 2091 } 2092 offset = center; 2093 2094 for (d = -2; d <= 2; d += 4) { 2095 int this_pos = offset + d; 2096 // check limit 2097 if (this_pos < 0 || this_pos > bw) continue; 2098 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 2099 if (this_sad < best_sad) { 2100 best_sad = this_sad; 2101 center = this_pos; 2102 } 2103 } 2104 offset = center; 2105 2106 for (d = -1; d <= 1; d += 2) { 2107 int this_pos = offset + d; 2108 // check limit 2109 if (this_pos < 0 || this_pos > bw) continue; 2110 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 2111 if (this_sad < best_sad) { 2112 best_sad = this_sad; 2113 center = this_pos; 2114 } 2115 } 2116 2117 return (center - (bw >> 1)); 2118 } 2119 2120 static const MV search_pos[4] = { 2121 { -1, 0 }, 2122 { 0, -1 }, 2123 { 0, 1 }, 2124 { 1, 0 }, 2125 }; 2126 2127 unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, 2128 BLOCK_SIZE bsize, int mi_row, 2129 int mi_col, const MV *ref_mv) { 2130 MACROBLOCKD *xd = &x->e_mbd; 2131 MODE_INFO *mi = xd->mi[0]; 2132 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; 2133 DECLARE_ALIGNED(16, int16_t, hbuf[128]); 2134 DECLARE_ALIGNED(16, int16_t, vbuf[128]); 2135 DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); 2136 DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); 2137 int idx; 2138 const int bw = 4 << b_width_log2_lookup[bsize]; 2139 const int bh = 4 << b_height_log2_lookup[bsize]; 2140 const int search_width = bw << 1; 2141 const int search_height = bh << 1; 2142 const int src_stride = x->plane[0].src.stride; 2143 const int ref_stride = xd->plane[0].pre[0].stride; 2144 uint8_t const *ref_buf, *src_buf; 2145 MV *tmp_mv = &xd->mi[0]->mv[0].as_mv; 2146 unsigned int best_sad, tmp_sad, this_sad[4]; 2147 MV this_mv; 2148 const int norm_factor = 3 + (bw >> 5); 2149 const YV12_BUFFER_CONFIG *scaled_ref_frame = 2150 vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]); 2151 MvLimits subpel_mv_limits; 2152 2153 if (scaled_ref_frame) { 2154 int i; 2155 // Swap out the reference frame for a version that's been scaled to 2156 // match the resolution of the current frame, allowing the existing 2157 // motion search code to be used without additional modifications. 2158 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; 2159 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 2160 } 2161 2162 #if CONFIG_VP9_HIGHBITDEPTH 2163 // TODO(jingning): Implement integral projection functions for high bit-depth 2164 // setting and remove this part of code. 2165 if (xd->bd != 8) { 2166 unsigned int this_sad; 2167 tmp_mv->row = 0; 2168 tmp_mv->col = 0; 2169 this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, 2170 xd->plane[0].pre[0].buf, ref_stride); 2171 2172 if (scaled_ref_frame) { 2173 int i; 2174 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 2175 } 2176 return this_sad; 2177 } 2178 #endif 2179 2180 // Set up prediction 1-D reference set 2181 ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); 2182 for (idx = 0; idx < search_width; idx += 16) { 2183 vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); 2184 ref_buf += 16; 2185 } 2186 2187 ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; 2188 for (idx = 0; idx < search_height; ++idx) { 2189 vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor; 2190 ref_buf += ref_stride; 2191 } 2192 2193 // Set up src 1-D reference set 2194 for (idx = 0; idx < bw; idx += 16) { 2195 src_buf = x->plane[0].src.buf + idx; 2196 vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); 2197 } 2198 2199 src_buf = x->plane[0].src.buf; 2200 for (idx = 0; idx < bh; ++idx) { 2201 src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor; 2202 src_buf += src_stride; 2203 } 2204 2205 // Find the best match per 1-D search 2206 tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]); 2207 tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]); 2208 2209 this_mv = *tmp_mv; 2210 src_buf = x->plane[0].src.buf; 2211 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; 2212 best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 2213 2214 { 2215 const uint8_t *const pos[4] = { 2216 ref_buf - ref_stride, 2217 ref_buf - 1, 2218 ref_buf + 1, 2219 ref_buf + ref_stride, 2220 }; 2221 2222 cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); 2223 } 2224 2225 for (idx = 0; idx < 4; ++idx) { 2226 if (this_sad[idx] < best_sad) { 2227 best_sad = this_sad[idx]; 2228 tmp_mv->row = search_pos[idx].row + this_mv.row; 2229 tmp_mv->col = search_pos[idx].col + this_mv.col; 2230 } 2231 } 2232 2233 if (this_sad[0] < this_sad[3]) 2234 this_mv.row -= 1; 2235 else 2236 this_mv.row += 1; 2237 2238 if (this_sad[1] < this_sad[2]) 2239 this_mv.col -= 1; 2240 else 2241 this_mv.col += 1; 2242 2243 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; 2244 2245 tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 2246 if (best_sad > tmp_sad) { 2247 *tmp_mv = this_mv; 2248 best_sad = tmp_sad; 2249 } 2250 2251 tmp_mv->row *= 8; 2252 tmp_mv->col *= 8; 2253 2254 vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); 2255 clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max, 2256 subpel_mv_limits.row_min, subpel_mv_limits.row_max); 2257 2258 if (scaled_ref_frame) { 2259 int i; 2260 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 2261 } 2262 2263 return best_sad; 2264 } 2265 2266 #if CONFIG_NON_GREEDY_MV 2267 // Runs sequence of diamond searches in smaller steps for RD. 2268 /* do_refine: If last step (1-away) of n-step search doesn't pick the center 2269 point as the best match, we will do a final 1-away diamond 2270 refining search */ 2271 double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, 2272 MV *mvp_full, int step_param, double lambda, 2273 int do_refine, 2274 const vp9_variance_fn_ptr_t *fn_ptr, 2275 const int_mv *nb_full_mvs, int full_mv_num, 2276 MV *best_mv, double *best_mv_dist, 2277 double *best_mv_cost) { 2278 int n, num00 = 0; 2279 double thissme; 2280 double bestsme; 2281 const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; 2282 vpx_clear_system_state(); 2283 bestsme = vp9_diamond_search_sad_new( 2284 x, &cpi->ss_cfg, mvp_full, best_mv, best_mv_dist, best_mv_cost, 2285 step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num); 2286 2287 // If there won't be more n-step search, check to see if refining search is 2288 // needed. 2289 if (n > further_steps) do_refine = 0; 2290 2291 while (n < further_steps) { 2292 ++n; 2293 if (num00) { 2294 num00--; 2295 } else { 2296 MV temp_mv; 2297 double mv_dist; 2298 double mv_cost; 2299 thissme = vp9_diamond_search_sad_new( 2300 x, &cpi->ss_cfg, mvp_full, &temp_mv, &mv_dist, &mv_cost, 2301 step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num); 2302 // check to see if refining search is needed. 2303 if (num00 > further_steps - n) do_refine = 0; 2304 2305 if (thissme < bestsme) { 2306 bestsme = thissme; 2307 *best_mv = temp_mv; 2308 *best_mv_dist = mv_dist; 2309 *best_mv_cost = mv_cost; 2310 } 2311 } 2312 } 2313 2314 // final 1-away diamond refining search 2315 if (do_refine) { 2316 const int search_range = 8; 2317 MV temp_mv = *best_mv; 2318 double mv_dist; 2319 double mv_cost; 2320 thissme = vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost, 2321 lambda, search_range, fn_ptr, 2322 nb_full_mvs, full_mv_num); 2323 if (thissme < bestsme) { 2324 bestsme = thissme; 2325 *best_mv = temp_mv; 2326 *best_mv_dist = mv_dist; 2327 *best_mv_cost = mv_cost; 2328 } 2329 } 2330 return bestsme; 2331 } 2332 #endif // CONFIG_NON_GREEDY_MV 2333 2334 // Runs sequence of diamond searches in smaller steps for RD. 2335 /* do_refine: If last step (1-away) of n-step search doesn't pick the center 2336 point as the best match, we will do a final 1-away diamond 2337 refining search */ 2338 static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, 2339 int step_param, int sadpb, int further_steps, 2340 int do_refine, int *cost_list, 2341 const vp9_variance_fn_ptr_t *fn_ptr, 2342 const MV *ref_mv, MV *dst_mv) { 2343 MV temp_mv; 2344 int thissme, n, num00 = 0; 2345 int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 2346 step_param, sadpb, &n, fn_ptr, ref_mv); 2347 if (bestsme < INT_MAX) 2348 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 2349 *dst_mv = temp_mv; 2350 2351 // If there won't be more n-step search, check to see if refining search is 2352 // needed. 2353 if (n > further_steps) do_refine = 0; 2354 2355 while (n < further_steps) { 2356 ++n; 2357 2358 if (num00) { 2359 num00--; 2360 } else { 2361 thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 2362 step_param + n, sadpb, &num00, fn_ptr, 2363 ref_mv); 2364 if (thissme < INT_MAX) 2365 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 2366 2367 // check to see if refining search is needed. 2368 if (num00 > further_steps - n) do_refine = 0; 2369 2370 if (thissme < bestsme) { 2371 bestsme = thissme; 2372 *dst_mv = temp_mv; 2373 } 2374 } 2375 } 2376 2377 // final 1-away diamond refining search 2378 if (do_refine) { 2379 const int search_range = 8; 2380 MV best_mv = *dst_mv; 2381 thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, 2382 ref_mv); 2383 if (thissme < INT_MAX) 2384 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); 2385 if (thissme < bestsme) { 2386 bestsme = thissme; 2387 *dst_mv = best_mv; 2388 } 2389 } 2390 2391 // Return cost list. 2392 if (cost_list) { 2393 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); 2394 } 2395 return bestsme; 2396 } 2397 2398 #define MIN_RANGE 7 2399 #define MAX_RANGE 256 2400 #define MIN_INTERVAL 1 2401 // Runs an limited range exhaustive mesh search using a pattern set 2402 // according to the encode speed profile. 2403 static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, 2404 MV *centre_mv_full, int sadpb, int *cost_list, 2405 const vp9_variance_fn_ptr_t *fn_ptr, 2406 const MV *ref_mv, MV *dst_mv) { 2407 const SPEED_FEATURES *const sf = &cpi->sf; 2408 MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; 2409 MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; 2410 int bestsme; 2411 int i; 2412 int interval = sf->mesh_patterns[0].interval; 2413 int range = sf->mesh_patterns[0].range; 2414 int baseline_interval_divisor; 2415 2416 // Trap illegal values for interval and range for this function. 2417 if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || 2418 (interval > range)) 2419 return INT_MAX; 2420 2421 baseline_interval_divisor = range / interval; 2422 2423 // Check size of proposed first range against magnitude of the centre 2424 // value used as a starting point. 2425 range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); 2426 range = VPXMIN(range, MAX_RANGE); 2427 interval = VPXMAX(interval, range / baseline_interval_divisor); 2428 2429 // initial search 2430 bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval, 2431 sadpb, fn_ptr, &temp_mv); 2432 2433 if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { 2434 // Progressive searches with range and step size decreasing each time 2435 // till we reach a step size of 1. Then break out. 2436 for (i = 1; i < MAX_MESH_STEP; ++i) { 2437 // First pass with coarser step and longer range 2438 bestsme = exhuastive_mesh_search( 2439 x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range, 2440 sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv); 2441 2442 if (sf->mesh_patterns[i].interval == 1) break; 2443 } 2444 } 2445 2446 if (bestsme < INT_MAX) 2447 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 2448 *dst_mv = temp_mv; 2449 2450 // Return cost list. 2451 if (cost_list) { 2452 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); 2453 } 2454 return bestsme; 2455 } 2456 2457 #if CONFIG_NON_GREEDY_MV 2458 double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, 2459 double *best_mv_dist, double *best_mv_cost, 2460 double lambda, int search_range, 2461 const vp9_variance_fn_ptr_t *fn_ptr, 2462 const int_mv *nb_full_mvs, int full_mv_num) { 2463 const MACROBLOCKD *const xd = &x->e_mbd; 2464 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; 2465 const struct buf_2d *const what = &x->plane[0].src; 2466 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2467 const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv); 2468 double best_sad; 2469 int i, j; 2470 vpx_clear_system_state(); 2471 *best_mv_dist = 2472 fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride); 2473 *best_mv_cost = 2474 vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); 2475 best_sad = (*best_mv_dist) + lambda * (*best_mv_cost); 2476 2477 for (i = 0; i < search_range; i++) { 2478 int best_site = -1; 2479 const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) & 2480 ((best_full_mv->row + 1) < x->mv_limits.row_max) & 2481 ((best_full_mv->col - 1) > x->mv_limits.col_min) & 2482 ((best_full_mv->col + 1) < x->mv_limits.col_max); 2483 2484 if (all_in) { 2485 unsigned int sads[4]; 2486 const uint8_t *const positions[4] = { best_address - in_what->stride, 2487 best_address - 1, best_address + 1, 2488 best_address + in_what->stride }; 2489 2490 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); 2491 2492 for (j = 0; j < 4; ++j) { 2493 const MV mv = { best_full_mv->row + neighbors[j].row, 2494 best_full_mv->col + neighbors[j].col }; 2495 const double mv_dist = sads[j]; 2496 const double mv_cost = 2497 vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); 2498 const double thissad = mv_dist + lambda * mv_cost; 2499 if (thissad < best_sad) { 2500 best_sad = thissad; 2501 *best_mv_dist = mv_dist; 2502 *best_mv_cost = mv_cost; 2503 best_site = j; 2504 } 2505 } 2506 } else { 2507 for (j = 0; j < 4; ++j) { 2508 const MV mv = { best_full_mv->row + neighbors[j].row, 2509 best_full_mv->col + neighbors[j].col }; 2510 2511 if (is_mv_in(&x->mv_limits, &mv)) { 2512 const double mv_dist = 2513 fn_ptr->sdf(what->buf, what->stride, 2514 get_buf_from_mv(in_what, &mv), in_what->stride); 2515 const double mv_cost = 2516 vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); 2517 const double thissad = mv_dist + lambda * mv_cost; 2518 if (thissad < best_sad) { 2519 best_sad = thissad; 2520 *best_mv_dist = mv_dist; 2521 *best_mv_cost = mv_cost; 2522 best_site = j; 2523 } 2524 } 2525 } 2526 } 2527 2528 if (best_site == -1) { 2529 break; 2530 } else { 2531 best_full_mv->row += neighbors[best_site].row; 2532 best_full_mv->col += neighbors[best_site].col; 2533 best_address = get_buf_from_mv(in_what, best_full_mv); 2534 } 2535 } 2536 2537 return best_sad; 2538 } 2539 #endif // CONFIG_NON_GREEDY_MV 2540 2541 int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, 2542 int search_range, 2543 const vp9_variance_fn_ptr_t *fn_ptr, 2544 const MV *center_mv) { 2545 const MACROBLOCKD *const xd = &x->e_mbd; 2546 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; 2547 const struct buf_2d *const what = &x->plane[0].src; 2548 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2549 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2550 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); 2551 unsigned int best_sad = 2552 fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) + 2553 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 2554 int i, j; 2555 2556 for (i = 0; i < search_range; i++) { 2557 int best_site = -1; 2558 const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) & 2559 ((ref_mv->row + 1) < x->mv_limits.row_max) & 2560 ((ref_mv->col - 1) > x->mv_limits.col_min) & 2561 ((ref_mv->col + 1) < x->mv_limits.col_max); 2562 2563 if (all_in) { 2564 unsigned int sads[4]; 2565 const uint8_t *const positions[4] = { best_address - in_what->stride, 2566 best_address - 1, best_address + 1, 2567 best_address + in_what->stride }; 2568 2569 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); 2570 2571 for (j = 0; j < 4; ++j) { 2572 if (sads[j] < best_sad) { 2573 const MV mv = { ref_mv->row + neighbors[j].row, 2574 ref_mv->col + neighbors[j].col }; 2575 sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2576 if (sads[j] < best_sad) { 2577 best_sad = sads[j]; 2578 best_site = j; 2579 } 2580 } 2581 } 2582 } else { 2583 for (j = 0; j < 4; ++j) { 2584 const MV mv = { ref_mv->row + neighbors[j].row, 2585 ref_mv->col + neighbors[j].col }; 2586 2587 if (is_mv_in(&x->mv_limits, &mv)) { 2588 unsigned int sad = 2589 fn_ptr->sdf(what->buf, what->stride, 2590 get_buf_from_mv(in_what, &mv), in_what->stride); 2591 if (sad < best_sad) { 2592 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2593 if (sad < best_sad) { 2594 best_sad = sad; 2595 best_site = j; 2596 } 2597 } 2598 } 2599 } 2600 } 2601 2602 if (best_site == -1) { 2603 break; 2604 } else { 2605 ref_mv->row += neighbors[best_site].row; 2606 ref_mv->col += neighbors[best_site].col; 2607 best_address = get_buf_from_mv(in_what, ref_mv); 2608 } 2609 } 2610 2611 return best_sad; 2612 } 2613 2614 // This function is called when we do joint motion search in comp_inter_inter 2615 // mode. 2616 int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, 2617 int search_range, 2618 const vp9_variance_fn_ptr_t *fn_ptr, 2619 const MV *center_mv, const uint8_t *second_pred) { 2620 const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, 2621 { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; 2622 const MACROBLOCKD *const xd = &x->e_mbd; 2623 const struct buf_2d *const what = &x->plane[0].src; 2624 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2625 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2626 unsigned int best_sad = INT_MAX; 2627 int i, j; 2628 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 2629 x->mv_limits.row_min, x->mv_limits.row_max); 2630 best_sad = 2631 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 2632 in_what->stride, second_pred) + 2633 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 2634 2635 for (i = 0; i < search_range; ++i) { 2636 int best_site = -1; 2637 2638 for (j = 0; j < 8; ++j) { 2639 const MV mv = { ref_mv->row + neighbors[j].row, 2640 ref_mv->col + neighbors[j].col }; 2641 2642 if (is_mv_in(&x->mv_limits, &mv)) { 2643 unsigned int sad = 2644 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), 2645 in_what->stride, second_pred); 2646 if (sad < best_sad) { 2647 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2648 if (sad < best_sad) { 2649 best_sad = sad; 2650 best_site = j; 2651 } 2652 } 2653 } 2654 } 2655 2656 if (best_site == -1) { 2657 break; 2658 } else { 2659 ref_mv->row += neighbors[best_site].row; 2660 ref_mv->col += neighbors[best_site].col; 2661 } 2662 } 2663 return best_sad; 2664 } 2665 2666 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 2667 MV *mvp_full, int step_param, int search_method, 2668 int error_per_bit, int *cost_list, const MV *ref_mv, 2669 MV *tmp_mv, int var_max, int rd) { 2670 const SPEED_FEATURES *const sf = &cpi->sf; 2671 const SEARCH_METHODS method = (SEARCH_METHODS)search_method; 2672 vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; 2673 int var = 0; 2674 int run_exhaustive_search = 0; 2675 2676 if (cost_list) { 2677 cost_list[0] = INT_MAX; 2678 cost_list[1] = INT_MAX; 2679 cost_list[2] = INT_MAX; 2680 cost_list[3] = INT_MAX; 2681 cost_list[4] = INT_MAX; 2682 } 2683 2684 switch (method) { 2685 case FAST_DIAMOND: 2686 var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, 2687 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2688 break; 2689 case FAST_HEX: 2690 var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, 2691 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2692 break; 2693 case HEX: 2694 var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2695 fn_ptr, 1, ref_mv, tmp_mv); 2696 break; 2697 case SQUARE: 2698 var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2699 fn_ptr, 1, ref_mv, tmp_mv); 2700 break; 2701 case BIGDIA: 2702 var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2703 fn_ptr, 1, ref_mv, tmp_mv); 2704 break; 2705 case NSTEP: 2706 case MESH: 2707 var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, 2708 MAX_MVSEARCH_STEPS - 1 - step_param, 1, 2709 cost_list, fn_ptr, ref_mv, tmp_mv); 2710 break; 2711 default: assert(0 && "Unknown search method"); 2712 } 2713 2714 if (method == NSTEP) { 2715 if (sf->exhaustive_searches_thresh < INT_MAX && 2716 !cpi->rc.is_src_frame_alt_ref) { 2717 const int64_t exhuastive_thr = 2718 sf->exhaustive_searches_thresh >> 2719 (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize])); 2720 if (var > exhuastive_thr) run_exhaustive_search = 1; 2721 } 2722 } else if (method == MESH) { 2723 run_exhaustive_search = 1; 2724 } 2725 2726 if (run_exhaustive_search) { 2727 int var_ex; 2728 MV tmp_mv_ex; 2729 var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list, 2730 fn_ptr, ref_mv, &tmp_mv_ex); 2731 if (var_ex < var) { 2732 var = var_ex; 2733 *tmp_mv = tmp_mv_ex; 2734 } 2735 } 2736 2737 if (method != NSTEP && method != MESH && rd && var < var_max) 2738 var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); 2739 2740 return var; 2741 } 2742 2743 // Note(yunqingwang): The following 2 functions are only used in the motion 2744 // vector unit test, which return extreme motion vectors allowed by the MV 2745 // limits. 2746 #define COMMON_MV_TEST \ 2747 SETUP_SUBPEL_SEARCH; \ 2748 \ 2749 (void)error_per_bit; \ 2750 (void)vfp; \ 2751 (void)z; \ 2752 (void)src_stride; \ 2753 (void)y; \ 2754 (void)y_stride; \ 2755 (void)second_pred; \ 2756 (void)w; \ 2757 (void)h; \ 2758 (void)offset; \ 2759 (void)mvjcost; \ 2760 (void)mvcost; \ 2761 (void)sse1; \ 2762 (void)distortion; \ 2763 \ 2764 (void)halfiters; \ 2765 (void)quarteriters; \ 2766 (void)eighthiters; \ 2767 (void)whichdir; \ 2768 (void)allow_hp; \ 2769 (void)forced_stop; \ 2770 (void)hstep; \ 2771 (void)rr; \ 2772 (void)rc; \ 2773 \ 2774 (void)tr; \ 2775 (void)tc; \ 2776 (void)sse; \ 2777 (void)thismse; \ 2778 (void)cost_list; \ 2779 (void)use_accurate_subpel_search; 2780 2781 // Return the maximum MV. 2782 uint32_t vp9_return_max_sub_pixel_mv( 2783 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 2784 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 2785 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 2786 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 2787 int h, int use_accurate_subpel_search) { 2788 COMMON_MV_TEST; 2789 2790 (void)minr; 2791 (void)minc; 2792 2793 bestmv->row = maxr; 2794 bestmv->col = maxc; 2795 besterr = 0; 2796 2797 // In the sub-pel motion search, if hp is not used, then the last bit of mv 2798 // has to be 0. 2799 lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); 2800 2801 return besterr; 2802 } 2803 // Return the minimum MV. 2804 uint32_t vp9_return_min_sub_pixel_mv( 2805 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 2806 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 2807 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 2808 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 2809 int h, int use_accurate_subpel_search) { 2810 COMMON_MV_TEST; 2811 2812 (void)maxr; 2813 (void)maxc; 2814 2815 bestmv->row = minr; 2816 bestmv->col = minc; 2817 besterr = 0; 2818 2819 // In the sub-pel motion search, if hp is not used, then the last bit of mv 2820 // has to be 0. 2821 lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); 2822 2823 return besterr; 2824 } 2825