1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <math.h> 13 #include <limits.h> 14 15 #include "vp9/common/vp9_alloccommon.h" 16 #include "vp9/common/vp9_common.h" 17 #include "vp9/common/vp9_onyxc_int.h" 18 #include "vp9/common/vp9_quant_common.h" 19 #include "vp9/common/vp9_reconinter.h" 20 #include "vp9/encoder/vp9_encodeframe.h" 21 #include "vp9/encoder/vp9_ethread.h" 22 #include "vp9/encoder/vp9_extend.h" 23 #include "vp9/encoder/vp9_firstpass.h" 24 #include "vp9/encoder/vp9_mcomp.h" 25 #include "vp9/encoder/vp9_encoder.h" 26 #include "vp9/encoder/vp9_quantize.h" 27 #include "vp9/encoder/vp9_ratectrl.h" 28 #include "vp9/encoder/vp9_segmentation.h" 29 #include "vp9/encoder/vp9_temporal_filter.h" 30 #include "vpx_dsp/vpx_dsp_common.h" 31 #include "vpx_mem/vpx_mem.h" 32 #include "vpx_ports/mem.h" 33 #include "vpx_ports/vpx_timer.h" 34 #include "vpx_scale/vpx_scale.h" 35 36 static int fixed_divide[512]; 37 38 static void temporal_filter_predictors_mb_c( 39 MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr, 40 int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col, 41 uint8_t *pred, struct scale_factors *scale, int x, int y) { 42 const int which_mv = 0; 43 const MV mv = { mv_row, mv_col }; 44 const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP]; 45 46 enum mv_precision mv_precision_uv; 47 int uv_stride; 48 if (uv_block_width == 8) { 49 uv_stride = (stride + 1) >> 1; 50 mv_precision_uv = MV_PRECISION_Q4; 51 } else { 52 uv_stride = stride; 53 mv_precision_uv = MV_PRECISION_Q3; 54 } 55 56 #if CONFIG_VP9_HIGHBITDEPTH 57 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 58 vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(y_mb_ptr), stride, 59 CONVERT_TO_SHORTPTR(&pred[0]), 16, &mv, 60 scale, 16, 16, which_mv, kernel, 61 MV_PRECISION_Q3, x, y, xd->bd); 62 63 vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(u_mb_ptr), uv_stride, 64 CONVERT_TO_SHORTPTR(&pred[256]), 65 uv_block_width, &mv, scale, uv_block_width, 66 uv_block_height, which_mv, kernel, 67 mv_precision_uv, x, y, xd->bd); 68 69 vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(v_mb_ptr), uv_stride, 70 CONVERT_TO_SHORTPTR(&pred[512]), 71 uv_block_width, &mv, scale, uv_block_width, 72 uv_block_height, which_mv, kernel, 73 mv_precision_uv, x, y, xd->bd); 74 return; 75 } 76 #endif // CONFIG_VP9_HIGHBITDEPTH 77 (void)xd; 78 vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16, 79 which_mv, kernel, MV_PRECISION_Q3, x, y); 80 81 vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width, 82 &mv, scale, uv_block_width, uv_block_height, 83 which_mv, kernel, mv_precision_uv, x, y); 84 85 vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width, 86 &mv, scale, uv_block_width, uv_block_height, 87 which_mv, kernel, mv_precision_uv, x, y); 88 } 89 90 void vp9_temporal_filter_init(void) { 91 int i; 92 93 fixed_divide[0] = 0; 94 for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i; 95 } 96 97 void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride, 98 const uint8_t *frame2, 99 unsigned int block_width, 100 unsigned int block_height, int strength, 101 int filter_weight, uint32_t *accumulator, 102 uint16_t *count) { 103 unsigned int i, j, k; 104 int modifier; 105 int byte = 0; 106 const int rounding = strength > 0 ? 1 << (strength - 1) : 0; 107 108 assert(strength >= 0); 109 assert(strength <= 6); 110 111 assert(filter_weight >= 0); 112 assert(filter_weight <= 2); 113 114 for (i = 0, k = 0; i < block_height; i++) { 115 for (j = 0; j < block_width; j++, k++) { 116 int pixel_value = *frame2; 117 118 // non-local mean approach 119 int diff_sse[9] = { 0 }; 120 int idx, idy, index = 0; 121 122 for (idy = -1; idy <= 1; ++idy) { 123 for (idx = -1; idx <= 1; ++idx) { 124 int row = (int)i + idy; 125 int col = (int)j + idx; 126 127 if (row >= 0 && row < (int)block_height && col >= 0 && 128 col < (int)block_width) { 129 int diff = frame1[byte + idy * (int)stride + idx] - 130 frame2[idy * (int)block_width + idx]; 131 diff_sse[index] = diff * diff; 132 ++index; 133 } 134 } 135 } 136 137 assert(index > 0); 138 139 modifier = 0; 140 for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; 141 142 modifier *= 3; 143 modifier /= index; 144 145 ++frame2; 146 147 modifier += rounding; 148 modifier >>= strength; 149 150 if (modifier > 16) modifier = 16; 151 152 modifier = 16 - modifier; 153 modifier *= filter_weight; 154 155 count[k] += modifier; 156 accumulator[k] += modifier * pixel_value; 157 158 byte++; 159 } 160 161 byte += stride - block_width; 162 } 163 } 164 165 #if CONFIG_VP9_HIGHBITDEPTH 166 void vp9_highbd_temporal_filter_apply_c( 167 const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8, 168 unsigned int block_width, unsigned int block_height, int strength, 169 int filter_weight, uint32_t *accumulator, uint16_t *count) { 170 const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8); 171 const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8); 172 unsigned int i, j, k; 173 int modifier; 174 int byte = 0; 175 const int rounding = strength > 0 ? 1 << (strength - 1) : 0; 176 177 for (i = 0, k = 0; i < block_height; i++) { 178 for (j = 0; j < block_width; j++, k++) { 179 int pixel_value = *frame2; 180 int diff_sse[9] = { 0 }; 181 int idx, idy, index = 0; 182 183 for (idy = -1; idy <= 1; ++idy) { 184 for (idx = -1; idx <= 1; ++idx) { 185 int row = (int)i + idy; 186 int col = (int)j + idx; 187 188 if (row >= 0 && row < (int)block_height && col >= 0 && 189 col < (int)block_width) { 190 int diff = frame1[byte + idy * (int)stride + idx] - 191 frame2[idy * (int)block_width + idx]; 192 diff_sse[index] = diff * diff; 193 ++index; 194 } 195 } 196 } 197 assert(index > 0); 198 199 modifier = 0; 200 for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; 201 202 modifier *= 3; 203 modifier /= index; 204 205 ++frame2; 206 modifier += rounding; 207 modifier >>= strength; 208 209 if (modifier > 16) modifier = 16; 210 211 modifier = 16 - modifier; 212 modifier *= filter_weight; 213 214 count[k] += modifier; 215 accumulator[k] += modifier * pixel_value; 216 217 byte++; 218 } 219 220 byte += stride - block_width; 221 } 222 } 223 #endif // CONFIG_VP9_HIGHBITDEPTH 224 225 static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, 226 ThreadData *td, 227 uint8_t *arf_frame_buf, 228 uint8_t *frame_ptr_buf, 229 int stride, MV *ref_mv) { 230 MACROBLOCK *const x = &td->mb; 231 MACROBLOCKD *const xd = &x->e_mbd; 232 MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; 233 const SEARCH_METHODS search_method = HEX; 234 int step_param; 235 int sadpb = x->sadperbit16; 236 uint32_t bestsme = UINT_MAX; 237 uint32_t distortion; 238 uint32_t sse; 239 int cost_list[5]; 240 const MvLimits tmp_mv_limits = x->mv_limits; 241 242 MV best_ref_mv1 = { 0, 0 }; 243 MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ 244 245 // Save input state 246 struct buf_2d src = x->plane[0].src; 247 struct buf_2d pre = xd->plane[0].pre[0]; 248 249 best_ref_mv1_full.col = best_ref_mv1.col >> 3; 250 best_ref_mv1_full.row = best_ref_mv1.row >> 3; 251 252 // Setup frame pointers 253 x->plane[0].src.buf = arf_frame_buf; 254 x->plane[0].src.stride = stride; 255 xd->plane[0].pre[0].buf = frame_ptr_buf; 256 xd->plane[0].pre[0].stride = stride; 257 258 step_param = mv_sf->reduce_first_step_size; 259 step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); 260 261 vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); 262 263 vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param, 264 search_method, sadpb, cond_cost_list(cpi, cost_list), 265 &best_ref_mv1, ref_mv, 0, 0); 266 267 /* restore UMV window */ 268 x->mv_limits = tmp_mv_limits; 269 270 // Ignore mv costing by sending NULL pointer instead of cost array 271 bestsme = cpi->find_fractional_mv_step( 272 x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, 273 x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, 274 mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL, 275 &distortion, &sse, NULL, 0, 0); 276 277 // Restore input state 278 x->plane[0].src = src; 279 xd->plane[0].pre[0] = pre; 280 281 return bestsme; 282 } 283 284 void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, 285 int mb_row, int mb_col_start, 286 int mb_col_end) { 287 ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data; 288 YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames; 289 int frame_count = arnr_filter_data->frame_count; 290 int alt_ref_index = arnr_filter_data->alt_ref_index; 291 int strength = arnr_filter_data->strength; 292 struct scale_factors *scale = &arnr_filter_data->sf; 293 int byte; 294 int frame; 295 int mb_col; 296 unsigned int filter_weight; 297 int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4; 298 int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4; 299 DECLARE_ALIGNED(16, uint32_t, accumulator[16 * 16 * 3]); 300 DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]); 301 MACROBLOCKD *mbd = &td->mb.e_mbd; 302 YV12_BUFFER_CONFIG *f = frames[alt_ref_index]; 303 uint8_t *dst1, *dst2; 304 #if CONFIG_VP9_HIGHBITDEPTH 305 DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]); 306 DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]); 307 uint8_t *predictor; 308 #else 309 DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]); 310 #endif 311 const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y; 312 const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x; 313 // Addition of the tile col level offsets 314 int mb_y_offset = mb_row * 16 * (f->y_stride) + 16 * mb_col_start; 315 int mb_uv_offset = 316 mb_row * mb_uv_height * f->uv_stride + mb_uv_width * mb_col_start; 317 318 #if CONFIG_VP9_HIGHBITDEPTH 319 if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 320 predictor = CONVERT_TO_BYTEPTR(predictor16); 321 } else { 322 predictor = predictor8; 323 } 324 #endif 325 326 // Source frames are extended to 16 pixels. This is different than 327 // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS) 328 // A 6/8 tap filter is used for motion search. This requires 2 pixels 329 // before and 3 pixels after. So the largest Y mv on a border would 330 // then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the 331 // Y and therefore only extended by 8. The largest mv that a UV block 332 // can support is 8 - VP9_INTERP_EXTEND. A UV mv is half of a Y mv. 333 // (16 - VP9_INTERP_EXTEND) >> 1 which is greater than 334 // 8 - VP9_INTERP_EXTEND. 335 // To keep the mv in play for both Y and UV planes the max that it 336 // can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1). 337 td->mb.mv_limits.row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND)); 338 td->mb.mv_limits.row_max = 339 ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND); 340 341 for (mb_col = mb_col_start; mb_col < mb_col_end; mb_col++) { 342 int i, j, k; 343 int stride; 344 MV ref_mv; 345 346 vp9_zero_array(accumulator, 16 * 16 * 3); 347 vp9_zero_array(count, 16 * 16 * 3); 348 349 td->mb.mv_limits.col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); 350 td->mb.mv_limits.col_max = 351 ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND); 352 353 for (frame = 0; frame < frame_count; frame++) { 354 const uint32_t thresh_low = 10000; 355 const uint32_t thresh_high = 20000; 356 357 if (frames[frame] == NULL) continue; 358 359 ref_mv.row = 0; 360 ref_mv.col = 0; 361 362 if (frame == alt_ref_index) { 363 filter_weight = 2; 364 } else { 365 // Find best match in this frame by MC 366 uint32_t err = temporal_filter_find_matching_mb_c( 367 cpi, td, frames[alt_ref_index]->y_buffer + mb_y_offset, 368 frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride, 369 &ref_mv); 370 371 // Assign higher weight to matching MB if its error 372 // score is lower. If not applying MC default behavior 373 // is to weight all MBs equal. 374 filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0; 375 } 376 377 if (filter_weight != 0) { 378 // Construct the predictors 379 temporal_filter_predictors_mb_c( 380 mbd, frames[frame]->y_buffer + mb_y_offset, 381 frames[frame]->u_buffer + mb_uv_offset, 382 frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride, 383 mb_uv_width, mb_uv_height, ref_mv.row, ref_mv.col, predictor, scale, 384 mb_col * 16, mb_row * 16); 385 386 #if CONFIG_VP9_HIGHBITDEPTH 387 if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 388 int adj_strength = strength + 2 * (mbd->bd - 8); 389 // Apply the filter (YUV) 390 vp9_highbd_temporal_filter_apply( 391 f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16, 392 adj_strength, filter_weight, accumulator, count); 393 vp9_highbd_temporal_filter_apply( 394 f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, 395 mb_uv_width, mb_uv_height, adj_strength, filter_weight, 396 accumulator + 256, count + 256); 397 vp9_highbd_temporal_filter_apply( 398 f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512, 399 mb_uv_width, mb_uv_height, adj_strength, filter_weight, 400 accumulator + 512, count + 512); 401 } else { 402 // Apply the filter (YUV) 403 vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, 404 predictor, 16, 16, strength, filter_weight, 405 accumulator, count); 406 vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, 407 predictor + 256, mb_uv_width, mb_uv_height, 408 strength, filter_weight, accumulator + 256, 409 count + 256); 410 vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, 411 predictor + 512, mb_uv_width, mb_uv_height, 412 strength, filter_weight, accumulator + 512, 413 count + 512); 414 } 415 #else 416 // Apply the filter (YUV) 417 vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, 418 predictor, 16, 16, strength, filter_weight, 419 accumulator, count); 420 vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, 421 predictor + 256, mb_uv_width, mb_uv_height, 422 strength, filter_weight, accumulator + 256, 423 count + 256); 424 vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, 425 predictor + 512, mb_uv_width, mb_uv_height, 426 strength, filter_weight, accumulator + 512, 427 count + 512); 428 #endif // CONFIG_VP9_HIGHBITDEPTH 429 } 430 } 431 432 #if CONFIG_VP9_HIGHBITDEPTH 433 if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 434 uint16_t *dst1_16; 435 uint16_t *dst2_16; 436 // Normalize filter output to produce AltRef frame 437 dst1 = cpi->alt_ref_buffer.y_buffer; 438 dst1_16 = CONVERT_TO_SHORTPTR(dst1); 439 stride = cpi->alt_ref_buffer.y_stride; 440 byte = mb_y_offset; 441 for (i = 0, k = 0; i < 16; i++) { 442 for (j = 0; j < 16; j++, k++) { 443 unsigned int pval = accumulator[k] + (count[k] >> 1); 444 pval *= fixed_divide[count[k]]; 445 pval >>= 19; 446 447 dst1_16[byte] = (uint16_t)pval; 448 449 // move to next pixel 450 byte++; 451 } 452 453 byte += stride - 16; 454 } 455 456 dst1 = cpi->alt_ref_buffer.u_buffer; 457 dst2 = cpi->alt_ref_buffer.v_buffer; 458 dst1_16 = CONVERT_TO_SHORTPTR(dst1); 459 dst2_16 = CONVERT_TO_SHORTPTR(dst2); 460 stride = cpi->alt_ref_buffer.uv_stride; 461 byte = mb_uv_offset; 462 for (i = 0, k = 256; i < mb_uv_height; i++) { 463 for (j = 0; j < mb_uv_width; j++, k++) { 464 int m = k + 256; 465 466 // U 467 unsigned int pval = accumulator[k] + (count[k] >> 1); 468 pval *= fixed_divide[count[k]]; 469 pval >>= 19; 470 dst1_16[byte] = (uint16_t)pval; 471 472 // V 473 pval = accumulator[m] + (count[m] >> 1); 474 pval *= fixed_divide[count[m]]; 475 pval >>= 19; 476 dst2_16[byte] = (uint16_t)pval; 477 478 // move to next pixel 479 byte++; 480 } 481 482 byte += stride - mb_uv_width; 483 } 484 } else { 485 // Normalize filter output to produce AltRef frame 486 dst1 = cpi->alt_ref_buffer.y_buffer; 487 stride = cpi->alt_ref_buffer.y_stride; 488 byte = mb_y_offset; 489 for (i = 0, k = 0; i < 16; i++) { 490 for (j = 0; j < 16; j++, k++) { 491 unsigned int pval = accumulator[k] + (count[k] >> 1); 492 pval *= fixed_divide[count[k]]; 493 pval >>= 19; 494 495 dst1[byte] = (uint8_t)pval; 496 497 // move to next pixel 498 byte++; 499 } 500 byte += stride - 16; 501 } 502 503 dst1 = cpi->alt_ref_buffer.u_buffer; 504 dst2 = cpi->alt_ref_buffer.v_buffer; 505 stride = cpi->alt_ref_buffer.uv_stride; 506 byte = mb_uv_offset; 507 for (i = 0, k = 256; i < mb_uv_height; i++) { 508 for (j = 0; j < mb_uv_width; j++, k++) { 509 int m = k + 256; 510 511 // U 512 unsigned int pval = accumulator[k] + (count[k] >> 1); 513 pval *= fixed_divide[count[k]]; 514 pval >>= 19; 515 dst1[byte] = (uint8_t)pval; 516 517 // V 518 pval = accumulator[m] + (count[m] >> 1); 519 pval *= fixed_divide[count[m]]; 520 pval >>= 19; 521 dst2[byte] = (uint8_t)pval; 522 523 // move to next pixel 524 byte++; 525 } 526 byte += stride - mb_uv_width; 527 } 528 } 529 #else 530 // Normalize filter output to produce AltRef frame 531 dst1 = cpi->alt_ref_buffer.y_buffer; 532 stride = cpi->alt_ref_buffer.y_stride; 533 byte = mb_y_offset; 534 for (i = 0, k = 0; i < 16; i++) { 535 for (j = 0; j < 16; j++, k++) { 536 unsigned int pval = accumulator[k] + (count[k] >> 1); 537 pval *= fixed_divide[count[k]]; 538 pval >>= 19; 539 540 dst1[byte] = (uint8_t)pval; 541 542 // move to next pixel 543 byte++; 544 } 545 byte += stride - 16; 546 } 547 548 dst1 = cpi->alt_ref_buffer.u_buffer; 549 dst2 = cpi->alt_ref_buffer.v_buffer; 550 stride = cpi->alt_ref_buffer.uv_stride; 551 byte = mb_uv_offset; 552 for (i = 0, k = 256; i < mb_uv_height; i++) { 553 for (j = 0; j < mb_uv_width; j++, k++) { 554 int m = k + 256; 555 556 // U 557 unsigned int pval = accumulator[k] + (count[k] >> 1); 558 pval *= fixed_divide[count[k]]; 559 pval >>= 19; 560 dst1[byte] = (uint8_t)pval; 561 562 // V 563 pval = accumulator[m] + (count[m] >> 1); 564 pval *= fixed_divide[count[m]]; 565 pval >>= 19; 566 dst2[byte] = (uint8_t)pval; 567 568 // move to next pixel 569 byte++; 570 } 571 byte += stride - mb_uv_width; 572 } 573 #endif // CONFIG_VP9_HIGHBITDEPTH 574 mb_y_offset += 16; 575 mb_uv_offset += mb_uv_width; 576 } 577 } 578 579 static void temporal_filter_iterate_tile_c(VP9_COMP *cpi, int tile_row, 580 int tile_col) { 581 VP9_COMMON *const cm = &cpi->common; 582 const int tile_cols = 1 << cm->log2_tile_cols; 583 TileInfo *tile_info = 584 &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info; 585 const int mb_row_start = (tile_info->mi_row_start) >> 1; 586 const int mb_row_end = (tile_info->mi_row_end + 1) >> 1; 587 const int mb_col_start = (tile_info->mi_col_start) >> 1; 588 const int mb_col_end = (tile_info->mi_col_end + 1) >> 1; 589 int mb_row; 590 591 for (mb_row = mb_row_start; mb_row < mb_row_end; mb_row++) { 592 vp9_temporal_filter_iterate_row_c(cpi, &cpi->td, mb_row, mb_col_start, 593 mb_col_end); 594 } 595 } 596 597 static void temporal_filter_iterate_c(VP9_COMP *cpi) { 598 VP9_COMMON *const cm = &cpi->common; 599 const int tile_cols = 1 << cm->log2_tile_cols; 600 const int tile_rows = 1 << cm->log2_tile_rows; 601 int tile_row, tile_col; 602 MACROBLOCKD *mbd = &cpi->td.mb.e_mbd; 603 // Save input state 604 uint8_t *input_buffer[MAX_MB_PLANE]; 605 int i; 606 607 for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf; 608 609 vp9_init_tile_data(cpi); 610 611 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { 612 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 613 temporal_filter_iterate_tile_c(cpi, tile_row, tile_col); 614 } 615 } 616 617 // Restore input state 618 for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i]; 619 } 620 621 // Apply buffer limits and context specific adjustments to arnr filter. 622 static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost, 623 int *arnr_frames, int *arnr_strength) { 624 const VP9EncoderConfig *const oxcf = &cpi->oxcf; 625 const int frames_after_arf = 626 vp9_lookahead_depth(cpi->lookahead) - distance - 1; 627 int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1; 628 int frames_bwd; 629 int q, frames, base_strength, strength; 630 631 // Context dependent two pass adjustment to strength. 632 if (oxcf->pass == 2) { 633 base_strength = oxcf->arnr_strength + cpi->twopass.arnr_strength_adjustment; 634 // Clip to allowed range. 635 base_strength = VPXMIN(6, VPXMAX(0, base_strength)); 636 } else { 637 base_strength = oxcf->arnr_strength; 638 } 639 640 // Define the forward and backwards filter limits for this arnr group. 641 if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf; 642 if (frames_fwd > distance) frames_fwd = distance; 643 644 frames_bwd = frames_fwd; 645 646 // For even length filter there is one more frame backward 647 // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. 648 if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1; 649 650 // Set the baseline active filter size. 651 frames = frames_bwd + 1 + frames_fwd; 652 653 // Adjust the strength based on active max q. 654 if (cpi->common.current_video_frame > 1) 655 q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME], 656 cpi->common.bit_depth)); 657 else 658 q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME], 659 cpi->common.bit_depth)); 660 if (q > 16) { 661 strength = base_strength; 662 } else { 663 strength = base_strength - ((16 - q) / 2); 664 if (strength < 0) strength = 0; 665 } 666 667 // Adjust number of frames in filter and strength based on gf boost level. 668 if (frames > group_boost / 150) { 669 frames = group_boost / 150; 670 frames += !(frames & 1); 671 } 672 673 if (strength > group_boost / 300) { 674 strength = group_boost / 300; 675 } 676 677 // Adjustments for second level arf in multi arf case. 678 if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) { 679 const GF_GROUP *const gf_group = &cpi->twopass.gf_group; 680 if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) { 681 strength >>= 1; 682 } 683 } 684 685 *arnr_frames = frames; 686 *arnr_strength = strength; 687 } 688 689 void vp9_temporal_filter(VP9_COMP *cpi, int distance) { 690 VP9_COMMON *const cm = &cpi->common; 691 RATE_CONTROL *const rc = &cpi->rc; 692 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; 693 ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data; 694 int frame; 695 int frames_to_blur; 696 int start_frame; 697 int strength; 698 int frames_to_blur_backward; 699 int frames_to_blur_forward; 700 struct scale_factors *sf = &arnr_filter_data->sf; 701 YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames; 702 int rdmult; 703 704 // Apply context specific adjustments to the arnr filter parameters. 705 adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength); 706 frames_to_blur_backward = (frames_to_blur / 2); 707 frames_to_blur_forward = ((frames_to_blur - 1) / 2); 708 start_frame = distance + frames_to_blur_forward; 709 710 arnr_filter_data->strength = strength; 711 arnr_filter_data->frame_count = frames_to_blur; 712 arnr_filter_data->alt_ref_index = frames_to_blur_backward; 713 714 // Setup frame pointers, NULL indicates frame not included in filter. 715 for (frame = 0; frame < frames_to_blur; ++frame) { 716 const int which_buffer = start_frame - frame; 717 struct lookahead_entry *buf = 718 vp9_lookahead_peek(cpi->lookahead, which_buffer); 719 frames[frames_to_blur - 1 - frame] = &buf->img; 720 } 721 722 if (frames_to_blur > 0) { 723 // Setup scaling factors. Scaling on each of the arnr frames is not 724 // supported. 725 if (cpi->use_svc) { 726 // In spatial svc the scaling factors might be less then 1/2. 727 // So we will use non-normative scaling. 728 int frame_used = 0; 729 #if CONFIG_VP9_HIGHBITDEPTH 730 vp9_setup_scale_factors_for_frame( 731 sf, get_frame_new_buffer(cm)->y_crop_width, 732 get_frame_new_buffer(cm)->y_crop_height, 733 get_frame_new_buffer(cm)->y_crop_width, 734 get_frame_new_buffer(cm)->y_crop_height, cm->use_highbitdepth); 735 #else 736 vp9_setup_scale_factors_for_frame( 737 sf, get_frame_new_buffer(cm)->y_crop_width, 738 get_frame_new_buffer(cm)->y_crop_height, 739 get_frame_new_buffer(cm)->y_crop_width, 740 get_frame_new_buffer(cm)->y_crop_height); 741 #endif // CONFIG_VP9_HIGHBITDEPTH 742 743 for (frame = 0; frame < frames_to_blur; ++frame) { 744 if (cm->mi_cols * MI_SIZE != frames[frame]->y_width || 745 cm->mi_rows * MI_SIZE != frames[frame]->y_height) { 746 if (vpx_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used], 747 cm->width, cm->height, cm->subsampling_x, 748 cm->subsampling_y, 749 #if CONFIG_VP9_HIGHBITDEPTH 750 cm->use_highbitdepth, 751 #endif 752 VP9_ENC_BORDER_IN_PIXELS, 753 cm->byte_alignment, NULL, NULL, NULL)) { 754 vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, 755 "Failed to reallocate alt_ref_buffer"); 756 } 757 frames[frame] = vp9_scale_if_required( 758 cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0, 759 EIGHTTAP, 0); 760 ++frame_used; 761 } 762 } 763 cm->mi = cm->mip + cm->mi_stride + 1; 764 xd->mi = cm->mi_grid_visible; 765 xd->mi[0] = cm->mi; 766 } else { 767 // ARF is produced at the native frame size and resized when coded. 768 #if CONFIG_VP9_HIGHBITDEPTH 769 vp9_setup_scale_factors_for_frame( 770 sf, frames[0]->y_crop_width, frames[0]->y_crop_height, 771 frames[0]->y_crop_width, frames[0]->y_crop_height, 772 cm->use_highbitdepth); 773 #else 774 vp9_setup_scale_factors_for_frame( 775 sf, frames[0]->y_crop_width, frames[0]->y_crop_height, 776 frames[0]->y_crop_width, frames[0]->y_crop_height); 777 #endif // CONFIG_VP9_HIGHBITDEPTH 778 } 779 } 780 781 // Initialize errorperbit and sabperbit. 782 rdmult = (int)vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX); 783 if (rdmult < 1) rdmult = 1; 784 set_error_per_bit(&cpi->td.mb, rdmult); 785 vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX); 786 787 if (!cpi->row_mt) 788 temporal_filter_iterate_c(cpi); 789 else 790 vp9_temporal_filter_row_mt(cpi); 791 } 792