1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 #include <limits.h> 13 14 #include "vp9/common/vp9_onyxc_int.h" 15 #include "vp9/common/vp9_reconinter.h" 16 #include "vp9/encoder/vp9_onyx_int.h" 17 #include "vp9/common/vp9_systemdependent.h" 18 #include "vp9/encoder/vp9_quantize.h" 19 #include "vp9/common/vp9_alloccommon.h" 20 #include "vp9/encoder/vp9_mcomp.h" 21 #include "vp9/encoder/vp9_firstpass.h" 22 #include "vp9/encoder/vp9_psnr.h" 23 #include "vpx_scale/vpx_scale.h" 24 #include "vp9/common/vp9_extend.h" 25 #include "vp9/encoder/vp9_ratectrl.h" 26 #include "vp9/common/vp9_quant_common.h" 27 #include "vp9/encoder/vp9_segmentation.h" 28 #include "vpx_mem/vpx_mem.h" 29 #include "vpx_ports/vpx_timer.h" 30 31 #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering 32 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering 33 34 static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, 35 uint8_t *y_mb_ptr, 36 uint8_t *u_mb_ptr, 37 uint8_t *v_mb_ptr, 38 int stride, 39 int mv_row, 40 int mv_col, 41 uint8_t *pred, 42 struct scale_factors *scale) { 43 const int which_mv = 0; 44 MV mv = { mv_row, mv_col }; 45 46 vp9_build_inter_predictor(y_mb_ptr, stride, 47 &pred[0], 16, 48 &mv, 49 scale, 50 16, 16, 51 which_mv, 52 &xd->subpix, MV_PRECISION_Q3); 53 54 stride = (stride + 1) >> 1; 55 56 vp9_build_inter_predictor(u_mb_ptr, stride, 57 &pred[256], 8, 58 &mv, 59 scale, 60 8, 8, 61 which_mv, 62 &xd->subpix, MV_PRECISION_Q4); 63 64 vp9_build_inter_predictor(v_mb_ptr, stride, 65 &pred[320], 8, 66 &mv, 67 scale, 68 8, 8, 69 which_mv, 70 &xd->subpix, MV_PRECISION_Q4); 71 } 72 73 void vp9_temporal_filter_apply_c(uint8_t *frame1, 74 unsigned int stride, 75 uint8_t *frame2, 76 unsigned int block_size, 77 int strength, 78 int filter_weight, 79 unsigned int *accumulator, 80 uint16_t *count) { 81 unsigned int i, j, k; 82 int modifier; 83 int byte = 0; 84 85 for (i = 0, k = 0; i < block_size; i++) { 86 for (j = 0; j < block_size; j++, k++) { 87 int src_byte = frame1[byte]; 88 int pixel_value = *frame2++; 89 90 modifier = src_byte - pixel_value; 91 // This is an integer approximation of: 92 // float coeff = (3.0 * modifer * modifier) / pow(2, strength); 93 // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); 94 modifier *= modifier; 95 modifier *= 3; 96 modifier += 1 << (strength - 1); 97 modifier >>= strength; 98 99 if (modifier > 16) 100 modifier = 16; 101 102 modifier = 16 - modifier; 103 modifier *= filter_weight; 104 105 count[k] += modifier; 106 accumulator[k] += modifier * pixel_value; 107 108 byte++; 109 } 110 111 byte += stride - block_size; 112 } 113 } 114 115 #if ALT_REF_MC_ENABLED 116 117 static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, 118 uint8_t *arf_frame_buf, 119 uint8_t *frame_ptr_buf, 120 int stride, 121 int error_thresh) { 122 MACROBLOCK *x = &cpi->mb; 123 MACROBLOCKD* const xd = &x->e_mbd; 124 int step_param; 125 int sadpb = x->sadperbit16; 126 int bestsme = INT_MAX; 127 128 int_mv best_ref_mv1; 129 int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ 130 int_mv *ref_mv; 131 132 // Save input state 133 struct buf_2d src = x->plane[0].src; 134 struct buf_2d pre = xd->plane[0].pre[0]; 135 136 best_ref_mv1.as_int = 0; 137 best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3; 138 best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3; 139 140 // Setup frame pointers 141 x->plane[0].src.buf = arf_frame_buf; 142 x->plane[0].src.stride = stride; 143 xd->plane[0].pre[0].buf = frame_ptr_buf; 144 xd->plane[0].pre[0].stride = stride; 145 146 // Further step/diamond searches as necessary 147 if (cpi->speed < 8) 148 step_param = cpi->sf.reduce_first_step_size + ((cpi->speed > 5) ? 1 : 0); 149 else 150 step_param = cpi->sf.reduce_first_step_size + 2; 151 step_param = MIN(step_param, (cpi->sf.max_step_search_steps - 2)); 152 153 /*cpi->sf.search_method == HEX*/ 154 // Ignore mv costing by sending NULL pointer instead of cost arrays 155 ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0]; 156 bestsme = vp9_hex_search(x, &best_ref_mv1_full.as_mv, 157 step_param, sadpb, 1, 158 &cpi->fn_ptr[BLOCK_16X16], 159 0, &best_ref_mv1.as_mv, &ref_mv->as_mv); 160 161 #if ALT_REF_SUBPEL_ENABLED 162 // Try sub-pixel MC? 163 // if (bestsme > error_thresh && bestsme < INT_MAX) 164 { 165 int distortion; 166 unsigned int sse; 167 // Ignore mv costing by sending NULL pointer instead of cost array 168 bestsme = cpi->find_fractional_mv_step(x, &ref_mv->as_mv, 169 &best_ref_mv1.as_mv, 170 cpi->common.allow_high_precision_mv, 171 x->errorperbit, 172 &cpi->fn_ptr[BLOCK_16X16], 173 0, cpi->sf.subpel_iters_per_step, 174 NULL, NULL, 175 &distortion, &sse); 176 } 177 #endif 178 179 // Restore input state 180 x->plane[0].src = src; 181 xd->plane[0].pre[0] = pre; 182 183 return bestsme; 184 } 185 #endif 186 187 static void temporal_filter_iterate_c(VP9_COMP *cpi, 188 int frame_count, 189 int alt_ref_index, 190 int strength, 191 struct scale_factors *scale) { 192 int byte; 193 int frame; 194 int mb_col, mb_row; 195 unsigned int filter_weight; 196 int mb_cols = cpi->common.mb_cols; 197 int mb_rows = cpi->common.mb_rows; 198 int mb_y_offset = 0; 199 int mb_uv_offset = 0; 200 DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 + 8 * 8 + 8 * 8); 201 DECLARE_ALIGNED_ARRAY(16, uint16_t, count, 16 * 16 + 8 * 8 + 8 * 8); 202 MACROBLOCKD *mbd = &cpi->mb.e_mbd; 203 YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; 204 uint8_t *dst1, *dst2; 205 DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 + 8 * 8 + 8 * 8); 206 207 // Save input state 208 uint8_t* input_buffer[MAX_MB_PLANE]; 209 int i; 210 211 for (i = 0; i < MAX_MB_PLANE; i++) 212 input_buffer[i] = mbd->plane[i].pre[0].buf; 213 214 for (mb_row = 0; mb_row < mb_rows; mb_row++) { 215 #if ALT_REF_MC_ENABLED 216 // Source frames are extended to 16 pixels. This is different than 217 // L/A/G reference frames that have a border of 32 (VP9BORDERINPIXELS) 218 // A 6/8 tap filter is used for motion search. This requires 2 pixels 219 // before and 3 pixels after. So the largest Y mv on a border would 220 // then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the 221 // Y and therefore only extended by 8. The largest mv that a UV block 222 // can support is 8 - VP9_INTERP_EXTEND. A UV mv is half of a Y mv. 223 // (16 - VP9_INTERP_EXTEND) >> 1 which is greater than 224 // 8 - VP9_INTERP_EXTEND. 225 // To keep the mv in play for both Y and UV planes the max that it 226 // can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1). 227 cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND)); 228 cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) 229 + (17 - 2 * VP9_INTERP_EXTEND); 230 #endif 231 232 for (mb_col = 0; mb_col < mb_cols; mb_col++) { 233 int i, j, k; 234 int stride; 235 236 vpx_memset(accumulator, 0, 384 * sizeof(unsigned int)); 237 vpx_memset(count, 0, 384 * sizeof(uint16_t)); 238 239 #if ALT_REF_MC_ENABLED 240 cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); 241 cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) 242 + (17 - 2 * VP9_INTERP_EXTEND); 243 #endif 244 245 for (frame = 0; frame < frame_count; frame++) { 246 if (cpi->frames[frame] == NULL) 247 continue; 248 249 mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row = 0; 250 mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col = 0; 251 252 if (frame == alt_ref_index) { 253 filter_weight = 2; 254 } else { 255 int err = 0; 256 #if ALT_REF_MC_ENABLED 257 #define THRESH_LOW 10000 258 #define THRESH_HIGH 20000 259 260 // Find best match in this frame by MC 261 err = temporal_filter_find_matching_mb_c 262 (cpi, 263 cpi->frames[alt_ref_index]->y_buffer + mb_y_offset, 264 cpi->frames[frame]->y_buffer + mb_y_offset, 265 cpi->frames[frame]->y_stride, 266 THRESH_LOW); 267 #endif 268 // Assign higher weight to matching MB if it's error 269 // score is lower. If not applying MC default behavior 270 // is to weight all MBs equal. 271 filter_weight = err < THRESH_LOW 272 ? 2 : err < THRESH_HIGH ? 1 : 0; 273 } 274 275 if (filter_weight != 0) { 276 // Construct the predictors 277 temporal_filter_predictors_mb_c 278 (mbd, 279 cpi->frames[frame]->y_buffer + mb_y_offset, 280 cpi->frames[frame]->u_buffer + mb_uv_offset, 281 cpi->frames[frame]->v_buffer + mb_uv_offset, 282 cpi->frames[frame]->y_stride, 283 mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row, 284 mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col, 285 predictor, scale); 286 287 // Apply the filter (YUV) 288 vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, 289 predictor, 16, strength, filter_weight, 290 accumulator, count); 291 292 vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, 293 predictor + 256, 8, strength, filter_weight, 294 accumulator + 256, count + 256); 295 296 vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, 297 predictor + 320, 8, strength, filter_weight, 298 accumulator + 320, count + 320); 299 } 300 } 301 302 // Normalize filter output to produce AltRef frame 303 dst1 = cpi->alt_ref_buffer.y_buffer; 304 stride = cpi->alt_ref_buffer.y_stride; 305 byte = mb_y_offset; 306 for (i = 0, k = 0; i < 16; i++) { 307 for (j = 0; j < 16; j++, k++) { 308 unsigned int pval = accumulator[k] + (count[k] >> 1); 309 pval *= cpi->fixed_divide[count[k]]; 310 pval >>= 19; 311 312 dst1[byte] = (uint8_t)pval; 313 314 // move to next pixel 315 byte++; 316 } 317 318 byte += stride - 16; 319 } 320 321 dst1 = cpi->alt_ref_buffer.u_buffer; 322 dst2 = cpi->alt_ref_buffer.v_buffer; 323 stride = cpi->alt_ref_buffer.uv_stride; 324 byte = mb_uv_offset; 325 for (i = 0, k = 256; i < 8; i++) { 326 for (j = 0; j < 8; j++, k++) { 327 int m = k + 64; 328 329 // U 330 unsigned int pval = accumulator[k] + (count[k] >> 1); 331 pval *= cpi->fixed_divide[count[k]]; 332 pval >>= 19; 333 dst1[byte] = (uint8_t)pval; 334 335 // V 336 pval = accumulator[m] + (count[m] >> 1); 337 pval *= cpi->fixed_divide[count[m]]; 338 pval >>= 19; 339 dst2[byte] = (uint8_t)pval; 340 341 // move to next pixel 342 byte++; 343 } 344 345 byte += stride - 8; 346 } 347 348 mb_y_offset += 16; 349 mb_uv_offset += 8; 350 } 351 352 mb_y_offset += 16 * (f->y_stride - mb_cols); 353 mb_uv_offset += 8 * (f->uv_stride - mb_cols); 354 } 355 356 // Restore input state 357 for (i = 0; i < MAX_MB_PLANE; i++) 358 mbd->plane[i].pre[0].buf = input_buffer[i]; 359 } 360 361 void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { 362 VP9_COMMON *const cm = &cpi->common; 363 364 int frame = 0; 365 366 int frames_to_blur_backward = 0; 367 int frames_to_blur_forward = 0; 368 int frames_to_blur = 0; 369 int start_frame = 0; 370 371 int strength = cpi->active_arnr_strength; 372 int blur_type = cpi->oxcf.arnr_type; 373 int max_frames = cpi->active_arnr_frames; 374 375 const int num_frames_backward = distance; 376 const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead) 377 - (num_frames_backward + 1); 378 379 struct scale_factors scale; 380 struct scale_factors_common scale_comm; 381 382 switch (blur_type) { 383 case 1: 384 // Backward Blur 385 frames_to_blur_backward = num_frames_backward; 386 387 if (frames_to_blur_backward >= max_frames) 388 frames_to_blur_backward = max_frames - 1; 389 390 frames_to_blur = frames_to_blur_backward + 1; 391 break; 392 393 case 2: 394 // Forward Blur 395 396 frames_to_blur_forward = num_frames_forward; 397 398 if (frames_to_blur_forward >= max_frames) 399 frames_to_blur_forward = max_frames - 1; 400 401 frames_to_blur = frames_to_blur_forward + 1; 402 break; 403 404 case 3: 405 default: 406 // Center Blur 407 frames_to_blur_forward = num_frames_forward; 408 frames_to_blur_backward = num_frames_backward; 409 410 if (frames_to_blur_forward > frames_to_blur_backward) 411 frames_to_blur_forward = frames_to_blur_backward; 412 413 if (frames_to_blur_backward > frames_to_blur_forward) 414 frames_to_blur_backward = frames_to_blur_forward; 415 416 // When max_frames is even we have 1 more frame backward than forward 417 if (frames_to_blur_forward > (max_frames - 1) / 2) 418 frames_to_blur_forward = ((max_frames - 1) / 2); 419 420 if (frames_to_blur_backward > (max_frames / 2)) 421 frames_to_blur_backward = (max_frames / 2); 422 423 frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; 424 break; 425 } 426 427 start_frame = distance + frames_to_blur_forward; 428 429 #ifdef DEBUGFWG 430 // DEBUG FWG 431 printf( 432 "max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d " 433 "start:%d", 434 max_frames, num_frames_backward, num_frames_forward, frames_to_blur, 435 frames_to_blur_backward, frames_to_blur_forward, cpi->source_encode_index, 436 cpi->last_alt_ref_sei, start_frame); 437 #endif 438 439 // Setup scaling factors. Scaling on each of the arnr frames is not supported 440 vp9_setup_scale_factors_for_frame(&scale, &scale_comm, 441 get_frame_new_buffer(cm)->y_crop_width, 442 get_frame_new_buffer(cm)->y_crop_height, 443 cm->width, cm->height); 444 445 // Setup frame pointers, NULL indicates frame not included in filter 446 vp9_zero(cpi->frames); 447 for (frame = 0; frame < frames_to_blur; frame++) { 448 int which_buffer = start_frame - frame; 449 struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, 450 which_buffer); 451 cpi->frames[frames_to_blur - 1 - frame] = &buf->img; 452 } 453 454 temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward, 455 strength, &scale); 456 } 457 458 void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, 459 const int group_boost) { 460 int half_gf_int; 461 int frames_after_arf; 462 int frames_bwd = cpi->oxcf.arnr_max_frames - 1; 463 int frames_fwd = cpi->oxcf.arnr_max_frames - 1; 464 int q; 465 466 // Define the arnr filter width for this group of frames: 467 // We only filter frames that lie within a distance of half 468 // the GF interval from the ARF frame. We also have to trap 469 // cases where the filter extends beyond the end of clip. 470 // Note: this_frame->frame has been updated in the loop 471 // so it now points at the ARF frame. 472 half_gf_int = cpi->baseline_gf_interval >> 1; 473 frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1); 474 475 switch (cpi->oxcf.arnr_type) { 476 case 1: // Backward filter 477 frames_fwd = 0; 478 if (frames_bwd > half_gf_int) 479 frames_bwd = half_gf_int; 480 break; 481 482 case 2: // Forward filter 483 if (frames_fwd > half_gf_int) 484 frames_fwd = half_gf_int; 485 if (frames_fwd > frames_after_arf) 486 frames_fwd = frames_after_arf; 487 frames_bwd = 0; 488 break; 489 490 case 3: // Centered filter 491 default: 492 frames_fwd >>= 1; 493 if (frames_fwd > frames_after_arf) 494 frames_fwd = frames_after_arf; 495 if (frames_fwd > half_gf_int) 496 frames_fwd = half_gf_int; 497 498 frames_bwd = frames_fwd; 499 500 // For even length filter there is one more frame backward 501 // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. 502 if (frames_bwd < half_gf_int) 503 frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1; 504 break; 505 } 506 507 cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; 508 509 // Adjust the strength based on active max q 510 q = ((int)vp9_convert_qindex_to_q(cpi->active_worst_quality) >> 1); 511 if (q > 8) { 512 cpi->active_arnr_strength = cpi->oxcf.arnr_strength; 513 } else { 514 cpi->active_arnr_strength = cpi->oxcf.arnr_strength - (8 - q); 515 if (cpi->active_arnr_strength < 0) 516 cpi->active_arnr_strength = 0; 517 } 518 519 // Adjust number of frames in filter and strength based on gf boost level. 520 if (cpi->active_arnr_frames > (group_boost / 150)) { 521 cpi->active_arnr_frames = (group_boost / 150); 522 cpi->active_arnr_frames += !(cpi->active_arnr_frames & 1); 523 } 524 if (cpi->active_arnr_strength > (group_boost / 300)) { 525 cpi->active_arnr_strength = (group_boost / 300); 526 } 527 } 528