1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <limits.h> 12 #include <math.h> 13 #include <stdio.h> 14 15 #include "./vp9_rtcd.h" 16 #include "./vpx_config.h" 17 18 #include "vpx_ports/vpx_timer.h" 19 20 #include "vp9/common/vp9_common.h" 21 #include "vp9/common/vp9_entropy.h" 22 #include "vp9/common/vp9_entropymode.h" 23 #include "vp9/common/vp9_extend.h" 24 #include "vp9/common/vp9_findnearmv.h" 25 #include "vp9/common/vp9_idct.h" 26 #include "vp9/common/vp9_mvref_common.h" 27 #include "vp9/common/vp9_pred_common.h" 28 #include "vp9/common/vp9_quant_common.h" 29 #include "vp9/common/vp9_reconintra.h" 30 #include "vp9/common/vp9_reconinter.h" 31 #include "vp9/common/vp9_seg_common.h" 32 #include "vp9/common/vp9_tile_common.h" 33 #include "vp9/encoder/vp9_encodeframe.h" 34 #include "vp9/encoder/vp9_encodeintra.h" 35 #include "vp9/encoder/vp9_encodemb.h" 36 #include "vp9/encoder/vp9_encodemv.h" 37 #include "vp9/encoder/vp9_onyx_int.h" 38 #include "vp9/encoder/vp9_rdopt.h" 39 #include "vp9/encoder/vp9_segmentation.h" 40 #include "vp9/common/vp9_systemdependent.h" 41 #include "vp9/encoder/vp9_tokenize.h" 42 #include "vp9/encoder/vp9_vaq.h" 43 44 45 #define DBG_PRNT_SEGMAP 0 46 47 48 // #define ENC_DEBUG 49 #ifdef ENC_DEBUG 50 int enc_debug = 0; 51 #endif 52 53 static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) { 54 switch (subsize) { 55 case BLOCK_64X64: 56 case BLOCK_64X32: 57 case BLOCK_32X64: 58 case BLOCK_32X32: 59 return &x->sb_index; 60 case BLOCK_32X16: 61 case BLOCK_16X32: 62 case BLOCK_16X16: 63 return &x->mb_index; 64 case BLOCK_16X8: 65 case BLOCK_8X16: 66 case BLOCK_8X8: 67 return &x->b_index; 68 case BLOCK_8X4: 69 case BLOCK_4X8: 70 case BLOCK_4X4: 71 return &x->ab_index; 72 default: 73 assert(0); 74 return NULL; 75 } 76 } 77 78 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, 79 int mi_row, int mi_col, BLOCK_SIZE bsize); 80 81 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); 82 83 /* activity_avg must be positive, or flat regions could get a zero weight 84 * (infinite lambda), which confounds analysis. 85 * This also avoids the need for divide by zero checks in 86 * vp9_activity_masking(). 87 */ 88 #define ACTIVITY_AVG_MIN (64) 89 90 /* Motion vector component magnitude threshold for defining fast motion. */ 91 #define FAST_MOTION_MV_THRESH (24) 92 93 /* This is used as a reference when computing the source variance for the 94 * purposes of activity masking. 95 * Eventually this should be replaced by custom no-reference routines, 96 * which will be faster. 97 */ 98 static const uint8_t VP9_VAR_OFFS[64] = { 99 128, 128, 128, 128, 128, 128, 128, 128, 100 128, 128, 128, 128, 128, 128, 128, 128, 101 128, 128, 128, 128, 128, 128, 128, 128, 102 128, 128, 128, 128, 128, 128, 128, 128, 103 128, 128, 128, 128, 128, 128, 128, 128, 104 128, 128, 128, 128, 128, 128, 128, 128, 105 128, 128, 128, 128, 128, 128, 128, 128, 106 128, 128, 128, 128, 128, 128, 128, 128 107 }; 108 109 static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x, 110 BLOCK_SIZE bs) { 111 unsigned int var, sse; 112 var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, 113 x->plane[0].src.stride, 114 VP9_VAR_OFFS, 0, &sse); 115 return (var + (1 << (num_pels_log2_lookup[bs] - 1))) >> 116 num_pels_log2_lookup[bs]; 117 } 118 119 // Original activity measure from Tim T's code. 120 static unsigned int tt_activity_measure(MACROBLOCK *x) { 121 unsigned int act; 122 unsigned int sse; 123 /* TODO: This could also be done over smaller areas (8x8), but that would 124 * require extensive changes elsewhere, as lambda is assumed to be fixed 125 * over an entire MB in most of the code. 126 * Another option is to compute four 8x8 variances, and pick a single 127 * lambda using a non-linear combination (e.g., the smallest, or second 128 * smallest, etc.). 129 */ 130 act = vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride, 131 VP9_VAR_OFFS, 0, &sse); 132 act <<= 4; 133 134 /* If the region is flat, lower the activity some more. */ 135 if (act < 8 << 12) 136 act = act < 5 << 12 ? act : 5 << 12; 137 138 return act; 139 } 140 141 // Stub for alternative experimental activity measures. 142 static unsigned int alt_activity_measure(MACROBLOCK *x, int use_dc_pred) { 143 return vp9_encode_intra(x, use_dc_pred); 144 } 145 146 // Measure the activity of the current macroblock 147 // What we measure here is TBD so abstracted to this function 148 #define ALT_ACT_MEASURE 1 149 static unsigned int mb_activity_measure(MACROBLOCK *x, int mb_row, int mb_col) { 150 unsigned int mb_activity; 151 152 if (ALT_ACT_MEASURE) { 153 int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); 154 155 // Or use and alternative. 156 mb_activity = alt_activity_measure(x, use_dc_pred); 157 } else { 158 // Original activity measure from Tim T's code. 159 mb_activity = tt_activity_measure(x); 160 } 161 162 if (mb_activity < ACTIVITY_AVG_MIN) 163 mb_activity = ACTIVITY_AVG_MIN; 164 165 return mb_activity; 166 } 167 168 // Calculate an "average" mb activity value for the frame 169 #define ACT_MEDIAN 0 170 static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) { 171 #if ACT_MEDIAN 172 // Find median: Simple n^2 algorithm for experimentation 173 { 174 unsigned int median; 175 unsigned int i, j; 176 unsigned int *sortlist; 177 unsigned int tmp; 178 179 // Create a list to sort to 180 CHECK_MEM_ERROR(&cpi->common, sortlist, vpx_calloc(sizeof(unsigned int), 181 cpi->common.MBs)); 182 183 // Copy map to sort list 184 vpx_memcpy(sortlist, cpi->mb_activity_map, 185 sizeof(unsigned int) * cpi->common.MBs); 186 187 // Ripple each value down to its correct position 188 for (i = 1; i < cpi->common.MBs; i ++) { 189 for (j = i; j > 0; j --) { 190 if (sortlist[j] < sortlist[j - 1]) { 191 // Swap values 192 tmp = sortlist[j - 1]; 193 sortlist[j - 1] = sortlist[j]; 194 sortlist[j] = tmp; 195 } else { 196 break; 197 } 198 } 199 } 200 201 // Even number MBs so estimate median as mean of two either side. 202 median = (1 + sortlist[cpi->common.MBs >> 1] + 203 sortlist[(cpi->common.MBs >> 1) + 1]) >> 1; 204 205 cpi->activity_avg = median; 206 207 vpx_free(sortlist); 208 } 209 #else 210 // Simple mean for now 211 cpi->activity_avg = (unsigned int) (activity_sum / cpi->common.MBs); 212 #endif // ACT_MEDIAN 213 214 if (cpi->activity_avg < ACTIVITY_AVG_MIN) 215 cpi->activity_avg = ACTIVITY_AVG_MIN; 216 217 // Experimental code: return fixed value normalized for several clips 218 if (ALT_ACT_MEASURE) 219 cpi->activity_avg = 100000; 220 } 221 222 #define USE_ACT_INDEX 0 223 #define OUTPUT_NORM_ACT_STATS 0 224 225 #if USE_ACT_INDEX 226 // Calculate an activity index for each mb 227 static void calc_activity_index(VP9_COMP *cpi, MACROBLOCK *x) { 228 VP9_COMMON *const cm = &cpi->common; 229 int mb_row, mb_col; 230 231 int64_t act; 232 int64_t a; 233 int64_t b; 234 235 #if OUTPUT_NORM_ACT_STATS 236 FILE *f = fopen("norm_act.stt", "a"); 237 fprintf(f, "\n%12d\n", cpi->activity_avg); 238 #endif 239 240 // Reset pointers to start of activity map 241 x->mb_activity_ptr = cpi->mb_activity_map; 242 243 // Calculate normalized mb activity number. 244 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { 245 // for each macroblock col in image 246 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { 247 // Read activity from the map 248 act = *(x->mb_activity_ptr); 249 250 // Calculate a normalized activity number 251 a = act + 4 * cpi->activity_avg; 252 b = 4 * act + cpi->activity_avg; 253 254 if (b >= a) 255 *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1; 256 else 257 *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b); 258 259 #if OUTPUT_NORM_ACT_STATS 260 fprintf(f, " %6d", *(x->mb_activity_ptr)); 261 #endif 262 // Increment activity map pointers 263 x->mb_activity_ptr++; 264 } 265 266 #if OUTPUT_NORM_ACT_STATS 267 fprintf(f, "\n"); 268 #endif 269 } 270 271 #if OUTPUT_NORM_ACT_STATS 272 fclose(f); 273 #endif 274 } 275 #endif // USE_ACT_INDEX 276 277 // Loop through all MBs. Note activity of each, average activity and 278 // calculate a normalized activity for each 279 static void build_activity_map(VP9_COMP *cpi) { 280 MACROBLOCK * const x = &cpi->mb; 281 MACROBLOCKD *xd = &x->e_mbd; 282 VP9_COMMON * const cm = &cpi->common; 283 284 #if ALT_ACT_MEASURE 285 YV12_BUFFER_CONFIG *new_yv12 = get_frame_new_buffer(cm); 286 int recon_yoffset; 287 int recon_y_stride = new_yv12->y_stride; 288 #endif 289 290 int mb_row, mb_col; 291 unsigned int mb_activity; 292 int64_t activity_sum = 0; 293 294 x->mb_activity_ptr = cpi->mb_activity_map; 295 296 // for each macroblock row in image 297 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { 298 #if ALT_ACT_MEASURE 299 // reset above block coeffs 300 xd->up_available = (mb_row != 0); 301 recon_yoffset = (mb_row * recon_y_stride * 16); 302 #endif 303 // for each macroblock col in image 304 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { 305 #if ALT_ACT_MEASURE 306 xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; 307 xd->left_available = (mb_col != 0); 308 recon_yoffset += 16; 309 #endif 310 311 // measure activity 312 mb_activity = mb_activity_measure(x, mb_row, mb_col); 313 314 // Keep frame sum 315 activity_sum += mb_activity; 316 317 // Store MB level activity details. 318 *x->mb_activity_ptr = mb_activity; 319 320 // Increment activity map pointer 321 x->mb_activity_ptr++; 322 323 // adjust to the next column of source macroblocks 324 x->plane[0].src.buf += 16; 325 } 326 327 // adjust to the next row of mbs 328 x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols; 329 } 330 331 // Calculate an "average" MB activity 332 calc_av_activity(cpi, activity_sum); 333 334 #if USE_ACT_INDEX 335 // Calculate an activity index number of each mb 336 calc_activity_index(cpi, x); 337 #endif 338 } 339 340 // Macroblock activity masking 341 void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { 342 #if USE_ACT_INDEX 343 x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2); 344 x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); 345 x->errorperbit += (x->errorperbit == 0); 346 #else 347 int64_t a; 348 int64_t b; 349 int64_t act = *(x->mb_activity_ptr); 350 351 // Apply the masking to the RD multiplier. 352 a = act + (2 * cpi->activity_avg); 353 b = (2 * act) + cpi->activity_avg; 354 355 x->rdmult = (unsigned int) (((int64_t) x->rdmult * b + (a >> 1)) / a); 356 x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); 357 x->errorperbit += (x->errorperbit == 0); 358 #endif 359 360 // Activity based Zbin adjustment 361 adjust_act_zbin(cpi, x); 362 } 363 364 static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, 365 BLOCK_SIZE bsize, int output_enabled) { 366 int i, x_idx, y; 367 VP9_COMMON *const cm = &cpi->common; 368 MACROBLOCK *const x = &cpi->mb; 369 MACROBLOCKD *const xd = &x->e_mbd; 370 struct macroblock_plane *const p = x->plane; 371 struct macroblockd_plane *const pd = xd->plane; 372 MODE_INFO *mi = &ctx->mic; 373 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; 374 MODE_INFO *mi_addr = xd->mi_8x8[0]; 375 376 int mb_mode_index = ctx->best_mode_index; 377 const int mis = cm->mode_info_stride; 378 const int mi_width = num_8x8_blocks_wide_lookup[bsize]; 379 const int mi_height = num_8x8_blocks_high_lookup[bsize]; 380 int max_plane; 381 382 assert(mi->mbmi.mode < MB_MODE_COUNT); 383 assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES); 384 assert(mi->mbmi.ref_frame[1] < MAX_REF_FRAMES); 385 assert(mi->mbmi.sb_type == bsize); 386 387 *mi_addr = *mi; 388 389 max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; 390 for (i = 0; i < max_plane; ++i) { 391 p[i].coeff = ctx->coeff_pbuf[i][1]; 392 pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; 393 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; 394 pd[i].eobs = ctx->eobs_pbuf[i][1]; 395 } 396 397 for (i = max_plane; i < MAX_MB_PLANE; ++i) { 398 p[i].coeff = ctx->coeff_pbuf[i][2]; 399 pd[i].qcoeff = ctx->qcoeff_pbuf[i][2]; 400 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; 401 pd[i].eobs = ctx->eobs_pbuf[i][2]; 402 } 403 404 // Restore the coding context of the MB to that that was in place 405 // when the mode was picked for it 406 for (y = 0; y < mi_height; y++) 407 for (x_idx = 0; x_idx < mi_width; x_idx++) 408 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx 409 && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) 410 xd->mi_8x8[x_idx + y * mis] = mi_addr; 411 412 if (cpi->sf.variance_adaptive_quantization) { 413 vp9_mb_init_quantizer(cpi, x); 414 } 415 416 // FIXME(rbultje) I'm pretty sure this should go to the end of this block 417 // (i.e. after the output_enabled) 418 if (bsize < BLOCK_32X32) { 419 if (bsize < BLOCK_16X16) 420 ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8]; 421 ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16]; 422 } 423 424 if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) { 425 mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; 426 mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; 427 } 428 429 x->skip = ctx->skip; 430 vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, 431 sizeof(uint8_t) * ctx->num_4x4_blk); 432 433 if (!output_enabled) 434 return; 435 436 if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { 437 for (i = 0; i < TX_MODES; i++) 438 cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i]; 439 } 440 441 if (frame_is_intra_only(cm)) { 442 #if CONFIG_INTERNAL_STATS 443 static const int kf_mode_index[] = { 444 THR_DC /*DC_PRED*/, 445 THR_V_PRED /*V_PRED*/, 446 THR_H_PRED /*H_PRED*/, 447 THR_D45_PRED /*D45_PRED*/, 448 THR_D135_PRED /*D135_PRED*/, 449 THR_D117_PRED /*D117_PRED*/, 450 THR_D153_PRED /*D153_PRED*/, 451 THR_D207_PRED /*D207_PRED*/, 452 THR_D63_PRED /*D63_PRED*/, 453 THR_TM /*TM_PRED*/, 454 }; 455 cpi->mode_chosen_counts[kf_mode_index[mi->mbmi.mode]]++; 456 #endif 457 } else { 458 // Note how often each mode chosen as best 459 cpi->mode_chosen_counts[mb_mode_index]++; 460 if (is_inter_block(mbmi) 461 && (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) { 462 int_mv best_mv[2]; 463 const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0]; 464 const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1]; 465 best_mv[0].as_int = ctx->best_ref_mv.as_int; 466 best_mv[1].as_int = ctx->second_best_ref_mv.as_int; 467 if (mbmi->mode == NEWMV) { 468 best_mv[0].as_int = mbmi->ref_mvs[rf1][0].as_int; 469 if (rf2 > 0) 470 best_mv[1].as_int = mbmi->ref_mvs[rf2][0].as_int; 471 } 472 mbmi->best_mv[0].as_int = best_mv[0].as_int; 473 mbmi->best_mv[1].as_int = best_mv[1].as_int; 474 vp9_update_mv_count(cpi, x, best_mv); 475 } 476 477 if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) { 478 const int ctx = vp9_get_pred_context_switchable_interp(xd); 479 ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; 480 } 481 482 cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff; 483 cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff; 484 cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff; 485 486 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 487 cpi->rd_filter_diff[i] += ctx->best_filter_diff[i]; 488 } 489 } 490 491 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, 492 int mi_row, int mi_col) { 493 uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, 494 src->alpha_buffer}; 495 const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, 496 src->alpha_stride}; 497 int i; 498 499 for (i = 0; i < MAX_MB_PLANE; i++) 500 setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col, 501 NULL, x->e_mbd.plane[i].subsampling_x, 502 x->e_mbd.plane[i].subsampling_y); 503 } 504 505 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, 506 int mi_row, int mi_col, BLOCK_SIZE bsize) { 507 MACROBLOCK *const x = &cpi->mb; 508 VP9_COMMON *const cm = &cpi->common; 509 MACROBLOCKD *const xd = &x->e_mbd; 510 MB_MODE_INFO *mbmi; 511 const int dst_fb_idx = cm->new_fb_idx; 512 const int idx_str = xd->mode_info_stride * mi_row + mi_col; 513 const int mi_width = num_8x8_blocks_wide_lookup[bsize]; 514 const int mi_height = num_8x8_blocks_high_lookup[bsize]; 515 const int mb_row = mi_row >> 1; 516 const int mb_col = mi_col >> 1; 517 const int idx_map = mb_row * cm->mb_cols + mb_col; 518 const struct segmentation *const seg = &cm->seg; 519 520 set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col); 521 522 // Activity map pointer 523 x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; 524 x->active_ptr = cpi->active_map + idx_map; 525 526 xd->mi_8x8 = cm->mi_grid_visible + idx_str; 527 xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; 528 529 // Special case: if prev_mi is NULL, the previous mode info context 530 // cannot be used. 531 xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL; 532 533 xd->mi_8x8[0] = cm->mi + idx_str; 534 535 mbmi = &xd->mi_8x8[0]->mbmi; 536 537 // Set up destination pointers 538 setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mi_row, mi_col); 539 540 // Set up limit values for MV components 541 // mv beyond the range do not produce new/different prediction block 542 x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); 543 x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); 544 x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; 545 x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; 546 547 // Set up distance of MB to edge of frame in 1/8th pel units 548 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); 549 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, 550 cm->mi_rows, cm->mi_cols); 551 552 /* set up source buffers */ 553 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 554 555 /* R/D setup */ 556 x->rddiv = cpi->RDDIV; 557 x->rdmult = cpi->RDMULT; 558 559 /* segment ID */ 560 if (seg->enabled) { 561 if (!cpi->sf.variance_adaptive_quantization) { 562 uint8_t *map = seg->update_map ? cpi->segmentation_map 563 : cm->last_frame_seg_map; 564 mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); 565 } 566 vp9_mb_init_quantizer(cpi, x); 567 568 if (seg->enabled && cpi->seg0_cnt > 0 569 && !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) 570 && vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) { 571 cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; 572 } else { 573 const int y = mb_row & ~3; 574 const int x = mb_col & ~3; 575 const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); 576 const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); 577 const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1; 578 const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1; 579 580 cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) 581 << 16) / cm->MBs; 582 } 583 584 x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; 585 } else { 586 mbmi->segment_id = 0; 587 x->encode_breakout = cpi->oxcf.encode_breakout; 588 } 589 } 590 591 static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, 592 int mi_row, int mi_col, 593 int *totalrate, int64_t *totaldist, 594 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, 595 int64_t best_rd) { 596 VP9_COMMON *const cm = &cpi->common; 597 MACROBLOCK *const x = &cpi->mb; 598 MACROBLOCKD *const xd = &x->e_mbd; 599 struct macroblock_plane *const p = x->plane; 600 struct macroblockd_plane *const pd = xd->plane; 601 int i; 602 int orig_rdmult = x->rdmult; 603 double rdmult_ratio; 604 605 vp9_clear_system_state(); // __asm emms; 606 rdmult_ratio = 1.0; // avoid uninitialized warnings 607 608 // Use the lower precision, but faster, 32x32 fdct for mode selection. 609 x->use_lp32x32fdct = 1; 610 611 if (bsize < BLOCK_8X8) { 612 // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 613 // there is nothing to be done. 614 if (x->ab_index != 0) { 615 *totalrate = 0; 616 *totaldist = 0; 617 return; 618 } 619 } 620 621 set_offsets(cpi, tile, mi_row, mi_col, bsize); 622 xd->mi_8x8[0]->mbmi.sb_type = bsize; 623 624 for (i = 0; i < MAX_MB_PLANE; ++i) { 625 p[i].coeff = ctx->coeff_pbuf[i][0]; 626 pd[i].qcoeff = ctx->qcoeff_pbuf[i][0]; 627 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; 628 pd[i].eobs = ctx->eobs_pbuf[i][0]; 629 } 630 ctx->is_coded = 0; 631 x->skip_recode = 0; 632 633 // Set to zero to make sure we do not use the previous encoded frame stats 634 xd->mi_8x8[0]->mbmi.skip_coeff = 0; 635 636 x->source_variance = get_sby_perpixel_variance(cpi, x, bsize); 637 638 if (cpi->sf.variance_adaptive_quantization) { 639 int energy; 640 if (bsize <= BLOCK_16X16) { 641 energy = x->mb_energy; 642 } else { 643 energy = vp9_block_energy(cpi, x, bsize); 644 } 645 646 xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy); 647 rdmult_ratio = vp9_vaq_rdmult_ratio(energy); 648 vp9_mb_init_quantizer(cpi, x); 649 } 650 651 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) 652 vp9_activity_masking(cpi, x); 653 654 if (cpi->sf.variance_adaptive_quantization) { 655 vp9_clear_system_state(); // __asm emms; 656 x->rdmult = round(x->rdmult * rdmult_ratio); 657 } 658 659 // Find best coding mode & reconstruct the MB so it is available 660 // as a predictor for MBs that follow in the SB 661 if (frame_is_intra_only(cm)) { 662 vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx, 663 best_rd); 664 } else { 665 if (bsize >= BLOCK_8X8) 666 vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col, 667 totalrate, totaldist, bsize, ctx, best_rd); 668 else 669 vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate, 670 totaldist, bsize, ctx, best_rd); 671 } 672 673 if (cpi->sf.variance_adaptive_quantization) { 674 x->rdmult = orig_rdmult; 675 if (*totalrate != INT_MAX) { 676 vp9_clear_system_state(); // __asm emms; 677 *totalrate = round(*totalrate * rdmult_ratio); 678 } 679 } 680 } 681 682 static void update_stats(VP9_COMP *cpi) { 683 VP9_COMMON *const cm = &cpi->common; 684 MACROBLOCK *const x = &cpi->mb; 685 MACROBLOCKD *const xd = &x->e_mbd; 686 MODE_INFO *mi = xd->mi_8x8[0]; 687 MB_MODE_INFO *const mbmi = &mi->mbmi; 688 689 if (!frame_is_intra_only(cm)) { 690 const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id, 691 SEG_LVL_REF_FRAME); 692 693 if (!seg_ref_active) 694 cpi->intra_inter_count[vp9_get_pred_context_intra_inter(xd)] 695 [is_inter_block(mbmi)]++; 696 697 // If the segment reference feature is enabled we have only a single 698 // reference frame allowed for the segment so exclude it from 699 // the reference frame counts used to work out probabilities. 700 if (is_inter_block(mbmi) && !seg_ref_active) { 701 if (cm->comp_pred_mode == HYBRID_PREDICTION) 702 cpi->comp_inter_count[vp9_get_pred_context_comp_inter_inter(cm, xd)] 703 [has_second_ref(mbmi)]++; 704 705 if (has_second_ref(mbmi)) { 706 cpi->comp_ref_count[vp9_get_pred_context_comp_ref_p(cm, xd)] 707 [mbmi->ref_frame[0] == GOLDEN_FRAME]++; 708 } else { 709 cpi->single_ref_count[vp9_get_pred_context_single_ref_p1(xd)][0] 710 [mbmi->ref_frame[0] != LAST_FRAME]++; 711 if (mbmi->ref_frame[0] != LAST_FRAME) 712 cpi->single_ref_count[vp9_get_pred_context_single_ref_p2(xd)][1] 713 [mbmi->ref_frame[0] != GOLDEN_FRAME]++; 714 } 715 } 716 } 717 } 718 719 static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) { 720 switch (bsize) { 721 case BLOCK_64X64: 722 return &x->sb64_partitioning; 723 case BLOCK_32X32: 724 return &x->sb_partitioning[x->sb_index]; 725 case BLOCK_16X16: 726 return &x->mb_partitioning[x->sb_index][x->mb_index]; 727 case BLOCK_8X8: 728 return &x->b_partitioning[x->sb_index][x->mb_index][x->b_index]; 729 default: 730 assert(0); 731 return NULL; 732 } 733 } 734 735 static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, 736 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], 737 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], 738 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], 739 BLOCK_SIZE bsize) { 740 MACROBLOCK *const x = &cpi->mb; 741 MACROBLOCKD *const xd = &x->e_mbd; 742 int p; 743 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 744 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 745 int mi_width = num_8x8_blocks_wide_lookup[bsize]; 746 int mi_height = num_8x8_blocks_high_lookup[bsize]; 747 for (p = 0; p < MAX_MB_PLANE; p++) { 748 vpx_memcpy( 749 cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), 750 a + num_4x4_blocks_wide * p, 751 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 752 xd->plane[p].subsampling_x); 753 vpx_memcpy( 754 cpi->left_context[p] 755 + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), 756 l + num_4x4_blocks_high * p, 757 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 758 xd->plane[p].subsampling_y); 759 } 760 vpx_memcpy(cpi->above_seg_context + mi_col, sa, 761 sizeof(*cpi->above_seg_context) * mi_width); 762 vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl, 763 sizeof(cpi->left_seg_context[0]) * mi_height); 764 } 765 static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, 766 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], 767 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], 768 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], 769 BLOCK_SIZE bsize) { 770 const MACROBLOCK *const x = &cpi->mb; 771 const MACROBLOCKD *const xd = &x->e_mbd; 772 int p; 773 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 774 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 775 int mi_width = num_8x8_blocks_wide_lookup[bsize]; 776 int mi_height = num_8x8_blocks_high_lookup[bsize]; 777 778 // buffer the above/left context information of the block in search. 779 for (p = 0; p < MAX_MB_PLANE; ++p) { 780 vpx_memcpy( 781 a + num_4x4_blocks_wide * p, 782 cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), 783 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 784 xd->plane[p].subsampling_x); 785 vpx_memcpy( 786 l + num_4x4_blocks_high * p, 787 cpi->left_context[p] 788 + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), 789 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 790 xd->plane[p].subsampling_y); 791 } 792 vpx_memcpy(sa, cpi->above_seg_context + mi_col, 793 sizeof(*cpi->above_seg_context) * mi_width); 794 vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK), 795 sizeof(cpi->left_seg_context[0]) * mi_height); 796 } 797 798 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, 799 TOKENEXTRA **tp, int mi_row, int mi_col, 800 int output_enabled, BLOCK_SIZE bsize, int sub_index) { 801 VP9_COMMON *const cm = &cpi->common; 802 MACROBLOCK *const x = &cpi->mb; 803 804 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) 805 return; 806 807 if (sub_index != -1) 808 *get_sb_index(x, bsize) = sub_index; 809 810 if (bsize < BLOCK_8X8) { 811 // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 812 // there is nothing to be done. 813 if (x->ab_index > 0) 814 return; 815 } 816 set_offsets(cpi, tile, mi_row, mi_col, bsize); 817 update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); 818 encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); 819 820 if (output_enabled) { 821 update_stats(cpi); 822 823 (*tp)->token = EOSB_TOKEN; 824 (*tp)++; 825 } 826 } 827 828 static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, 829 TOKENEXTRA **tp, int mi_row, int mi_col, 830 int output_enabled, BLOCK_SIZE bsize) { 831 VP9_COMMON *const cm = &cpi->common; 832 MACROBLOCK *const x = &cpi->mb; 833 BLOCK_SIZE c1 = BLOCK_8X8; 834 const int bsl = b_width_log2(bsize), bs = (1 << bsl) / 4; 835 int pl = 0; 836 PARTITION_TYPE partition; 837 BLOCK_SIZE subsize; 838 int i; 839 840 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) 841 return; 842 843 c1 = BLOCK_4X4; 844 if (bsize >= BLOCK_8X8) { 845 pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, 846 mi_row, mi_col, bsize); 847 c1 = *(get_sb_partitioning(x, bsize)); 848 } 849 partition = partition_lookup[bsl][c1]; 850 851 switch (partition) { 852 case PARTITION_NONE: 853 if (output_enabled && bsize >= BLOCK_8X8) 854 cpi->partition_count[pl][PARTITION_NONE]++; 855 encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, -1); 856 break; 857 case PARTITION_VERT: 858 if (output_enabled) 859 cpi->partition_count[pl][PARTITION_VERT]++; 860 encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, 0); 861 encode_b(cpi, tile, tp, mi_row, mi_col + bs, output_enabled, c1, 1); 862 break; 863 case PARTITION_HORZ: 864 if (output_enabled) 865 cpi->partition_count[pl][PARTITION_HORZ]++; 866 encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, 0); 867 encode_b(cpi, tile, tp, mi_row + bs, mi_col, output_enabled, c1, 1); 868 break; 869 case PARTITION_SPLIT: 870 subsize = get_subsize(bsize, PARTITION_SPLIT); 871 872 if (output_enabled) 873 cpi->partition_count[pl][PARTITION_SPLIT]++; 874 875 for (i = 0; i < 4; i++) { 876 const int x_idx = i & 1, y_idx = i >> 1; 877 878 *get_sb_index(x, subsize) = i; 879 encode_sb(cpi, tile, tp, mi_row + y_idx * bs, mi_col + x_idx * bs, 880 output_enabled, subsize); 881 } 882 break; 883 default: 884 assert(0); 885 break; 886 } 887 888 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) 889 update_partition_context(cpi->above_seg_context, cpi->left_seg_context, 890 mi_row, mi_col, c1, bsize); 891 } 892 893 // Check to see if the given partition size is allowed for a specified number 894 // of 8x8 block rows and columns remaining in the image. 895 // If not then return the largest allowed partition size 896 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, 897 int rows_left, int cols_left, 898 int *bh, int *bw) { 899 if ((rows_left <= 0) || (cols_left <= 0)) { 900 return MIN(bsize, BLOCK_8X8); 901 } else { 902 for (; bsize > 0; --bsize) { 903 *bh = num_8x8_blocks_high_lookup[bsize]; 904 *bw = num_8x8_blocks_wide_lookup[bsize]; 905 if ((*bh <= rows_left) && (*bw <= cols_left)) { 906 break; 907 } 908 } 909 } 910 return bsize; 911 } 912 913 // This function attempts to set all mode info entries in a given SB64 914 // to the same block partition size. 915 // However, at the bottom and right borders of the image the requested size 916 // may not be allowed in which case this code attempts to choose the largest 917 // allowable partition. 918 static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile, 919 MODE_INFO **mi_8x8, int mi_row, int mi_col) { 920 VP9_COMMON *const cm = &cpi->common; 921 BLOCK_SIZE bsize = cpi->sf.always_this_block_size; 922 const int mis = cm->mode_info_stride; 923 int row8x8_remaining = tile->mi_row_end - mi_row; 924 int col8x8_remaining = tile->mi_col_end - mi_col; 925 int block_row, block_col; 926 MODE_INFO * mi_upper_left = cm->mi + mi_row * mis + mi_col; 927 int bh = num_8x8_blocks_high_lookup[bsize]; 928 int bw = num_8x8_blocks_wide_lookup[bsize]; 929 930 assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); 931 932 // Apply the requested partition size to the SB64 if it is all "in image" 933 if ((col8x8_remaining >= MI_BLOCK_SIZE) && 934 (row8x8_remaining >= MI_BLOCK_SIZE)) { 935 for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { 936 for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { 937 int index = block_row * mis + block_col; 938 mi_8x8[index] = mi_upper_left + index; 939 mi_8x8[index]->mbmi.sb_type = bsize; 940 } 941 } 942 } else { 943 // Else this is a partial SB64. 944 for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { 945 for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { 946 int index = block_row * mis + block_col; 947 // Find a partition size that fits 948 bsize = find_partition_size(cpi->sf.always_this_block_size, 949 (row8x8_remaining - block_row), 950 (col8x8_remaining - block_col), &bh, &bw); 951 mi_8x8[index] = mi_upper_left + index; 952 mi_8x8[index]->mbmi.sb_type = bsize; 953 } 954 } 955 } 956 } 957 958 static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8, 959 MODE_INFO **prev_mi_8x8) { 960 VP9_COMMON *const cm = &cpi->common; 961 const int mis = cm->mode_info_stride; 962 int block_row, block_col; 963 964 for (block_row = 0; block_row < 8; ++block_row) { 965 for (block_col = 0; block_col < 8; ++block_col) { 966 MODE_INFO * prev_mi = prev_mi_8x8[block_row * mis + block_col]; 967 BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; 968 ptrdiff_t offset; 969 970 if (prev_mi) { 971 offset = prev_mi - cm->prev_mi; 972 mi_8x8[block_row * mis + block_col] = cm->mi + offset; 973 mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type; 974 } 975 } 976 } 977 } 978 979 static int sb_has_motion(VP9_COMP *cpi, MODE_INFO **prev_mi_8x8) { 980 VP9_COMMON *const cm = &cpi->common; 981 const int mis = cm->mode_info_stride; 982 int block_row, block_col; 983 984 if (cm->prev_mi) { 985 for (block_row = 0; block_row < 8; ++block_row) { 986 for (block_col = 0; block_col < 8; ++block_col) { 987 MODE_INFO * prev_mi = prev_mi_8x8[block_row * mis + block_col]; 988 if (prev_mi) { 989 if (abs(prev_mi->mbmi.mv[0].as_mv.row) >= 8 || 990 abs(prev_mi->mbmi.mv[0].as_mv.col) >= 8) 991 return 1; 992 } 993 } 994 } 995 } 996 return 0; 997 } 998 999 static void rd_use_partition(VP9_COMP *cpi, 1000 const TileInfo *const tile, 1001 MODE_INFO **mi_8x8, 1002 TOKENEXTRA **tp, int mi_row, int mi_col, 1003 BLOCK_SIZE bsize, int *rate, int64_t *dist, 1004 int do_recon) { 1005 VP9_COMMON *const cm = &cpi->common; 1006 MACROBLOCK *const x = &cpi->mb; 1007 const int mis = cm->mode_info_stride; 1008 int bsl = b_width_log2(bsize); 1009 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1010 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1011 int ms = num_4x4_blocks_wide / 2; 1012 int mh = num_4x4_blocks_high / 2; 1013 int bss = (1 << bsl) / 4; 1014 int i, pl; 1015 PARTITION_TYPE partition = PARTITION_NONE; 1016 BLOCK_SIZE subsize; 1017 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 1018 PARTITION_CONTEXT sl[8], sa[8]; 1019 int last_part_rate = INT_MAX; 1020 int64_t last_part_dist = INT_MAX; 1021 int split_rate = INT_MAX; 1022 int64_t split_dist = INT_MAX; 1023 int none_rate = INT_MAX; 1024 int64_t none_dist = INT_MAX; 1025 int chosen_rate = INT_MAX; 1026 int64_t chosen_dist = INT_MAX; 1027 BLOCK_SIZE sub_subsize = BLOCK_4X4; 1028 int splits_below = 0; 1029 BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; 1030 1031 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) 1032 return; 1033 1034 partition = partition_lookup[bsl][bs_type]; 1035 1036 subsize = get_subsize(bsize, partition); 1037 1038 if (bsize < BLOCK_8X8) { 1039 // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 1040 // there is nothing to be done. 1041 if (x->ab_index != 0) { 1042 *rate = 0; 1043 *dist = 0; 1044 return; 1045 } 1046 } else { 1047 *(get_sb_partitioning(x, bsize)) = subsize; 1048 } 1049 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1050 1051 if (bsize == BLOCK_16X16) { 1052 set_offsets(cpi, tile, mi_row, mi_col, bsize); 1053 x->mb_energy = vp9_block_energy(cpi, x, bsize); 1054 } 1055 1056 x->fast_ms = 0; 1057 x->subblock_ref = 0; 1058 1059 if (cpi->sf.adjust_partitioning_from_last_frame) { 1060 // Check if any of the sub blocks are further split. 1061 if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { 1062 sub_subsize = get_subsize(subsize, PARTITION_SPLIT); 1063 splits_below = 1; 1064 for (i = 0; i < 4; i++) { 1065 int jj = i >> 1, ii = i & 0x01; 1066 MODE_INFO * this_mi = mi_8x8[jj * bss * mis + ii * bss]; 1067 if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) { 1068 splits_below = 0; 1069 } 1070 } 1071 } 1072 1073 // If partition is not none try none unless each of the 4 splits are split 1074 // even further.. 1075 if (partition != PARTITION_NONE && !splits_below && 1076 mi_row + (ms >> 1) < cm->mi_rows && 1077 mi_col + (ms >> 1) < cm->mi_cols) { 1078 *(get_sb_partitioning(x, bsize)) = bsize; 1079 pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize, 1080 get_block_context(x, bsize), INT64_MAX); 1081 1082 pl = partition_plane_context(cpi->above_seg_context, 1083 cpi->left_seg_context, 1084 mi_row, mi_col, bsize); 1085 none_rate += x->partition_cost[pl][PARTITION_NONE]; 1086 1087 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1088 mi_8x8[0]->mbmi.sb_type = bs_type; 1089 *(get_sb_partitioning(x, bsize)) = subsize; 1090 } 1091 } 1092 1093 switch (partition) { 1094 case PARTITION_NONE: 1095 pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist, 1096 bsize, get_block_context(x, bsize), INT64_MAX); 1097 break; 1098 case PARTITION_HORZ: 1099 *get_sb_index(x, subsize) = 0; 1100 pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist, 1101 subsize, get_block_context(x, subsize), INT64_MAX); 1102 if (last_part_rate != INT_MAX && 1103 bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) { 1104 int rt = 0; 1105 int64_t dt = 0; 1106 update_state(cpi, get_block_context(x, subsize), subsize, 0); 1107 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); 1108 *get_sb_index(x, subsize) = 1; 1109 pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, subsize, 1110 get_block_context(x, subsize), INT64_MAX); 1111 if (rt == INT_MAX || dt == INT_MAX) { 1112 last_part_rate = INT_MAX; 1113 last_part_dist = INT_MAX; 1114 break; 1115 } 1116 1117 last_part_rate += rt; 1118 last_part_dist += dt; 1119 } 1120 break; 1121 case PARTITION_VERT: 1122 *get_sb_index(x, subsize) = 0; 1123 pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist, 1124 subsize, get_block_context(x, subsize), INT64_MAX); 1125 if (last_part_rate != INT_MAX && 1126 bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) { 1127 int rt = 0; 1128 int64_t dt = 0; 1129 update_state(cpi, get_block_context(x, subsize), subsize, 0); 1130 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); 1131 *get_sb_index(x, subsize) = 1; 1132 pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, subsize, 1133 get_block_context(x, subsize), INT64_MAX); 1134 if (rt == INT_MAX || dt == INT_MAX) { 1135 last_part_rate = INT_MAX; 1136 last_part_dist = INT_MAX; 1137 break; 1138 } 1139 last_part_rate += rt; 1140 last_part_dist += dt; 1141 } 1142 break; 1143 case PARTITION_SPLIT: 1144 // Split partition. 1145 last_part_rate = 0; 1146 last_part_dist = 0; 1147 for (i = 0; i < 4; i++) { 1148 int x_idx = (i & 1) * (ms >> 1); 1149 int y_idx = (i >> 1) * (ms >> 1); 1150 int jj = i >> 1, ii = i & 0x01; 1151 int rt; 1152 int64_t dt; 1153 1154 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 1155 continue; 1156 1157 *get_sb_index(x, subsize) = i; 1158 1159 rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp, 1160 mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt, 1161 i != 3); 1162 if (rt == INT_MAX || dt == INT_MAX) { 1163 last_part_rate = INT_MAX; 1164 last_part_dist = INT_MAX; 1165 break; 1166 } 1167 last_part_rate += rt; 1168 last_part_dist += dt; 1169 } 1170 break; 1171 default: 1172 assert(0); 1173 } 1174 1175 pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, 1176 mi_row, mi_col, bsize); 1177 if (last_part_rate < INT_MAX) 1178 last_part_rate += x->partition_cost[pl][partition]; 1179 1180 if (cpi->sf.adjust_partitioning_from_last_frame 1181 && partition != PARTITION_SPLIT && bsize > BLOCK_8X8 1182 && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows) 1183 && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) { 1184 BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); 1185 split_rate = 0; 1186 split_dist = 0; 1187 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1188 1189 // Split partition. 1190 for (i = 0; i < 4; i++) { 1191 int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2); 1192 int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2); 1193 int rt = 0; 1194 int64_t dt = 0; 1195 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 1196 PARTITION_CONTEXT sl[8], sa[8]; 1197 1198 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 1199 continue; 1200 1201 *get_sb_index(x, split_subsize) = i; 1202 *get_sb_partitioning(x, bsize) = split_subsize; 1203 *get_sb_partitioning(x, split_subsize) = split_subsize; 1204 1205 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1206 1207 pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt, 1208 split_subsize, get_block_context(x, split_subsize), 1209 INT64_MAX); 1210 1211 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1212 1213 if (rt == INT_MAX || dt == INT_MAX) { 1214 split_rate = INT_MAX; 1215 split_dist = INT_MAX; 1216 break; 1217 } 1218 1219 if (i != 3) 1220 encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0, 1221 split_subsize); 1222 1223 split_rate += rt; 1224 split_dist += dt; 1225 pl = partition_plane_context(cpi->above_seg_context, 1226 cpi->left_seg_context, 1227 mi_row + y_idx, mi_col + x_idx, bsize); 1228 split_rate += x->partition_cost[pl][PARTITION_NONE]; 1229 } 1230 pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, 1231 mi_row, mi_col, bsize); 1232 if (split_rate < INT_MAX) { 1233 split_rate += x->partition_cost[pl][PARTITION_SPLIT]; 1234 1235 chosen_rate = split_rate; 1236 chosen_dist = split_dist; 1237 } 1238 } 1239 1240 // If last_part is better set the partitioning to that... 1241 if (RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist) 1242 < RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)) { 1243 mi_8x8[0]->mbmi.sb_type = bsize; 1244 if (bsize >= BLOCK_8X8) 1245 *(get_sb_partitioning(x, bsize)) = subsize; 1246 chosen_rate = last_part_rate; 1247 chosen_dist = last_part_dist; 1248 } 1249 // If none was better set the partitioning to that... 1250 if (RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist) 1251 > RDCOST(x->rdmult, x->rddiv, none_rate, none_dist)) { 1252 if (bsize >= BLOCK_8X8) 1253 *(get_sb_partitioning(x, bsize)) = bsize; 1254 chosen_rate = none_rate; 1255 chosen_dist = none_dist; 1256 } 1257 1258 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1259 1260 // We must have chosen a partitioning and encoding or we'll fail later on. 1261 // No other opportunities for success. 1262 if ( bsize == BLOCK_64X64) 1263 assert(chosen_rate < INT_MAX && chosen_dist < INT_MAX); 1264 1265 if (do_recon) 1266 encode_sb(cpi, tile, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize); 1267 1268 *rate = chosen_rate; 1269 *dist = chosen_dist; 1270 } 1271 1272 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { 1273 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, 1274 BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, 1275 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 1276 }; 1277 1278 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { 1279 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, 1280 BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, 1281 BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 1282 }; 1283 1284 // Look at all the mode_info entries for blocks that are part of this 1285 // partition and find the min and max values for sb_type. 1286 // At the moment this is designed to work on a 64x64 SB but could be 1287 // adjusted to use a size parameter. 1288 // 1289 // The min and max are assumed to have been initialized prior to calling this 1290 // function so repeat calls can accumulate a min and max of more than one sb64. 1291 static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8, 1292 BLOCK_SIZE * min_block_size, 1293 BLOCK_SIZE * max_block_size ) { 1294 MACROBLOCKD *const xd = &cpi->mb.e_mbd; 1295 int sb_width_in_blocks = MI_BLOCK_SIZE; 1296 int sb_height_in_blocks = MI_BLOCK_SIZE; 1297 int i, j; 1298 int index = 0; 1299 1300 // Check the sb_type for each block that belongs to this region. 1301 for (i = 0; i < sb_height_in_blocks; ++i) { 1302 for (j = 0; j < sb_width_in_blocks; ++j) { 1303 MODE_INFO * mi = mi_8x8[index+j]; 1304 BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0; 1305 *min_block_size = MIN(*min_block_size, sb_type); 1306 *max_block_size = MAX(*max_block_size, sb_type); 1307 } 1308 index += xd->mode_info_stride; 1309 } 1310 } 1311 1312 // Look at neighboring blocks and set a min and max partition size based on 1313 // what they chose. 1314 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, 1315 int row, int col, 1316 BLOCK_SIZE *min_block_size, 1317 BLOCK_SIZE *max_block_size) { 1318 VP9_COMMON * const cm = &cpi->common; 1319 MACROBLOCKD *const xd = &cpi->mb.e_mbd; 1320 MODE_INFO ** mi_8x8 = xd->mi_8x8; 1321 MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8; 1322 1323 const int left_in_image = xd->left_available && mi_8x8[-1]; 1324 const int above_in_image = xd->up_available && 1325 mi_8x8[-xd->mode_info_stride]; 1326 MODE_INFO ** above_sb64_mi_8x8; 1327 MODE_INFO ** left_sb64_mi_8x8; 1328 1329 int row8x8_remaining = tile->mi_row_end - row; 1330 int col8x8_remaining = tile->mi_col_end - col; 1331 int bh, bw; 1332 1333 // Trap case where we do not have a prediction. 1334 if (!left_in_image && !above_in_image && 1335 ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) { 1336 *min_block_size = BLOCK_4X4; 1337 *max_block_size = BLOCK_64X64; 1338 } else { 1339 // Default "min to max" and "max to min" 1340 *min_block_size = BLOCK_64X64; 1341 *max_block_size = BLOCK_4X4; 1342 1343 // NOTE: each call to get_sb_partition_size_range() uses the previous 1344 // passed in values for min and max as a starting point. 1345 // 1346 // Find the min and max partition used in previous frame at this location 1347 if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) { 1348 get_sb_partition_size_range(cpi, prev_mi_8x8, 1349 min_block_size, max_block_size); 1350 } 1351 1352 // Find the min and max partition sizes used in the left SB64 1353 if (left_in_image) { 1354 left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE]; 1355 get_sb_partition_size_range(cpi, left_sb64_mi_8x8, 1356 min_block_size, max_block_size); 1357 } 1358 1359 // Find the min and max partition sizes used in the above SB64. 1360 if (above_in_image) { 1361 above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE]; 1362 get_sb_partition_size_range(cpi, above_sb64_mi_8x8, 1363 min_block_size, max_block_size); 1364 } 1365 } 1366 1367 // Give a bit of leaway either side of the observed min and max 1368 *min_block_size = min_partition_size[*min_block_size]; 1369 *max_block_size = max_partition_size[*max_block_size]; 1370 1371 // Check border cases where max and min from neighbours may not be legal. 1372 *max_block_size = find_partition_size(*max_block_size, 1373 row8x8_remaining, col8x8_remaining, 1374 &bh, &bw); 1375 *min_block_size = MIN(*min_block_size, *max_block_size); 1376 } 1377 1378 static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) { 1379 VP9_COMMON *const cm = &cpi->common; 1380 MACROBLOCK *const x = &cpi->mb; 1381 1382 // Only use 8x8 result for non HD videos. 1383 // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0; 1384 int use_8x8 = 1; 1385 1386 if (cm->frame_type && !cpi->is_src_frame_alt_ref && 1387 ((use_8x8 && bsize == BLOCK_16X16) || 1388 bsize == BLOCK_32X32 || bsize == BLOCK_64X64)) { 1389 int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0; 1390 PICK_MODE_CONTEXT *block_context = NULL; 1391 1392 if (bsize == BLOCK_16X16) { 1393 block_context = x->sb8x8_context[x->sb_index][x->mb_index]; 1394 } else if (bsize == BLOCK_32X32) { 1395 block_context = x->mb_context[x->sb_index]; 1396 } else if (bsize == BLOCK_64X64) { 1397 block_context = x->sb32_context; 1398 } 1399 1400 if (block_context) { 1401 ref0 = block_context[0].mic.mbmi.ref_frame[0]; 1402 ref1 = block_context[1].mic.mbmi.ref_frame[0]; 1403 ref2 = block_context[2].mic.mbmi.ref_frame[0]; 1404 ref3 = block_context[3].mic.mbmi.ref_frame[0]; 1405 } 1406 1407 // Currently, only consider 4 inter reference frames. 1408 if (ref0 && ref1 && ref2 && ref3) { 1409 int d01, d23, d02, d13; 1410 1411 // Motion vectors for the four subblocks. 1412 int16_t mvr0 = block_context[0].mic.mbmi.mv[0].as_mv.row; 1413 int16_t mvc0 = block_context[0].mic.mbmi.mv[0].as_mv.col; 1414 int16_t mvr1 = block_context[1].mic.mbmi.mv[0].as_mv.row; 1415 int16_t mvc1 = block_context[1].mic.mbmi.mv[0].as_mv.col; 1416 int16_t mvr2 = block_context[2].mic.mbmi.mv[0].as_mv.row; 1417 int16_t mvc2 = block_context[2].mic.mbmi.mv[0].as_mv.col; 1418 int16_t mvr3 = block_context[3].mic.mbmi.mv[0].as_mv.row; 1419 int16_t mvc3 = block_context[3].mic.mbmi.mv[0].as_mv.col; 1420 1421 // Adjust sign if ref is alt_ref. 1422 if (cm->ref_frame_sign_bias[ref0]) { 1423 mvr0 *= -1; 1424 mvc0 *= -1; 1425 } 1426 1427 if (cm->ref_frame_sign_bias[ref1]) { 1428 mvr1 *= -1; 1429 mvc1 *= -1; 1430 } 1431 1432 if (cm->ref_frame_sign_bias[ref2]) { 1433 mvr2 *= -1; 1434 mvc2 *= -1; 1435 } 1436 1437 if (cm->ref_frame_sign_bias[ref3]) { 1438 mvr3 *= -1; 1439 mvc3 *= -1; 1440 } 1441 1442 // Calculate mv distances. 1443 d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1)); 1444 d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3)); 1445 d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2)); 1446 d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3)); 1447 1448 if (d01 < FAST_MOTION_MV_THRESH && d23 < FAST_MOTION_MV_THRESH && 1449 d02 < FAST_MOTION_MV_THRESH && d13 < FAST_MOTION_MV_THRESH) { 1450 // Set fast motion search level. 1451 x->fast_ms = 1; 1452 1453 if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 && 1454 d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) { 1455 // Set fast motion search level. 1456 x->fast_ms = 2; 1457 1458 if (!d01 && !d23 && !d02 && !d13) { 1459 x->fast_ms = 3; 1460 x->subblock_ref = ref0; 1461 } 1462 } 1463 } 1464 } 1465 } 1466 } 1467 1468 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { 1469 vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); 1470 } 1471 1472 static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { 1473 vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); 1474 } 1475 1476 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are 1477 // unlikely to be selected depending on previous rate-distortion optimization 1478 // results, for encoding speed-up. 1479 static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, 1480 TOKENEXTRA **tp, int mi_row, 1481 int mi_col, BLOCK_SIZE bsize, int *rate, 1482 int64_t *dist, int do_recon, int64_t best_rd) { 1483 VP9_COMMON *const cm = &cpi->common; 1484 MACROBLOCK *const x = &cpi->mb; 1485 const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; 1486 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 1487 PARTITION_CONTEXT sl[8], sa[8]; 1488 TOKENEXTRA *tp_orig = *tp; 1489 int i, pl; 1490 BLOCK_SIZE subsize; 1491 int this_rate, sum_rate = 0, best_rate = INT_MAX; 1492 int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX; 1493 int64_t sum_rd = 0; 1494 int do_split = bsize >= BLOCK_8X8; 1495 int do_rect = 1; 1496 // Override skipping rectangular partition operations for edge blocks 1497 const int force_horz_split = (mi_row + ms >= cm->mi_rows); 1498 const int force_vert_split = (mi_col + ms >= cm->mi_cols); 1499 1500 int partition_none_allowed = !force_horz_split && !force_vert_split; 1501 int partition_horz_allowed = !force_vert_split && bsize >= BLOCK_8X8; 1502 int partition_vert_allowed = !force_horz_split && bsize >= BLOCK_8X8; 1503 1504 int partition_split_done = 0; 1505 (void) *tp_orig; 1506 1507 if (bsize < BLOCK_8X8) { 1508 // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 1509 // there is nothing to be done. 1510 if (x->ab_index != 0) { 1511 *rate = 0; 1512 *dist = 0; 1513 return; 1514 } 1515 } 1516 assert(num_8x8_blocks_wide_lookup[bsize] == 1517 num_8x8_blocks_high_lookup[bsize]); 1518 1519 if (bsize == BLOCK_16X16) { 1520 set_offsets(cpi, tile, mi_row, mi_col, bsize); 1521 x->mb_energy = vp9_block_energy(cpi, x, bsize); 1522 } 1523 1524 // Determine partition types in search according to the speed features. 1525 // The threshold set here has to be of square block size. 1526 if (cpi->sf.auto_min_max_partition_size) { 1527 partition_none_allowed &= (bsize <= cpi->sf.max_partition_size && 1528 bsize >= cpi->sf.min_partition_size); 1529 partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size && 1530 bsize > cpi->sf.min_partition_size) || 1531 force_horz_split); 1532 partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size && 1533 bsize > cpi->sf.min_partition_size) || 1534 force_vert_split); 1535 do_split &= bsize > cpi->sf.min_partition_size; 1536 } 1537 if (cpi->sf.use_square_partition_only) { 1538 partition_horz_allowed &= force_horz_split; 1539 partition_vert_allowed &= force_vert_split; 1540 } 1541 1542 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1543 1544 if (cpi->sf.disable_split_var_thresh && partition_none_allowed) { 1545 unsigned int source_variancey; 1546 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 1547 source_variancey = get_sby_perpixel_variance(cpi, x, bsize); 1548 if (source_variancey < cpi->sf.disable_split_var_thresh) { 1549 do_split = 0; 1550 if (source_variancey < cpi->sf.disable_split_var_thresh / 2) 1551 do_rect = 0; 1552 } 1553 } 1554 1555 // PARTITION_NONE 1556 if (partition_none_allowed) { 1557 pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize, 1558 get_block_context(x, bsize), best_rd); 1559 if (this_rate != INT_MAX) { 1560 if (bsize >= BLOCK_8X8) { 1561 pl = partition_plane_context(cpi->above_seg_context, 1562 cpi->left_seg_context, 1563 mi_row, mi_col, bsize); 1564 this_rate += x->partition_cost[pl][PARTITION_NONE]; 1565 } 1566 sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); 1567 if (sum_rd < best_rd) { 1568 int64_t stop_thresh = 2048; 1569 1570 best_rate = this_rate; 1571 best_dist = this_dist; 1572 best_rd = sum_rd; 1573 if (bsize >= BLOCK_8X8) 1574 *(get_sb_partitioning(x, bsize)) = bsize; 1575 1576 // Adjust threshold according to partition size. 1577 stop_thresh >>= 8 - (b_width_log2_lookup[bsize] + 1578 b_height_log2_lookup[bsize]); 1579 1580 // If obtained distortion is very small, choose current partition 1581 // and stop splitting. 1582 if (this_dist < stop_thresh) { 1583 do_split = 0; 1584 do_rect = 0; 1585 } 1586 } 1587 } 1588 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1589 } 1590 1591 // store estimated motion vector 1592 if (cpi->sf.adaptive_motion_search) 1593 store_pred_mv(x, get_block_context(x, bsize)); 1594 1595 // PARTITION_SPLIT 1596 sum_rd = 0; 1597 // TODO(jingning): use the motion vectors given by the above search as 1598 // the starting point of motion search in the following partition type check. 1599 if (do_split) { 1600 subsize = get_subsize(bsize, PARTITION_SPLIT); 1601 for (i = 0; i < 4 && sum_rd < best_rd; ++i) { 1602 const int x_idx = (i & 1) * ms; 1603 const int y_idx = (i >> 1) * ms; 1604 1605 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) 1606 continue; 1607 1608 *get_sb_index(x, subsize) = i; 1609 if (cpi->sf.adaptive_motion_search) 1610 load_pred_mv(x, get_block_context(x, bsize)); 1611 rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, 1612 &this_rate, &this_dist, i != 3, best_rd - sum_rd); 1613 1614 if (this_rate == INT_MAX) { 1615 sum_rd = INT64_MAX; 1616 } else { 1617 sum_rate += this_rate; 1618 sum_dist += this_dist; 1619 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); 1620 } 1621 } 1622 if (sum_rd < best_rd && i == 4) { 1623 pl = partition_plane_context(cpi->above_seg_context, 1624 cpi->left_seg_context, 1625 mi_row, mi_col, bsize); 1626 sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; 1627 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); 1628 if (sum_rd < best_rd) { 1629 best_rate = sum_rate; 1630 best_dist = sum_dist; 1631 best_rd = sum_rd; 1632 *(get_sb_partitioning(x, bsize)) = subsize; 1633 } 1634 } else { 1635 // skip rectangular partition test when larger block size 1636 // gives better rd cost 1637 if (cpi->sf.less_rectangular_check) 1638 do_rect &= !partition_none_allowed; 1639 } 1640 partition_split_done = 1; 1641 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1642 } 1643 1644 x->fast_ms = 0; 1645 x->subblock_ref = 0; 1646 1647 if (partition_split_done && 1648 cpi->sf.using_small_partition_info) { 1649 compute_fast_motion_search_level(cpi, bsize); 1650 } 1651 1652 // PARTITION_HORZ 1653 if (partition_horz_allowed && do_rect) { 1654 subsize = get_subsize(bsize, PARTITION_HORZ); 1655 *get_sb_index(x, subsize) = 0; 1656 if (cpi->sf.adaptive_motion_search) 1657 load_pred_mv(x, get_block_context(x, bsize)); 1658 pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, 1659 get_block_context(x, subsize), best_rd); 1660 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); 1661 1662 if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { 1663 update_state(cpi, get_block_context(x, subsize), subsize, 0); 1664 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); 1665 1666 *get_sb_index(x, subsize) = 1; 1667 if (cpi->sf.adaptive_motion_search) 1668 load_pred_mv(x, get_block_context(x, bsize)); 1669 pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate, 1670 &this_dist, subsize, get_block_context(x, subsize), 1671 best_rd - sum_rd); 1672 if (this_rate == INT_MAX) { 1673 sum_rd = INT64_MAX; 1674 } else { 1675 sum_rate += this_rate; 1676 sum_dist += this_dist; 1677 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); 1678 } 1679 } 1680 if (sum_rd < best_rd) { 1681 pl = partition_plane_context(cpi->above_seg_context, 1682 cpi->left_seg_context, 1683 mi_row, mi_col, bsize); 1684 sum_rate += x->partition_cost[pl][PARTITION_HORZ]; 1685 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); 1686 if (sum_rd < best_rd) { 1687 best_rd = sum_rd; 1688 best_rate = sum_rate; 1689 best_dist = sum_dist; 1690 *(get_sb_partitioning(x, bsize)) = subsize; 1691 } 1692 } 1693 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1694 } 1695 1696 // PARTITION_VERT 1697 if (partition_vert_allowed && do_rect) { 1698 subsize = get_subsize(bsize, PARTITION_VERT); 1699 1700 *get_sb_index(x, subsize) = 0; 1701 if (cpi->sf.adaptive_motion_search) 1702 load_pred_mv(x, get_block_context(x, bsize)); 1703 pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, 1704 get_block_context(x, subsize), best_rd); 1705 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); 1706 if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { 1707 update_state(cpi, get_block_context(x, subsize), subsize, 0); 1708 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); 1709 1710 *get_sb_index(x, subsize) = 1; 1711 if (cpi->sf.adaptive_motion_search) 1712 load_pred_mv(x, get_block_context(x, bsize)); 1713 pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate, 1714 &this_dist, subsize, get_block_context(x, subsize), 1715 best_rd - sum_rd); 1716 if (this_rate == INT_MAX) { 1717 sum_rd = INT64_MAX; 1718 } else { 1719 sum_rate += this_rate; 1720 sum_dist += this_dist; 1721 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); 1722 } 1723 } 1724 if (sum_rd < best_rd) { 1725 pl = partition_plane_context(cpi->above_seg_context, 1726 cpi->left_seg_context, 1727 mi_row, mi_col, bsize); 1728 sum_rate += x->partition_cost[pl][PARTITION_VERT]; 1729 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); 1730 if (sum_rd < best_rd) { 1731 best_rate = sum_rate; 1732 best_dist = sum_dist; 1733 best_rd = sum_rd; 1734 *(get_sb_partitioning(x, bsize)) = subsize; 1735 } 1736 } 1737 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); 1738 } 1739 1740 1741 *rate = best_rate; 1742 *dist = best_dist; 1743 1744 if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) 1745 encode_sb(cpi, tile, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize); 1746 if (bsize == BLOCK_64X64) { 1747 assert(tp_orig < *tp); 1748 assert(best_rate < INT_MAX); 1749 assert(best_dist < INT_MAX); 1750 } else { 1751 assert(tp_orig == *tp); 1752 } 1753 } 1754 1755 // Examines 64x64 block and chooses a best reference frame 1756 static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile, 1757 int mi_row, int mi_col) { 1758 VP9_COMMON * const cm = &cpi->common; 1759 MACROBLOCK * const x = &cpi->mb; 1760 int bsl = b_width_log2(BLOCK_64X64), bs = 1 << bsl; 1761 int ms = bs / 2; 1762 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 1763 PARTITION_CONTEXT sl[8], sa[8]; 1764 int pl; 1765 int r; 1766 int64_t d; 1767 1768 save_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64); 1769 1770 // Default is non mask (all reference frames allowed. 1771 cpi->ref_frame_mask = 0; 1772 1773 // Do RD search for 64x64. 1774 if ((mi_row + (ms >> 1) < cm->mi_rows) && 1775 (mi_col + (ms >> 1) < cm->mi_cols)) { 1776 cpi->set_ref_frame_mask = 1; 1777 pick_sb_modes(cpi, tile, mi_row, mi_col, &r, &d, BLOCK_64X64, 1778 get_block_context(x, BLOCK_64X64), INT64_MAX); 1779 pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, 1780 mi_row, mi_col, BLOCK_64X64); 1781 r += x->partition_cost[pl][PARTITION_NONE]; 1782 1783 *(get_sb_partitioning(x, BLOCK_64X64)) = BLOCK_64X64; 1784 cpi->set_ref_frame_mask = 0; 1785 } 1786 1787 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64); 1788 } 1789 1790 static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, 1791 int mi_row, TOKENEXTRA **tp) { 1792 VP9_COMMON * const cm = &cpi->common; 1793 int mi_col; 1794 1795 // Initialize the left context for the new SB row 1796 vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); 1797 vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); 1798 1799 // Code each SB in the row 1800 for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; 1801 mi_col += MI_BLOCK_SIZE) { 1802 int dummy_rate; 1803 int64_t dummy_dist; 1804 1805 vp9_zero(cpi->mb.pred_mv); 1806 1807 if (cpi->sf.reference_masking) 1808 rd_pick_reference_frame(cpi, tile, mi_row, mi_col); 1809 1810 if (cpi->sf.use_lastframe_partitioning || 1811 cpi->sf.use_one_partition_size_always ) { 1812 const int idx_str = cm->mode_info_stride * mi_row + mi_col; 1813 MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; 1814 MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; 1815 1816 cpi->mb.source_variance = UINT_MAX; 1817 if (cpi->sf.use_one_partition_size_always) { 1818 set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); 1819 set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col); 1820 rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1821 &dummy_rate, &dummy_dist, 1); 1822 } else { 1823 if ((cpi->common.current_video_frame 1824 % cpi->sf.last_partitioning_redo_frequency) == 0 1825 || cm->prev_mi == 0 1826 || cpi->common.show_frame == 0 1827 || cpi->common.frame_type == KEY_FRAME 1828 || cpi->is_src_frame_alt_ref 1829 || ((cpi->sf.use_lastframe_partitioning == 1830 LAST_FRAME_PARTITION_LOW_MOTION) && 1831 sb_has_motion(cpi, prev_mi_8x8))) { 1832 // If required set upper and lower partition size limits 1833 if (cpi->sf.auto_min_max_partition_size) { 1834 set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); 1835 rd_auto_partition_range(cpi, tile, mi_row, mi_col, 1836 &cpi->sf.min_partition_size, 1837 &cpi->sf.max_partition_size); 1838 } 1839 rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, 1840 &dummy_rate, &dummy_dist, 1, INT64_MAX); 1841 } else { 1842 copy_partitioning(cpi, mi_8x8, prev_mi_8x8); 1843 rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1844 &dummy_rate, &dummy_dist, 1); 1845 } 1846 } 1847 } else { 1848 // If required set upper and lower partition size limits 1849 if (cpi->sf.auto_min_max_partition_size) { 1850 set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); 1851 rd_auto_partition_range(cpi, tile, mi_row, mi_col, 1852 &cpi->sf.min_partition_size, 1853 &cpi->sf.max_partition_size); 1854 } 1855 rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, 1856 &dummy_rate, &dummy_dist, 1, INT64_MAX); 1857 } 1858 } 1859 } 1860 1861 static void init_encode_frame_mb_context(VP9_COMP *cpi) { 1862 MACROBLOCK *const x = &cpi->mb; 1863 VP9_COMMON *const cm = &cpi->common; 1864 MACROBLOCKD *const xd = &x->e_mbd; 1865 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); 1866 1867 x->act_zbin_adj = 0; 1868 cpi->seg0_idx = 0; 1869 1870 xd->mode_info_stride = cm->mode_info_stride; 1871 1872 // reset intra mode contexts 1873 if (frame_is_intra_only(cm)) 1874 vp9_init_mbmode_probs(cm); 1875 1876 // Copy data over into macro block data structures. 1877 vp9_setup_src_planes(x, cpi->Source, 0, 0); 1878 1879 // TODO(jkoleszar): are these initializations required? 1880 setup_pre_planes(xd, 0, &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]], 1881 0, 0, NULL); 1882 setup_dst_planes(xd, get_frame_new_buffer(cm), 0, 0); 1883 1884 setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); 1885 1886 xd->mi_8x8[0]->mbmi.mode = DC_PRED; 1887 xd->mi_8x8[0]->mbmi.uv_mode = DC_PRED; 1888 1889 vp9_zero(cpi->y_mode_count); 1890 vp9_zero(cpi->y_uv_mode_count); 1891 vp9_zero(cm->counts.inter_mode); 1892 vp9_zero(cpi->partition_count); 1893 vp9_zero(cpi->intra_inter_count); 1894 vp9_zero(cpi->comp_inter_count); 1895 vp9_zero(cpi->single_ref_count); 1896 vp9_zero(cpi->comp_ref_count); 1897 vp9_zero(cm->counts.tx); 1898 vp9_zero(cm->counts.mbskip); 1899 1900 // Note: this memset assumes above_context[0], [1] and [2] 1901 // are allocated as part of the same buffer. 1902 vpx_memset(cpi->above_context[0], 0, 1903 sizeof(*cpi->above_context[0]) * 1904 2 * aligned_mi_cols * MAX_MB_PLANE); 1905 vpx_memset(cpi->above_seg_context, 0, 1906 sizeof(*cpi->above_seg_context) * aligned_mi_cols); 1907 } 1908 1909 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { 1910 if (lossless) { 1911 // printf("Switching to lossless\n"); 1912 cpi->mb.fwd_txm4x4 = vp9_fwht4x4; 1913 cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; 1914 cpi->mb.optimize = 0; 1915 cpi->common.lf.filter_level = 0; 1916 cpi->zbin_mode_boost_enabled = 0; 1917 cpi->common.tx_mode = ONLY_4X4; 1918 } else { 1919 // printf("Not lossless\n"); 1920 cpi->mb.fwd_txm4x4 = vp9_fdct4x4; 1921 cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; 1922 } 1923 } 1924 1925 static void switch_tx_mode(VP9_COMP *cpi) { 1926 if (cpi->sf.tx_size_search_method == USE_LARGESTALL && 1927 cpi->common.tx_mode >= ALLOW_32X32) 1928 cpi->common.tx_mode = ALLOW_32X32; 1929 } 1930 1931 static void encode_frame_internal(VP9_COMP *cpi) { 1932 int mi_row; 1933 MACROBLOCK * const x = &cpi->mb; 1934 VP9_COMMON * const cm = &cpi->common; 1935 MACROBLOCKD * const xd = &x->e_mbd; 1936 1937 // fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", 1938 // cpi->common.current_video_frame, cpi->common.show_frame, 1939 // cm->frame_type); 1940 1941 // debug output 1942 #if DBG_PRNT_SEGMAP 1943 { 1944 FILE *statsfile; 1945 statsfile = fopen("segmap2.stt", "a"); 1946 fprintf(statsfile, "\n"); 1947 fclose(statsfile); 1948 } 1949 #endif 1950 1951 vp9_zero(cm->counts.switchable_interp); 1952 vp9_zero(cpi->tx_stepdown_count); 1953 1954 xd->mi_8x8 = cm->mi_grid_visible; 1955 // required for vp9_frame_init_quantizer 1956 xd->mi_8x8[0] = cm->mi; 1957 1958 xd->last_mi = cm->prev_mi; 1959 1960 vp9_zero(cpi->NMVcount); 1961 vp9_zero(cpi->coef_counts); 1962 vp9_zero(cm->counts.eob_branch); 1963 1964 cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 1965 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; 1966 switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); 1967 1968 vp9_frame_init_quantizer(cpi); 1969 1970 vp9_initialize_rd_consts(cpi); 1971 vp9_initialize_me_consts(cpi, cm->base_qindex); 1972 switch_tx_mode(cpi); 1973 1974 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { 1975 // Initialize encode frame context. 1976 init_encode_frame_mb_context(cpi); 1977 1978 // Build a frame level activity map 1979 build_activity_map(cpi); 1980 } 1981 1982 // Re-initialize encode frame context. 1983 init_encode_frame_mb_context(cpi); 1984 1985 vp9_zero(cpi->rd_comp_pred_diff); 1986 vp9_zero(cpi->rd_filter_diff); 1987 vp9_zero(cpi->rd_tx_select_diff); 1988 vp9_zero(cpi->rd_tx_select_threshes); 1989 1990 set_prev_mi(cm); 1991 1992 { 1993 struct vpx_usec_timer emr_timer; 1994 vpx_usec_timer_start(&emr_timer); 1995 1996 { 1997 // Take tiles into account and give start/end MB 1998 int tile_col, tile_row; 1999 TOKENEXTRA *tp = cpi->tok; 2000 const int tile_cols = 1 << cm->log2_tile_cols; 2001 const int tile_rows = 1 << cm->log2_tile_rows; 2002 2003 for (tile_row = 0; tile_row < tile_rows; tile_row++) { 2004 for (tile_col = 0; tile_col < tile_cols; tile_col++) { 2005 TileInfo tile; 2006 TOKENEXTRA *tp_old = tp; 2007 2008 // For each row of SBs in the frame 2009 vp9_tile_init(&tile, cm, tile_row, tile_col); 2010 for (mi_row = tile.mi_row_start; 2011 mi_row < tile.mi_row_end; mi_row += 8) 2012 encode_sb_row(cpi, &tile, mi_row, &tp); 2013 2014 cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); 2015 assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); 2016 } 2017 } 2018 } 2019 2020 vpx_usec_timer_mark(&emr_timer); 2021 cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); 2022 } 2023 2024 if (cpi->sf.skip_encode_sb) { 2025 int j; 2026 unsigned int intra_count = 0, inter_count = 0; 2027 for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { 2028 intra_count += cpi->intra_inter_count[j][0]; 2029 inter_count += cpi->intra_inter_count[j][1]; 2030 } 2031 cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count); 2032 cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME); 2033 cpi->sf.skip_encode_frame &= cm->show_frame; 2034 } else { 2035 cpi->sf.skip_encode_frame = 0; 2036 } 2037 2038 #if 0 2039 // Keep record of the total distortion this time around for future use 2040 cpi->last_frame_distortion = cpi->frame_distortion; 2041 #endif 2042 } 2043 2044 static int check_dual_ref_flags(VP9_COMP *cpi) { 2045 const int ref_flags = cpi->ref_frame_flags; 2046 2047 if (vp9_segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) { 2048 return 0; 2049 } else { 2050 return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) 2051 + !!(ref_flags & VP9_ALT_FLAG)) >= 2; 2052 } 2053 } 2054 2055 static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) { 2056 int x, y; 2057 2058 for (y = 0; y < ymbs; y++) { 2059 for (x = 0; x < xmbs; x++) { 2060 if (!mi_8x8[y * mis + x]->mbmi.skip_coeff) 2061 return 0; 2062 } 2063 } 2064 2065 return 1; 2066 } 2067 2068 static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs, 2069 TX_SIZE tx_size) { 2070 int x, y; 2071 2072 for (y = 0; y < ymbs; y++) { 2073 for (x = 0; x < xmbs; x++) 2074 mi_8x8[y * mis + x]->mbmi.tx_size = tx_size; 2075 } 2076 } 2077 2078 static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO **mi_8x8, 2079 int mis, TX_SIZE max_tx_size, int bw, int bh, 2080 int mi_row, int mi_col, BLOCK_SIZE bsize) { 2081 VP9_COMMON * const cm = &cpi->common; 2082 2083 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) { 2084 return; 2085 } else { 2086 MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi; 2087 if (mbmi->tx_size > max_tx_size) { 2088 const int ymbs = MIN(bh, cm->mi_rows - mi_row); 2089 const int xmbs = MIN(bw, cm->mi_cols - mi_col); 2090 2091 assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) || 2092 get_skip_flag(mi_8x8, mis, ymbs, xmbs)); 2093 set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size); 2094 } 2095 } 2096 } 2097 2098 static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO **mi_8x8, 2099 TX_SIZE max_tx_size, int mi_row, int mi_col, 2100 BLOCK_SIZE bsize) { 2101 VP9_COMMON * const cm = &cpi->common; 2102 const int mis = cm->mode_info_stride; 2103 int bw, bh; 2104 const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; 2105 2106 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) 2107 return; 2108 2109 bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type]; 2110 bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type]; 2111 2112 if (bw == bs && bh == bs) { 2113 reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, bs, bs, mi_row, 2114 mi_col, bsize); 2115 } else if (bw == bs && bh < bs) { 2116 reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, bs, hbs, mi_row, 2117 mi_col, bsize); 2118 reset_skip_txfm_size_b(cpi, mi_8x8 + hbs * mis, mis, max_tx_size, bs, hbs, 2119 mi_row + hbs, mi_col, bsize); 2120 } else if (bw < bs && bh == bs) { 2121 reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, hbs, bs, mi_row, 2122 mi_col, bsize); 2123 reset_skip_txfm_size_b(cpi, mi_8x8 + hbs, mis, max_tx_size, hbs, bs, mi_row, 2124 mi_col + hbs, bsize); 2125 2126 } else { 2127 const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize]; 2128 int n; 2129 2130 assert(bw < bs && bh < bs); 2131 2132 for (n = 0; n < 4; n++) { 2133 const int mi_dc = hbs * (n & 1); 2134 const int mi_dr = hbs * (n >> 1); 2135 2136 reset_skip_txfm_size_sb(cpi, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size, 2137 mi_row + mi_dr, mi_col + mi_dc, subsize); 2138 } 2139 } 2140 } 2141 2142 static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { 2143 VP9_COMMON * const cm = &cpi->common; 2144 int mi_row, mi_col; 2145 const int mis = cm->mode_info_stride; 2146 // MODE_INFO *mi, *mi_ptr = cm->mi; 2147 MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible; 2148 2149 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) { 2150 mi_8x8 = mi_ptr; 2151 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) { 2152 reset_skip_txfm_size_sb(cpi, mi_8x8, txfm_max, mi_row, mi_col, 2153 BLOCK_64X64); 2154 } 2155 } 2156 } 2157 2158 static int get_frame_type(VP9_COMP *cpi) { 2159 int frame_type; 2160 if (frame_is_intra_only(&cpi->common)) 2161 frame_type = 0; 2162 else if (cpi->is_src_frame_alt_ref && cpi->refresh_golden_frame) 2163 frame_type = 3; 2164 else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) 2165 frame_type = 1; 2166 else 2167 frame_type = 2; 2168 return frame_type; 2169 } 2170 2171 static void select_tx_mode(VP9_COMP *cpi) { 2172 if (cpi->oxcf.lossless) { 2173 cpi->common.tx_mode = ONLY_4X4; 2174 } else if (cpi->common.current_video_frame == 0) { 2175 cpi->common.tx_mode = TX_MODE_SELECT; 2176 } else { 2177 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { 2178 cpi->common.tx_mode = ALLOW_32X32; 2179 } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) { 2180 int frame_type = get_frame_type(cpi); 2181 cpi->common.tx_mode = 2182 cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] 2183 > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? 2184 ALLOW_32X32 : TX_MODE_SELECT; 2185 } else { 2186 unsigned int total = 0; 2187 int i; 2188 for (i = 0; i < TX_SIZES; ++i) 2189 total += cpi->tx_stepdown_count[i]; 2190 if (total) { 2191 double fraction = (double)cpi->tx_stepdown_count[0] / total; 2192 cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT; 2193 // printf("fraction = %f\n", fraction); 2194 } // else keep unchanged 2195 } 2196 } 2197 } 2198 2199 void vp9_encode_frame(VP9_COMP *cpi) { 2200 VP9_COMMON * const cm = &cpi->common; 2201 2202 // In the longer term the encoder should be generalized to match the 2203 // decoder such that we allow compound where one of the 3 buffers has a 2204 // different sign bias and that buffer is then the fixed ref. However, this 2205 // requires further work in the rd loop. For now the only supported encoder 2206 // side behavior is where the ALT ref buffer has opposite sign bias to 2207 // the other two. 2208 if (!frame_is_intra_only(cm)) { 2209 if ((cm->ref_frame_sign_bias[ALTREF_FRAME] 2210 == cm->ref_frame_sign_bias[GOLDEN_FRAME]) 2211 || (cm->ref_frame_sign_bias[ALTREF_FRAME] 2212 == cm->ref_frame_sign_bias[LAST_FRAME])) { 2213 cm->allow_comp_inter_inter = 0; 2214 } else { 2215 cm->allow_comp_inter_inter = 1; 2216 cm->comp_fixed_ref = ALTREF_FRAME; 2217 cm->comp_var_ref[0] = LAST_FRAME; 2218 cm->comp_var_ref[1] = GOLDEN_FRAME; 2219 } 2220 } 2221 2222 if (cpi->sf.RD) { 2223 int i, pred_type; 2224 INTERPOLATION_TYPE filter_type; 2225 /* 2226 * This code does a single RD pass over the whole frame assuming 2227 * either compound, single or hybrid prediction as per whatever has 2228 * worked best for that type of frame in the past. 2229 * It also predicts whether another coding mode would have worked 2230 * better that this coding mode. If that is the case, it remembers 2231 * that for subsequent frames. 2232 * It does the same analysis for transform size selection also. 2233 */ 2234 int frame_type = get_frame_type(cpi); 2235 2236 /* prediction (compound, single or hybrid) mode selection */ 2237 if (frame_type == 3 || !cm->allow_comp_inter_inter) 2238 pred_type = SINGLE_PREDICTION_ONLY; 2239 else if (cpi->rd_prediction_type_threshes[frame_type][1] 2240 > cpi->rd_prediction_type_threshes[frame_type][0] 2241 && cpi->rd_prediction_type_threshes[frame_type][1] 2242 > cpi->rd_prediction_type_threshes[frame_type][2] 2243 && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) 2244 pred_type = COMP_PREDICTION_ONLY; 2245 else if (cpi->rd_prediction_type_threshes[frame_type][0] 2246 > cpi->rd_prediction_type_threshes[frame_type][2]) 2247 pred_type = SINGLE_PREDICTION_ONLY; 2248 else 2249 pred_type = HYBRID_PREDICTION; 2250 2251 /* filter type selection */ 2252 // FIXME(rbultje) for some odd reason, we often select smooth_filter 2253 // as default filter for ARF overlay frames. This is a REALLY BAD 2254 // IDEA so we explicitly disable it here. 2255 if (frame_type != 3 && 2256 cpi->rd_filter_threshes[frame_type][1] > 2257 cpi->rd_filter_threshes[frame_type][0] && 2258 cpi->rd_filter_threshes[frame_type][1] > 2259 cpi->rd_filter_threshes[frame_type][2] && 2260 cpi->rd_filter_threshes[frame_type][1] > 2261 cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) { 2262 filter_type = EIGHTTAP_SMOOTH; 2263 } else if (cpi->rd_filter_threshes[frame_type][2] > 2264 cpi->rd_filter_threshes[frame_type][0] && 2265 cpi->rd_filter_threshes[frame_type][2] > 2266 cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) { 2267 filter_type = EIGHTTAP_SHARP; 2268 } else if (cpi->rd_filter_threshes[frame_type][0] > 2269 cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) { 2270 filter_type = EIGHTTAP; 2271 } else { 2272 filter_type = SWITCHABLE; 2273 } 2274 2275 cpi->mb.e_mbd.lossless = 0; 2276 if (cpi->oxcf.lossless) { 2277 cpi->mb.e_mbd.lossless = 1; 2278 } 2279 2280 /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */ 2281 select_tx_mode(cpi); 2282 cpi->common.comp_pred_mode = pred_type; 2283 cpi->common.mcomp_filter_type = filter_type; 2284 encode_frame_internal(cpi); 2285 2286 for (i = 0; i < NB_PREDICTION_TYPES; ++i) { 2287 const int diff = (int) (cpi->rd_comp_pred_diff[i] / cpi->common.MBs); 2288 cpi->rd_prediction_type_threshes[frame_type][i] += diff; 2289 cpi->rd_prediction_type_threshes[frame_type][i] >>= 1; 2290 } 2291 2292 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 2293 const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs; 2294 cpi->rd_filter_threshes[frame_type][i] = 2295 (cpi->rd_filter_threshes[frame_type][i] + diff) / 2; 2296 } 2297 2298 for (i = 0; i < TX_MODES; ++i) { 2299 int64_t pd = cpi->rd_tx_select_diff[i]; 2300 int diff; 2301 if (i == TX_MODE_SELECT) 2302 pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2303 2048 * (TX_SIZES - 1), 0); 2304 diff = (int) (pd / cpi->common.MBs); 2305 cpi->rd_tx_select_threshes[frame_type][i] += diff; 2306 cpi->rd_tx_select_threshes[frame_type][i] /= 2; 2307 } 2308 2309 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 2310 int single_count_zero = 0; 2311 int comp_count_zero = 0; 2312 2313 for (i = 0; i < COMP_INTER_CONTEXTS; i++) { 2314 single_count_zero += cpi->comp_inter_count[i][0]; 2315 comp_count_zero += cpi->comp_inter_count[i][1]; 2316 } 2317 2318 if (comp_count_zero == 0) { 2319 cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY; 2320 vp9_zero(cpi->comp_inter_count); 2321 } else if (single_count_zero == 0) { 2322 cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY; 2323 vp9_zero(cpi->comp_inter_count); 2324 } 2325 } 2326 2327 if (cpi->common.tx_mode == TX_MODE_SELECT) { 2328 int count4x4 = 0; 2329 int count8x8_lp = 0, count8x8_8x8p = 0; 2330 int count16x16_16x16p = 0, count16x16_lp = 0; 2331 int count32x32 = 0; 2332 2333 for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { 2334 count4x4 += cm->counts.tx.p32x32[i][TX_4X4]; 2335 count4x4 += cm->counts.tx.p16x16[i][TX_4X4]; 2336 count4x4 += cm->counts.tx.p8x8[i][TX_4X4]; 2337 2338 count8x8_lp += cm->counts.tx.p32x32[i][TX_8X8]; 2339 count8x8_lp += cm->counts.tx.p16x16[i][TX_8X8]; 2340 count8x8_8x8p += cm->counts.tx.p8x8[i][TX_8X8]; 2341 2342 count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16]; 2343 count16x16_lp += cm->counts.tx.p32x32[i][TX_16X16]; 2344 count32x32 += cm->counts.tx.p32x32[i][TX_32X32]; 2345 } 2346 2347 if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 2348 && count32x32 == 0) { 2349 cpi->common.tx_mode = ALLOW_8X8; 2350 reset_skip_txfm_size(cpi, TX_8X8); 2351 } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 2352 && count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { 2353 cpi->common.tx_mode = ONLY_4X4; 2354 reset_skip_txfm_size(cpi, TX_4X4); 2355 } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { 2356 cpi->common.tx_mode = ALLOW_32X32; 2357 } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { 2358 cpi->common.tx_mode = ALLOW_16X16; 2359 reset_skip_txfm_size(cpi, TX_16X16); 2360 } 2361 } 2362 } else { 2363 encode_frame_internal(cpi); 2364 } 2365 } 2366 2367 static void sum_intra_stats(VP9_COMP *cpi, const MODE_INFO *mi) { 2368 const MB_PREDICTION_MODE y_mode = mi->mbmi.mode; 2369 const MB_PREDICTION_MODE uv_mode = mi->mbmi.uv_mode; 2370 const BLOCK_SIZE bsize = mi->mbmi.sb_type; 2371 2372 ++cpi->y_uv_mode_count[y_mode][uv_mode]; 2373 2374 if (bsize < BLOCK_8X8) { 2375 int idx, idy; 2376 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 2377 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 2378 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) 2379 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) 2380 ++cpi->y_mode_count[0][mi->bmi[idy * 2 + idx].as_mode]; 2381 } else { 2382 ++cpi->y_mode_count[size_group_lookup[bsize]][y_mode]; 2383 } 2384 } 2385 2386 // Experimental stub function to create a per MB zbin adjustment based on 2387 // some previously calculated measure of MB activity. 2388 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { 2389 #if USE_ACT_INDEX 2390 x->act_zbin_adj = *(x->mb_activity_ptr); 2391 #else 2392 int64_t a; 2393 int64_t b; 2394 int64_t act = *(x->mb_activity_ptr); 2395 2396 // Apply the masking to the RD multiplier. 2397 a = act + 4 * cpi->activity_avg; 2398 b = 4 * act + cpi->activity_avg; 2399 2400 if (act > cpi->activity_avg) 2401 x->act_zbin_adj = (int) (((int64_t) b + (a >> 1)) / a) - 1; 2402 else 2403 x->act_zbin_adj = 1 - (int) (((int64_t) a + (b >> 1)) / b); 2404 #endif 2405 } 2406 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, 2407 int mi_row, int mi_col, BLOCK_SIZE bsize) { 2408 VP9_COMMON * const cm = &cpi->common; 2409 MACROBLOCK * const x = &cpi->mb; 2410 MACROBLOCKD * const xd = &x->e_mbd; 2411 MODE_INFO **mi_8x8 = xd->mi_8x8; 2412 MODE_INFO *mi = mi_8x8[0]; 2413 MB_MODE_INFO *mbmi = &mi->mbmi; 2414 PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); 2415 unsigned int segment_id = mbmi->segment_id; 2416 const int mis = cm->mode_info_stride; 2417 const int mi_width = num_8x8_blocks_wide_lookup[bsize]; 2418 const int mi_height = num_8x8_blocks_high_lookup[bsize]; 2419 x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8; 2420 x->skip_optimize = ctx->is_coded; 2421 ctx->is_coded = 1; 2422 x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; 2423 x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame && 2424 x->q_index < QIDX_SKIP_THRESH); 2425 if (x->skip_encode) 2426 return; 2427 2428 if (cm->frame_type == KEY_FRAME) { 2429 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { 2430 adjust_act_zbin(cpi, x); 2431 vp9_update_zbin_extra(cpi, x); 2432 } 2433 } else { 2434 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2435 2436 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { 2437 // Adjust the zbin based on this MB rate. 2438 adjust_act_zbin(cpi, x); 2439 } 2440 2441 // Experimental code. Special case for gf and arf zeromv modes. 2442 // Increase zbin size to suppress noise 2443 cpi->zbin_mode_boost = 0; 2444 if (cpi->zbin_mode_boost_enabled) { 2445 if (is_inter_block(mbmi)) { 2446 if (mbmi->mode == ZEROMV) { 2447 if (mbmi->ref_frame[0] != LAST_FRAME) 2448 cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; 2449 else 2450 cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; 2451 } else if (mbmi->sb_type < BLOCK_8X8) { 2452 cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST; 2453 } else { 2454 cpi->zbin_mode_boost = MV_ZBIN_BOOST; 2455 } 2456 } else { 2457 cpi->zbin_mode_boost = INTRA_ZBIN_BOOST; 2458 } 2459 } 2460 2461 vp9_update_zbin_extra(cpi, x); 2462 } 2463 2464 if (!is_inter_block(mbmi)) { 2465 vp9_encode_intra_block_y(x, MAX(bsize, BLOCK_8X8)); 2466 vp9_encode_intra_block_uv(x, MAX(bsize, BLOCK_8X8)); 2467 if (output_enabled) 2468 sum_intra_stats(cpi, mi); 2469 } else { 2470 int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])]; 2471 YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx]; 2472 YV12_BUFFER_CONFIG *second_ref_fb = NULL; 2473 if (has_second_ref(mbmi)) { 2474 idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])]; 2475 second_ref_fb = &cm->yv12_fb[idx]; 2476 } 2477 2478 assert(cm->frame_type != KEY_FRAME); 2479 2480 setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col, 2481 &xd->scale_factor[0]); 2482 setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col, 2483 &xd->scale_factor[1]); 2484 2485 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); 2486 } 2487 2488 if (!is_inter_block(mbmi)) { 2489 vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8)); 2490 } else if (!x->skip) { 2491 vp9_encode_sb(x, MAX(bsize, BLOCK_8X8)); 2492 vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8)); 2493 } else { 2494 int mb_skip_context = xd->left_available ? mi_8x8[-1]->mbmi.skip_coeff : 0; 2495 mb_skip_context += mi_8x8[-mis] ? mi_8x8[-mis]->mbmi.skip_coeff : 0; 2496 2497 mbmi->skip_coeff = 1; 2498 if (output_enabled) 2499 cm->counts.mbskip[mb_skip_context][1]++; 2500 reset_skip_context(xd, MAX(bsize, BLOCK_8X8)); 2501 } 2502 2503 if (output_enabled) { 2504 if (cm->tx_mode == TX_MODE_SELECT && 2505 mbmi->sb_type >= BLOCK_8X8 && 2506 !(is_inter_block(mbmi) && 2507 (mbmi->skip_coeff || 2508 vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) { 2509 const uint8_t context = vp9_get_pred_context_tx_size(xd); 2510 ++get_tx_counts(max_txsize_lookup[bsize], 2511 context, &cm->counts.tx)[mbmi->tx_size]; 2512 } else { 2513 int x, y; 2514 TX_SIZE sz = tx_mode_to_biggest_tx_size[cm->tx_mode]; 2515 assert(sizeof(tx_mode_to_biggest_tx_size) / 2516 sizeof(tx_mode_to_biggest_tx_size[0]) == TX_MODES); 2517 // The new intra coding scheme requires no change of transform size 2518 if (is_inter_block(&mi->mbmi)) { 2519 if (sz == TX_32X32 && bsize < BLOCK_32X32) 2520 sz = TX_16X16; 2521 if (sz == TX_16X16 && bsize < BLOCK_16X16) 2522 sz = TX_8X8; 2523 if (sz == TX_8X8 && bsize < BLOCK_8X8) 2524 sz = TX_4X4; 2525 } else if (bsize >= BLOCK_8X8) { 2526 sz = mbmi->tx_size; 2527 } else { 2528 sz = TX_4X4; 2529 } 2530 2531 for (y = 0; y < mi_height; y++) 2532 for (x = 0; x < mi_width; x++) 2533 if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) 2534 mi_8x8[mis * y + x]->mbmi.tx_size = sz; 2535 } 2536 } 2537 } 2538