1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef VP9_ENCODER_VP9_ONYX_INT_H_ 12 #define VP9_ENCODER_VP9_ONYX_INT_H_ 13 14 #include <stdio.h> 15 16 #include "./vpx_config.h" 17 #include "vpx_ports/mem.h" 18 #include "vpx/internal/vpx_codec_internal.h" 19 #include "vpx/vp8cx.h" 20 21 #include "vp9/common/vp9_ppflags.h" 22 #include "vp9/common/vp9_entropy.h" 23 #include "vp9/common/vp9_entropymode.h" 24 #include "vp9/common/vp9_onyxc_int.h" 25 26 #include "vp9/encoder/vp9_aq_cyclicrefresh.h" 27 #include "vp9/encoder/vp9_encodemb.h" 28 #include "vp9/encoder/vp9_firstpass.h" 29 #include "vp9/encoder/vp9_lookahead.h" 30 #include "vp9/encoder/vp9_mbgraph.h" 31 #include "vp9/encoder/vp9_mcomp.h" 32 #include "vp9/encoder/vp9_quantize.h" 33 #include "vp9/encoder/vp9_ratectrl.h" 34 #include "vp9/encoder/vp9_speed_features.h" 35 #include "vp9/encoder/vp9_svc_layercontext.h" 36 #include "vp9/encoder/vp9_tokenize.h" 37 #include "vp9/encoder/vp9_variance.h" 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 // #define MODE_TEST_HIT_STATS 44 45 #define DEFAULT_GF_INTERVAL 10 46 47 #define MAX_MODES 30 48 #define MAX_REFS 6 49 50 typedef struct { 51 int nmvjointcost[MV_JOINTS]; 52 int nmvcosts[2][MV_VALS]; 53 int nmvcosts_hp[2][MV_VALS]; 54 55 vp9_prob segment_pred_probs[PREDICTION_PROBS]; 56 57 unsigned char *last_frame_seg_map_copy; 58 59 // 0 = Intra, Last, GF, ARF 60 signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; 61 // 0 = ZERO_MV, MV 62 signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; 63 64 FRAME_CONTEXT fc; 65 } CODING_CONTEXT; 66 67 // This enumerator type needs to be kept aligned with the mode order in 68 // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. 69 typedef enum { 70 THR_NEARESTMV, 71 THR_NEARESTA, 72 THR_NEARESTG, 73 74 THR_DC, 75 76 THR_NEWMV, 77 THR_NEWA, 78 THR_NEWG, 79 80 THR_NEARMV, 81 THR_NEARA, 82 THR_COMP_NEARESTLA, 83 THR_COMP_NEARESTGA, 84 85 THR_TM, 86 87 THR_COMP_NEARLA, 88 THR_COMP_NEWLA, 89 THR_NEARG, 90 THR_COMP_NEARGA, 91 THR_COMP_NEWGA, 92 93 THR_ZEROMV, 94 THR_ZEROG, 95 THR_ZEROA, 96 THR_COMP_ZEROLA, 97 THR_COMP_ZEROGA, 98 99 THR_H_PRED, 100 THR_V_PRED, 101 THR_D135_PRED, 102 THR_D207_PRED, 103 THR_D153_PRED, 104 THR_D63_PRED, 105 THR_D117_PRED, 106 THR_D45_PRED, 107 } THR_MODES; 108 109 typedef enum { 110 THR_LAST, 111 THR_GOLD, 112 THR_ALTR, 113 THR_COMP_LA, 114 THR_COMP_GA, 115 THR_INTRA, 116 } THR_MODES_SUB8X8; 117 118 typedef enum { 119 // encode_breakout is disabled. 120 ENCODE_BREAKOUT_DISABLED = 0, 121 // encode_breakout is enabled. 122 ENCODE_BREAKOUT_ENABLED = 1, 123 // encode_breakout is enabled with small max_thresh limit. 124 ENCODE_BREAKOUT_LIMITED = 2 125 } ENCODE_BREAKOUT_TYPE; 126 127 typedef enum { 128 NORMAL = 0, 129 FOURFIVE = 1, 130 THREEFIVE = 2, 131 ONETWO = 3 132 } VPX_SCALING; 133 134 typedef enum { 135 USAGE_LOCAL_FILE_PLAYBACK = 0, 136 USAGE_STREAM_FROM_SERVER = 1, 137 USAGE_CONSTRAINED_QUALITY = 2, 138 USAGE_CONSTANT_QUALITY = 3, 139 } END_USAGE; 140 141 typedef enum { 142 // Good Quality Fast Encoding. The encoder balances quality with the 143 // amount of time it takes to encode the output. (speed setting 144 // controls how fast) 145 MODE_GOODQUALITY = 1, 146 147 // One Pass - Best Quality. The encoder places priority on the 148 // quality of the output over encoding speed. The output is compressed 149 // at the highest possible quality. This option takes the longest 150 // amount of time to encode. (speed setting ignored) 151 MODE_BESTQUALITY = 2, 152 153 // Two Pass - First Pass. The encoder generates a file of statistics 154 // for use in the second encoding pass. (speed setting controls how fast) 155 MODE_FIRSTPASS = 3, 156 157 // Two Pass - Second Pass. The encoder uses the statistics that were 158 // generated in the first encoding pass to create the compressed 159 // output. (speed setting controls how fast) 160 MODE_SECONDPASS = 4, 161 162 // Two Pass - Second Pass Best. The encoder uses the statistics that 163 // were generated in the first encoding pass to create the compressed 164 // output using the highest possible quality, and taking a 165 // longer amount of time to encode. (speed setting ignored) 166 MODE_SECONDPASS_BEST = 5, 167 168 // Realtime/Live Encoding. This mode is optimized for realtime 169 // encoding (for example, capturing a television signal or feed from 170 // a live camera). (speed setting controls how fast) 171 MODE_REALTIME = 6, 172 } MODE; 173 174 typedef enum { 175 FRAMEFLAGS_KEY = 1 << 0, 176 FRAMEFLAGS_GOLDEN = 1 << 1, 177 FRAMEFLAGS_ALTREF = 1 << 2, 178 } FRAMETYPE_FLAGS; 179 180 typedef enum { 181 NO_AQ = 0, 182 VARIANCE_AQ = 1, 183 COMPLEXITY_AQ = 2, 184 CYCLIC_REFRESH_AQ = 3, 185 AQ_MODE_COUNT // This should always be the last member of the enum 186 } AQ_MODE; 187 188 typedef struct { 189 int version; // 4 versions of bitstream defined: 190 // 0 - best quality/slowest decode, 191 // 3 - lowest quality/fastest decode 192 int width; // width of data passed to the compressor 193 int height; // height of data passed to the compressor 194 double framerate; // set to passed in framerate 195 int64_t target_bandwidth; // bandwidth to be used in kilobits per second 196 197 int noise_sensitivity; // pre processing blur: recommendation 0 198 int sharpness; // sharpening output: recommendation 0: 199 int cpu_used; 200 unsigned int rc_max_intra_bitrate_pct; 201 202 MODE mode; 203 204 // Key Framing Operations 205 int auto_key; // autodetect cut scenes and set the keyframes 206 int key_freq; // maximum distance to key frame. 207 208 int lag_in_frames; // how many frames lag before we start encoding 209 210 // ---------------------------------------------------------------- 211 // DATARATE CONTROL OPTIONS 212 213 END_USAGE end_usage; // vbr or cbr 214 215 // buffer targeting aggressiveness 216 int under_shoot_pct; 217 int over_shoot_pct; 218 219 // buffering parameters 220 int64_t starting_buffer_level; // in seconds 221 int64_t optimal_buffer_level; 222 int64_t maximum_buffer_size; 223 224 // Frame drop threshold. 225 int drop_frames_water_mark; 226 227 // controlling quality 228 int fixed_q; 229 int worst_allowed_q; 230 int best_allowed_q; 231 int cq_level; 232 int lossless; 233 AQ_MODE aq_mode; // Adaptive Quantization mode 234 235 // Enable feature to reduce the frame quantization every x frames. 236 int frame_periodic_boost; 237 238 // two pass datarate control 239 int two_pass_vbrbias; // two pass datarate control tweaks 240 int two_pass_vbrmin_section; 241 int two_pass_vbrmax_section; 242 // END DATARATE CONTROL OPTIONS 243 // ---------------------------------------------------------------- 244 245 // Spatial and temporal scalability. 246 int ss_number_layers; // Number of spatial layers. 247 int ts_number_layers; // Number of temporal layers. 248 // Bitrate allocation for spatial layers. 249 int ss_target_bitrate[VPX_SS_MAX_LAYERS]; 250 // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. 251 int ts_target_bitrate[VPX_TS_MAX_LAYERS]; 252 int ts_rate_decimator[VPX_TS_MAX_LAYERS]; 253 254 // these parameters aren't to be used in final build don't use!!! 255 int play_alternate; 256 int alt_freq; 257 258 int encode_breakout; // early breakout : for video conf recommend 800 259 260 /* Bitfield defining the error resiliency features to enable. 261 * Can provide decodable frames after losses in previous 262 * frames and decodable partitions after losses in the same frame. 263 */ 264 unsigned int error_resilient_mode; 265 266 /* Bitfield defining the parallel decoding mode where the 267 * decoding in successive frames may be conducted in parallel 268 * just by decoding the frame headers. 269 */ 270 unsigned int frame_parallel_decoding_mode; 271 272 int arnr_max_frames; 273 int arnr_strength; 274 int arnr_type; 275 276 int tile_columns; 277 int tile_rows; 278 279 struct vpx_fixed_buf two_pass_stats_in; 280 struct vpx_codec_pkt_list *output_pkt_list; 281 282 vp8e_tuning tuning; 283 } VP9_CONFIG; 284 285 typedef struct VP9_COMP { 286 QUANTS quants; 287 MACROBLOCK mb; 288 VP9_COMMON common; 289 VP9_CONFIG oxcf; 290 struct lookahead_ctx *lookahead; 291 struct lookahead_entry *source; 292 #if CONFIG_MULTIPLE_ARF 293 struct lookahead_entry *alt_ref_source[REF_FRAMES]; 294 #else 295 struct lookahead_entry *alt_ref_source; 296 #endif 297 struct lookahead_entry *last_source; 298 299 YV12_BUFFER_CONFIG *Source; 300 YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames 301 YV12_BUFFER_CONFIG *un_scaled_source; 302 YV12_BUFFER_CONFIG scaled_source; 303 YV12_BUFFER_CONFIG *unscaled_last_source; 304 YV12_BUFFER_CONFIG scaled_last_source; 305 306 int key_frame_frequency; 307 308 int gold_is_last; // gold same as last frame ( short circuit gold searches) 309 int alt_is_last; // Alt same as last ( short circuit altref search) 310 int gold_is_alt; // don't do both alt and gold search ( just do gold). 311 312 int scaled_ref_idx[3]; 313 int lst_fb_idx; 314 int gld_fb_idx; 315 int alt_fb_idx; 316 317 #if CONFIG_MULTIPLE_ARF 318 int alt_ref_fb_idx[REF_FRAMES - 3]; 319 #endif 320 int refresh_last_frame; 321 int refresh_golden_frame; 322 int refresh_alt_ref_frame; 323 324 int ext_refresh_frame_flags_pending; 325 int ext_refresh_last_frame; 326 int ext_refresh_golden_frame; 327 int ext_refresh_alt_ref_frame; 328 329 int ext_refresh_frame_context_pending; 330 int ext_refresh_frame_context; 331 332 YV12_BUFFER_CONFIG last_frame_uf; 333 334 TOKENEXTRA *tok; 335 unsigned int tok_count[4][1 << 6]; 336 337 #if CONFIG_MULTIPLE_ARF 338 // Position within a frame coding order (including any additional ARF frames). 339 unsigned int sequence_number; 340 // Next frame in naturally occurring order that has not yet been coded. 341 int next_frame_in_order; 342 #endif 343 344 // Ambient reconstruction err target for force key frames 345 int ambient_err; 346 347 // Thresh_mult is used to set a threshold for the rd score. A higher value 348 // means that we will accept the best mode so far more often. This number 349 // is used in combination with the current block size, and thresh_freq_fact 350 // to pick a threshold. 351 int rd_thresh_mult[MAX_MODES]; 352 int rd_thresh_mult_sub8x8[MAX_REFS]; 353 354 int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; 355 int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; 356 int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; 357 int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS]; 358 359 int64_t rd_comp_pred_diff[REFERENCE_MODES]; 360 int64_t rd_prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; 361 int64_t rd_tx_select_diff[TX_MODES]; 362 // FIXME(rbultje) can this overflow? 363 int rd_tx_select_threshes[MAX_REF_FRAMES][TX_MODES]; 364 365 int64_t rd_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 366 int64_t rd_filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; 367 int64_t rd_filter_cache[SWITCHABLE_FILTER_CONTEXTS]; 368 int64_t mask_filter_rd; 369 370 int RDMULT; 371 int RDDIV; 372 373 CODING_CONTEXT coding_context; 374 375 int zbin_mode_boost; 376 int zbin_mode_boost_enabled; 377 int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames 378 int active_arnr_strength; // <= cpi->oxcf.arnr_max_strength 379 380 double output_framerate; 381 int64_t last_time_stamp_seen; 382 int64_t last_end_time_stamp_seen; 383 int64_t first_time_stamp_ever; 384 385 RATE_CONTROL rc; 386 387 int cq_target_quality; 388 389 vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; 390 vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES]; 391 392 struct vpx_codec_pkt_list *output_pkt_list; 393 394 MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; 395 int mbgraph_n_frames; // number of frames filled in the above 396 int static_mb_pct; // % forced skip mbs by segmentation 397 int seg0_progress, seg0_idx, seg0_cnt; 398 399 // for real time encoding 400 int speed; 401 402 int cpu_used; 403 int pass; 404 405 int ref_frame_flags; 406 407 SPEED_FEATURES sf; 408 409 unsigned int max_mv_magnitude; 410 int mv_step_param; 411 412 // Default value is 1. From first pass stats, encode_breakout may be disabled. 413 ENCODE_BREAKOUT_TYPE allow_encode_breakout; 414 415 // Get threshold from external input. In real time mode, it can be 416 // overwritten according to encoding speed. 417 int encode_breakout; 418 419 unsigned char *segmentation_map; 420 421 // segment threashold for encode breakout 422 int segment_encode_breakout[MAX_SEGMENTS]; 423 424 unsigned char *complexity_map; 425 426 unsigned char *active_map; 427 unsigned int active_map_enabled; 428 429 CYCLIC_REFRESH *cyclic_refresh; 430 431 fractional_mv_step_fp *find_fractional_mv_step; 432 fractional_mv_step_comp_fp *find_fractional_mv_step_comp; 433 vp9_full_search_fn_t full_search_sad; 434 vp9_refining_search_fn_t refining_search_sad; 435 vp9_diamond_search_fn_t diamond_search_sad; 436 vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; 437 uint64_t time_receive_data; 438 uint64_t time_compress_data; 439 uint64_t time_pick_lpf; 440 uint64_t time_encode_sb_row; 441 442 struct twopass_rc twopass; 443 444 YV12_BUFFER_CONFIG alt_ref_buffer; 445 YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; 446 int fixed_divide[512]; 447 448 #if CONFIG_INTERNAL_STATS 449 unsigned int mode_chosen_counts[MAX_MODES]; 450 451 int count; 452 double total_y; 453 double total_u; 454 double total_v; 455 double total; 456 uint64_t total_sq_error; 457 uint64_t total_samples; 458 459 double totalp_y; 460 double totalp_u; 461 double totalp_v; 462 double totalp; 463 uint64_t totalp_sq_error; 464 uint64_t totalp_samples; 465 466 int bytes; 467 double summed_quality; 468 double summed_weights; 469 double summedp_quality; 470 double summedp_weights; 471 unsigned int tot_recode_hits; 472 473 474 double total_ssimg_y; 475 double total_ssimg_u; 476 double total_ssimg_v; 477 double total_ssimg_all; 478 479 int b_calculate_ssimg; 480 #endif 481 int b_calculate_psnr; 482 483 // Per MB activity measurement 484 unsigned int activity_avg; 485 unsigned int *mb_activity_map; 486 int *mb_norm_activity_map; 487 488 int droppable; 489 490 int dummy_packing; /* flag to indicate if packing is dummy */ 491 492 unsigned int tx_stepdown_count[TX_SIZES]; 493 494 int initial_width; 495 int initial_height; 496 497 int use_svc; 498 499 SVC svc; 500 501 #if CONFIG_MULTIPLE_ARF 502 // ARF tracking variables. 503 int multi_arf_enabled; 504 unsigned int frame_coding_order_period; 505 unsigned int new_frame_coding_order_period; 506 int frame_coding_order[MAX_LAG_BUFFERS * 2]; 507 int arf_buffer_idx[MAX_LAG_BUFFERS * 3 / 2]; 508 int arf_weight[MAX_LAG_BUFFERS]; 509 int arf_buffered; 510 int this_frame_weight; 511 int max_arf_level; 512 #endif 513 514 #ifdef MODE_TEST_HIT_STATS 515 // Debug / test stats 516 int64_t mode_test_hits[BLOCK_SIZES]; 517 #endif 518 } VP9_COMP; 519 520 void vp9_initialize_enc(); 521 522 struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf); 523 void vp9_remove_compressor(VP9_COMP *cpi); 524 525 void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf); 526 527 // receive a frames worth of data. caller can assume that a copy of this 528 // frame is made and not just a copy of the pointer.. 529 int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, 530 YV12_BUFFER_CONFIG *sd, int64_t time_stamp, 531 int64_t end_time_stamp); 532 533 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, 534 size_t *size, uint8_t *dest, 535 int64_t *time_stamp, int64_t *time_end, int flush); 536 537 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, 538 vp9_ppflags_t *flags); 539 540 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags); 541 542 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags); 543 544 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, 545 YV12_BUFFER_CONFIG *sd); 546 547 int vp9_get_reference_enc(VP9_COMP *cpi, int index, 548 YV12_BUFFER_CONFIG **fb); 549 550 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, 551 YV12_BUFFER_CONFIG *sd); 552 553 int vp9_update_entropy(VP9_COMP *cpi, int update); 554 555 int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, 556 unsigned int rows, unsigned int cols, 557 int delta_q[MAX_SEGMENTS], 558 int delta_lf[MAX_SEGMENTS], 559 unsigned int threshold[MAX_SEGMENTS]); 560 561 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, 562 unsigned int rows, unsigned int cols); 563 564 int vp9_set_internal_size(VP9_COMP *cpi, 565 VPX_SCALING horiz_mode, VPX_SCALING vert_mode); 566 567 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, 568 unsigned int height); 569 570 void vp9_set_svc(VP9_COMP *cpi, int use_svc); 571 572 int vp9_get_quantizer(struct VP9_COMP *cpi); 573 574 static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, 575 MV_REFERENCE_FRAME ref_frame) { 576 if (ref_frame == LAST_FRAME) { 577 return cpi->lst_fb_idx; 578 } else if (ref_frame == GOLDEN_FRAME) { 579 return cpi->gld_fb_idx; 580 } else { 581 return cpi->alt_fb_idx; 582 } 583 } 584 585 static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( 586 VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { 587 VP9_COMMON * const cm = &cpi->common; 588 return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] 589 .buf; 590 } 591 592 // Intra only frames, golden frames (except alt ref overlays) and 593 // alt ref frames tend to be coded at a higher than ambient quality 594 static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) { 595 return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || 596 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); 597 } 598 599 static INLINE int get_token_alloc(int mb_rows, int mb_cols) { 600 // TODO(JBB): make this work for alpha channel and double check we can't 601 // exceed this token count if we have a 32x32 transform crossing a boundary 602 // at a multiple of 16. 603 // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full 604 // resolution. We assume up to 1 token per pixel, and then allow 605 // a head room of 4. 606 return mb_rows * mb_cols * (16 * 16 * 3 + 4); 607 } 608 609 int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, 610 const YV12_BUFFER_CONFIG *reference); 611 612 void vp9_alloc_compressor_data(VP9_COMP *cpi); 613 614 int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget); 615 616 int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, 617 double rate_target_ratio); 618 619 void vp9_scale_references(VP9_COMP *cpi); 620 621 void vp9_update_reference_frames(VP9_COMP *cpi); 622 623 extern const int q_trans[]; 624 625 int64_t vp9_rescale(int64_t val, int64_t num, int denom); 626 627 static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, 628 MV_REFERENCE_FRAME ref0, 629 MV_REFERENCE_FRAME ref1) { 630 xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME 631 : 0]; 632 xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME 633 : 0]; 634 } 635 636 #ifdef __cplusplus 637 } // extern "C" 638 #endif 639 640 #endif // VP9_ENCODER_VP9_ONYX_INT_H_ 641