1 /* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /** 12 * @file 13 * VP9 SVC encoding support via libvpx 14 */ 15 16 #include <assert.h> 17 #include <math.h> 18 #include <stdarg.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #define VPX_DISABLE_CTRL_TYPECHECKS 1 23 #define VPX_CODEC_DISABLE_COMPAT 1 24 #include "vpx/svc_context.h" 25 #include "vpx/vp8cx.h" 26 #include "vpx/vpx_encoder.h" 27 28 #ifdef __MINGW32__ 29 #define strtok_r strtok_s 30 #ifndef MINGW_HAS_SECURE_API 31 // proto from /usr/x86_64-w64-mingw32/include/sec_api/string_s.h 32 _CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context); 33 #endif /* MINGW_HAS_SECURE_API */ 34 #endif /* __MINGW32__ */ 35 36 #ifdef _MSC_VER 37 #define strdup _strdup 38 #define strtok_r strtok_s 39 #endif 40 41 #define SVC_REFERENCE_FRAMES 8 42 #define SUPERFRAME_SLOTS (8) 43 #define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2) 44 #define OPTION_BUFFER_SIZE 256 45 #define COMPONENTS 4 // psnr & sse statistics maintained for total, y, u, v 46 47 static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27"; 48 static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16"; 49 50 typedef struct SvcInternal { 51 char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options 52 char quantizers[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_quantizers 53 char quantizers_keyframe[OPTION_BUFFER_SIZE]; // set by 54 // vpx_svc_set_quantizers 55 char scale_factors[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_scale_factors 56 57 // values extracted from option, quantizers 58 int scaling_factor_num[VPX_SS_MAX_LAYERS]; 59 int scaling_factor_den[VPX_SS_MAX_LAYERS]; 60 int quantizer_keyframe[VPX_SS_MAX_LAYERS]; 61 int quantizer[VPX_SS_MAX_LAYERS]; 62 63 // accumulated statistics 64 double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V 65 uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; 66 uint32_t bytes_sum[VPX_SS_MAX_LAYERS]; 67 68 // codec encoding values 69 int width; // width of highest layer 70 int height; // height of highest layer 71 int kf_dist; // distance between keyframes 72 73 // state variables 74 int encode_frame_count; 75 int frame_within_gop; 76 vpx_enc_frame_flags_t enc_frame_flags; 77 int layers; 78 int layer; 79 int is_keyframe; 80 81 size_t frame_size; 82 size_t buffer_size; 83 void *buffer; 84 85 char *rc_stats_buf; 86 size_t rc_stats_buf_size; 87 size_t rc_stats_buf_used; 88 89 char message_buffer[2048]; 90 vpx_codec_ctx_t *codec_ctx; 91 } SvcInternal; 92 93 // Superframe is used to generate an index of individual frames (i.e., layers) 94 struct Superframe { 95 int count; 96 uint32_t sizes[SUPERFRAME_SLOTS]; 97 uint32_t magnitude; 98 uint8_t buffer[SUPERFRAME_BUFFER_SIZE]; 99 size_t index_size; 100 }; 101 102 // One encoded frame layer 103 struct LayerData { 104 void *buf; // compressed data buffer 105 size_t size; // length of compressed data 106 struct LayerData *next; 107 }; 108 109 // create LayerData from encoder output 110 static struct LayerData *ld_create(void *buf, size_t size) { 111 struct LayerData *const layer_data = 112 (struct LayerData *)malloc(sizeof(*layer_data)); 113 if (layer_data == NULL) { 114 return NULL; 115 } 116 layer_data->buf = malloc(size); 117 if (layer_data->buf == NULL) { 118 free(layer_data); 119 return NULL; 120 } 121 memcpy(layer_data->buf, buf, size); 122 layer_data->size = size; 123 return layer_data; 124 } 125 126 // free LayerData 127 static void ld_free(struct LayerData *layer_data) { 128 if (layer_data) { 129 if (layer_data->buf) { 130 free(layer_data->buf); 131 layer_data->buf = NULL; 132 } 133 free(layer_data); 134 } 135 } 136 137 // add layer data to list 138 static void ld_list_add(struct LayerData **list, struct LayerData *layer_data) { 139 struct LayerData **p = list; 140 141 while (*p != NULL) p = &(*p)->next; 142 *p = layer_data; 143 layer_data->next = NULL; 144 } 145 146 // get accumulated size of layer data 147 static size_t ld_list_get_buffer_size(struct LayerData *list) { 148 struct LayerData *p; 149 size_t size = 0; 150 151 for (p = list; p != NULL; p = p->next) { 152 size += p->size; 153 } 154 return size; 155 } 156 157 // copy layer data to buffer 158 static void ld_list_copy_to_buffer(struct LayerData *list, uint8_t *buffer) { 159 struct LayerData *p; 160 161 for (p = list; p != NULL; p = p->next) { 162 buffer[0] = 1; 163 memcpy(buffer, p->buf, p->size); 164 buffer += p->size; 165 } 166 } 167 168 // free layer data list 169 static void ld_list_free(struct LayerData *list) { 170 struct LayerData *p = list; 171 172 while (p) { 173 list = list->next; 174 ld_free(p); 175 p = list; 176 } 177 } 178 179 static void sf_create_index(struct Superframe *sf) { 180 uint8_t marker = 0xc0; 181 int i; 182 uint32_t mag, mask; 183 uint8_t *bufp; 184 185 if (sf->count == 0 || sf->count >= 8) return; 186 187 // Add the number of frames to the marker byte 188 marker |= sf->count - 1; 189 190 // Choose the magnitude 191 for (mag = 0, mask = 0xff; mag < 4; ++mag) { 192 if (sf->magnitude < mask) break; 193 mask <<= 8; 194 mask |= 0xff; 195 } 196 marker |= mag << 3; 197 198 // Write the index 199 sf->index_size = 2 + (mag + 1) * sf->count; 200 bufp = sf->buffer; 201 202 *bufp++ = marker; 203 for (i = 0; i < sf->count; ++i) { 204 int this_sz = sf->sizes[i]; 205 uint32_t j; 206 207 for (j = 0; j <= mag; ++j) { 208 *bufp++ = this_sz & 0xff; 209 this_sz >>= 8; 210 } 211 } 212 *bufp++ = marker; 213 } 214 215 static SvcInternal *get_svc_internal(SvcContext *svc_ctx) { 216 if (svc_ctx == NULL) return NULL; 217 if (svc_ctx->internal == NULL) { 218 SvcInternal *const si = (SvcInternal *)malloc(sizeof(*si)); 219 if (si != NULL) { 220 memset(si, 0, sizeof(*si)); 221 } 222 svc_ctx->internal = si; 223 } 224 return (SvcInternal *)svc_ctx->internal; 225 } 226 227 static const SvcInternal *get_const_svc_internal(const SvcContext *svc_ctx) { 228 if (svc_ctx == NULL) return NULL; 229 return (const SvcInternal *)svc_ctx->internal; 230 } 231 232 static void svc_log_reset(SvcContext *svc_ctx) { 233 SvcInternal *const si = (SvcInternal *)svc_ctx->internal; 234 si->message_buffer[0] = '\0'; 235 } 236 237 static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level, 238 const char *fmt, ...) { 239 char buf[512]; 240 int retval = 0; 241 va_list ap; 242 SvcInternal *const si = get_svc_internal(svc_ctx); 243 244 if (level > svc_ctx->log_level) { 245 return retval; 246 } 247 248 va_start(ap, fmt); 249 retval = vsnprintf(buf, sizeof(buf), fmt, ap); 250 va_end(ap); 251 252 if (svc_ctx->log_print) { 253 printf("%s", buf); 254 } else { 255 strncat(si->message_buffer, buf, 256 sizeof(si->message_buffer) - strlen(si->message_buffer) - 1); 257 } 258 259 if (level == SVC_LOG_ERROR) { 260 si->codec_ctx->err_detail = si->message_buffer; 261 } 262 return retval; 263 } 264 265 static vpx_codec_err_t set_option_encoding_mode(SvcContext *svc_ctx, 266 const char *value_str) { 267 if (strcmp(value_str, "i") == 0) { 268 svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_I; 269 } else if (strcmp(value_str, "alt-ip") == 0) { 270 svc_ctx->encoding_mode = ALT_INTER_LAYER_PREDICTION_IP; 271 } else if (strcmp(value_str, "ip") == 0) { 272 svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_IP; 273 } else if (strcmp(value_str, "gf") == 0) { 274 svc_ctx->encoding_mode = USE_GOLDEN_FRAME; 275 } else { 276 svc_log(svc_ctx, SVC_LOG_ERROR, "invalid encoding mode: %s", value_str); 277 return VPX_CODEC_INVALID_PARAM; 278 } 279 return VPX_CODEC_OK; 280 } 281 282 static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx, 283 const char *quantizer_values, 284 const int is_keyframe) { 285 char *input_string; 286 char *token; 287 const char *delim = ","; 288 char *save_ptr; 289 int found = 0; 290 int i, q; 291 vpx_codec_err_t res = VPX_CODEC_OK; 292 SvcInternal *const si = get_svc_internal(svc_ctx); 293 294 if (quantizer_values == NULL || strlen(quantizer_values) == 0) { 295 if (is_keyframe) { 296 // If there non settings for key frame, we will apply settings from 297 // non key frame. So just simply return here. 298 return VPX_CODEC_INVALID_PARAM; 299 } 300 input_string = strdup(DEFAULT_QUANTIZER_VALUES); 301 } else { 302 input_string = strdup(quantizer_values); 303 } 304 305 token = strtok_r(input_string, delim, &save_ptr); 306 for (i = 0; i < svc_ctx->spatial_layers; ++i) { 307 if (token != NULL) { 308 q = atoi(token); 309 if (q <= 0 || q > 100) { 310 svc_log(svc_ctx, SVC_LOG_ERROR, 311 "svc-quantizer-values: invalid value %s\n", token); 312 res = VPX_CODEC_INVALID_PARAM; 313 break; 314 } 315 token = strtok_r(NULL, delim, &save_ptr); 316 found = i + 1; 317 } else { 318 q = 0; 319 } 320 if (is_keyframe) { 321 si->quantizer_keyframe[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] 322 = q; 323 } else { 324 si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q; 325 } 326 } 327 if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) { 328 svc_log(svc_ctx, SVC_LOG_ERROR, 329 "svc: quantizers: %d values required, but only %d specified\n", 330 svc_ctx->spatial_layers, found); 331 res = VPX_CODEC_INVALID_PARAM; 332 } 333 free(input_string); 334 return res; 335 } 336 337 static void log_invalid_scale_factor(SvcContext *svc_ctx, const char *value) { 338 svc_log(svc_ctx, SVC_LOG_ERROR, "svc scale-factors: invalid value %s\n", 339 value); 340 } 341 342 static vpx_codec_err_t parse_scale_factors(SvcContext *svc_ctx, 343 const char *scale_factors) { 344 char *input_string; 345 char *token; 346 const char *delim = ","; 347 char *save_ptr; 348 int found = 0; 349 int i; 350 int64_t num, den; 351 vpx_codec_err_t res = VPX_CODEC_OK; 352 SvcInternal *const si = get_svc_internal(svc_ctx); 353 354 if (scale_factors == NULL || strlen(scale_factors) == 0) { 355 input_string = strdup(DEFAULT_SCALE_FACTORS); 356 } else { 357 input_string = strdup(scale_factors); 358 } 359 token = strtok_r(input_string, delim, &save_ptr); 360 for (i = 0; i < svc_ctx->spatial_layers; ++i) { 361 num = den = 0; 362 if (token != NULL) { 363 num = strtol(token, &token, 10); 364 if (num <= 0) { 365 log_invalid_scale_factor(svc_ctx, token); 366 res = VPX_CODEC_INVALID_PARAM; 367 break; 368 } 369 if (*token++ != '/') { 370 log_invalid_scale_factor(svc_ctx, token); 371 res = VPX_CODEC_INVALID_PARAM; 372 break; 373 } 374 den = strtol(token, &token, 10); 375 if (den <= 0) { 376 log_invalid_scale_factor(svc_ctx, token); 377 res = VPX_CODEC_INVALID_PARAM; 378 break; 379 } 380 token = strtok_r(NULL, delim, &save_ptr); 381 found = i + 1; 382 } 383 si->scaling_factor_num[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = 384 (int)num; 385 si->scaling_factor_den[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = 386 (int)den; 387 } 388 if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) { 389 svc_log(svc_ctx, SVC_LOG_ERROR, 390 "svc: scale-factors: %d values required, but only %d specified\n", 391 svc_ctx->spatial_layers, found); 392 res = VPX_CODEC_INVALID_PARAM; 393 } 394 free(input_string); 395 return res; 396 } 397 398 /** 399 * Parse SVC encoding options 400 * Format: encoding-mode=<svc_mode>,layers=<layer_count> 401 * scale-factors=<n1>/<d1>,<n2>/<d2>,... 402 * quantizers=<q1>,<q2>,... 403 * svc_mode = [i|ip|alt_ip|gf] 404 */ 405 static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { 406 char *input_string; 407 char *option_name; 408 char *option_value; 409 char *input_ptr; 410 int is_keyframe_qaunt_set = 0; 411 vpx_codec_err_t res = VPX_CODEC_OK; 412 413 if (options == NULL) return VPX_CODEC_OK; 414 input_string = strdup(options); 415 416 // parse option name 417 option_name = strtok_r(input_string, "=", &input_ptr); 418 while (option_name != NULL) { 419 // parse option value 420 option_value = strtok_r(NULL, " ", &input_ptr); 421 if (option_value == NULL) { 422 svc_log(svc_ctx, SVC_LOG_ERROR, "option missing value: %s\n", 423 option_name); 424 res = VPX_CODEC_INVALID_PARAM; 425 break; 426 } 427 if (strcmp("encoding-mode", option_name) == 0) { 428 res = set_option_encoding_mode(svc_ctx, option_value); 429 if (res != VPX_CODEC_OK) break; 430 } else if (strcmp("layers", option_name) == 0) { 431 svc_ctx->spatial_layers = atoi(option_value); 432 } else if (strcmp("scale-factors", option_name) == 0) { 433 res = parse_scale_factors(svc_ctx, option_value); 434 if (res != VPX_CODEC_OK) break; 435 } else if (strcmp("quantizers", option_name) == 0) { 436 res = parse_quantizer_values(svc_ctx, option_value, 0); 437 if (res != VPX_CODEC_OK) break; 438 if (!is_keyframe_qaunt_set) { 439 SvcInternal *const si = get_svc_internal(svc_ctx); 440 memcpy(get_svc_internal(svc_ctx)->quantizer_keyframe, si->quantizer, 441 sizeof(si->quantizer)); 442 } 443 } else if (strcmp("quantizers-keyframe", option_name) == 0) { 444 res = parse_quantizer_values(svc_ctx, option_value, 1); 445 if (res != VPX_CODEC_OK) break; 446 is_keyframe_qaunt_set = 1; 447 } else { 448 svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name); 449 res = VPX_CODEC_INVALID_PARAM; 450 break; 451 } 452 option_name = strtok_r(NULL, "=", &input_ptr); 453 } 454 free(input_string); 455 return res; 456 } 457 458 vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) { 459 SvcInternal *const si = get_svc_internal(svc_ctx); 460 if (svc_ctx == NULL || options == NULL || si == NULL) { 461 return VPX_CODEC_INVALID_PARAM; 462 } 463 strncpy(si->options, options, sizeof(si->options)); 464 si->options[sizeof(si->options) - 1] = '\0'; 465 return VPX_CODEC_OK; 466 } 467 468 vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx, 469 const char *quantizers, 470 const int is_for_keyframe) { 471 SvcInternal *const si = get_svc_internal(svc_ctx); 472 if (svc_ctx == NULL || quantizers == NULL || si == NULL) { 473 return VPX_CODEC_INVALID_PARAM; 474 } 475 if (is_for_keyframe) { 476 strncpy(si->quantizers_keyframe, quantizers, sizeof(si->quantizers)); 477 si->quantizers_keyframe[sizeof(si->quantizers_keyframe) - 1] = '\0'; 478 } else { 479 strncpy(si->quantizers, quantizers, sizeof(si->quantizers)); 480 si->quantizers[sizeof(si->quantizers) - 1] = '\0'; 481 } 482 return VPX_CODEC_OK; 483 } 484 485 vpx_codec_err_t vpx_svc_set_scale_factors(SvcContext *svc_ctx, 486 const char *scale_factors) { 487 SvcInternal *const si = get_svc_internal(svc_ctx); 488 if (svc_ctx == NULL || scale_factors == NULL || si == NULL) { 489 return VPX_CODEC_INVALID_PARAM; 490 } 491 strncpy(si->scale_factors, scale_factors, sizeof(si->scale_factors)); 492 si->scale_factors[sizeof(si->scale_factors) - 1] = '\0'; 493 return VPX_CODEC_OK; 494 } 495 496 vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, 497 vpx_codec_iface_t *iface, 498 vpx_codec_enc_cfg_t *enc_cfg) { 499 int max_intra_size_pct; 500 vpx_codec_err_t res; 501 SvcInternal *const si = get_svc_internal(svc_ctx); 502 if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL || 503 enc_cfg == NULL) { 504 return VPX_CODEC_INVALID_PARAM; 505 } 506 if (si == NULL) return VPX_CODEC_MEM_ERROR; 507 508 si->codec_ctx = codec_ctx; 509 510 si->width = enc_cfg->g_w; 511 si->height = enc_cfg->g_h; 512 513 if (enc_cfg->kf_max_dist < 2) { 514 svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n", 515 enc_cfg->kf_max_dist); 516 return VPX_CODEC_INVALID_PARAM; 517 } 518 si->kf_dist = enc_cfg->kf_max_dist; 519 520 if (svc_ctx->spatial_layers == 0) 521 svc_ctx->spatial_layers = VPX_SS_DEFAULT_LAYERS; 522 if (svc_ctx->spatial_layers < 1 || 523 svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) { 524 svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers: invalid value: %d\n", 525 svc_ctx->spatial_layers); 526 return VPX_CODEC_INVALID_PARAM; 527 } 528 529 res = parse_quantizer_values(svc_ctx, si->quantizers, 0); 530 if (res != VPX_CODEC_OK) return res; 531 532 res = parse_quantizer_values(svc_ctx, si->quantizers_keyframe, 1); 533 if (res != VPX_CODEC_OK) 534 memcpy(si->quantizer_keyframe, si->quantizer, sizeof(si->quantizer)); 535 536 res = parse_scale_factors(svc_ctx, si->scale_factors); 537 if (res != VPX_CODEC_OK) return res; 538 539 // Parse aggregate command line options. Options must start with 540 // "layers=xx" then followed by other options 541 res = parse_options(svc_ctx, si->options); 542 if (res != VPX_CODEC_OK) return res; 543 544 si->layers = svc_ctx->spatial_layers; 545 546 // Assign target bitrate for each layer. We calculate the ratio 547 // from the resolution for now. 548 // TODO(Minghai): Optimize the mechanism of allocating bits after 549 // implementing svc two pass rate control. 550 if (si->layers > 1) { 551 int i; 552 float total = 0; 553 float alloc_ratio[VPX_SS_MAX_LAYERS] = {0}; 554 555 assert(si->layers <= VPX_SS_MAX_LAYERS); 556 for (i = 0; i < si->layers; ++i) { 557 int pos = i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers; 558 if (pos < VPX_SS_MAX_LAYERS && si->scaling_factor_den[pos] > 0) { 559 alloc_ratio[i] = (float)(si->scaling_factor_num[pos] * 1.0 / 560 si->scaling_factor_den[pos]); 561 562 alloc_ratio[i] *= alloc_ratio[i]; 563 total += alloc_ratio[i]; 564 } 565 } 566 567 for (i = 0; i < si->layers; ++i) { 568 if (total > 0) { 569 enc_cfg->ss_target_bitrate[i] = (unsigned int) 570 (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total); 571 } 572 } 573 } 574 575 // modify encoder configuration 576 enc_cfg->ss_number_layers = si->layers; 577 enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder. 578 enc_cfg->kf_mode = VPX_KF_DISABLED; 579 // Lag in frames not currently supported 580 enc_cfg->g_lag_in_frames = 0; 581 582 // TODO(ivanmaltz): determine if these values need to be set explicitly for 583 // svc, or if the normal default/override mechanism can be used 584 enc_cfg->rc_dropframe_thresh = 0; 585 enc_cfg->rc_end_usage = VPX_CBR; 586 enc_cfg->rc_resize_allowed = 0; 587 588 if (enc_cfg->g_pass == VPX_RC_ONE_PASS) { 589 enc_cfg->rc_min_quantizer = 33; 590 enc_cfg->rc_max_quantizer = 33; 591 } 592 593 enc_cfg->rc_undershoot_pct = 100; 594 enc_cfg->rc_overshoot_pct = 15; 595 enc_cfg->rc_buf_initial_sz = 500; 596 enc_cfg->rc_buf_optimal_sz = 600; 597 enc_cfg->rc_buf_sz = 1000; 598 enc_cfg->g_error_resilient = 1; 599 600 // Initialize codec 601 res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR); 602 if (res != VPX_CODEC_OK) { 603 svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n"); 604 return res; 605 } 606 607 vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1); 608 vpx_codec_control(codec_ctx, VP8E_SET_CPUUSED, 1); 609 vpx_codec_control(codec_ctx, VP8E_SET_STATIC_THRESHOLD, 1); 610 vpx_codec_control(codec_ctx, VP8E_SET_NOISE_SENSITIVITY, 1); 611 vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1); 612 613 max_intra_size_pct = 614 (int)(((double)enc_cfg->rc_buf_optimal_sz * 0.5) * 615 ((double)enc_cfg->g_timebase.den / enc_cfg->g_timebase.num) / 10.0); 616 vpx_codec_control(codec_ctx, VP8E_SET_MAX_INTRA_BITRATE_PCT, 617 max_intra_size_pct); 618 return VPX_CODEC_OK; 619 } 620 621 // SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h 622 623 // encoder should reference the last frame 624 #define USE_LAST (1 << 0) 625 626 // encoder should reference the alt ref frame 627 #define USE_ARF (1 << 1) 628 629 // encoder should reference the golden frame 630 #define USE_GF (1 << 2) 631 632 // encoder should copy current frame to the last frame buffer 633 #define UPDATE_LAST (1 << 3) 634 635 // encoder should copy current frame to the alt ref frame buffer 636 #define UPDATE_ARF (1 << 4) 637 638 // encoder should copy current frame to the golden frame 639 #define UPDATE_GF (1 << 5) 640 641 static int map_vp8_flags(int svc_flags) { 642 int flags = 0; 643 644 if (!(svc_flags & USE_LAST)) flags |= VP8_EFLAG_NO_REF_LAST; 645 if (!(svc_flags & USE_ARF)) flags |= VP8_EFLAG_NO_REF_ARF; 646 if (!(svc_flags & USE_GF)) flags |= VP8_EFLAG_NO_REF_GF; 647 648 if (svc_flags & UPDATE_LAST) { 649 // last is updated automatically 650 } else { 651 flags |= VP8_EFLAG_NO_UPD_LAST; 652 } 653 if (svc_flags & UPDATE_ARF) { 654 flags |= VP8_EFLAG_FORCE_ARF; 655 } else { 656 flags |= VP8_EFLAG_NO_UPD_ARF; 657 } 658 if (svc_flags & UPDATE_GF) { 659 flags |= VP8_EFLAG_FORCE_GF; 660 } else { 661 flags |= VP8_EFLAG_NO_UPD_GF; 662 } 663 return flags; 664 } 665 666 static void calculate_enc_frame_flags(SvcContext *svc_ctx) { 667 vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF; 668 SvcInternal *const si = get_svc_internal(svc_ctx); 669 const int is_keyframe = (si->frame_within_gop == 0); 670 671 // keyframe layer zero is identical for all modes 672 if (is_keyframe && si->layer == 0) { 673 si->enc_frame_flags = VPX_EFLAG_FORCE_KF; 674 return; 675 } 676 677 switch (svc_ctx->encoding_mode) { 678 case ALT_INTER_LAYER_PREDICTION_IP: 679 if (si->layer == 0) { 680 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 681 } else if (is_keyframe) { 682 if (si->layer == si->layers - 1) { 683 flags = map_vp8_flags(USE_ARF | UPDATE_LAST); 684 } else { 685 flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF); 686 } 687 } else { 688 flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST); 689 } 690 break; 691 case INTER_LAYER_PREDICTION_I: 692 if (si->layer == 0) { 693 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 694 } else if (is_keyframe) { 695 flags = map_vp8_flags(USE_ARF | UPDATE_LAST); 696 } else { 697 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 698 } 699 break; 700 case INTER_LAYER_PREDICTION_IP: 701 if (si->layer == 0) { 702 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 703 } else if (is_keyframe) { 704 flags = map_vp8_flags(USE_ARF | UPDATE_LAST); 705 } else { 706 flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST); 707 } 708 break; 709 case USE_GOLDEN_FRAME: 710 if (2 * si->layers - SVC_REFERENCE_FRAMES <= si->layer) { 711 if (si->layer == 0) { 712 flags = map_vp8_flags(USE_LAST | USE_GF | UPDATE_LAST); 713 } else if (is_keyframe) { 714 flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF); 715 } else { 716 flags = map_vp8_flags(USE_LAST | USE_ARF | USE_GF | UPDATE_LAST); 717 } 718 } else { 719 if (si->layer == 0) { 720 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 721 } else if (is_keyframe) { 722 flags = map_vp8_flags(USE_ARF | UPDATE_LAST); 723 } else { 724 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 725 } 726 } 727 break; 728 default: 729 svc_log(svc_ctx, SVC_LOG_ERROR, "unexpected encoding mode: %d\n", 730 svc_ctx->encoding_mode); 731 break; 732 } 733 si->enc_frame_flags = flags; 734 } 735 736 vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx, 737 int layer, 738 unsigned int *width, 739 unsigned int *height) { 740 int w, h, index, num, den; 741 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 742 743 if (svc_ctx == NULL || si == NULL || width == NULL || height == NULL) { 744 return VPX_CODEC_INVALID_PARAM; 745 } 746 if (layer < 0 || layer >= si->layers) return VPX_CODEC_INVALID_PARAM; 747 748 index = layer + VPX_SS_MAX_LAYERS - si->layers; 749 num = si->scaling_factor_num[index]; 750 den = si->scaling_factor_den[index]; 751 if (num == 0 || den == 0) return VPX_CODEC_INVALID_PARAM; 752 753 w = si->width * num / den; 754 h = si->height * num / den; 755 756 // make height and width even to make chrome player happy 757 w += w % 2; 758 h += h % 2; 759 760 *width = w; 761 *height = h; 762 763 return VPX_CODEC_OK; 764 } 765 766 static void set_svc_parameters(SvcContext *svc_ctx, 767 vpx_codec_ctx_t *codec_ctx) { 768 int layer, layer_index; 769 vpx_svc_parameters_t svc_params; 770 SvcInternal *const si = get_svc_internal(svc_ctx); 771 772 memset(&svc_params, 0, sizeof(svc_params)); 773 svc_params.temporal_layer = 0; 774 svc_params.spatial_layer = si->layer; 775 svc_params.flags = si->enc_frame_flags; 776 777 layer = si->layer; 778 if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && 779 si->frame_within_gop == 0) { 780 // layers 1 & 3 don't exist in this mode, use the higher one 781 if (layer == 0 || layer == 2) { 782 layer += 1; 783 } 784 } 785 if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(svc_ctx, layer, 786 &svc_params.width, 787 &svc_params.height)) { 788 svc_log(svc_ctx, SVC_LOG_ERROR, "vpx_svc_get_layer_resolution failed\n"); 789 } 790 layer_index = layer + VPX_SS_MAX_LAYERS - si->layers; 791 792 if (codec_ctx->config.enc->g_pass == VPX_RC_ONE_PASS) { 793 if (vpx_svc_is_keyframe(svc_ctx)) { 794 svc_params.min_quantizer = si->quantizer_keyframe[layer_index]; 795 svc_params.max_quantizer = si->quantizer_keyframe[layer_index]; 796 } else { 797 svc_params.min_quantizer = si->quantizer[layer_index]; 798 svc_params.max_quantizer = si->quantizer[layer_index]; 799 } 800 } else { 801 svc_params.min_quantizer = codec_ctx->config.enc->rc_min_quantizer; 802 svc_params.max_quantizer = codec_ctx->config.enc->rc_max_quantizer; 803 } 804 805 svc_params.distance_from_i_frame = si->frame_within_gop; 806 807 // Use buffer i for layer i LST 808 svc_params.lst_fb_idx = si->layer; 809 810 // Use buffer i-1 for layer i Alt (Inter-layer prediction) 811 if (si->layer != 0) { 812 const int use_higher_layer = 813 svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && 814 si->frame_within_gop == 0; 815 svc_params.alt_fb_idx = use_higher_layer ? si->layer - 2 : si->layer - 1; 816 } 817 818 if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP) { 819 svc_params.gld_fb_idx = si->layer + 1; 820 } else { 821 if (si->layer < 2 * si->layers - SVC_REFERENCE_FRAMES) 822 svc_params.gld_fb_idx = svc_params.lst_fb_idx; 823 else 824 svc_params.gld_fb_idx = 2 * si->layers - 1 - si->layer; 825 } 826 827 svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, %dx%d, q: %d\n", 828 si->encode_frame_count, si->layer, svc_params.width, 829 svc_params.height, svc_params.min_quantizer); 830 831 if (svc_params.flags == VPX_EFLAG_FORCE_KF) { 832 svc_log(svc_ctx, SVC_LOG_DEBUG, "flags == VPX_EFLAG_FORCE_KF\n"); 833 } else { 834 svc_log( 835 svc_ctx, SVC_LOG_DEBUG, "Using: LST/GLD/ALT [%2d|%2d|%2d]\n", 836 svc_params.flags & VP8_EFLAG_NO_REF_LAST ? -1 : svc_params.lst_fb_idx, 837 svc_params.flags & VP8_EFLAG_NO_REF_GF ? -1 : svc_params.gld_fb_idx, 838 svc_params.flags & VP8_EFLAG_NO_REF_ARF ? -1 : svc_params.alt_fb_idx); 839 svc_log( 840 svc_ctx, SVC_LOG_DEBUG, "Updating: LST/GLD/ALT [%2d|%2d|%2d]\n", 841 svc_params.flags & VP8_EFLAG_NO_UPD_LAST ? -1 : svc_params.lst_fb_idx, 842 svc_params.flags & VP8_EFLAG_NO_UPD_GF ? -1 : svc_params.gld_fb_idx, 843 svc_params.flags & VP8_EFLAG_NO_UPD_ARF ? -1 : svc_params.alt_fb_idx); 844 } 845 846 vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &svc_params); 847 } 848 849 /** 850 * Encode a frame into multiple layers 851 * Create a superframe containing the individual layers 852 */ 853 vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, 854 struct vpx_image *rawimg, vpx_codec_pts_t pts, 855 int64_t duration, int deadline) { 856 vpx_codec_err_t res; 857 vpx_codec_iter_t iter; 858 const vpx_codec_cx_pkt_t *cx_pkt; 859 struct LayerData *cx_layer_list = NULL; 860 struct LayerData *layer_data; 861 struct Superframe superframe; 862 SvcInternal *const si = get_svc_internal(svc_ctx); 863 if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) { 864 return VPX_CODEC_INVALID_PARAM; 865 } 866 867 memset(&superframe, 0, sizeof(superframe)); 868 svc_log_reset(svc_ctx); 869 si->rc_stats_buf_used = 0; 870 871 si->layers = svc_ctx->spatial_layers; 872 if (si->frame_within_gop >= si->kf_dist || 873 si->encode_frame_count == 0) { 874 si->frame_within_gop = 0; 875 } 876 si->is_keyframe = (si->frame_within_gop == 0); 877 si->frame_size = 0; 878 879 if (rawimg != NULL) { 880 svc_log(svc_ctx, SVC_LOG_DEBUG, 881 "vpx_svc_encode layers: %d, frame_count: %d, " 882 "frame_within_gop: %d\n", si->layers, si->encode_frame_count, 883 si->frame_within_gop); 884 } 885 886 // encode each layer 887 for (si->layer = 0; si->layer < si->layers; ++si->layer) { 888 if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && 889 si->is_keyframe && (si->layer == 1 || si->layer == 3)) { 890 svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer); 891 continue; 892 } 893 894 if (rawimg != NULL) { 895 calculate_enc_frame_flags(svc_ctx); 896 set_svc_parameters(svc_ctx, codec_ctx); 897 } 898 899 res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, 900 si->enc_frame_flags, deadline); 901 if (res != VPX_CODEC_OK) { 902 return res; 903 } 904 // save compressed data 905 iter = NULL; 906 while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { 907 switch (cx_pkt->kind) { 908 case VPX_CODEC_CX_FRAME_PKT: { 909 const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz); 910 si->bytes_sum[si->layer] += frame_pkt_size; 911 svc_log(svc_ctx, SVC_LOG_DEBUG, 912 "SVC frame: %d, layer: %d, size: %u\n", 913 si->encode_frame_count, si->layer, frame_pkt_size); 914 layer_data = 915 ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size); 916 if (layer_data == NULL) { 917 svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating LayerData\n"); 918 return VPX_CODEC_OK; 919 } 920 ld_list_add(&cx_layer_list, layer_data); 921 922 // save layer size in superframe index 923 superframe.sizes[superframe.count++] = frame_pkt_size; 924 superframe.magnitude |= frame_pkt_size; 925 break; 926 } 927 case VPX_CODEC_PSNR_PKT: { 928 int i; 929 svc_log(svc_ctx, SVC_LOG_DEBUG, 930 "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): " 931 "%2.3f %2.3f %2.3f %2.3f \n", 932 si->encode_frame_count, si->layer, 933 cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1], 934 cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]); 935 svc_log(svc_ctx, SVC_LOG_DEBUG, 936 "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): " 937 "%2.3f %2.3f %2.3f %2.3f \n", 938 si->encode_frame_count, si->layer, 939 cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1], 940 cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]); 941 for (i = 0; i < COMPONENTS; i++) { 942 si->psnr_sum[si->layer][i] += cx_pkt->data.psnr.psnr[i]; 943 si->sse_sum[si->layer][i] += cx_pkt->data.psnr.sse[i]; 944 } 945 break; 946 } 947 case VPX_CODEC_STATS_PKT: { 948 size_t new_size = si->rc_stats_buf_used + 949 cx_pkt->data.twopass_stats.sz; 950 951 if (new_size > si->rc_stats_buf_size) { 952 char *p = (char*)realloc(si->rc_stats_buf, new_size); 953 if (p == NULL) { 954 svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n"); 955 break; 956 } 957 si->rc_stats_buf = p; 958 si->rc_stats_buf_size = new_size; 959 } 960 961 memcpy(si->rc_stats_buf + si->rc_stats_buf_used, 962 cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz); 963 si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz; 964 break; 965 } 966 default: { 967 break; 968 } 969 } 970 } 971 if (rawimg == NULL) { 972 break; 973 } 974 } 975 if (codec_ctx->config.enc->g_pass != VPX_RC_FIRST_PASS) { 976 // add superframe index to layer data list 977 sf_create_index(&superframe); 978 layer_data = ld_create(superframe.buffer, superframe.index_size); 979 ld_list_add(&cx_layer_list, layer_data); 980 981 // get accumulated size of layer data 982 si->frame_size = ld_list_get_buffer_size(cx_layer_list); 983 if (si->frame_size > 0) { 984 // all layers encoded, create single buffer with concatenated layers 985 if (si->frame_size > si->buffer_size) { 986 free(si->buffer); 987 si->buffer = malloc(si->frame_size); 988 if (si->buffer == NULL) { 989 ld_list_free(cx_layer_list); 990 return VPX_CODEC_MEM_ERROR; 991 } 992 si->buffer_size = si->frame_size; 993 } 994 // copy layer data into packet 995 ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer); 996 997 ld_list_free(cx_layer_list); 998 999 svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, " 1000 "pts: %d\n", si->encode_frame_count, si->is_keyframe, 1001 (int)si->frame_size, (int)pts); 1002 } 1003 } 1004 if (rawimg != NULL) { 1005 ++si->frame_within_gop; 1006 ++si->encode_frame_count; 1007 } 1008 1009 return VPX_CODEC_OK; 1010 } 1011 1012 const char *vpx_svc_get_message(const SvcContext *svc_ctx) { 1013 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1014 if (svc_ctx == NULL || si == NULL) return NULL; 1015 return si->message_buffer; 1016 } 1017 1018 void *vpx_svc_get_buffer(const SvcContext *svc_ctx) { 1019 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1020 if (svc_ctx == NULL || si == NULL) return NULL; 1021 return si->buffer; 1022 } 1023 1024 size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx) { 1025 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1026 if (svc_ctx == NULL || si == NULL) return 0; 1027 return si->frame_size; 1028 } 1029 1030 int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx) { 1031 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1032 if (svc_ctx == NULL || si == NULL) return 0; 1033 return si->encode_frame_count; 1034 } 1035 1036 int vpx_svc_is_keyframe(const SvcContext *svc_ctx) { 1037 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1038 if (svc_ctx == NULL || si == NULL) return 0; 1039 return si->is_keyframe; 1040 } 1041 1042 void vpx_svc_set_keyframe(SvcContext *svc_ctx) { 1043 SvcInternal *const si = get_svc_internal(svc_ctx); 1044 if (svc_ctx == NULL || si == NULL) return; 1045 si->frame_within_gop = 0; 1046 } 1047 1048 static double calc_psnr(double d) { 1049 if (d == 0) return 100; 1050 return -10.0 * log(d) / log(10.0); 1051 } 1052 1053 // dump accumulated statistics and reset accumulated values 1054 const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { 1055 int number_of_frames, number_of_keyframes, encode_frame_count; 1056 int i, j; 1057 uint32_t bytes_total = 0; 1058 double scale[COMPONENTS]; 1059 double psnr[COMPONENTS]; 1060 double mse[COMPONENTS]; 1061 double y_scale; 1062 1063 SvcInternal *const si = get_svc_internal(svc_ctx); 1064 if (svc_ctx == NULL || si == NULL) return NULL; 1065 1066 svc_log_reset(svc_ctx); 1067 1068 encode_frame_count = si->encode_frame_count; 1069 if (si->encode_frame_count <= 0) return vpx_svc_get_message(svc_ctx); 1070 1071 svc_log(svc_ctx, SVC_LOG_INFO, "\n"); 1072 number_of_keyframes = encode_frame_count / si->kf_dist + 1; 1073 for (i = 0; i < si->layers; ++i) { 1074 number_of_frames = encode_frame_count; 1075 1076 if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && 1077 (i == 1 || i == 3)) { 1078 number_of_frames -= number_of_keyframes; 1079 } 1080 svc_log(svc_ctx, SVC_LOG_INFO, 1081 "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n", 1082 i, (double)si->psnr_sum[i][0] / number_of_frames, 1083 (double)si->psnr_sum[i][1] / number_of_frames, 1084 (double)si->psnr_sum[i][2] / number_of_frames, 1085 (double)si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]); 1086 // the following psnr calculation is deduced from ffmpeg.c#print_report 1087 y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames; 1088 scale[1] = y_scale; 1089 scale[2] = scale[3] = y_scale / 4; // U or V 1090 scale[0] = y_scale * 1.5; // total 1091 1092 for (j = 0; j < COMPONENTS; j++) { 1093 psnr[j] = calc_psnr(si->sse_sum[i][j] / scale[j]); 1094 mse[j] = si->sse_sum[i][j] * 255.0 * 255.0 / scale[j]; 1095 } 1096 svc_log(svc_ctx, SVC_LOG_INFO, 1097 "Layer %d Overall PSNR=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, psnr[0], 1098 psnr[1], psnr[2], psnr[3]); 1099 svc_log(svc_ctx, SVC_LOG_INFO, 1100 "Layer %d Overall MSE=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, mse[0], 1101 mse[1], mse[2], mse[3]); 1102 1103 bytes_total += si->bytes_sum[i]; 1104 // clear sums for next time 1105 si->bytes_sum[i] = 0; 1106 for (j = 0; j < COMPONENTS; ++j) { 1107 si->psnr_sum[i][j] = 0; 1108 si->sse_sum[i][j] = 0; 1109 } 1110 } 1111 1112 // only display statistics once 1113 si->encode_frame_count = 0; 1114 1115 svc_log(svc_ctx, SVC_LOG_INFO, "Total Bytes=[%u]\n", bytes_total); 1116 return vpx_svc_get_message(svc_ctx); 1117 } 1118 1119 void vpx_svc_release(SvcContext *svc_ctx) { 1120 SvcInternal *si; 1121 if (svc_ctx == NULL) return; 1122 // do not use get_svc_internal as it will unnecessarily allocate an 1123 // SvcInternal if it was not already allocated 1124 si = (SvcInternal *)svc_ctx->internal; 1125 if (si != NULL) { 1126 free(si->buffer); 1127 if (si->rc_stats_buf) { 1128 free(si->rc_stats_buf); 1129 } 1130 free(si); 1131 svc_ctx->internal = NULL; 1132 } 1133 } 1134 1135 size_t vpx_svc_get_rc_stats_buffer_size(const SvcContext *svc_ctx) { 1136 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1137 if (svc_ctx == NULL || si == NULL) return 0; 1138 return si->rc_stats_buf_used; 1139 } 1140 1141 char *vpx_svc_get_rc_stats_buffer(const SvcContext *svc_ctx) { 1142 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1143 if (svc_ctx == NULL || si == NULL) return NULL; 1144 return si->rc_stats_buf; 1145 } 1146 1147 1148