1 /* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /** 12 * @file 13 * VP9 SVC encoding support via libvpx 14 */ 15 16 #include <assert.h> 17 #include <math.h> 18 #include <stdarg.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #define VPX_DISABLE_CTRL_TYPECHECKS 1 23 #define VPX_CODEC_DISABLE_COMPAT 1 24 #include "vpx/svc_context.h" 25 #include "vpx/vp8cx.h" 26 #include "vpx/vpx_encoder.h" 27 28 #ifdef __MINGW32__ 29 #define strtok_r strtok_s 30 #ifndef MINGW_HAS_SECURE_API 31 // proto from /usr/x86_64-w64-mingw32/include/sec_api/string_s.h 32 _CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context); 33 #endif /* MINGW_HAS_SECURE_API */ 34 #endif /* __MINGW32__ */ 35 36 #ifdef _MSC_VER 37 #define strdup _strdup 38 #define strtok_r strtok_s 39 #endif 40 41 #define SVC_REFERENCE_FRAMES 8 42 #define SUPERFRAME_SLOTS (8) 43 #define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2) 44 #define OPTION_BUFFER_SIZE 256 45 #define COMPONENTS 4 // psnr & sse statistics maintained for total, y, u, v 46 47 static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27"; 48 static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16"; 49 50 typedef struct SvcInternal { 51 char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options 52 char quantizers[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_quantizers 53 char quantizers_keyframe[OPTION_BUFFER_SIZE]; // set by 54 // vpx_svc_set_quantizers 55 char scale_factors[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_scale_factors 56 57 // values extracted from option, quantizers 58 int scaling_factor_num[VPX_SS_MAX_LAYERS]; 59 int scaling_factor_den[VPX_SS_MAX_LAYERS]; 60 int quantizer_keyframe[VPX_SS_MAX_LAYERS]; 61 int quantizer[VPX_SS_MAX_LAYERS]; 62 63 // accumulated statistics 64 double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V 65 uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; 66 uint32_t bytes_sum[VPX_SS_MAX_LAYERS]; 67 68 // codec encoding values 69 int width; // width of highest layer 70 int height; // height of highest layer 71 int kf_dist; // distance between keyframes 72 73 // state variables 74 int encode_frame_count; 75 int frame_within_gop; 76 vpx_enc_frame_flags_t enc_frame_flags; 77 int layers; 78 int layer; 79 int is_keyframe; 80 81 size_t frame_size; 82 size_t buffer_size; 83 void *buffer; 84 85 char *rc_stats_buf; 86 size_t rc_stats_buf_size; 87 size_t rc_stats_buf_used; 88 89 char message_buffer[2048]; 90 vpx_codec_ctx_t *codec_ctx; 91 } SvcInternal; 92 93 // Superframe is used to generate an index of individual frames (i.e., layers) 94 struct Superframe { 95 int count; 96 uint32_t sizes[SUPERFRAME_SLOTS]; 97 uint32_t magnitude; 98 uint8_t buffer[SUPERFRAME_BUFFER_SIZE]; 99 size_t index_size; 100 }; 101 102 // One encoded frame layer 103 struct LayerData { 104 void *buf; // compressed data buffer 105 size_t size; // length of compressed data 106 struct LayerData *next; 107 }; 108 109 // create LayerData from encoder output 110 static struct LayerData *ld_create(void *buf, size_t size) { 111 struct LayerData *const layer_data = 112 (struct LayerData *)malloc(sizeof(*layer_data)); 113 if (layer_data == NULL) { 114 return NULL; 115 } 116 layer_data->buf = malloc(size); 117 if (layer_data->buf == NULL) { 118 free(layer_data); 119 return NULL; 120 } 121 memcpy(layer_data->buf, buf, size); 122 layer_data->size = size; 123 return layer_data; 124 } 125 126 // free LayerData 127 static void ld_free(struct LayerData *layer_data) { 128 if (layer_data) { 129 if (layer_data->buf) { 130 free(layer_data->buf); 131 layer_data->buf = NULL; 132 } 133 free(layer_data); 134 } 135 } 136 137 // add layer data to list 138 static void ld_list_add(struct LayerData **list, struct LayerData *layer_data) { 139 struct LayerData **p = list; 140 141 while (*p != NULL) p = &(*p)->next; 142 *p = layer_data; 143 layer_data->next = NULL; 144 } 145 146 // get accumulated size of layer data 147 static size_t ld_list_get_buffer_size(struct LayerData *list) { 148 struct LayerData *p; 149 size_t size = 0; 150 151 for (p = list; p != NULL; p = p->next) { 152 size += p->size; 153 } 154 return size; 155 } 156 157 // copy layer data to buffer 158 static void ld_list_copy_to_buffer(struct LayerData *list, uint8_t *buffer) { 159 struct LayerData *p; 160 161 for (p = list; p != NULL; p = p->next) { 162 buffer[0] = 1; 163 memcpy(buffer, p->buf, p->size); 164 buffer += p->size; 165 } 166 } 167 168 // free layer data list 169 static void ld_list_free(struct LayerData *list) { 170 struct LayerData *p = list; 171 172 while (p) { 173 list = list->next; 174 ld_free(p); 175 p = list; 176 } 177 } 178 179 static void sf_create_index(struct Superframe *sf) { 180 uint8_t marker = 0xc0; 181 int i; 182 uint32_t mag, mask; 183 uint8_t *bufp; 184 185 if (sf->count == 0 || sf->count >= 8) return; 186 187 // Add the number of frames to the marker byte 188 marker |= sf->count - 1; 189 190 // Choose the magnitude 191 for (mag = 0, mask = 0xff; mag < 4; ++mag) { 192 if (sf->magnitude < mask) break; 193 mask <<= 8; 194 mask |= 0xff; 195 } 196 marker |= mag << 3; 197 198 // Write the index 199 sf->index_size = 2 + (mag + 1) * sf->count; 200 bufp = sf->buffer; 201 202 *bufp++ = marker; 203 for (i = 0; i < sf->count; ++i) { 204 int this_sz = sf->sizes[i]; 205 uint32_t j; 206 207 for (j = 0; j <= mag; ++j) { 208 *bufp++ = this_sz & 0xff; 209 this_sz >>= 8; 210 } 211 } 212 *bufp++ = marker; 213 } 214 215 static SvcInternal *get_svc_internal(SvcContext *svc_ctx) { 216 if (svc_ctx == NULL) return NULL; 217 if (svc_ctx->internal == NULL) { 218 SvcInternal *const si = (SvcInternal *)malloc(sizeof(*si)); 219 if (si != NULL) { 220 memset(si, 0, sizeof(*si)); 221 } 222 svc_ctx->internal = si; 223 } 224 return (SvcInternal *)svc_ctx->internal; 225 } 226 227 static const SvcInternal *get_const_svc_internal(const SvcContext *svc_ctx) { 228 if (svc_ctx == NULL) return NULL; 229 return (const SvcInternal *)svc_ctx->internal; 230 } 231 232 static void svc_log_reset(SvcContext *svc_ctx) { 233 SvcInternal *const si = (SvcInternal *)svc_ctx->internal; 234 si->message_buffer[0] = '\0'; 235 } 236 237 static int svc_log(SvcContext *svc_ctx, int level, const char *fmt, ...) { 238 char buf[512]; 239 int retval = 0; 240 va_list ap; 241 SvcInternal *const si = get_svc_internal(svc_ctx); 242 243 if (level > svc_ctx->log_level) { 244 return retval; 245 } 246 247 va_start(ap, fmt); 248 retval = vsnprintf(buf, sizeof(buf), fmt, ap); 249 va_end(ap); 250 251 if (svc_ctx->log_print) { 252 printf("%s", buf); 253 } else { 254 strncat(si->message_buffer, buf, 255 sizeof(si->message_buffer) - strlen(si->message_buffer) - 1); 256 } 257 258 if (level == SVC_LOG_ERROR) { 259 si->codec_ctx->err_detail = si->message_buffer; 260 } 261 return retval; 262 } 263 264 static vpx_codec_err_t set_option_encoding_mode(SvcContext *svc_ctx, 265 const char *value_str) { 266 if (strcmp(value_str, "i") == 0) { 267 svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_I; 268 } else if (strcmp(value_str, "alt-ip") == 0) { 269 svc_ctx->encoding_mode = ALT_INTER_LAYER_PREDICTION_IP; 270 } else if (strcmp(value_str, "ip") == 0) { 271 svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_IP; 272 } else if (strcmp(value_str, "gf") == 0) { 273 svc_ctx->encoding_mode = USE_GOLDEN_FRAME; 274 } else { 275 svc_log(svc_ctx, SVC_LOG_ERROR, "invalid encoding mode: %s", value_str); 276 return VPX_CODEC_INVALID_PARAM; 277 } 278 return VPX_CODEC_OK; 279 } 280 281 static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx, 282 const char *quantizer_values, 283 const int is_keyframe) { 284 char *input_string; 285 char *token; 286 const char *delim = ","; 287 char *save_ptr; 288 int found = 0; 289 int i, q; 290 vpx_codec_err_t res = VPX_CODEC_OK; 291 SvcInternal *const si = get_svc_internal(svc_ctx); 292 293 if (quantizer_values == NULL || strlen(quantizer_values) == 0) { 294 if (is_keyframe) { 295 // If there non settings for key frame, we will apply settings from 296 // non key frame. So just simply return here. 297 return VPX_CODEC_INVALID_PARAM; 298 } 299 input_string = strdup(DEFAULT_QUANTIZER_VALUES); 300 } else { 301 input_string = strdup(quantizer_values); 302 } 303 304 token = strtok_r(input_string, delim, &save_ptr); 305 for (i = 0; i < svc_ctx->spatial_layers; ++i) { 306 if (token != NULL) { 307 q = atoi(token); 308 if (q <= 0 || q > 100) { 309 svc_log(svc_ctx, SVC_LOG_ERROR, 310 "svc-quantizer-values: invalid value %s\n", token); 311 res = VPX_CODEC_INVALID_PARAM; 312 break; 313 } 314 token = strtok_r(NULL, delim, &save_ptr); 315 found = i + 1; 316 } else { 317 q = 0; 318 } 319 if (is_keyframe) { 320 si->quantizer_keyframe[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] 321 = q; 322 } else { 323 si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q; 324 } 325 } 326 if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) { 327 svc_log(svc_ctx, SVC_LOG_ERROR, 328 "svc: quantizers: %d values required, but only %d specified\n", 329 svc_ctx->spatial_layers, found); 330 res = VPX_CODEC_INVALID_PARAM; 331 } 332 free(input_string); 333 return res; 334 } 335 336 static void log_invalid_scale_factor(SvcContext *svc_ctx, const char *value) { 337 svc_log(svc_ctx, SVC_LOG_ERROR, "svc scale-factors: invalid value %s\n", 338 value); 339 } 340 341 static vpx_codec_err_t parse_scale_factors(SvcContext *svc_ctx, 342 const char *scale_factors) { 343 char *input_string; 344 char *token; 345 const char *delim = ","; 346 char *save_ptr; 347 int found = 0; 348 int i; 349 int64_t num, den; 350 vpx_codec_err_t res = VPX_CODEC_OK; 351 SvcInternal *const si = get_svc_internal(svc_ctx); 352 353 if (scale_factors == NULL || strlen(scale_factors) == 0) { 354 input_string = strdup(DEFAULT_SCALE_FACTORS); 355 } else { 356 input_string = strdup(scale_factors); 357 } 358 token = strtok_r(input_string, delim, &save_ptr); 359 for (i = 0; i < svc_ctx->spatial_layers; ++i) { 360 num = den = 0; 361 if (token != NULL) { 362 num = strtol(token, &token, 10); 363 if (num <= 0) { 364 log_invalid_scale_factor(svc_ctx, token); 365 res = VPX_CODEC_INVALID_PARAM; 366 break; 367 } 368 if (*token++ != '/') { 369 log_invalid_scale_factor(svc_ctx, token); 370 res = VPX_CODEC_INVALID_PARAM; 371 break; 372 } 373 den = strtol(token, &token, 10); 374 if (den <= 0) { 375 log_invalid_scale_factor(svc_ctx, token); 376 res = VPX_CODEC_INVALID_PARAM; 377 break; 378 } 379 token = strtok_r(NULL, delim, &save_ptr); 380 found = i + 1; 381 } 382 si->scaling_factor_num[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = 383 (int)num; 384 si->scaling_factor_den[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = 385 (int)den; 386 } 387 if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) { 388 svc_log(svc_ctx, SVC_LOG_ERROR, 389 "svc: scale-factors: %d values required, but only %d specified\n", 390 svc_ctx->spatial_layers, found); 391 res = VPX_CODEC_INVALID_PARAM; 392 } 393 free(input_string); 394 return res; 395 } 396 397 /** 398 * Parse SVC encoding options 399 * Format: encoding-mode=<svc_mode>,layers=<layer_count> 400 * scale-factors=<n1>/<d1>,<n2>/<d2>,... 401 * quantizers=<q1>,<q2>,... 402 * svc_mode = [i|ip|alt_ip|gf] 403 */ 404 static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { 405 char *input_string; 406 char *option_name; 407 char *option_value; 408 char *input_ptr; 409 int is_keyframe_qaunt_set = 0; 410 vpx_codec_err_t res = VPX_CODEC_OK; 411 412 if (options == NULL) return VPX_CODEC_OK; 413 input_string = strdup(options); 414 415 // parse option name 416 option_name = strtok_r(input_string, "=", &input_ptr); 417 while (option_name != NULL) { 418 // parse option value 419 option_value = strtok_r(NULL, " ", &input_ptr); 420 if (option_value == NULL) { 421 svc_log(svc_ctx, SVC_LOG_ERROR, "option missing value: %s\n", 422 option_name); 423 res = VPX_CODEC_INVALID_PARAM; 424 break; 425 } 426 if (strcmp("encoding-mode", option_name) == 0) { 427 res = set_option_encoding_mode(svc_ctx, option_value); 428 if (res != VPX_CODEC_OK) break; 429 } else if (strcmp("layers", option_name) == 0) { 430 svc_ctx->spatial_layers = atoi(option_value); 431 } else if (strcmp("scale-factors", option_name) == 0) { 432 res = parse_scale_factors(svc_ctx, option_value); 433 if (res != VPX_CODEC_OK) break; 434 } else if (strcmp("quantizers", option_name) == 0) { 435 res = parse_quantizer_values(svc_ctx, option_value, 0); 436 if (res != VPX_CODEC_OK) break; 437 if (!is_keyframe_qaunt_set) { 438 SvcInternal *const si = get_svc_internal(svc_ctx); 439 memcpy(get_svc_internal(svc_ctx)->quantizer_keyframe, si->quantizer, 440 sizeof(si->quantizer)); 441 } 442 } else if (strcmp("quantizers-keyframe", option_name) == 0) { 443 res = parse_quantizer_values(svc_ctx, option_value, 1); 444 if (res != VPX_CODEC_OK) break; 445 is_keyframe_qaunt_set = 1; 446 } else { 447 svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name); 448 res = VPX_CODEC_INVALID_PARAM; 449 break; 450 } 451 option_name = strtok_r(NULL, "=", &input_ptr); 452 } 453 free(input_string); 454 return res; 455 } 456 457 vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) { 458 SvcInternal *const si = get_svc_internal(svc_ctx); 459 if (svc_ctx == NULL || options == NULL || si == NULL) { 460 return VPX_CODEC_INVALID_PARAM; 461 } 462 strncpy(si->options, options, sizeof(si->options)); 463 si->options[sizeof(si->options) - 1] = '\0'; 464 return VPX_CODEC_OK; 465 } 466 467 vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx, 468 const char *quantizers, 469 const int is_for_keyframe) { 470 SvcInternal *const si = get_svc_internal(svc_ctx); 471 if (svc_ctx == NULL || quantizers == NULL || si == NULL) { 472 return VPX_CODEC_INVALID_PARAM; 473 } 474 if (is_for_keyframe) { 475 strncpy(si->quantizers_keyframe, quantizers, sizeof(si->quantizers)); 476 si->quantizers_keyframe[sizeof(si->quantizers_keyframe) - 1] = '\0'; 477 } else { 478 strncpy(si->quantizers, quantizers, sizeof(si->quantizers)); 479 si->quantizers[sizeof(si->quantizers) - 1] = '\0'; 480 } 481 return VPX_CODEC_OK; 482 } 483 484 vpx_codec_err_t vpx_svc_set_scale_factors(SvcContext *svc_ctx, 485 const char *scale_factors) { 486 SvcInternal *const si = get_svc_internal(svc_ctx); 487 if (svc_ctx == NULL || scale_factors == NULL || si == NULL) { 488 return VPX_CODEC_INVALID_PARAM; 489 } 490 strncpy(si->scale_factors, scale_factors, sizeof(si->scale_factors)); 491 si->scale_factors[sizeof(si->scale_factors) - 1] = '\0'; 492 return VPX_CODEC_OK; 493 } 494 495 vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, 496 vpx_codec_iface_t *iface, 497 vpx_codec_enc_cfg_t *enc_cfg) { 498 int max_intra_size_pct; 499 vpx_codec_err_t res; 500 SvcInternal *const si = get_svc_internal(svc_ctx); 501 if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL || 502 enc_cfg == NULL) { 503 return VPX_CODEC_INVALID_PARAM; 504 } 505 if (si == NULL) return VPX_CODEC_MEM_ERROR; 506 507 si->codec_ctx = codec_ctx; 508 509 si->width = enc_cfg->g_w; 510 si->height = enc_cfg->g_h; 511 512 if (enc_cfg->kf_max_dist < 2) { 513 svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n", 514 enc_cfg->kf_max_dist); 515 return VPX_CODEC_INVALID_PARAM; 516 } 517 si->kf_dist = enc_cfg->kf_max_dist; 518 519 if (svc_ctx->spatial_layers == 0) 520 svc_ctx->spatial_layers = VPX_SS_DEFAULT_LAYERS; 521 if (svc_ctx->spatial_layers < 1 || 522 svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) { 523 svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers: invalid value: %d\n", 524 svc_ctx->spatial_layers); 525 return VPX_CODEC_INVALID_PARAM; 526 } 527 528 res = parse_quantizer_values(svc_ctx, si->quantizers, 0); 529 if (res != VPX_CODEC_OK) return res; 530 531 res = parse_quantizer_values(svc_ctx, si->quantizers_keyframe, 1); 532 if (res != VPX_CODEC_OK) 533 memcpy(si->quantizer_keyframe, si->quantizer, sizeof(si->quantizer)); 534 535 res = parse_scale_factors(svc_ctx, si->scale_factors); 536 if (res != VPX_CODEC_OK) return res; 537 538 // Parse aggregate command line options. Options must start with 539 // "layers=xx" then followed by other options 540 res = parse_options(svc_ctx, si->options); 541 if (res != VPX_CODEC_OK) return res; 542 543 si->layers = svc_ctx->spatial_layers; 544 545 // Assign target bitrate for each layer. We calculate the ratio 546 // from the resolution for now. 547 // TODO(Minghai): Optimize the mechanism of allocating bits after 548 // implementing svc two pass rate control. 549 if (si->layers > 1) { 550 int i; 551 float total = 0; 552 float alloc_ratio[VPX_SS_MAX_LAYERS] = {0}; 553 554 assert(si->layers <= VPX_SS_MAX_LAYERS); 555 for (i = 0; i < si->layers; ++i) { 556 int pos = i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers; 557 if (pos < VPX_SS_MAX_LAYERS && si->scaling_factor_den[pos] > 0) { 558 alloc_ratio[i] = (float)(si->scaling_factor_num[pos] * 1.0 / 559 si->scaling_factor_den[pos]); 560 561 alloc_ratio[i] *= alloc_ratio[i]; 562 total += alloc_ratio[i]; 563 } 564 } 565 566 for (i = 0; i < si->layers; ++i) { 567 if (total > 0) { 568 enc_cfg->ss_target_bitrate[i] = (unsigned int) 569 (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total); 570 } 571 } 572 } 573 574 // modify encoder configuration 575 enc_cfg->ss_number_layers = si->layers; 576 enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder. 577 enc_cfg->kf_mode = VPX_KF_DISABLED; 578 // Lag in frames not currently supported 579 enc_cfg->g_lag_in_frames = 0; 580 581 // TODO(ivanmaltz): determine if these values need to be set explicitly for 582 // svc, or if the normal default/override mechanism can be used 583 enc_cfg->rc_dropframe_thresh = 0; 584 enc_cfg->rc_end_usage = VPX_CBR; 585 enc_cfg->rc_resize_allowed = 0; 586 587 if (enc_cfg->g_pass == VPX_RC_ONE_PASS) { 588 enc_cfg->rc_min_quantizer = 33; 589 enc_cfg->rc_max_quantizer = 33; 590 } 591 592 enc_cfg->rc_undershoot_pct = 100; 593 enc_cfg->rc_overshoot_pct = 15; 594 enc_cfg->rc_buf_initial_sz = 500; 595 enc_cfg->rc_buf_optimal_sz = 600; 596 enc_cfg->rc_buf_sz = 1000; 597 enc_cfg->g_error_resilient = 1; 598 599 // Initialize codec 600 res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR); 601 if (res != VPX_CODEC_OK) { 602 svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n"); 603 return res; 604 } 605 606 vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1); 607 vpx_codec_control(codec_ctx, VP8E_SET_CPUUSED, 1); 608 vpx_codec_control(codec_ctx, VP8E_SET_STATIC_THRESHOLD, 1); 609 vpx_codec_control(codec_ctx, VP8E_SET_NOISE_SENSITIVITY, 1); 610 vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1); 611 612 max_intra_size_pct = 613 (int)(((double)enc_cfg->rc_buf_optimal_sz * 0.5) * 614 ((double)enc_cfg->g_timebase.den / enc_cfg->g_timebase.num) / 10.0); 615 vpx_codec_control(codec_ctx, VP8E_SET_MAX_INTRA_BITRATE_PCT, 616 max_intra_size_pct); 617 return VPX_CODEC_OK; 618 } 619 620 // SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h 621 622 // encoder should reference the last frame 623 #define USE_LAST (1 << 0) 624 625 // encoder should reference the alt ref frame 626 #define USE_ARF (1 << 1) 627 628 // encoder should reference the golden frame 629 #define USE_GF (1 << 2) 630 631 // encoder should copy current frame to the last frame buffer 632 #define UPDATE_LAST (1 << 3) 633 634 // encoder should copy current frame to the alt ref frame buffer 635 #define UPDATE_ARF (1 << 4) 636 637 // encoder should copy current frame to the golden frame 638 #define UPDATE_GF (1 << 5) 639 640 static int map_vp8_flags(int svc_flags) { 641 int flags = 0; 642 643 if (!(svc_flags & USE_LAST)) flags |= VP8_EFLAG_NO_REF_LAST; 644 if (!(svc_flags & USE_ARF)) flags |= VP8_EFLAG_NO_REF_ARF; 645 if (!(svc_flags & USE_GF)) flags |= VP8_EFLAG_NO_REF_GF; 646 647 if (svc_flags & UPDATE_LAST) { 648 // last is updated automatically 649 } else { 650 flags |= VP8_EFLAG_NO_UPD_LAST; 651 } 652 if (svc_flags & UPDATE_ARF) { 653 flags |= VP8_EFLAG_FORCE_ARF; 654 } else { 655 flags |= VP8_EFLAG_NO_UPD_ARF; 656 } 657 if (svc_flags & UPDATE_GF) { 658 flags |= VP8_EFLAG_FORCE_GF; 659 } else { 660 flags |= VP8_EFLAG_NO_UPD_GF; 661 } 662 return flags; 663 } 664 665 static void calculate_enc_frame_flags(SvcContext *svc_ctx) { 666 vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF; 667 SvcInternal *const si = get_svc_internal(svc_ctx); 668 const int is_keyframe = (si->frame_within_gop == 0); 669 670 // keyframe layer zero is identical for all modes 671 if (is_keyframe && si->layer == 0) { 672 si->enc_frame_flags = VPX_EFLAG_FORCE_KF; 673 return; 674 } 675 676 switch (svc_ctx->encoding_mode) { 677 case ALT_INTER_LAYER_PREDICTION_IP: 678 if (si->layer == 0) { 679 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 680 } else if (is_keyframe) { 681 if (si->layer == si->layers - 1) { 682 flags = map_vp8_flags(USE_ARF | UPDATE_LAST); 683 } else { 684 flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF); 685 } 686 } else { 687 flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST); 688 } 689 break; 690 case INTER_LAYER_PREDICTION_I: 691 if (si->layer == 0) { 692 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 693 } else if (is_keyframe) { 694 flags = map_vp8_flags(USE_ARF | UPDATE_LAST); 695 } else { 696 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 697 } 698 break; 699 case INTER_LAYER_PREDICTION_IP: 700 if (si->layer == 0) { 701 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 702 } else if (is_keyframe) { 703 flags = map_vp8_flags(USE_ARF | UPDATE_LAST); 704 } else { 705 flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST); 706 } 707 break; 708 case USE_GOLDEN_FRAME: 709 if (2 * si->layers - SVC_REFERENCE_FRAMES <= si->layer) { 710 if (si->layer == 0) { 711 flags = map_vp8_flags(USE_LAST | USE_GF | UPDATE_LAST); 712 } else if (is_keyframe) { 713 flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF); 714 } else { 715 flags = map_vp8_flags(USE_LAST | USE_ARF | USE_GF | UPDATE_LAST); 716 } 717 } else { 718 if (si->layer == 0) { 719 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 720 } else if (is_keyframe) { 721 flags = map_vp8_flags(USE_ARF | UPDATE_LAST); 722 } else { 723 flags = map_vp8_flags(USE_LAST | UPDATE_LAST); 724 } 725 } 726 break; 727 default: 728 svc_log(svc_ctx, SVC_LOG_ERROR, "unexpected encoding mode: %d\n", 729 svc_ctx->encoding_mode); 730 break; 731 } 732 si->enc_frame_flags = flags; 733 } 734 735 vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx, 736 int layer, 737 unsigned int *width, 738 unsigned int *height) { 739 int w, h, index, num, den; 740 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 741 742 if (svc_ctx == NULL || si == NULL || width == NULL || height == NULL) { 743 return VPX_CODEC_INVALID_PARAM; 744 } 745 if (layer < 0 || layer >= si->layers) return VPX_CODEC_INVALID_PARAM; 746 747 index = layer + VPX_SS_MAX_LAYERS - si->layers; 748 num = si->scaling_factor_num[index]; 749 den = si->scaling_factor_den[index]; 750 if (num == 0 || den == 0) return VPX_CODEC_INVALID_PARAM; 751 752 w = si->width * num / den; 753 h = si->height * num / den; 754 755 // make height and width even to make chrome player happy 756 w += w % 2; 757 h += h % 2; 758 759 *width = w; 760 *height = h; 761 762 return VPX_CODEC_OK; 763 } 764 765 static void set_svc_parameters(SvcContext *svc_ctx, 766 vpx_codec_ctx_t *codec_ctx) { 767 int layer, layer_index; 768 vpx_svc_parameters_t svc_params; 769 SvcInternal *const si = get_svc_internal(svc_ctx); 770 771 memset(&svc_params, 0, sizeof(svc_params)); 772 svc_params.temporal_layer = 0; 773 svc_params.spatial_layer = si->layer; 774 svc_params.flags = si->enc_frame_flags; 775 776 layer = si->layer; 777 if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && 778 si->frame_within_gop == 0) { 779 // layers 1 & 3 don't exist in this mode, use the higher one 780 if (layer == 0 || layer == 2) { 781 layer += 1; 782 } 783 } 784 if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(svc_ctx, layer, 785 &svc_params.width, 786 &svc_params.height)) { 787 svc_log(svc_ctx, SVC_LOG_ERROR, "vpx_svc_get_layer_resolution failed\n"); 788 } 789 layer_index = layer + VPX_SS_MAX_LAYERS - si->layers; 790 791 if (codec_ctx->config.enc->g_pass == VPX_RC_ONE_PASS) { 792 if (vpx_svc_is_keyframe(svc_ctx)) { 793 svc_params.min_quantizer = si->quantizer_keyframe[layer_index]; 794 svc_params.max_quantizer = si->quantizer_keyframe[layer_index]; 795 } else { 796 svc_params.min_quantizer = si->quantizer[layer_index]; 797 svc_params.max_quantizer = si->quantizer[layer_index]; 798 } 799 } else { 800 svc_params.min_quantizer = codec_ctx->config.enc->rc_min_quantizer; 801 svc_params.max_quantizer = codec_ctx->config.enc->rc_max_quantizer; 802 } 803 804 svc_params.distance_from_i_frame = si->frame_within_gop; 805 806 // Use buffer i for layer i LST 807 svc_params.lst_fb_idx = si->layer; 808 809 // Use buffer i-1 for layer i Alt (Inter-layer prediction) 810 if (si->layer != 0) { 811 const int use_higher_layer = 812 svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && 813 si->frame_within_gop == 0; 814 svc_params.alt_fb_idx = use_higher_layer ? si->layer - 2 : si->layer - 1; 815 } 816 817 if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP) { 818 svc_params.gld_fb_idx = si->layer + 1; 819 } else { 820 if (si->layer < 2 * si->layers - SVC_REFERENCE_FRAMES) 821 svc_params.gld_fb_idx = svc_params.lst_fb_idx; 822 else 823 svc_params.gld_fb_idx = 2 * si->layers - 1 - si->layer; 824 } 825 826 svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, %dx%d, q: %d\n", 827 si->encode_frame_count, si->layer, svc_params.width, 828 svc_params.height, svc_params.min_quantizer); 829 830 if (svc_params.flags == VPX_EFLAG_FORCE_KF) { 831 svc_log(svc_ctx, SVC_LOG_DEBUG, "flags == VPX_EFLAG_FORCE_KF\n"); 832 } else { 833 svc_log( 834 svc_ctx, SVC_LOG_DEBUG, "Using: LST/GLD/ALT [%2d|%2d|%2d]\n", 835 svc_params.flags & VP8_EFLAG_NO_REF_LAST ? -1 : svc_params.lst_fb_idx, 836 svc_params.flags & VP8_EFLAG_NO_REF_GF ? -1 : svc_params.gld_fb_idx, 837 svc_params.flags & VP8_EFLAG_NO_REF_ARF ? -1 : svc_params.alt_fb_idx); 838 svc_log( 839 svc_ctx, SVC_LOG_DEBUG, "Updating: LST/GLD/ALT [%2d|%2d|%2d]\n", 840 svc_params.flags & VP8_EFLAG_NO_UPD_LAST ? -1 : svc_params.lst_fb_idx, 841 svc_params.flags & VP8_EFLAG_NO_UPD_GF ? -1 : svc_params.gld_fb_idx, 842 svc_params.flags & VP8_EFLAG_NO_UPD_ARF ? -1 : svc_params.alt_fb_idx); 843 } 844 845 vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &svc_params); 846 } 847 848 /** 849 * Encode a frame into multiple layers 850 * Create a superframe containing the individual layers 851 */ 852 vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, 853 struct vpx_image *rawimg, vpx_codec_pts_t pts, 854 int64_t duration, int deadline) { 855 vpx_codec_err_t res; 856 vpx_codec_iter_t iter; 857 const vpx_codec_cx_pkt_t *cx_pkt; 858 struct LayerData *cx_layer_list = NULL; 859 struct LayerData *layer_data; 860 struct Superframe superframe; 861 SvcInternal *const si = get_svc_internal(svc_ctx); 862 if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) { 863 return VPX_CODEC_INVALID_PARAM; 864 } 865 866 memset(&superframe, 0, sizeof(superframe)); 867 svc_log_reset(svc_ctx); 868 si->rc_stats_buf_used = 0; 869 870 si->layers = svc_ctx->spatial_layers; 871 if (si->frame_within_gop >= si->kf_dist || 872 si->encode_frame_count == 0) { 873 si->frame_within_gop = 0; 874 } 875 si->is_keyframe = (si->frame_within_gop == 0); 876 si->frame_size = 0; 877 878 if (rawimg != NULL) { 879 svc_log(svc_ctx, SVC_LOG_DEBUG, 880 "vpx_svc_encode layers: %d, frame_count: %d, " 881 "frame_within_gop: %d\n", si->layers, si->encode_frame_count, 882 si->frame_within_gop); 883 } 884 885 // encode each layer 886 for (si->layer = 0; si->layer < si->layers; ++si->layer) { 887 if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && 888 si->is_keyframe && (si->layer == 1 || si->layer == 3)) { 889 svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer); 890 continue; 891 } 892 893 if (rawimg != NULL) { 894 calculate_enc_frame_flags(svc_ctx); 895 set_svc_parameters(svc_ctx, codec_ctx); 896 } 897 898 res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, 899 si->enc_frame_flags, deadline); 900 if (res != VPX_CODEC_OK) { 901 return res; 902 } 903 // save compressed data 904 iter = NULL; 905 while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { 906 switch (cx_pkt->kind) { 907 case VPX_CODEC_CX_FRAME_PKT: { 908 const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz); 909 si->bytes_sum[si->layer] += frame_pkt_size; 910 svc_log(svc_ctx, SVC_LOG_DEBUG, 911 "SVC frame: %d, layer: %d, size: %u\n", 912 si->encode_frame_count, si->layer, frame_pkt_size); 913 layer_data = 914 ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size); 915 if (layer_data == NULL) { 916 svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating LayerData\n"); 917 return VPX_CODEC_OK; 918 } 919 ld_list_add(&cx_layer_list, layer_data); 920 921 // save layer size in superframe index 922 superframe.sizes[superframe.count++] = frame_pkt_size; 923 superframe.magnitude |= frame_pkt_size; 924 break; 925 } 926 case VPX_CODEC_PSNR_PKT: { 927 int i; 928 svc_log(svc_ctx, SVC_LOG_DEBUG, 929 "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): " 930 "%2.3f %2.3f %2.3f %2.3f \n", 931 si->encode_frame_count, si->layer, 932 cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1], 933 cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]); 934 svc_log(svc_ctx, SVC_LOG_DEBUG, 935 "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): " 936 "%2.3f %2.3f %2.3f %2.3f \n", 937 si->encode_frame_count, si->layer, 938 cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1], 939 cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]); 940 for (i = 0; i < COMPONENTS; i++) { 941 si->psnr_sum[si->layer][i] += cx_pkt->data.psnr.psnr[i]; 942 si->sse_sum[si->layer][i] += cx_pkt->data.psnr.sse[i]; 943 } 944 break; 945 } 946 case VPX_CODEC_STATS_PKT: { 947 size_t new_size = si->rc_stats_buf_used + 948 cx_pkt->data.twopass_stats.sz; 949 950 if (new_size > si->rc_stats_buf_size) { 951 char *p = (char*)realloc(si->rc_stats_buf, new_size); 952 if (p == NULL) { 953 svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n"); 954 break; 955 } 956 si->rc_stats_buf = p; 957 si->rc_stats_buf_size = new_size; 958 } 959 960 memcpy(si->rc_stats_buf + si->rc_stats_buf_used, 961 cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz); 962 si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz; 963 break; 964 } 965 default: { 966 break; 967 } 968 } 969 } 970 if (rawimg == NULL) { 971 break; 972 } 973 } 974 if (codec_ctx->config.enc->g_pass != VPX_RC_FIRST_PASS) { 975 // add superframe index to layer data list 976 sf_create_index(&superframe); 977 layer_data = ld_create(superframe.buffer, superframe.index_size); 978 ld_list_add(&cx_layer_list, layer_data); 979 980 // get accumulated size of layer data 981 si->frame_size = ld_list_get_buffer_size(cx_layer_list); 982 if (si->frame_size > 0) { 983 // all layers encoded, create single buffer with concatenated layers 984 if (si->frame_size > si->buffer_size) { 985 free(si->buffer); 986 si->buffer = malloc(si->frame_size); 987 if (si->buffer == NULL) { 988 ld_list_free(cx_layer_list); 989 return VPX_CODEC_MEM_ERROR; 990 } 991 si->buffer_size = si->frame_size; 992 } 993 // copy layer data into packet 994 ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer); 995 996 ld_list_free(cx_layer_list); 997 998 svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, " 999 "pts: %d\n", si->encode_frame_count, si->is_keyframe, 1000 (int)si->frame_size, (int)pts); 1001 } 1002 } 1003 ++si->frame_within_gop; 1004 ++si->encode_frame_count; 1005 1006 return VPX_CODEC_OK; 1007 } 1008 1009 const char *vpx_svc_get_message(const SvcContext *svc_ctx) { 1010 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1011 if (svc_ctx == NULL || si == NULL) return NULL; 1012 return si->message_buffer; 1013 } 1014 1015 void *vpx_svc_get_buffer(const SvcContext *svc_ctx) { 1016 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1017 if (svc_ctx == NULL || si == NULL) return NULL; 1018 return si->buffer; 1019 } 1020 1021 size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx) { 1022 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1023 if (svc_ctx == NULL || si == NULL) return 0; 1024 return si->frame_size; 1025 } 1026 1027 int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx) { 1028 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1029 if (svc_ctx == NULL || si == NULL) return 0; 1030 return si->encode_frame_count; 1031 } 1032 1033 int vpx_svc_is_keyframe(const SvcContext *svc_ctx) { 1034 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1035 if (svc_ctx == NULL || si == NULL) return 0; 1036 return si->is_keyframe; 1037 } 1038 1039 void vpx_svc_set_keyframe(SvcContext *svc_ctx) { 1040 SvcInternal *const si = get_svc_internal(svc_ctx); 1041 if (svc_ctx == NULL || si == NULL) return; 1042 si->frame_within_gop = 0; 1043 } 1044 1045 static double calc_psnr(double d) { 1046 if (d == 0) return 100; 1047 return -10.0 * log(d) / log(10.0); 1048 } 1049 1050 // dump accumulated statistics and reset accumulated values 1051 const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { 1052 int number_of_frames, number_of_keyframes, encode_frame_count; 1053 int i, j; 1054 uint32_t bytes_total = 0; 1055 double scale[COMPONENTS]; 1056 double psnr[COMPONENTS]; 1057 double mse[COMPONENTS]; 1058 double y_scale; 1059 1060 SvcInternal *const si = get_svc_internal(svc_ctx); 1061 if (svc_ctx == NULL || si == NULL) return NULL; 1062 1063 svc_log_reset(svc_ctx); 1064 1065 encode_frame_count = si->encode_frame_count; 1066 if (si->encode_frame_count <= 0) return vpx_svc_get_message(svc_ctx); 1067 1068 svc_log(svc_ctx, SVC_LOG_INFO, "\n"); 1069 number_of_keyframes = encode_frame_count / si->kf_dist + 1; 1070 for (i = 0; i < si->layers; ++i) { 1071 number_of_frames = encode_frame_count; 1072 1073 if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && 1074 (i == 1 || i == 3)) { 1075 number_of_frames -= number_of_keyframes; 1076 } 1077 svc_log(svc_ctx, SVC_LOG_INFO, 1078 "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n", 1079 i, (double)si->psnr_sum[i][0] / number_of_frames, 1080 (double)si->psnr_sum[i][1] / number_of_frames, 1081 (double)si->psnr_sum[i][2] / number_of_frames, 1082 (double)si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]); 1083 // the following psnr calculation is deduced from ffmpeg.c#print_report 1084 y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames; 1085 scale[1] = y_scale; 1086 scale[2] = scale[3] = y_scale / 4; // U or V 1087 scale[0] = y_scale * 1.5; // total 1088 1089 for (j = 0; j < COMPONENTS; j++) { 1090 psnr[j] = calc_psnr(si->sse_sum[i][j] / scale[j]); 1091 mse[j] = si->sse_sum[i][j] * 255.0 * 255.0 / scale[j]; 1092 } 1093 svc_log(svc_ctx, SVC_LOG_INFO, 1094 "Layer %d Overall PSNR=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, psnr[0], 1095 psnr[1], psnr[2], psnr[3]); 1096 svc_log(svc_ctx, SVC_LOG_INFO, 1097 "Layer %d Overall MSE=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, mse[0], 1098 mse[1], mse[2], mse[3]); 1099 1100 bytes_total += si->bytes_sum[i]; 1101 // clear sums for next time 1102 si->bytes_sum[i] = 0; 1103 for (j = 0; j < COMPONENTS; ++j) { 1104 si->psnr_sum[i][j] = 0; 1105 si->sse_sum[i][j] = 0; 1106 } 1107 } 1108 1109 // only display statistics once 1110 si->encode_frame_count = 0; 1111 1112 svc_log(svc_ctx, SVC_LOG_INFO, "Total Bytes=[%u]\n", bytes_total); 1113 return vpx_svc_get_message(svc_ctx); 1114 } 1115 1116 void vpx_svc_release(SvcContext *svc_ctx) { 1117 SvcInternal *si; 1118 if (svc_ctx == NULL) return; 1119 // do not use get_svc_internal as it will unnecessarily allocate an 1120 // SvcInternal if it was not already allocated 1121 si = (SvcInternal *)svc_ctx->internal; 1122 if (si != NULL) { 1123 free(si->buffer); 1124 if (si->rc_stats_buf) { 1125 free(si->rc_stats_buf); 1126 } 1127 free(si); 1128 svc_ctx->internal = NULL; 1129 } 1130 } 1131 1132 size_t vpx_svc_get_rc_stats_buffer_size(const SvcContext *svc_ctx) { 1133 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1134 if (svc_ctx == NULL || si == NULL) return 0; 1135 return si->rc_stats_buf_used; 1136 } 1137 1138 char *vpx_svc_get_rc_stats_buffer(const SvcContext *svc_ctx) { 1139 const SvcInternal *const si = get_const_svc_internal(svc_ctx); 1140 if (svc_ctx == NULL || si == NULL) return NULL; 1141 return si->rc_stats_buf; 1142 } 1143 1144 1145