1 /* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * This is an example demonstrating how to implement a multi-layer 13 * VP9 encoding scheme based on spatial scalability for video applications 14 * that benefit from a scalable bitstream. 15 */ 16 17 #include <stdarg.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include <time.h> 21 #include "./args.h" 22 #include "vpx/svc_context.h" 23 #include "vpx/vp8cx.h" 24 #include "vpx/vpx_encoder.h" 25 26 #define VP90_FOURCC 0x30395056 27 28 static const struct arg_enum_list encoding_mode_enum[] = { 29 {"i", INTER_LAYER_PREDICTION_I}, 30 {"alt-ip", ALT_INTER_LAYER_PREDICTION_IP}, 31 {"ip", INTER_LAYER_PREDICTION_IP}, 32 {"gf", USE_GOLDEN_FRAME}, 33 {NULL, 0} 34 }; 35 36 static const arg_def_t encoding_mode_arg = ARG_DEF_ENUM( 37 "m", "encoding-mode", 1, "Encoding mode algorithm", encoding_mode_enum); 38 static const arg_def_t skip_frames_arg = 39 ARG_DEF("s", "skip-frames", 1, "input frames to skip"); 40 static const arg_def_t frames_arg = 41 ARG_DEF("f", "frames", 1, "number of frames to encode"); 42 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width"); 43 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height"); 44 static const arg_def_t timebase_arg = 45 ARG_DEF("t", "timebase", 1, "timebase (num/den)"); 46 static const arg_def_t bitrate_arg = ARG_DEF( 47 "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second"); 48 static const arg_def_t layers_arg = 49 ARG_DEF("l", "layers", 1, "number of SVC layers"); 50 static const arg_def_t kf_dist_arg = 51 ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes"); 52 static const arg_def_t scale_factors_arg = 53 ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)"); 54 static const arg_def_t quantizers_arg = 55 ARG_DEF("q", "quantizers", 1, "quantizers (lowest to highest layer)"); 56 static const arg_def_t dummy_frame_arg = 57 ARG_DEF("z", "dummy-frame", 1, "make first frame blank and full size"); 58 59 static const arg_def_t *svc_args[] = { 60 &encoding_mode_arg, &frames_arg, &width_arg, &height_arg, 61 &timebase_arg, &bitrate_arg, &skip_frames_arg, &layers_arg, 62 &kf_dist_arg, &scale_factors_arg, &quantizers_arg, &dummy_frame_arg, 63 NULL 64 }; 65 66 static const SVC_ENCODING_MODE default_encoding_mode = 67 INTER_LAYER_PREDICTION_IP; 68 static const uint32_t default_frames_to_skip = 0; 69 static const uint32_t default_frames_to_code = 60 * 60; 70 static const uint32_t default_width = 1920; 71 static const uint32_t default_height = 1080; 72 static const uint32_t default_timebase_num = 1; 73 static const uint32_t default_timebase_den = 60; 74 static const uint32_t default_bitrate = 1000; 75 static const uint32_t default_spatial_layers = 5; 76 static const uint32_t default_kf_dist = 100; 77 static const int default_use_dummy_frame = 1; 78 79 typedef struct { 80 char *input_filename; 81 char *output_filename; 82 uint32_t frames_to_code; 83 uint32_t frames_to_skip; 84 } AppInput; 85 86 static void mem_put_le16(char *mem, uint32_t val) { 87 mem[0] = val; 88 mem[1] = val >> 8; 89 } 90 91 static void mem_put_le32(char *mem, uint32_t val) { 92 mem[0] = val; 93 mem[1] = val >> 8; 94 mem[2] = val >> 16; 95 mem[3] = val >> 24; 96 } 97 98 static void usage(const char *exec_name) { 99 fprintf(stderr, "Usage: %s <options> input_filename output_filename\n", 100 exec_name); 101 fprintf(stderr, "Options:\n"); 102 arg_show_usage(stderr, svc_args); 103 exit(EXIT_FAILURE); 104 } 105 106 void die(const char *fmt, ...) { 107 va_list ap; 108 109 va_start(ap, fmt); 110 vfprintf(stderr, fmt, ap); 111 if (fmt[strlen(fmt) - 1] != '\n') printf("\n"); 112 exit(EXIT_FAILURE); 113 } 114 115 static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { 116 const char *detail = vpx_codec_error_detail(ctx); 117 118 printf("%s: %s\n", s, vpx_codec_error(ctx)); 119 if (detail) printf(" %s\n", detail); 120 exit(EXIT_FAILURE); 121 } 122 123 static int read_frame(FILE *f, vpx_image_t *img) { 124 size_t nbytes; 125 int res = 1; 126 int plane; 127 128 for (plane = 0; plane < 3; ++plane) { 129 uint8_t *ptr; 130 const int w = (plane ? (1 + img->d_w) / 2 : img->d_w); 131 const int h = (plane ? (1 + img->d_h) / 2 : img->d_h); 132 int r; 133 134 switch (plane) { 135 case 1: 136 ptr = img->planes[VPX_PLANE_U]; 137 break; 138 case 2: 139 ptr = img->planes[VPX_PLANE_V]; 140 break; 141 default: 142 ptr = img->planes[plane]; 143 } 144 for (r = 0; r < h; ++r) { 145 const int to_read = w; 146 147 nbytes = fread(ptr, 1, to_read, f); 148 if (nbytes != to_read) { 149 res = 0; 150 if (nbytes > 0) 151 printf("Warning: Read partial frame. Check your width & height!\n"); 152 break; 153 } 154 ptr += img->stride[plane]; 155 } 156 if (!res) break; 157 } 158 return res; 159 } 160 161 static int create_dummy_frame(vpx_image_t *img) { 162 const size_t buf_size = img->w * img->h * 3 / 2; 163 memset(img->planes[0], 129, buf_size); 164 return 1; 165 } 166 167 static void write_ivf_file_header(FILE *outfile, 168 uint32_t width, uint32_t height, 169 int timebase_num, int timebase_den, 170 int frame_cnt) { 171 char header[32]; 172 173 header[0] = 'D'; 174 header[1] = 'K'; 175 header[2] = 'I'; 176 header[3] = 'F'; 177 mem_put_le16(header + 4, 0); /* version */ 178 mem_put_le16(header + 6, 32); /* headersize */ 179 mem_put_le32(header + 8, VP90_FOURCC); /* fourcc */ 180 mem_put_le16(header + 12, width); /* width */ 181 mem_put_le16(header + 14, height); /* height */ 182 mem_put_le32(header + 16, timebase_den); /* rate */ 183 mem_put_le32(header + 20, timebase_num); /* scale */ 184 mem_put_le32(header + 24, frame_cnt); /* length */ 185 mem_put_le32(header + 28, 0); /* unused */ 186 187 (void)fwrite(header, 1, 32, outfile); 188 } 189 190 static void write_ivf_frame_header(FILE *outfile, vpx_codec_pts_t pts, 191 size_t sz) { 192 char header[12]; 193 mem_put_le32(header, (uint32_t)sz); 194 mem_put_le32(header + 4, pts & 0xFFFFFFFF); 195 mem_put_le32(header + 8, pts >> 32); 196 197 (void)fwrite(header, 1, 12, outfile); 198 } 199 200 static void parse_command_line(int argc, const char **argv_, 201 AppInput *app_input, SvcContext *svc_ctx, 202 vpx_codec_enc_cfg_t *enc_cfg) { 203 struct arg arg; 204 char **argv, **argi, **argj; 205 vpx_codec_err_t res; 206 207 // initialize SvcContext with parameters that will be passed to vpx_svc_init 208 svc_ctx->log_level = SVC_LOG_DEBUG; 209 svc_ctx->spatial_layers = default_spatial_layers; 210 svc_ctx->encoding_mode = default_encoding_mode; 211 // when using a dummy frame, that frame is only encoded to be full size 212 svc_ctx->first_frame_full_size = default_use_dummy_frame; 213 214 // start with default encoder configuration 215 res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0); 216 if (res) { 217 die("Failed to get config: %s\n", vpx_codec_err_to_string(res)); 218 } 219 // update enc_cfg with app default values 220 enc_cfg->g_w = default_width; 221 enc_cfg->g_h = default_height; 222 enc_cfg->g_timebase.num = default_timebase_num; 223 enc_cfg->g_timebase.den = default_timebase_den; 224 enc_cfg->rc_target_bitrate = default_bitrate; 225 enc_cfg->kf_min_dist = default_kf_dist; 226 enc_cfg->kf_max_dist = default_kf_dist; 227 228 // initialize AppInput with default values 229 app_input->frames_to_code = default_frames_to_code; 230 app_input->frames_to_skip = default_frames_to_skip; 231 232 // process command line options 233 argv = argv_dup(argc - 1, argv_ + 1); 234 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { 235 arg.argv_step = 1; 236 237 if (arg_match(&arg, &encoding_mode_arg, argi)) { 238 svc_ctx->encoding_mode = arg_parse_enum_or_int(&arg); 239 } else if (arg_match(&arg, &frames_arg, argi)) { 240 app_input->frames_to_code = arg_parse_uint(&arg); 241 } else if (arg_match(&arg, &width_arg, argi)) { 242 enc_cfg->g_w = arg_parse_uint(&arg); 243 } else if (arg_match(&arg, &height_arg, argi)) { 244 enc_cfg->g_h = arg_parse_uint(&arg); 245 } else if (arg_match(&arg, &height_arg, argi)) { 246 enc_cfg->g_h = arg_parse_uint(&arg); 247 } else if (arg_match(&arg, &timebase_arg, argi)) { 248 enc_cfg->g_timebase = arg_parse_rational(&arg); 249 } else if (arg_match(&arg, &bitrate_arg, argi)) { 250 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg); 251 } else if (arg_match(&arg, &skip_frames_arg, argi)) { 252 app_input->frames_to_skip = arg_parse_uint(&arg); 253 } else if (arg_match(&arg, &layers_arg, argi)) { 254 svc_ctx->spatial_layers = arg_parse_uint(&arg); 255 } else if (arg_match(&arg, &kf_dist_arg, argi)) { 256 enc_cfg->kf_min_dist = arg_parse_uint(&arg); 257 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; 258 } else if (arg_match(&arg, &scale_factors_arg, argi)) { 259 vpx_svc_set_scale_factors(svc_ctx, arg.val); 260 } else if (arg_match(&arg, &quantizers_arg, argi)) { 261 vpx_svc_set_quantizers(svc_ctx, arg.val); 262 } else if (arg_match(&arg, &dummy_frame_arg, argi)) { 263 svc_ctx->first_frame_full_size = arg_parse_int(&arg); 264 } else { 265 ++argj; 266 } 267 } 268 269 // Check for unrecognized options 270 for (argi = argv; *argi; ++argi) 271 if (argi[0][0] == '-' && strlen(argi[0]) > 1) 272 die("Error: Unrecognized option %s\n", *argi); 273 274 if (argv[0] == NULL || argv[1] == 0) { 275 usage(argv_[0]); 276 } 277 app_input->input_filename = argv[0]; 278 app_input->output_filename = argv[1]; 279 free(argv); 280 281 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || 282 enc_cfg->g_h % 2) 283 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); 284 285 printf( 286 "Codec %s\nframes: %d, skip: %d\n" 287 "mode: %d, layers: %d\n" 288 "width %d, height: %d,\n" 289 "num: %d, den: %d, bitrate: %d,\n" 290 "gop size: %d, use_dummy_frame: %d\n", 291 vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code, 292 app_input->frames_to_skip, svc_ctx->encoding_mode, 293 svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h, 294 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den, 295 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist, 296 svc_ctx->first_frame_full_size); 297 } 298 299 int main(int argc, const char **argv) { 300 AppInput app_input = {0}; 301 FILE *infile, *outfile; 302 vpx_codec_ctx_t codec; 303 vpx_codec_enc_cfg_t enc_cfg; 304 SvcContext svc_ctx; 305 uint32_t i; 306 uint32_t frame_cnt = 0; 307 vpx_image_t raw; 308 vpx_codec_err_t res; 309 int pts = 0; /* PTS starts at 0 */ 310 int frame_duration = 1; /* 1 timebase tick per frame */ 311 312 memset(&svc_ctx, 0, sizeof(svc_ctx)); 313 svc_ctx.log_print = 1; 314 parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg); 315 316 // Allocate image buffer 317 if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) 318 die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); 319 320 if (!(infile = fopen(app_input.input_filename, "rb"))) 321 die("Failed to open %s for reading\n", app_input.input_filename); 322 323 if (!(outfile = fopen(app_input.output_filename, "wb"))) 324 die("Failed to open %s for writing\n", app_input.output_filename); 325 326 // Initialize codec 327 if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) != 328 VPX_CODEC_OK) 329 die("Failed to initialize encoder\n"); 330 331 write_ivf_file_header(outfile, enc_cfg.g_w, enc_cfg.g_h, 332 enc_cfg.g_timebase.num, enc_cfg.g_timebase.den, 0); 333 334 // skip initial frames 335 for (i = 0; i < app_input.frames_to_skip; ++i) { 336 read_frame(infile, &raw); 337 } 338 339 // Encode frames 340 while (frame_cnt <= app_input.frames_to_code) { 341 if (frame_cnt == 0 && svc_ctx.first_frame_full_size) { 342 create_dummy_frame(&raw); 343 } else { 344 if (!read_frame(infile, &raw)) break; 345 } 346 res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration, 347 VPX_DL_REALTIME); 348 printf("%s", vpx_svc_get_message(&svc_ctx)); 349 if (res != VPX_CODEC_OK) { 350 die_codec(&codec, "Failed to encode frame"); 351 } 352 if (vpx_svc_get_frame_size(&svc_ctx) > 0) { 353 write_ivf_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx)); 354 (void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1, 355 vpx_svc_get_frame_size(&svc_ctx), outfile); 356 } 357 ++frame_cnt; 358 pts += frame_duration; 359 } 360 361 printf("Processed %d frames\n", frame_cnt - svc_ctx.first_frame_full_size); 362 363 fclose(infile); 364 if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); 365 366 // rewrite the output file headers with the actual frame count 367 if (!fseek(outfile, 0, SEEK_SET)) { 368 write_ivf_file_header(outfile, enc_cfg.g_w, enc_cfg.g_h, 369 enc_cfg.g_timebase.num, enc_cfg.g_timebase.den, 370 frame_cnt); 371 } 372 fclose(outfile); 373 vpx_img_free(&raw); 374 375 // display average size, psnr 376 printf("%s", vpx_svc_dump_statistics(&svc_ctx)); 377 378 vpx_svc_release(&svc_ctx); 379 380 return EXIT_SUCCESS; 381 } 382