1 /* 2 * Copyright (c) 2007-2013 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the 13 * next paragraph) shall be included in all copies or substantial portions 14 * of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR 20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 #include "sysdeps.h" 25 #include <stdio.h> 26 #include <string.h> 27 #include <stdlib.h> 28 #include <getopt.h> 29 #include <unistd.h> 30 #include <sys/types.h> 31 #include <sys/stat.h> 32 #include <sys/time.h> 33 #include <sys/mman.h> 34 #include <fcntl.h> 35 #include <assert.h> 36 #include <pthread.h> 37 #include <errno.h> 38 #include <math.h> 39 #include <va/va.h> 40 #include <va/va_enc_h264.h> 41 #include "va_display.h" 42 43 #define CHECK_VASTATUS(va_status,func) \ 44 if (va_status != VA_STATUS_SUCCESS) { \ 45 fprintf(stderr,"%s:%s (%d) failed,exit\n", __func__, func, __LINE__); \ 46 exit(1); \ 47 } 48 49 #include "../loadsurface.h" 50 51 #define NAL_REF_IDC_NONE 0 52 #define NAL_REF_IDC_LOW 1 53 #define NAL_REF_IDC_MEDIUM 2 54 #define NAL_REF_IDC_HIGH 3 55 56 #define NAL_NON_IDR 1 57 #define NAL_IDR 5 58 #define NAL_SPS 7 59 #define NAL_PPS 8 60 #define NAL_SEI 6 61 62 #define SLICE_TYPE_P 0 63 #define SLICE_TYPE_B 1 64 #define SLICE_TYPE_I 2 65 66 #define ENTROPY_MODE_CAVLC 0 67 #define ENTROPY_MODE_CABAC 1 68 69 #define PROFILE_IDC_BASELINE 66 70 #define PROFILE_IDC_MAIN 77 71 #define PROFILE_IDC_HIGH 100 72 73 #define BITSTREAM_ALLOCATE_STEPPING 4096 74 75 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */ 76 #define SURFACE_NUM 16 /* 16 surfaces for reference */ 77 static VADisplay va_dpy; 78 static VAProfile h264_profile = ~0; 79 static VAConfigAttrib attrib[VAConfigAttribTypeMax]; 80 static VAConfigAttrib config_attrib[VAConfigAttribTypeMax]; 81 static int config_attrib_num = 0; 82 static VASurfaceID src_surface[SURFACE_NUM]; 83 static VABufferID coded_buf[SURFACE_NUM]; 84 static VASurfaceID ref_surface[SURFACE_NUM]; 85 static VAConfigID config_id; 86 static VAContextID context_id; 87 static VAEncSequenceParameterBufferH264 seq_param; 88 static VAEncPictureParameterBufferH264 pic_param; 89 static VAEncSliceParameterBufferH264 slice_param; 90 static VAPictureH264 CurrentCurrPic; 91 static VAPictureH264 ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32]; 92 93 static unsigned int MaxFrameNum = (2<<16); 94 static unsigned int MaxPicOrderCntLsb = (2<<8); 95 static unsigned int Log2MaxFrameNum = 16; 96 static unsigned int Log2MaxPicOrderCntLsb = 8; 97 98 static unsigned int num_ref_frames = 2; 99 static unsigned int numShortTerm = 0; 100 static int constraint_set_flag = 0; 101 static int h264_packedheader = 0; /* support pack header? */ 102 static int h264_maxref = (1<<16|1); 103 static int h264_entropy_mode = 1; /* cabac */ 104 105 static char *coded_fn = NULL, *srcyuv_fn = NULL, *recyuv_fn = NULL; 106 static FILE *coded_fp = NULL, *srcyuv_fp = NULL, *recyuv_fp = NULL; 107 static unsigned long long srcyuv_frames = 0; 108 static int srcyuv_fourcc = VA_FOURCC_NV12; 109 static int calc_psnr = 0; 110 111 static int frame_width = 176; 112 static int frame_height = 144; 113 static int frame_width_mbaligned; 114 static int frame_height_mbaligned; 115 static int frame_rate = 30; 116 static unsigned int frame_count = 60; 117 static unsigned int frame_coded = 0; 118 static unsigned int frame_bitrate = 0; 119 static unsigned int frame_slices = 1; 120 static double frame_size = 0; 121 static int initial_qp = 26; 122 static int minimal_qp = 0; 123 static int intra_period = 30; 124 static int intra_idr_period = 60; 125 static int ip_period = 1; 126 static int rc_mode = VA_RC_VBR; 127 static unsigned long long current_frame_encoding = 0; 128 static unsigned long long current_frame_display = 0; 129 static unsigned long long current_IDR_display = 0; 130 static unsigned int current_frame_num = 0; 131 static int current_frame_type; 132 #define current_slot (current_frame_display % SURFACE_NUM) 133 134 static int misc_priv_type = 0; 135 static int misc_priv_value = 0; 136 137 #define MIN(a, b) ((a)>(b)?(b):(a)) 138 #define MAX(a, b) ((a)>(b)?(a):(b)) 139 140 /* thread to save coded data/upload source YUV */ 141 struct storage_task_t { 142 void *next; 143 unsigned long long display_order; 144 unsigned long long encode_order; 145 }; 146 static struct storage_task_t *storage_task_header = NULL, *storage_task_tail = NULL; 147 #define SRC_SURFACE_IN_ENCODING 0 148 #define SRC_SURFACE_IN_STORAGE 1 149 static int srcsurface_status[SURFACE_NUM]; 150 static int encode_syncmode = 0; 151 static pthread_mutex_t encode_mutex = PTHREAD_MUTEX_INITIALIZER; 152 static pthread_cond_t encode_cond = PTHREAD_COND_INITIALIZER; 153 static pthread_t encode_thread; 154 155 /* for performance profiling */ 156 static unsigned int UploadPictureTicks=0; 157 static unsigned int BeginPictureTicks=0; 158 static unsigned int RenderPictureTicks=0; 159 static unsigned int EndPictureTicks=0; 160 static unsigned int SyncPictureTicks=0; 161 static unsigned int SavePictureTicks=0; 162 static unsigned int TotalTicks=0; 163 164 struct __bitstream { 165 unsigned int *buffer; 166 int bit_offset; 167 int max_size_in_dword; 168 }; 169 typedef struct __bitstream bitstream; 170 171 172 static unsigned int 173 va_swap32(unsigned int val) 174 { 175 unsigned char *pval = (unsigned char *)&val; 176 177 return ((pval[0] << 24) | 178 (pval[1] << 16) | 179 (pval[2] << 8) | 180 (pval[3] << 0)); 181 } 182 183 static void 184 bitstream_start(bitstream *bs) 185 { 186 bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING; 187 bs->buffer = calloc(bs->max_size_in_dword * sizeof(int), 1); 188 bs->bit_offset = 0; 189 } 190 191 static void 192 bitstream_end(bitstream *bs) 193 { 194 int pos = (bs->bit_offset >> 5); 195 int bit_offset = (bs->bit_offset & 0x1f); 196 int bit_left = 32 - bit_offset; 197 198 if (bit_offset) { 199 bs->buffer[pos] = va_swap32((bs->buffer[pos] << bit_left)); 200 } 201 } 202 203 static void 204 bitstream_put_ui(bitstream *bs, unsigned int val, int size_in_bits) 205 { 206 int pos = (bs->bit_offset >> 5); 207 int bit_offset = (bs->bit_offset & 0x1f); 208 int bit_left = 32 - bit_offset; 209 210 if (!size_in_bits) 211 return; 212 213 bs->bit_offset += size_in_bits; 214 215 if (bit_left > size_in_bits) { 216 bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val); 217 } else { 218 size_in_bits -= bit_left; 219 bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits); 220 bs->buffer[pos] = va_swap32(bs->buffer[pos]); 221 222 if (pos + 1 == bs->max_size_in_dword) { 223 bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING; 224 bs->buffer = realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int)); 225 } 226 227 bs->buffer[pos + 1] = val; 228 } 229 } 230 231 static void 232 bitstream_put_ue(bitstream *bs, unsigned int val) 233 { 234 int size_in_bits = 0; 235 int tmp_val = ++val; 236 237 while (tmp_val) { 238 tmp_val >>= 1; 239 size_in_bits++; 240 } 241 242 bitstream_put_ui(bs, 0, size_in_bits - 1); // leading zero 243 bitstream_put_ui(bs, val, size_in_bits); 244 } 245 246 static void 247 bitstream_put_se(bitstream *bs, int val) 248 { 249 unsigned int new_val; 250 251 if (val <= 0) 252 new_val = -2 * val; 253 else 254 new_val = 2 * val - 1; 255 256 bitstream_put_ue(bs, new_val); 257 } 258 259 static void 260 bitstream_byte_aligning(bitstream *bs, int bit) 261 { 262 int bit_offset = (bs->bit_offset & 0x7); 263 int bit_left = 8 - bit_offset; 264 int new_val; 265 266 if (!bit_offset) 267 return; 268 269 assert(bit == 0 || bit == 1); 270 271 if (bit) 272 new_val = (1 << bit_left) - 1; 273 else 274 new_val = 0; 275 276 bitstream_put_ui(bs, new_val, bit_left); 277 } 278 279 static void 280 rbsp_trailing_bits(bitstream *bs) 281 { 282 bitstream_put_ui(bs, 1, 1); 283 bitstream_byte_aligning(bs, 0); 284 } 285 286 static void nal_start_code_prefix(bitstream *bs) 287 { 288 bitstream_put_ui(bs, 0x00000001, 32); 289 } 290 291 static void nal_header(bitstream *bs, int nal_ref_idc, int nal_unit_type) 292 { 293 bitstream_put_ui(bs, 0, 1); /* forbidden_zero_bit: 0 */ 294 bitstream_put_ui(bs, nal_ref_idc, 2); 295 bitstream_put_ui(bs, nal_unit_type, 5); 296 } 297 298 static void sps_rbsp(bitstream *bs) 299 { 300 int profile_idc = PROFILE_IDC_BASELINE; 301 302 if (h264_profile == VAProfileH264High) 303 profile_idc = PROFILE_IDC_HIGH; 304 else if (h264_profile == VAProfileH264Main) 305 profile_idc = PROFILE_IDC_MAIN; 306 307 bitstream_put_ui(bs, profile_idc, 8); /* profile_idc */ 308 bitstream_put_ui(bs, !!(constraint_set_flag & 1), 1); /* constraint_set0_flag */ 309 bitstream_put_ui(bs, !!(constraint_set_flag & 2), 1); /* constraint_set1_flag */ 310 bitstream_put_ui(bs, !!(constraint_set_flag & 4), 1); /* constraint_set2_flag */ 311 bitstream_put_ui(bs, !!(constraint_set_flag & 8), 1); /* constraint_set3_flag */ 312 bitstream_put_ui(bs, 0, 4); /* reserved_zero_4bits */ 313 bitstream_put_ui(bs, seq_param.level_idc, 8); /* level_idc */ 314 bitstream_put_ue(bs, seq_param.seq_parameter_set_id); /* seq_parameter_set_id */ 315 316 if ( profile_idc == PROFILE_IDC_HIGH) { 317 bitstream_put_ue(bs, 1); /* chroma_format_idc = 1, 4:2:0 */ 318 bitstream_put_ue(bs, 0); /* bit_depth_luma_minus8 */ 319 bitstream_put_ue(bs, 0); /* bit_depth_chroma_minus8 */ 320 bitstream_put_ui(bs, 0, 1); /* qpprime_y_zero_transform_bypass_flag */ 321 bitstream_put_ui(bs, 0, 1); /* seq_scaling_matrix_present_flag */ 322 } 323 324 bitstream_put_ue(bs, seq_param.seq_fields.bits.log2_max_frame_num_minus4); /* log2_max_frame_num_minus4 */ 325 bitstream_put_ue(bs, seq_param.seq_fields.bits.pic_order_cnt_type); /* pic_order_cnt_type */ 326 327 if (seq_param.seq_fields.bits.pic_order_cnt_type == 0) 328 bitstream_put_ue(bs, seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4); /* log2_max_pic_order_cnt_lsb_minus4 */ 329 else { 330 assert(0); 331 } 332 333 bitstream_put_ue(bs, seq_param.max_num_ref_frames); /* num_ref_frames */ 334 bitstream_put_ui(bs, 0, 1); /* gaps_in_frame_num_value_allowed_flag */ 335 336 bitstream_put_ue(bs, seq_param.picture_width_in_mbs - 1); /* pic_width_in_mbs_minus1 */ 337 bitstream_put_ue(bs, seq_param.picture_height_in_mbs - 1); /* pic_height_in_map_units_minus1 */ 338 bitstream_put_ui(bs, seq_param.seq_fields.bits.frame_mbs_only_flag, 1); /* frame_mbs_only_flag */ 339 340 if (!seq_param.seq_fields.bits.frame_mbs_only_flag) { 341 assert(0); 342 } 343 344 bitstream_put_ui(bs, seq_param.seq_fields.bits.direct_8x8_inference_flag, 1); /* direct_8x8_inference_flag */ 345 bitstream_put_ui(bs, seq_param.frame_cropping_flag, 1); /* frame_cropping_flag */ 346 347 if (seq_param.frame_cropping_flag) { 348 bitstream_put_ue(bs, seq_param.frame_crop_left_offset); /* frame_crop_left_offset */ 349 bitstream_put_ue(bs, seq_param.frame_crop_right_offset); /* frame_crop_right_offset */ 350 bitstream_put_ue(bs, seq_param.frame_crop_top_offset); /* frame_crop_top_offset */ 351 bitstream_put_ue(bs, seq_param.frame_crop_bottom_offset); /* frame_crop_bottom_offset */ 352 } 353 354 //if ( frame_bit_rate < 0 ) { //TODO EW: the vui header isn't correct 355 if ( 1 ) { 356 bitstream_put_ui(bs, 0, 1); /* vui_parameters_present_flag */ 357 } else { 358 bitstream_put_ui(bs, 1, 1); /* vui_parameters_present_flag */ 359 bitstream_put_ui(bs, 0, 1); /* aspect_ratio_info_present_flag */ 360 bitstream_put_ui(bs, 0, 1); /* overscan_info_present_flag */ 361 bitstream_put_ui(bs, 0, 1); /* video_signal_type_present_flag */ 362 bitstream_put_ui(bs, 0, 1); /* chroma_loc_info_present_flag */ 363 bitstream_put_ui(bs, 1, 1); /* timing_info_present_flag */ 364 { 365 bitstream_put_ui(bs, 15, 32); 366 bitstream_put_ui(bs, 900, 32); 367 bitstream_put_ui(bs, 1, 1); 368 } 369 bitstream_put_ui(bs, 1, 1); /* nal_hrd_parameters_present_flag */ 370 { 371 // hrd_parameters 372 bitstream_put_ue(bs, 0); /* cpb_cnt_minus1 */ 373 bitstream_put_ui(bs, 4, 4); /* bit_rate_scale */ 374 bitstream_put_ui(bs, 6, 4); /* cpb_size_scale */ 375 376 bitstream_put_ue(bs, frame_bitrate - 1); /* bit_rate_value_minus1[0] */ 377 bitstream_put_ue(bs, frame_bitrate*8 - 1); /* cpb_size_value_minus1[0] */ 378 bitstream_put_ui(bs, 1, 1); /* cbr_flag[0] */ 379 380 bitstream_put_ui(bs, 23, 5); /* initial_cpb_removal_delay_length_minus1 */ 381 bitstream_put_ui(bs, 23, 5); /* cpb_removal_delay_length_minus1 */ 382 bitstream_put_ui(bs, 23, 5); /* dpb_output_delay_length_minus1 */ 383 bitstream_put_ui(bs, 23, 5); /* time_offset_length */ 384 } 385 bitstream_put_ui(bs, 0, 1); /* vcl_hrd_parameters_present_flag */ 386 bitstream_put_ui(bs, 0, 1); /* low_delay_hrd_flag */ 387 388 bitstream_put_ui(bs, 0, 1); /* pic_struct_present_flag */ 389 bitstream_put_ui(bs, 0, 1); /* bitstream_restriction_flag */ 390 } 391 392 rbsp_trailing_bits(bs); /* rbsp_trailing_bits */ 393 } 394 395 396 static void pps_rbsp(bitstream *bs) 397 { 398 bitstream_put_ue(bs, pic_param.pic_parameter_set_id); /* pic_parameter_set_id */ 399 bitstream_put_ue(bs, pic_param.seq_parameter_set_id); /* seq_parameter_set_id */ 400 401 bitstream_put_ui(bs, pic_param.pic_fields.bits.entropy_coding_mode_flag, 1); /* entropy_coding_mode_flag */ 402 403 bitstream_put_ui(bs, 0, 1); /* pic_order_present_flag: 0 */ 404 405 bitstream_put_ue(bs, 0); /* num_slice_groups_minus1 */ 406 407 bitstream_put_ue(bs, pic_param.num_ref_idx_l0_active_minus1); /* num_ref_idx_l0_active_minus1 */ 408 bitstream_put_ue(bs, pic_param.num_ref_idx_l1_active_minus1); /* num_ref_idx_l1_active_minus1 1 */ 409 410 bitstream_put_ui(bs, pic_param.pic_fields.bits.weighted_pred_flag, 1); /* weighted_pred_flag: 0 */ 411 bitstream_put_ui(bs, pic_param.pic_fields.bits.weighted_bipred_idc, 2); /* weighted_bipred_idc: 0 */ 412 413 bitstream_put_se(bs, pic_param.pic_init_qp - 26); /* pic_init_qp_minus26 */ 414 bitstream_put_se(bs, 0); /* pic_init_qs_minus26 */ 415 bitstream_put_se(bs, 0); /* chroma_qp_index_offset */ 416 417 bitstream_put_ui(bs, pic_param.pic_fields.bits.deblocking_filter_control_present_flag, 1); /* deblocking_filter_control_present_flag */ 418 bitstream_put_ui(bs, 0, 1); /* constrained_intra_pred_flag */ 419 bitstream_put_ui(bs, 0, 1); /* redundant_pic_cnt_present_flag */ 420 421 /* more_rbsp_data */ 422 bitstream_put_ui(bs, pic_param.pic_fields.bits.transform_8x8_mode_flag, 1); /*transform_8x8_mode_flag */ 423 bitstream_put_ui(bs, 0, 1); /* pic_scaling_matrix_present_flag */ 424 bitstream_put_se(bs, pic_param.second_chroma_qp_index_offset ); /*second_chroma_qp_index_offset */ 425 426 rbsp_trailing_bits(bs); 427 } 428 429 430 static int 431 build_packed_pic_buffer(unsigned char **header_buffer) 432 { 433 bitstream bs; 434 435 bitstream_start(&bs); 436 nal_start_code_prefix(&bs); 437 nal_header(&bs, NAL_REF_IDC_HIGH, NAL_PPS); 438 pps_rbsp(&bs); 439 bitstream_end(&bs); 440 441 *header_buffer = (unsigned char *)bs.buffer; 442 return bs.bit_offset; 443 } 444 445 static int 446 build_packed_seq_buffer(unsigned char **header_buffer) 447 { 448 bitstream bs; 449 450 bitstream_start(&bs); 451 nal_start_code_prefix(&bs); 452 nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS); 453 sps_rbsp(&bs); 454 bitstream_end(&bs); 455 456 *header_buffer = (unsigned char *)bs.buffer; 457 return bs.bit_offset; 458 } 459 460 static int 461 build_packed_sei_buffer_timing(unsigned int init_cpb_removal_length, 462 unsigned int init_cpb_removal_delay, 463 unsigned int init_cpb_removal_delay_offset, 464 unsigned int cpb_removal_length, 465 unsigned int cpb_removal_delay, 466 unsigned int dpb_output_length, 467 unsigned int dpb_output_delay, 468 unsigned char **sei_buffer) 469 { 470 unsigned char *byte_buf; 471 int bp_byte_size, i, pic_byte_size; 472 473 bitstream nal_bs; 474 bitstream sei_bp_bs, sei_pic_bs; 475 476 bitstream_start(&sei_bp_bs); 477 bitstream_put_ue(&sei_bp_bs, 0); /*seq_parameter_set_id*/ 478 bitstream_put_ui(&sei_bp_bs, init_cpb_removal_delay, cpb_removal_length); 479 bitstream_put_ui(&sei_bp_bs, init_cpb_removal_delay_offset, cpb_removal_length); 480 if ( sei_bp_bs.bit_offset & 0x7) { 481 bitstream_put_ui(&sei_bp_bs, 1, 1); 482 } 483 bitstream_end(&sei_bp_bs); 484 bp_byte_size = (sei_bp_bs.bit_offset + 7) / 8; 485 486 bitstream_start(&sei_pic_bs); 487 bitstream_put_ui(&sei_pic_bs, cpb_removal_delay, cpb_removal_length); 488 bitstream_put_ui(&sei_pic_bs, dpb_output_delay, dpb_output_length); 489 if ( sei_pic_bs.bit_offset & 0x7) { 490 bitstream_put_ui(&sei_pic_bs, 1, 1); 491 } 492 bitstream_end(&sei_pic_bs); 493 pic_byte_size = (sei_pic_bs.bit_offset + 7) / 8; 494 495 bitstream_start(&nal_bs); 496 nal_start_code_prefix(&nal_bs); 497 nal_header(&nal_bs, NAL_REF_IDC_NONE, NAL_SEI); 498 499 /* Write the SEI buffer period data */ 500 bitstream_put_ui(&nal_bs, 0, 8); 501 bitstream_put_ui(&nal_bs, bp_byte_size, 8); 502 503 byte_buf = (unsigned char *)sei_bp_bs.buffer; 504 for(i = 0; i < bp_byte_size; i++) { 505 bitstream_put_ui(&nal_bs, byte_buf[i], 8); 506 } 507 free(byte_buf); 508 /* write the SEI timing data */ 509 bitstream_put_ui(&nal_bs, 0x01, 8); 510 bitstream_put_ui(&nal_bs, pic_byte_size, 8); 511 512 byte_buf = (unsigned char *)sei_pic_bs.buffer; 513 for(i = 0; i < pic_byte_size; i++) { 514 bitstream_put_ui(&nal_bs, byte_buf[i], 8); 515 } 516 free(byte_buf); 517 518 rbsp_trailing_bits(&nal_bs); 519 bitstream_end(&nal_bs); 520 521 *sei_buffer = (unsigned char *)nal_bs.buffer; 522 523 return nal_bs.bit_offset; 524 } 525 526 527 528 /* 529 * Helper function for profiling purposes 530 */ 531 static unsigned int GetTickCount() 532 { 533 struct timeval tv; 534 if (gettimeofday(&tv, NULL)) 535 return 0; 536 return tv.tv_usec/1000+tv.tv_sec*1000; 537 } 538 539 /* 540 Assume frame sequence is: Frame#0,#1,#2,...,#M,...,#X,... (encoding order) 541 1) period between Frame #X and Frame #N = #X - #N 542 2) 0 means infinite for intra_period/intra_idr_period, and 0 is invalid for ip_period 543 3) intra_idr_period % intra_period (intra_period > 0) and intra_period % ip_period must be 0 544 4) intra_period and intra_idr_period take precedence over ip_period 545 5) if ip_period > 1, intra_period and intra_idr_period are not the strict periods 546 of I/IDR frames, see bellow examples 547 ------------------------------------------------------------------- 548 intra_period intra_idr_period ip_period frame sequence (intra_period/intra_idr_period/ip_period) 549 0 ignored 1 IDRPPPPPPP ... (No IDR/I any more) 550 0 ignored >=2 IDR(PBB)(PBB)... (No IDR/I any more) 551 1 0 ignored IDRIIIIIII... (No IDR any more) 552 1 1 ignored IDR IDR IDR IDR... 553 1 >=2 ignored IDRII IDRII IDR... (1/3/ignore) 554 >=2 0 1 IDRPPP IPPP I... (3/0/1) 555 >=2 0 >=2 IDR(PBB)(PBB)(IBB) (6/0/3) 556 (PBB)(IBB)(PBB)(IBB)... 557 >=2 >=2 1 IDRPPPPP IPPPPP IPPPPP (6/18/1) 558 IDRPPPPP IPPPPP IPPPPP... 559 >=2 >=2 >=2 {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)} (6/18/3) 560 {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)}... 561 {IDR(PBB)(PBB)(IBB)(PBB)} (6/12/3) 562 {IDR(PBB)(PBB)(IBB)(PBB)}... 563 {IDR(PBB)(PBB)} (6/6/3) 564 {IDR(PBB)(PBB)}. 565 */ 566 567 /* 568 * Return displaying order with specified periods and encoding order 569 * displaying_order: displaying order 570 * frame_type: frame type 571 */ 572 #define FRAME_P 0 573 #define FRAME_B 1 574 #define FRAME_I 2 575 #define FRAME_IDR 7 576 void encoding2display_order( 577 unsigned long long encoding_order,int intra_period, 578 int intra_idr_period,int ip_period, 579 unsigned long long *displaying_order, 580 int *frame_type) 581 { 582 int encoding_order_gop = 0; 583 584 if (intra_period == 1) { /* all are I/IDR frames */ 585 *displaying_order = encoding_order; 586 if (intra_idr_period == 0) 587 *frame_type = (encoding_order == 0)?FRAME_IDR:FRAME_I; 588 else 589 *frame_type = (encoding_order % intra_idr_period == 0)?FRAME_IDR:FRAME_I; 590 return; 591 } 592 593 if (intra_period == 0) 594 intra_idr_period = 0; 595 596 /* new sequence like 597 * IDR PPPPP IPPPPP 598 * IDR (PBB)(PBB)(IBB)(PBB) 599 */ 600 encoding_order_gop = (intra_idr_period == 0)? encoding_order: 601 (encoding_order % (intra_idr_period + ((ip_period == 1)?0:1))); 602 603 if (encoding_order_gop == 0) { /* the first frame */ 604 *frame_type = FRAME_IDR; 605 *displaying_order = encoding_order; 606 } else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */ 607 *frame_type = FRAME_B; 608 *displaying_order = encoding_order - 1; 609 } else if ((intra_period != 0) && /* have I frames */ 610 (encoding_order_gop >= 2) && 611 ((ip_period == 1 && encoding_order_gop % intra_period == 0) || /* for IDR PPPPP IPPPP */ 612 /* for IDR (PBB)(PBB)(IBB) */ 613 (ip_period >= 2 && ((encoding_order_gop - 1) / ip_period % (intra_period / ip_period)) == 0))) { 614 *frame_type = FRAME_I; 615 *displaying_order = encoding_order + ip_period - 1; 616 } else { 617 *frame_type = FRAME_P; 618 *displaying_order = encoding_order + ip_period - 1; 619 } 620 } 621 622 623 static char *fourcc_to_string(int fourcc) 624 { 625 switch (fourcc) { 626 case VA_FOURCC_NV12: 627 return "NV12"; 628 case VA_FOURCC_IYUV: 629 return "IYUV"; 630 case VA_FOURCC_YV12: 631 return "YV12"; 632 case VA_FOURCC_UYVY: 633 return "UYVY"; 634 default: 635 return "Unknown"; 636 } 637 } 638 639 static int string_to_fourcc(char *str) 640 { 641 int fourcc; 642 643 if (!strncmp(str, "NV12", 4)) 644 fourcc = VA_FOURCC_NV12; 645 else if (!strncmp(str, "IYUV", 4)) 646 fourcc = VA_FOURCC_IYUV; 647 else if (!strncmp(str, "YV12", 4)) 648 fourcc = VA_FOURCC_YV12; 649 else if (!strncmp(str, "UYVY", 4)) 650 fourcc = VA_FOURCC_UYVY; 651 else { 652 printf("Unknow FOURCC\n"); 653 fourcc = -1; 654 } 655 return fourcc; 656 } 657 658 659 static char *rc_to_string(int rcmode) 660 { 661 switch (rc_mode) { 662 case VA_RC_NONE: 663 return "NONE"; 664 case VA_RC_CBR: 665 return "CBR"; 666 case VA_RC_VBR: 667 return "VBR"; 668 case VA_RC_VCM: 669 return "VCM"; 670 case VA_RC_CQP: 671 return "CQP"; 672 case VA_RC_VBR_CONSTRAINED: 673 return "VBR_CONSTRAINED"; 674 default: 675 return "Unknown"; 676 } 677 } 678 679 static int string_to_rc(char *str) 680 { 681 int rc_mode; 682 683 if (!strncmp(str, "NONE", 4)) 684 rc_mode = VA_RC_NONE; 685 else if (!strncmp(str, "CBR", 3)) 686 rc_mode = VA_RC_CBR; 687 else if (!strncmp(str, "VBR", 3)) 688 rc_mode = VA_RC_VBR; 689 else if (!strncmp(str, "VCM", 3)) 690 rc_mode = VA_RC_VCM; 691 else if (!strncmp(str, "CQP", 3)) 692 rc_mode = VA_RC_CQP; 693 else if (!strncmp(str, "VBR_CONSTRAINED", 15)) 694 rc_mode = VA_RC_VBR_CONSTRAINED; 695 else { 696 printf("Unknown RC mode\n"); 697 rc_mode = -1; 698 } 699 return rc_mode; 700 } 701 702 703 static int print_help(void) 704 { 705 printf("./h264encode <options>\n"); 706 printf(" -w <width> -h <height>\n"); 707 printf(" -framecount <frame number>\n"); 708 printf(" -n <frame number>\n"); 709 printf(" if set to 0 and srcyuv is set, the frame count is from srcuv file\n"); 710 printf(" -o <coded file>\n"); 711 printf(" -f <frame rate>\n"); 712 printf(" --intra_period <number>\n"); 713 printf(" --idr_period <number>\n"); 714 printf(" --ip_period <number>\n"); 715 printf(" --bitrate <bitrate>\n"); 716 printf(" --initialqp <number>\n"); 717 printf(" --minqp <number>\n"); 718 printf(" --rcmode <NONE|CBR|VBR|VCM|CQP|VBR_CONTRAINED>\n"); 719 printf(" --syncmode: sequentially upload source, encoding, save result, no multi-thread\n"); 720 printf(" --srcyuv <filename> load YUV from a file\n"); 721 printf(" --fourcc <NV12|IYUV|YV12> source YUV fourcc\n"); 722 printf(" --recyuv <filename> save reconstructed YUV into a file\n"); 723 printf(" --enablePSNR calculate PSNR of recyuv vs. srcyuv\n"); 724 printf(" --entropy <0|1>, 1 means cabac, 0 cavlc\n"); 725 printf(" --profile <BP|MP|HP>\n"); 726 return 0; 727 } 728 729 static int process_cmdline(int argc, char *argv[]) 730 { 731 char c; 732 const struct option long_opts[] = { 733 {"help", no_argument, NULL, 0 }, 734 {"bitrate", required_argument, NULL, 1 }, 735 {"minqp", required_argument, NULL, 2 }, 736 {"initialqp", required_argument, NULL, 3 }, 737 {"intra_period", required_argument, NULL, 4 }, 738 {"idr_period", required_argument, NULL, 5 }, 739 {"ip_period", required_argument, NULL, 6 }, 740 {"rcmode", required_argument, NULL, 7 }, 741 {"srcyuv", required_argument, NULL, 9 }, 742 {"recyuv", required_argument, NULL, 10 }, 743 {"fourcc", required_argument, NULL, 11 }, 744 {"syncmode", no_argument, NULL, 12 }, 745 {"enablePSNR", no_argument, NULL, 13 }, 746 {"prit", required_argument, NULL, 14 }, 747 {"priv", required_argument, NULL, 15 }, 748 {"framecount", required_argument, NULL, 16 }, 749 {"entropy", required_argument, NULL, 17 }, 750 {"profile", required_argument, NULL, 18 }, 751 {NULL, no_argument, NULL, 0 }}; 752 int long_index; 753 754 while ((c =getopt_long_only(argc,argv,"w:h:n:f:o:?",long_opts,&long_index)) != EOF) { 755 switch (c) { 756 case 'w': 757 frame_width = atoi(optarg); 758 break; 759 case 'h': 760 frame_height = atoi(optarg); 761 break; 762 case 'n': 763 case 16: 764 frame_count = atoi(optarg); 765 break; 766 case 'f': 767 frame_rate = atoi(optarg); 768 break; 769 case 'o': 770 coded_fn = strdup(optarg); 771 break; 772 case 0: 773 print_help(); 774 exit(0); 775 case 1: 776 frame_bitrate = atoi(optarg); 777 break; 778 case 2: 779 minimal_qp = atoi(optarg); 780 break; 781 case 3: 782 initial_qp = atoi(optarg); 783 break; 784 case 4: 785 intra_period = atoi(optarg); 786 break; 787 case 5: 788 intra_idr_period = atoi(optarg); 789 break; 790 case 6: 791 ip_period = atoi(optarg); 792 break; 793 case 7: 794 rc_mode = string_to_rc(optarg); 795 if (rc_mode < 0) { 796 print_help(); 797 exit(1); 798 } 799 break; 800 case 9: 801 srcyuv_fn = strdup(optarg); 802 break; 803 case 10: 804 recyuv_fn = strdup(optarg); 805 break; 806 case 11: 807 srcyuv_fourcc = string_to_fourcc(optarg); 808 if (srcyuv_fourcc <= 0) { 809 print_help(); 810 exit(1); 811 } 812 break; 813 case 12: 814 encode_syncmode = 1; 815 break; 816 case 13: 817 calc_psnr = 1; 818 break; 819 case 14: 820 misc_priv_type = strtol(optarg, NULL, 0); 821 break; 822 case 15: 823 misc_priv_value = strtol(optarg, NULL, 0); 824 break; 825 case 17: 826 h264_entropy_mode = atoi(optarg) ? 1: 0; 827 break; 828 case 18: 829 if (strncmp(optarg, "BP", 2) == 0) 830 h264_profile = VAProfileH264Baseline; 831 else if (strncmp(optarg, "MP", 2) == 0) 832 h264_profile = VAProfileH264Main; 833 else if (strncmp(optarg, "HP", 2) == 0) 834 h264_profile = VAProfileH264High; 835 else 836 h264_profile = 0; 837 break; 838 case ':': 839 case '?': 840 print_help(); 841 exit(0); 842 } 843 } 844 845 if (ip_period < 1) { 846 printf(" ip_period must be greater than 0\n"); 847 exit(0); 848 } 849 if (intra_period != 1 && intra_period % ip_period != 0) { 850 printf(" intra_period must be a multiplier of ip_period\n"); 851 exit(0); 852 } 853 if (intra_period != 0 && intra_idr_period % intra_period != 0) { 854 printf(" intra_idr_period must be a multiplier of intra_period\n"); 855 exit(0); 856 } 857 858 if (frame_bitrate == 0) 859 frame_bitrate = frame_width * frame_height * 12 * frame_rate / 50; 860 861 /* open source file */ 862 if (srcyuv_fn) { 863 srcyuv_fp = fopen(srcyuv_fn,"r"); 864 865 if (srcyuv_fp == NULL) 866 printf("Open source YUV file %s failed, use auto-generated YUV data\n", srcyuv_fn); 867 else { 868 struct stat tmp; 869 870 fstat(fileno(srcyuv_fp), &tmp); 871 srcyuv_frames = tmp.st_size / (frame_width * frame_height * 1.5); 872 printf("Source YUV file %s with %llu frames\n", srcyuv_fn, srcyuv_frames); 873 874 if (frame_count == 0) 875 frame_count = srcyuv_frames; 876 } 877 } 878 879 /* open source file */ 880 if (recyuv_fn) { 881 recyuv_fp = fopen(recyuv_fn,"w+"); 882 883 if (recyuv_fp == NULL) 884 printf("Open reconstructed YUV file %s failed\n", recyuv_fn); 885 } 886 887 if (coded_fn == NULL) { 888 struct stat buf; 889 if (stat("/tmp", &buf) == 0) 890 coded_fn = strdup("/tmp/test.264"); 891 else if (stat("/sdcard", &buf) == 0) 892 coded_fn = strdup("/sdcard/test.264"); 893 else 894 coded_fn = strdup("./test.264"); 895 } 896 897 /* store coded data into a file */ 898 coded_fp = fopen(coded_fn,"w+"); 899 if (coded_fp == NULL) { 900 printf("Open file %s failed, exit\n", coded_fn); 901 exit(1); 902 } 903 904 frame_width_mbaligned = (frame_width + 15) & (~15); 905 frame_height_mbaligned = (frame_height + 15) & (~15); 906 if (frame_width != frame_width_mbaligned || 907 frame_height != frame_height_mbaligned) { 908 printf("Source frame is %dx%d and will code clip to %dx%d with crop\n", 909 frame_width, frame_height, 910 frame_width_mbaligned, frame_height_mbaligned 911 ); 912 } 913 914 return 0; 915 } 916 917 static int init_va(void) 918 { 919 VAProfile profile_list[]={VAProfileH264High,VAProfileH264Main,VAProfileH264Baseline,VAProfileH264ConstrainedBaseline}; 920 VAEntrypoint *entrypoints; 921 int num_entrypoints, slice_entrypoint; 922 int support_encode = 0; 923 int major_ver, minor_ver; 924 VAStatus va_status; 925 unsigned int i; 926 927 va_dpy = va_open_display(); 928 va_status = vaInitialize(va_dpy, &major_ver, &minor_ver); 929 CHECK_VASTATUS(va_status, "vaInitialize"); 930 931 num_entrypoints = vaMaxNumEntrypoints(va_dpy); 932 entrypoints = malloc(num_entrypoints * sizeof(*entrypoints)); 933 if (!entrypoints) { 934 fprintf(stderr, "error: failed to initialize VA entrypoints array\n"); 935 exit(1); 936 } 937 938 /* use the highest profile */ 939 for (i = 0; i < sizeof(profile_list)/sizeof(profile_list[0]); i++) { 940 if ((h264_profile != ~0) && h264_profile != profile_list[i]) 941 continue; 942 943 h264_profile = profile_list[i]; 944 vaQueryConfigEntrypoints(va_dpy, h264_profile, entrypoints, &num_entrypoints); 945 for (slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) { 946 if (entrypoints[slice_entrypoint] == VAEntrypointEncSlice) { 947 support_encode = 1; 948 break; 949 } 950 } 951 if (support_encode == 1) 952 break; 953 } 954 955 if (support_encode == 0) { 956 printf("Can't find VAEntrypointEncSlice for H264 profiles\n"); 957 exit(1); 958 } else { 959 switch (h264_profile) { 960 case VAProfileH264Baseline: 961 printf("Use profile VAProfileH264Baseline\n"); 962 ip_period = 1; 963 constraint_set_flag |= (1 << 0); /* Annex A.2.1 */ 964 h264_entropy_mode = 0; 965 break; 966 case VAProfileH264ConstrainedBaseline: 967 printf("Use profile VAProfileH264ConstrainedBaseline\n"); 968 constraint_set_flag |= (1 << 0 | 1 << 1); /* Annex A.2.2 */ 969 ip_period = 1; 970 break; 971 972 case VAProfileH264Main: 973 printf("Use profile VAProfileH264Main\n"); 974 constraint_set_flag |= (1 << 1); /* Annex A.2.2 */ 975 break; 976 977 case VAProfileH264High: 978 constraint_set_flag |= (1 << 3); /* Annex A.2.4 */ 979 printf("Use profile VAProfileH264High\n"); 980 break; 981 default: 982 printf("unknow profile. Set to Baseline"); 983 h264_profile = VAProfileH264Baseline; 984 ip_period = 1; 985 constraint_set_flag |= (1 << 0); /* Annex A.2.1 */ 986 break; 987 } 988 } 989 990 /* find out the format for the render target, and rate control mode */ 991 for (i = 0; i < VAConfigAttribTypeMax; i++) 992 attrib[i].type = i; 993 994 va_status = vaGetConfigAttributes(va_dpy, h264_profile, VAEntrypointEncSlice, 995 &attrib[0], VAConfigAttribTypeMax); 996 CHECK_VASTATUS(va_status, "vaGetConfigAttributes"); 997 /* check the interested configattrib */ 998 if ((attrib[VAConfigAttribRTFormat].value & VA_RT_FORMAT_YUV420) == 0) { 999 printf("Not find desired YUV420 RT format\n"); 1000 exit(1); 1001 } else { 1002 config_attrib[config_attrib_num].type = VAConfigAttribRTFormat; 1003 config_attrib[config_attrib_num].value = VA_RT_FORMAT_YUV420; 1004 config_attrib_num++; 1005 } 1006 1007 if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) { 1008 int tmp = attrib[VAConfigAttribRateControl].value; 1009 1010 printf("Support rate control mode (0x%x):", tmp); 1011 1012 if (tmp & VA_RC_NONE) 1013 printf("NONE "); 1014 if (tmp & VA_RC_CBR) 1015 printf("CBR "); 1016 if (tmp & VA_RC_VBR) 1017 printf("VBR "); 1018 if (tmp & VA_RC_VCM) 1019 printf("VCM "); 1020 if (tmp & VA_RC_CQP) 1021 printf("CQP "); 1022 if (tmp & VA_RC_VBR_CONSTRAINED) 1023 printf("VBR_CONSTRAINED "); 1024 1025 printf("\n"); 1026 1027 /* need to check if support rc_mode */ 1028 config_attrib[config_attrib_num].type = VAConfigAttribRateControl; 1029 config_attrib[config_attrib_num].value = rc_mode; 1030 config_attrib_num++; 1031 } 1032 1033 1034 if (attrib[VAConfigAttribEncPackedHeaders].value != VA_ATTRIB_NOT_SUPPORTED) { 1035 int tmp = attrib[VAConfigAttribEncPackedHeaders].value; 1036 1037 printf("Support VAConfigAttribEncPackedHeaders\n"); 1038 1039 h264_packedheader = 1; 1040 config_attrib[config_attrib_num].type = VAConfigAttribEncPackedHeaders; 1041 config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE; 1042 1043 if (tmp & VA_ENC_PACKED_HEADER_SEQUENCE) { 1044 printf("Support packed sequence headers\n"); 1045 config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SEQUENCE; 1046 } 1047 1048 if (tmp & VA_ENC_PACKED_HEADER_PICTURE) { 1049 printf("Support packed picture headers\n"); 1050 config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_PICTURE; 1051 } 1052 1053 if (tmp & VA_ENC_PACKED_HEADER_SLICE) { 1054 printf("Support packed slice headers\n"); 1055 config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SLICE; 1056 } 1057 1058 if (tmp & VA_ENC_PACKED_HEADER_MISC) { 1059 printf("Support packed misc headers\n"); 1060 config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_MISC; 1061 } 1062 1063 config_attrib_num++; 1064 } 1065 1066 if (attrib[VAConfigAttribEncInterlaced].value != VA_ATTRIB_NOT_SUPPORTED) { 1067 int tmp = attrib[VAConfigAttribEncInterlaced].value; 1068 1069 printf("Support VAConfigAttribEncInterlaced\n"); 1070 1071 if (tmp & VA_ENC_INTERLACED_FRAME) 1072 printf("support VA_ENC_INTERLACED_FRAME\n"); 1073 if (tmp & VA_ENC_INTERLACED_FIELD) 1074 printf("Support VA_ENC_INTERLACED_FIELD\n"); 1075 if (tmp & VA_ENC_INTERLACED_MBAFF) 1076 printf("Support VA_ENC_INTERLACED_MBAFF\n"); 1077 if (tmp & VA_ENC_INTERLACED_PAFF) 1078 printf("Support VA_ENC_INTERLACED_PAFF\n"); 1079 1080 config_attrib[config_attrib_num].type = VAConfigAttribEncInterlaced; 1081 config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE; 1082 config_attrib_num++; 1083 } 1084 1085 if (attrib[VAConfigAttribEncMaxRefFrames].value != VA_ATTRIB_NOT_SUPPORTED) { 1086 h264_maxref = attrib[VAConfigAttribEncMaxRefFrames].value; 1087 1088 printf("Support %d RefPicList0 and %d RefPicList1\n", 1089 h264_maxref & 0xffff, (h264_maxref >> 16) & 0xffff ); 1090 } 1091 1092 if (attrib[VAConfigAttribEncMaxSlices].value != VA_ATTRIB_NOT_SUPPORTED) 1093 printf("Support %d slices\n", attrib[VAConfigAttribEncMaxSlices].value); 1094 1095 if (attrib[VAConfigAttribEncSliceStructure].value != VA_ATTRIB_NOT_SUPPORTED) { 1096 int tmp = attrib[VAConfigAttribEncSliceStructure].value; 1097 1098 printf("Support VAConfigAttribEncSliceStructure\n"); 1099 1100 if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS) 1101 printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS\n"); 1102 if (tmp & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS) 1103 printf("Support VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS\n"); 1104 if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) 1105 printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS\n"); 1106 } 1107 if (attrib[VAConfigAttribEncMacroblockInfo].value != VA_ATTRIB_NOT_SUPPORTED) { 1108 printf("Support VAConfigAttribEncMacroblockInfo\n"); 1109 } 1110 1111 free(entrypoints); 1112 return 0; 1113 } 1114 1115 static int setup_encode() 1116 { 1117 VAStatus va_status; 1118 VASurfaceID *tmp_surfaceid; 1119 int codedbuf_size, i; 1120 1121 va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice, 1122 &config_attrib[0], config_attrib_num, &config_id); 1123 CHECK_VASTATUS(va_status, "vaCreateConfig"); 1124 1125 /* create source surfaces */ 1126 va_status = vaCreateSurfaces(va_dpy, 1127 VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, 1128 &src_surface[0], SURFACE_NUM, 1129 NULL, 0); 1130 CHECK_VASTATUS(va_status, "vaCreateSurfaces"); 1131 1132 /* create reference surfaces */ 1133 va_status = vaCreateSurfaces( 1134 va_dpy, 1135 VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, 1136 &ref_surface[0], SURFACE_NUM, 1137 NULL, 0 1138 ); 1139 CHECK_VASTATUS(va_status, "vaCreateSurfaces"); 1140 1141 tmp_surfaceid = calloc(2 * SURFACE_NUM, sizeof(VASurfaceID)); 1142 memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID)); 1143 memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID)); 1144 1145 /* Create a context for this encode pipe */ 1146 va_status = vaCreateContext(va_dpy, config_id, 1147 frame_width_mbaligned, frame_height_mbaligned, 1148 VA_PROGRESSIVE, 1149 tmp_surfaceid, 2 * SURFACE_NUM, 1150 &context_id); 1151 CHECK_VASTATUS(va_status, "vaCreateContext"); 1152 free(tmp_surfaceid); 1153 1154 codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16); 1155 1156 for (i = 0; i < SURFACE_NUM; i++) { 1157 /* create coded buffer once for all 1158 * other VA buffers which won't be used again after vaRenderPicture. 1159 * so APP can always vaCreateBuffer for every frame 1160 * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture 1161 * so VA won't maintain the coded buffer 1162 */ 1163 va_status = vaCreateBuffer(va_dpy,context_id,VAEncCodedBufferType, 1164 codedbuf_size, 1, NULL, &coded_buf[i]); 1165 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1166 } 1167 1168 return 0; 1169 } 1170 1171 1172 1173 #define partition(ref, field, key, ascending) \ 1174 while (i <= j) { \ 1175 if (ascending) { \ 1176 while (ref[i].field < key) \ 1177 i++; \ 1178 while (ref[j].field > key) \ 1179 j--; \ 1180 } else { \ 1181 while (ref[i].field > key) \ 1182 i++; \ 1183 while (ref[j].field < key) \ 1184 j--; \ 1185 } \ 1186 if (i <= j) { \ 1187 tmp = ref[i]; \ 1188 ref[i] = ref[j]; \ 1189 ref[j] = tmp; \ 1190 i++; \ 1191 j--; \ 1192 } \ 1193 } \ 1194 1195 static void sort_one(VAPictureH264 ref[], int left, int right, 1196 int ascending, int frame_idx) 1197 { 1198 int i = left, j = right; 1199 unsigned int key; 1200 VAPictureH264 tmp; 1201 1202 if (frame_idx) { 1203 key = ref[(left + right) / 2].frame_idx; 1204 partition(ref, frame_idx, key, ascending); 1205 } else { 1206 key = ref[(left + right) / 2].TopFieldOrderCnt; 1207 partition(ref, TopFieldOrderCnt, (signed int)key, ascending); 1208 } 1209 1210 /* recursion */ 1211 if (left < j) 1212 sort_one(ref, left, j, ascending, frame_idx); 1213 1214 if (i < right) 1215 sort_one(ref, i, right, ascending, frame_idx); 1216 } 1217 1218 static void sort_two(VAPictureH264 ref[], int left, int right, unsigned int key, unsigned int frame_idx, 1219 int partition_ascending, int list0_ascending, int list1_ascending) 1220 { 1221 int i = left, j = right; 1222 VAPictureH264 tmp; 1223 1224 if (frame_idx) { 1225 partition(ref, frame_idx, key, partition_ascending); 1226 } else { 1227 partition(ref, TopFieldOrderCnt, (signed int)key, partition_ascending); 1228 } 1229 1230 1231 sort_one(ref, left, i-1, list0_ascending, frame_idx); 1232 sort_one(ref, j+1, right, list1_ascending, frame_idx); 1233 } 1234 1235 static int update_ReferenceFrames(void) 1236 { 1237 int i; 1238 1239 if (current_frame_type == FRAME_B) 1240 return 0; 1241 1242 CurrentCurrPic.flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE; 1243 numShortTerm++; 1244 if (numShortTerm > num_ref_frames) 1245 numShortTerm = num_ref_frames; 1246 for (i=numShortTerm-1; i>0; i--) 1247 ReferenceFrames[i] = ReferenceFrames[i-1]; 1248 ReferenceFrames[0] = CurrentCurrPic; 1249 1250 if (current_frame_type != FRAME_B) 1251 current_frame_num++; 1252 if (current_frame_num > MaxFrameNum) 1253 current_frame_num = 0; 1254 1255 return 0; 1256 } 1257 1258 1259 static int update_RefPicList(void) 1260 { 1261 unsigned int current_poc = CurrentCurrPic.TopFieldOrderCnt; 1262 1263 if (current_frame_type == FRAME_P) { 1264 memcpy(RefPicList0_P, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); 1265 sort_one(RefPicList0_P, 0, numShortTerm-1, 0, 1); 1266 } 1267 1268 if (current_frame_type == FRAME_B) { 1269 memcpy(RefPicList0_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); 1270 sort_two(RefPicList0_B, 0, numShortTerm-1, current_poc, 0, 1271 1, 0, 1); 1272 1273 memcpy(RefPicList1_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); 1274 sort_two(RefPicList1_B, 0, numShortTerm-1, current_poc, 0, 1275 0, 1, 0); 1276 } 1277 1278 return 0; 1279 } 1280 1281 1282 static int render_sequence(void) 1283 { 1284 VABufferID seq_param_buf, rc_param_buf, misc_param_tmpbuf, render_id[2]; 1285 VAStatus va_status; 1286 VAEncMiscParameterBuffer *misc_param, *misc_param_tmp; 1287 VAEncMiscParameterRateControl *misc_rate_ctrl; 1288 1289 seq_param.level_idc = 41 /*SH_LEVEL_3*/; 1290 seq_param.picture_width_in_mbs = frame_width_mbaligned / 16; 1291 seq_param.picture_height_in_mbs = frame_height_mbaligned / 16; 1292 seq_param.bits_per_second = frame_bitrate; 1293 1294 seq_param.intra_period = intra_period; 1295 seq_param.intra_idr_period = intra_idr_period; 1296 seq_param.ip_period = ip_period; 1297 1298 seq_param.max_num_ref_frames = num_ref_frames; 1299 seq_param.seq_fields.bits.frame_mbs_only_flag = 1; 1300 seq_param.time_scale = 900; 1301 seq_param.num_units_in_tick = 15; /* Tc = num_units_in_tick / time_sacle */ 1302 seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 = Log2MaxPicOrderCntLsb - 4; 1303 seq_param.seq_fields.bits.log2_max_frame_num_minus4 = Log2MaxFrameNum - 4;; 1304 seq_param.seq_fields.bits.frame_mbs_only_flag = 1; 1305 seq_param.seq_fields.bits.chroma_format_idc = 1; 1306 seq_param.seq_fields.bits.direct_8x8_inference_flag = 1; 1307 1308 if (frame_width != frame_width_mbaligned || 1309 frame_height != frame_height_mbaligned) { 1310 seq_param.frame_cropping_flag = 1; 1311 seq_param.frame_crop_left_offset = 0; 1312 seq_param.frame_crop_right_offset = (frame_width_mbaligned - frame_width)/2; 1313 seq_param.frame_crop_top_offset = 0; 1314 seq_param.frame_crop_bottom_offset = (frame_height_mbaligned - frame_height)/2; 1315 } 1316 1317 va_status = vaCreateBuffer(va_dpy, context_id, 1318 VAEncSequenceParameterBufferType, 1319 sizeof(seq_param),1,&seq_param,&seq_param_buf); 1320 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1321 1322 va_status = vaCreateBuffer(va_dpy, context_id, 1323 VAEncMiscParameterBufferType, 1324 sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl), 1325 1,NULL,&rc_param_buf); 1326 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1327 1328 vaMapBuffer(va_dpy, rc_param_buf,(void **)&misc_param); 1329 misc_param->type = VAEncMiscParameterTypeRateControl; 1330 misc_rate_ctrl = (VAEncMiscParameterRateControl *)misc_param->data; 1331 memset(misc_rate_ctrl, 0, sizeof(*misc_rate_ctrl)); 1332 misc_rate_ctrl->bits_per_second = frame_bitrate; 1333 misc_rate_ctrl->target_percentage = 66; 1334 misc_rate_ctrl->window_size = 1000; 1335 misc_rate_ctrl->initial_qp = initial_qp; 1336 misc_rate_ctrl->min_qp = minimal_qp; 1337 misc_rate_ctrl->basic_unit_size = 0; 1338 vaUnmapBuffer(va_dpy, rc_param_buf); 1339 1340 render_id[0] = seq_param_buf; 1341 render_id[1] = rc_param_buf; 1342 1343 va_status = vaRenderPicture(va_dpy,context_id, &render_id[0], 2); 1344 CHECK_VASTATUS(va_status,"vaRenderPicture");; 1345 1346 if (misc_priv_type != 0) { 1347 va_status = vaCreateBuffer(va_dpy, context_id, 1348 VAEncMiscParameterBufferType, 1349 sizeof(VAEncMiscParameterBuffer), 1350 1, NULL, &misc_param_tmpbuf); 1351 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1352 vaMapBuffer(va_dpy, misc_param_tmpbuf,(void **)&misc_param_tmp); 1353 misc_param_tmp->type = misc_priv_type; 1354 misc_param_tmp->data[0] = misc_priv_value; 1355 vaUnmapBuffer(va_dpy, misc_param_tmpbuf); 1356 1357 va_status = vaRenderPicture(va_dpy,context_id, &misc_param_tmpbuf, 1); 1358 } 1359 1360 return 0; 1361 } 1362 1363 static int calc_poc(int pic_order_cnt_lsb) 1364 { 1365 static int PicOrderCntMsb_ref = 0, pic_order_cnt_lsb_ref = 0; 1366 int prevPicOrderCntMsb, prevPicOrderCntLsb; 1367 int PicOrderCntMsb, TopFieldOrderCnt; 1368 1369 if (current_frame_type == FRAME_IDR) 1370 prevPicOrderCntMsb = prevPicOrderCntLsb = 0; 1371 else { 1372 prevPicOrderCntMsb = PicOrderCntMsb_ref; 1373 prevPicOrderCntLsb = pic_order_cnt_lsb_ref; 1374 } 1375 1376 if ((pic_order_cnt_lsb < prevPicOrderCntLsb) && 1377 ((prevPicOrderCntLsb - pic_order_cnt_lsb) >= (int)(MaxPicOrderCntLsb / 2))) 1378 PicOrderCntMsb = prevPicOrderCntMsb + MaxPicOrderCntLsb; 1379 else if ((pic_order_cnt_lsb > prevPicOrderCntLsb) && 1380 ((pic_order_cnt_lsb - prevPicOrderCntLsb) > (int)(MaxPicOrderCntLsb / 2))) 1381 PicOrderCntMsb = prevPicOrderCntMsb - MaxPicOrderCntLsb; 1382 else 1383 PicOrderCntMsb = prevPicOrderCntMsb; 1384 1385 TopFieldOrderCnt = PicOrderCntMsb + pic_order_cnt_lsb; 1386 1387 if (current_frame_type != FRAME_B) { 1388 PicOrderCntMsb_ref = PicOrderCntMsb; 1389 pic_order_cnt_lsb_ref = pic_order_cnt_lsb; 1390 } 1391 1392 return TopFieldOrderCnt; 1393 } 1394 1395 static int render_picture(void) 1396 { 1397 VABufferID pic_param_buf; 1398 VAStatus va_status; 1399 int i = 0; 1400 1401 pic_param.CurrPic.picture_id = ref_surface[current_slot]; 1402 pic_param.CurrPic.frame_idx = current_frame_num; 1403 pic_param.CurrPic.flags = 0; 1404 pic_param.CurrPic.TopFieldOrderCnt = calc_poc((current_frame_display - current_IDR_display) % MaxPicOrderCntLsb); 1405 pic_param.CurrPic.BottomFieldOrderCnt = pic_param.CurrPic.TopFieldOrderCnt; 1406 CurrentCurrPic = pic_param.CurrPic; 1407 1408 if (getenv("TO_DEL")) { /* set RefPicList into ReferenceFrames */ 1409 update_RefPicList(); /* calc RefPicList */ 1410 memset(pic_param.ReferenceFrames, 0xff, 16 * sizeof(VAPictureH264)); /* invalid all */ 1411 if (current_frame_type == FRAME_P) { 1412 pic_param.ReferenceFrames[0] = RefPicList0_P[0]; 1413 } else if (current_frame_type == FRAME_B) { 1414 pic_param.ReferenceFrames[0] = RefPicList0_B[0]; 1415 pic_param.ReferenceFrames[1] = RefPicList1_B[0]; 1416 } 1417 } else { 1418 memcpy(pic_param.ReferenceFrames, ReferenceFrames, numShortTerm*sizeof(VAPictureH264)); 1419 for (i = numShortTerm; i < SURFACE_NUM; i++) { 1420 pic_param.ReferenceFrames[i].picture_id = VA_INVALID_SURFACE; 1421 pic_param.ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID; 1422 } 1423 } 1424 1425 pic_param.pic_fields.bits.idr_pic_flag = (current_frame_type == FRAME_IDR); 1426 pic_param.pic_fields.bits.reference_pic_flag = (current_frame_type != FRAME_B); 1427 pic_param.pic_fields.bits.entropy_coding_mode_flag = h264_entropy_mode; 1428 pic_param.pic_fields.bits.deblocking_filter_control_present_flag = 1; 1429 pic_param.frame_num = current_frame_num; 1430 pic_param.coded_buf = coded_buf[current_slot]; 1431 pic_param.last_picture = (current_frame_encoding == frame_count); 1432 pic_param.pic_init_qp = initial_qp; 1433 1434 va_status = vaCreateBuffer(va_dpy, context_id,VAEncPictureParameterBufferType, 1435 sizeof(pic_param),1,&pic_param, &pic_param_buf); 1436 CHECK_VASTATUS(va_status,"vaCreateBuffer");; 1437 1438 va_status = vaRenderPicture(va_dpy,context_id, &pic_param_buf, 1); 1439 CHECK_VASTATUS(va_status,"vaRenderPicture"); 1440 1441 return 0; 1442 } 1443 1444 static int render_packedsequence(void) 1445 { 1446 VAEncPackedHeaderParameterBuffer packedheader_param_buffer; 1447 VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2]; 1448 unsigned int length_in_bits; 1449 unsigned char *packedseq_buffer = NULL; 1450 VAStatus va_status; 1451 1452 length_in_bits = build_packed_seq_buffer(&packedseq_buffer); 1453 1454 packedheader_param_buffer.type = VAEncPackedHeaderSequence; 1455 1456 packedheader_param_buffer.bit_length = length_in_bits; /*length_in_bits*/ 1457 packedheader_param_buffer.has_emulation_bytes = 0; 1458 va_status = vaCreateBuffer(va_dpy, 1459 context_id, 1460 VAEncPackedHeaderParameterBufferType, 1461 sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer, 1462 &packedseq_para_bufid); 1463 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1464 1465 va_status = vaCreateBuffer(va_dpy, 1466 context_id, 1467 VAEncPackedHeaderDataBufferType, 1468 (length_in_bits + 7) / 8, 1, packedseq_buffer, 1469 &packedseq_data_bufid); 1470 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1471 1472 render_id[0] = packedseq_para_bufid; 1473 render_id[1] = packedseq_data_bufid; 1474 va_status = vaRenderPicture(va_dpy,context_id, render_id, 2); 1475 CHECK_VASTATUS(va_status,"vaRenderPicture"); 1476 1477 free(packedseq_buffer); 1478 1479 return 0; 1480 } 1481 1482 1483 static int render_packedpicture(void) 1484 { 1485 VAEncPackedHeaderParameterBuffer packedheader_param_buffer; 1486 VABufferID packedpic_para_bufid, packedpic_data_bufid, render_id[2]; 1487 unsigned int length_in_bits; 1488 unsigned char *packedpic_buffer = NULL; 1489 VAStatus va_status; 1490 1491 length_in_bits = build_packed_pic_buffer(&packedpic_buffer); 1492 packedheader_param_buffer.type = VAEncPackedHeaderPicture; 1493 packedheader_param_buffer.bit_length = length_in_bits; 1494 packedheader_param_buffer.has_emulation_bytes = 0; 1495 1496 va_status = vaCreateBuffer(va_dpy, 1497 context_id, 1498 VAEncPackedHeaderParameterBufferType, 1499 sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer, 1500 &packedpic_para_bufid); 1501 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1502 1503 va_status = vaCreateBuffer(va_dpy, 1504 context_id, 1505 VAEncPackedHeaderDataBufferType, 1506 (length_in_bits + 7) / 8, 1, packedpic_buffer, 1507 &packedpic_data_bufid); 1508 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1509 1510 render_id[0] = packedpic_para_bufid; 1511 render_id[1] = packedpic_data_bufid; 1512 va_status = vaRenderPicture(va_dpy,context_id, render_id, 2); 1513 CHECK_VASTATUS(va_status,"vaRenderPicture"); 1514 1515 free(packedpic_buffer); 1516 1517 return 0; 1518 } 1519 1520 static void render_packedsei(void) 1521 { 1522 VAEncPackedHeaderParameterBuffer packed_header_param_buffer; 1523 VABufferID packed_sei_header_param_buf_id, packed_sei_buf_id, render_id[2]; 1524 unsigned int length_in_bits /*offset_in_bytes*/; 1525 unsigned char *packed_sei_buffer = NULL; 1526 VAStatus va_status; 1527 int init_cpb_size, target_bit_rate, i_initial_cpb_removal_delay_length, i_initial_cpb_removal_delay; 1528 int i_cpb_removal_delay, i_dpb_output_delay_length, i_cpb_removal_delay_length; 1529 1530 /* it comes for the bps defined in SPS */ 1531 target_bit_rate = frame_bitrate; 1532 init_cpb_size = (target_bit_rate * 8) >> 10; 1533 i_initial_cpb_removal_delay = init_cpb_size * 0.5 * 1024 / target_bit_rate * 90000; 1534 1535 i_cpb_removal_delay = 2; 1536 i_initial_cpb_removal_delay_length = 24; 1537 i_cpb_removal_delay_length = 24; 1538 i_dpb_output_delay_length = 24; 1539 1540 1541 length_in_bits = build_packed_sei_buffer_timing( 1542 i_initial_cpb_removal_delay_length, 1543 i_initial_cpb_removal_delay, 1544 0, 1545 i_cpb_removal_delay_length, 1546 i_cpb_removal_delay * current_frame_encoding, 1547 i_dpb_output_delay_length, 1548 0, 1549 &packed_sei_buffer); 1550 1551 //offset_in_bytes = 0; 1552 packed_header_param_buffer.type = VAEncPackedHeaderH264_SEI; 1553 packed_header_param_buffer.bit_length = length_in_bits; 1554 packed_header_param_buffer.has_emulation_bytes = 0; 1555 1556 va_status = vaCreateBuffer(va_dpy, 1557 context_id, 1558 VAEncPackedHeaderParameterBufferType, 1559 sizeof(packed_header_param_buffer), 1, &packed_header_param_buffer, 1560 &packed_sei_header_param_buf_id); 1561 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1562 1563 va_status = vaCreateBuffer(va_dpy, 1564 context_id, 1565 VAEncPackedHeaderDataBufferType, 1566 (length_in_bits + 7) / 8, 1, packed_sei_buffer, 1567 &packed_sei_buf_id); 1568 CHECK_VASTATUS(va_status,"vaCreateBuffer"); 1569 1570 1571 render_id[0] = packed_sei_header_param_buf_id; 1572 render_id[1] = packed_sei_buf_id; 1573 va_status = vaRenderPicture(va_dpy,context_id, render_id, 2); 1574 CHECK_VASTATUS(va_status,"vaRenderPicture"); 1575 1576 1577 free(packed_sei_buffer); 1578 1579 return; 1580 } 1581 1582 1583 static int render_hrd(void) 1584 { 1585 VABufferID misc_parameter_hrd_buf_id; 1586 VAStatus va_status; 1587 VAEncMiscParameterBuffer *misc_param; 1588 VAEncMiscParameterHRD *misc_hrd_param; 1589 1590 va_status = vaCreateBuffer(va_dpy, context_id, 1591 VAEncMiscParameterBufferType, 1592 sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterHRD), 1593 1, 1594 NULL, 1595 &misc_parameter_hrd_buf_id); 1596 CHECK_VASTATUS(va_status, "vaCreateBuffer"); 1597 1598 vaMapBuffer(va_dpy, 1599 misc_parameter_hrd_buf_id, 1600 (void **)&misc_param); 1601 misc_param->type = VAEncMiscParameterTypeHRD; 1602 misc_hrd_param = (VAEncMiscParameterHRD *)misc_param->data; 1603 1604 if (frame_bitrate > 0) { 1605 misc_hrd_param->initial_buffer_fullness = frame_bitrate * 1024 * 4; 1606 misc_hrd_param->buffer_size = frame_bitrate * 1024 * 8; 1607 } else { 1608 misc_hrd_param->initial_buffer_fullness = 0; 1609 misc_hrd_param->buffer_size = 0; 1610 } 1611 vaUnmapBuffer(va_dpy, misc_parameter_hrd_buf_id); 1612 1613 va_status = vaRenderPicture(va_dpy,context_id, &misc_parameter_hrd_buf_id, 1); 1614 CHECK_VASTATUS(va_status,"vaRenderPicture");; 1615 1616 return 0; 1617 } 1618 1619 static int render_slice(void) 1620 { 1621 VABufferID slice_param_buf; 1622 VAStatus va_status; 1623 int i; 1624 1625 update_RefPicList(); 1626 1627 /* one frame, one slice */ 1628 slice_param.macroblock_address = 0; 1629 slice_param.num_macroblocks = frame_width_mbaligned * frame_height_mbaligned/(16*16); /* Measured by MB */ 1630 slice_param.slice_type = (current_frame_type == FRAME_IDR)?2:current_frame_type; 1631 if (current_frame_type == FRAME_IDR) { 1632 if (current_frame_encoding != 0) 1633 ++slice_param.idr_pic_id; 1634 } else if (current_frame_type == FRAME_P) { 1635 int refpiclist0_max = h264_maxref & 0xffff; 1636 memcpy(slice_param.RefPicList0, RefPicList0_P, refpiclist0_max*sizeof(VAPictureH264)); 1637 1638 for (i = refpiclist0_max; i < 32; i++) { 1639 slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE; 1640 slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID; 1641 } 1642 } else if (current_frame_type == FRAME_B) { 1643 int refpiclist0_max = h264_maxref & 0xffff; 1644 int refpiclist1_max = (h264_maxref >> 16) & 0xffff; 1645 1646 memcpy(slice_param.RefPicList0, RefPicList0_B, refpiclist0_max*sizeof(VAPictureH264)); 1647 for (i = refpiclist0_max; i < 32; i++) { 1648 slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE; 1649 slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID; 1650 } 1651 1652 memcpy(slice_param.RefPicList1, RefPicList1_B, refpiclist1_max*sizeof(VAPictureH264)); 1653 for (i = refpiclist1_max; i < 32; i++) { 1654 slice_param.RefPicList1[i].picture_id = VA_INVALID_SURFACE; 1655 slice_param.RefPicList1[i].flags = VA_PICTURE_H264_INVALID; 1656 } 1657 } 1658 1659 slice_param.slice_alpha_c0_offset_div2 = 0; 1660 slice_param.slice_beta_offset_div2 = 0; 1661 slice_param.direct_spatial_mv_pred_flag = 1; 1662 slice_param.pic_order_cnt_lsb = (current_frame_display - current_IDR_display) % MaxPicOrderCntLsb; 1663 1664 va_status = vaCreateBuffer(va_dpy,context_id,VAEncSliceParameterBufferType, 1665 sizeof(slice_param),1,&slice_param,&slice_param_buf); 1666 CHECK_VASTATUS(va_status,"vaCreateBuffer");; 1667 1668 va_status = vaRenderPicture(va_dpy,context_id, &slice_param_buf, 1); 1669 CHECK_VASTATUS(va_status,"vaRenderPicture"); 1670 1671 return 0; 1672 } 1673 1674 1675 static int upload_source_YUV_once_for_all() 1676 { 1677 int box_width=8; 1678 int row_shift=0; 1679 int i; 1680 1681 for (i = 0; i < SURFACE_NUM; i++) { 1682 printf("\rLoading data into surface %d.....", i); 1683 upload_surface(va_dpy, src_surface[i], box_width, row_shift, 0); 1684 1685 row_shift++; 1686 if (row_shift==(2*box_width)) row_shift= 0; 1687 } 1688 printf("Complete surface loading\n"); 1689 1690 return 0; 1691 } 1692 1693 static int load_surface(VASurfaceID surface_id, unsigned long long display_order) 1694 { 1695 unsigned char *srcyuv_ptr = NULL, *src_Y = NULL, *src_U = NULL, *src_V = NULL; 1696 unsigned long long frame_start, mmap_start; 1697 char *mmap_ptr = NULL; 1698 int frame_size, mmap_size; 1699 1700 if (srcyuv_fp == NULL) 1701 return 0; 1702 1703 /* allow encoding more than srcyuv_frames */ 1704 display_order = display_order % srcyuv_frames; 1705 frame_size = frame_width * frame_height * 3 / 2; /* for YUV420 */ 1706 frame_start = display_order * frame_size; 1707 1708 mmap_start = frame_start & (~0xfff); 1709 mmap_size = (frame_size + (frame_start & 0xfff) + 0xfff) & (~0xfff); 1710 mmap_ptr = mmap(0, mmap_size, PROT_READ, MAP_SHARED, 1711 fileno(srcyuv_fp), mmap_start); 1712 if (mmap_ptr == MAP_FAILED) { 1713 printf("Failed to mmap YUV file (%s)\n", strerror(errno)); 1714 return 1; 1715 } 1716 srcyuv_ptr = (unsigned char *)mmap_ptr + (frame_start & 0xfff); 1717 if (srcyuv_fourcc == VA_FOURCC_NV12) { 1718 src_Y = srcyuv_ptr; 1719 src_U = src_Y + frame_width * frame_height; 1720 src_V = NULL; 1721 } else if (srcyuv_fourcc == VA_FOURCC_IYUV || 1722 srcyuv_fourcc == VA_FOURCC_YV12) { 1723 src_Y = srcyuv_ptr; 1724 if (srcyuv_fourcc == VA_FOURCC_IYUV) { 1725 src_U = src_Y + frame_width * frame_height; 1726 src_V = src_U + (frame_width/2) * (frame_height/2); 1727 } else { /* YV12 */ 1728 src_V = src_Y + frame_width * frame_height; 1729 src_U = src_V + (frame_width/2) * (frame_height/2); 1730 } 1731 } else { 1732 printf("Unsupported source YUV format\n"); 1733 exit(1); 1734 } 1735 1736 upload_surface_yuv(va_dpy, surface_id, 1737 srcyuv_fourcc, frame_width, frame_height, 1738 src_Y, src_U, src_V); 1739 if (mmap_ptr) 1740 munmap(mmap_ptr, mmap_size); 1741 1742 return 0; 1743 } 1744 1745 1746 static int save_recyuv(VASurfaceID surface_id, 1747 unsigned long long display_order, 1748 unsigned long long encode_order) 1749 { 1750 unsigned char *dst_Y = NULL, *dst_U = NULL, *dst_V = NULL; 1751 1752 if (recyuv_fp == NULL) 1753 return 0; 1754 1755 if (srcyuv_fourcc == VA_FOURCC_NV12) { 1756 int uv_size = 2 * (frame_width/2) * (frame_height/2); 1757 dst_Y = malloc(2*uv_size); 1758 dst_U = malloc(uv_size); 1759 } else if (srcyuv_fourcc == VA_FOURCC_IYUV || 1760 srcyuv_fourcc == VA_FOURCC_YV12) { 1761 int uv_size = (frame_width/2) * (frame_height/2); 1762 dst_Y = malloc(4*uv_size); 1763 dst_U = malloc(uv_size); 1764 dst_V = malloc(uv_size); 1765 } else { 1766 printf("Unsupported source YUV format\n"); 1767 exit(1); 1768 } 1769 1770 download_surface_yuv(va_dpy, surface_id, 1771 srcyuv_fourcc, frame_width, frame_height, 1772 dst_Y, dst_U, dst_V); 1773 fseek(recyuv_fp, display_order * frame_width * frame_height * 1.5, SEEK_SET); 1774 1775 if (srcyuv_fourcc == VA_FOURCC_NV12) { 1776 int uv_size = 2 * (frame_width/2) * (frame_height/2); 1777 fwrite(dst_Y, uv_size * 2, 1, recyuv_fp); 1778 fwrite(dst_U, uv_size, 1, recyuv_fp); 1779 } else if (srcyuv_fourcc == VA_FOURCC_IYUV || 1780 srcyuv_fourcc == VA_FOURCC_YV12) { 1781 int uv_size = (frame_width/2) * (frame_height/2); 1782 fwrite(dst_Y, uv_size * 4, 1, recyuv_fp); 1783 1784 if (srcyuv_fourcc == VA_FOURCC_IYUV) { 1785 fwrite(dst_U, uv_size, 1, recyuv_fp); 1786 fwrite(dst_V, uv_size, 1, recyuv_fp); 1787 } else { 1788 fwrite(dst_V, uv_size, 1, recyuv_fp); 1789 fwrite(dst_U, uv_size, 1, recyuv_fp); 1790 } 1791 } else { 1792 printf("Unsupported YUV format\n"); 1793 exit(1); 1794 } 1795 1796 if (dst_Y) 1797 free(dst_Y); 1798 if (dst_U) 1799 free(dst_U); 1800 if (dst_V) 1801 free(dst_V); 1802 1803 fflush(recyuv_fp); 1804 1805 return 0; 1806 } 1807 1808 1809 static int save_codeddata(unsigned long long display_order, unsigned long long encode_order) 1810 { 1811 VACodedBufferSegment *buf_list = NULL; 1812 VAStatus va_status; 1813 unsigned int coded_size = 0; 1814 1815 va_status = vaMapBuffer(va_dpy,coded_buf[display_order % SURFACE_NUM],(void **)(&buf_list)); 1816 CHECK_VASTATUS(va_status,"vaMapBuffer"); 1817 while (buf_list != NULL) { 1818 coded_size += fwrite(buf_list->buf, 1, buf_list->size, coded_fp); 1819 buf_list = (VACodedBufferSegment *) buf_list->next; 1820 1821 frame_size += coded_size; 1822 } 1823 vaUnmapBuffer(va_dpy,coded_buf[display_order % SURFACE_NUM]); 1824 1825 printf("\r "); /* return back to startpoint */ 1826 switch (encode_order % 4) { 1827 case 0: 1828 printf("|"); 1829 break; 1830 case 1: 1831 printf("/"); 1832 break; 1833 case 2: 1834 printf("-"); 1835 break; 1836 case 3: 1837 printf("\\"); 1838 break; 1839 } 1840 printf("%08lld", encode_order); 1841 printf("(%06d bytes coded)",coded_size); 1842 1843 fflush(coded_fp); 1844 1845 return 0; 1846 } 1847 1848 1849 static struct storage_task_t * storage_task_dequeue(void) 1850 { 1851 struct storage_task_t *header; 1852 1853 pthread_mutex_lock(&encode_mutex); 1854 1855 header = storage_task_header; 1856 if (storage_task_header != NULL) { 1857 if (storage_task_tail == storage_task_header) 1858 storage_task_tail = NULL; 1859 storage_task_header = header->next; 1860 } 1861 1862 pthread_mutex_unlock(&encode_mutex); 1863 1864 return header; 1865 } 1866 1867 static int storage_task_queue(unsigned long long display_order, unsigned long long encode_order) 1868 { 1869 struct storage_task_t *tmp; 1870 1871 tmp = calloc(1, sizeof(struct storage_task_t)); 1872 tmp->display_order = display_order; 1873 tmp->encode_order = encode_order; 1874 1875 pthread_mutex_lock(&encode_mutex); 1876 1877 if (storage_task_header == NULL) { 1878 storage_task_header = tmp; 1879 storage_task_tail = tmp; 1880 } else { 1881 storage_task_tail->next = tmp; 1882 storage_task_tail = tmp; 1883 } 1884 1885 srcsurface_status[display_order % SURFACE_NUM] = SRC_SURFACE_IN_STORAGE; 1886 pthread_cond_signal(&encode_cond); 1887 1888 pthread_mutex_unlock(&encode_mutex); 1889 1890 return 0; 1891 } 1892 1893 static void storage_task(unsigned long long display_order, unsigned long long encode_order) 1894 { 1895 unsigned int tmp; 1896 VAStatus va_status; 1897 1898 tmp = GetTickCount(); 1899 va_status = vaSyncSurface(va_dpy, src_surface[display_order % SURFACE_NUM]); 1900 CHECK_VASTATUS(va_status,"vaSyncSurface"); 1901 SyncPictureTicks += GetTickCount() - tmp; 1902 tmp = GetTickCount(); 1903 save_codeddata(display_order, encode_order); 1904 SavePictureTicks += GetTickCount() - tmp; 1905 1906 save_recyuv(ref_surface[display_order % SURFACE_NUM], display_order, encode_order); 1907 1908 /* reload a new frame data */ 1909 tmp = GetTickCount(); 1910 if (srcyuv_fp != NULL) 1911 load_surface(src_surface[display_order % SURFACE_NUM], display_order + SURFACE_NUM); 1912 UploadPictureTicks += GetTickCount() - tmp; 1913 1914 pthread_mutex_lock(&encode_mutex); 1915 srcsurface_status[display_order % SURFACE_NUM] = SRC_SURFACE_IN_ENCODING; 1916 pthread_mutex_unlock(&encode_mutex); 1917 } 1918 1919 1920 static void * storage_task_thread(void *t) 1921 { 1922 while (1) { 1923 struct storage_task_t *current; 1924 1925 current = storage_task_dequeue(); 1926 if (current == NULL) { 1927 pthread_mutex_lock(&encode_mutex); 1928 pthread_cond_wait(&encode_cond, &encode_mutex); 1929 pthread_mutex_unlock(&encode_mutex); 1930 continue; 1931 } 1932 1933 storage_task(current->display_order, current->encode_order); 1934 1935 free(current); 1936 1937 /* all frames are saved, exit the thread */ 1938 if (++frame_coded >= frame_count) 1939 break; 1940 } 1941 1942 return 0; 1943 } 1944 1945 1946 static int encode_frames(void) 1947 { 1948 unsigned int i, tmp; 1949 VAStatus va_status; 1950 //VASurfaceStatus surface_status; 1951 1952 /* upload RAW YUV data into all surfaces */ 1953 tmp = GetTickCount(); 1954 if (srcyuv_fp != NULL) { 1955 for (i = 0; i < SURFACE_NUM; i++) 1956 load_surface(src_surface[i], i); 1957 } else 1958 upload_source_YUV_once_for_all(); 1959 UploadPictureTicks += GetTickCount() - tmp; 1960 1961 /* ready for encoding */ 1962 memset(srcsurface_status, SRC_SURFACE_IN_ENCODING, sizeof(srcsurface_status)); 1963 1964 memset(&seq_param, 0, sizeof(seq_param)); 1965 memset(&pic_param, 0, sizeof(pic_param)); 1966 memset(&slice_param, 0, sizeof(slice_param)); 1967 1968 if (encode_syncmode == 0) 1969 pthread_create(&encode_thread, NULL, storage_task_thread, NULL); 1970 1971 for (current_frame_encoding = 0; current_frame_encoding < frame_count; current_frame_encoding++) { 1972 encoding2display_order(current_frame_encoding, intra_period, intra_idr_period, ip_period, 1973 ¤t_frame_display, ¤t_frame_type); 1974 if (current_frame_type == FRAME_IDR) { 1975 numShortTerm = 0; 1976 current_frame_num = 0; 1977 current_IDR_display = current_frame_display; 1978 } 1979 1980 /* check if the source frame is ready */ 1981 while (srcsurface_status[current_slot] != SRC_SURFACE_IN_ENCODING) { 1982 usleep(1); 1983 } 1984 1985 tmp = GetTickCount(); 1986 va_status = vaBeginPicture(va_dpy, context_id, src_surface[current_slot]); 1987 CHECK_VASTATUS(va_status,"vaBeginPicture"); 1988 BeginPictureTicks += GetTickCount() - tmp; 1989 1990 tmp = GetTickCount(); 1991 if (current_frame_type == FRAME_IDR) { 1992 render_sequence(); 1993 render_picture(); 1994 if (h264_packedheader) { 1995 render_packedsequence(); 1996 render_packedpicture(); 1997 } 1998 //if (rc_mode == VA_RC_CBR) 1999 // render_packedsei(); 2000 //render_hrd(); 2001 } else { 2002 //render_sequence(); 2003 render_picture(); 2004 //if (rc_mode == VA_RC_CBR) 2005 // render_packedsei(); 2006 //render_hrd(); 2007 } 2008 render_slice(); 2009 RenderPictureTicks += GetTickCount() - tmp; 2010 2011 tmp = GetTickCount(); 2012 va_status = vaEndPicture(va_dpy,context_id); 2013 CHECK_VASTATUS(va_status,"vaEndPicture");; 2014 EndPictureTicks += GetTickCount() - tmp; 2015 2016 if (encode_syncmode) 2017 storage_task(current_frame_display, current_frame_encoding); 2018 else /* queue the storage task queue */ 2019 storage_task_queue(current_frame_display, current_frame_encoding); 2020 2021 update_ReferenceFrames(); 2022 } 2023 2024 if (encode_syncmode == 0) { 2025 int ret; 2026 pthread_join(encode_thread, (void **)&ret); 2027 } 2028 2029 return 0; 2030 } 2031 2032 2033 static int release_encode() 2034 { 2035 int i; 2036 2037 vaDestroySurfaces(va_dpy,&src_surface[0],SURFACE_NUM); 2038 vaDestroySurfaces(va_dpy,&ref_surface[0],SURFACE_NUM); 2039 2040 for (i = 0; i < SURFACE_NUM; i++) 2041 vaDestroyBuffer(va_dpy,coded_buf[i]); 2042 2043 vaDestroyContext(va_dpy,context_id); 2044 vaDestroyConfig(va_dpy,config_id); 2045 2046 return 0; 2047 } 2048 2049 static int deinit_va() 2050 { 2051 vaTerminate(va_dpy); 2052 2053 va_close_display(va_dpy); 2054 2055 return 0; 2056 } 2057 2058 2059 static int print_input() 2060 { 2061 printf("\n\nINPUT:Try to encode H264...\n"); 2062 printf("INPUT: RateControl : %s\n", rc_to_string(rc_mode)); 2063 printf("INPUT: Resolution : %dx%d, %d frames\n", 2064 frame_width, frame_height, frame_count); 2065 printf("INPUT: FrameRate : %d\n", frame_rate); 2066 printf("INPUT: Bitrate : %d\n", frame_bitrate); 2067 printf("INPUT: Slieces : %d\n", frame_slices); 2068 printf("INPUT: IntraPeriod : %d\n", intra_period); 2069 printf("INPUT: IDRPeriod : %d\n", intra_idr_period); 2070 printf("INPUT: IpPeriod : %d\n", ip_period); 2071 printf("INPUT: Initial QP : %d\n", initial_qp); 2072 printf("INPUT: Min QP : %d\n", minimal_qp); 2073 printf("INPUT: Source YUV : %s", srcyuv_fp?"FILE":"AUTO generated"); 2074 if (srcyuv_fp) 2075 printf(":%s (fourcc %s)\n", srcyuv_fn, fourcc_to_string(srcyuv_fourcc)); 2076 else 2077 printf("\n"); 2078 printf("INPUT: Coded Clip : %s\n", coded_fn); 2079 if (recyuv_fp == NULL) 2080 printf("INPUT: Rec Clip : %s\n", "Not save reconstructed frame"); 2081 else 2082 printf("INPUT: Rec Clip : Save reconstructed frame into %s (fourcc %s)\n", recyuv_fn, 2083 fourcc_to_string(srcyuv_fourcc)); 2084 2085 printf("\n\n"); /* return back to startpoint */ 2086 2087 return 0; 2088 } 2089 2090 static int calc_PSNR(double *psnr) 2091 { 2092 char *srcyuv_ptr = NULL, *recyuv_ptr = NULL, tmp; 2093 unsigned long long min_size; 2094 unsigned long long i, sse=0; 2095 double ssemean; 2096 int fourM = 0x400000; /* 4M */ 2097 2098 min_size = MIN(srcyuv_frames, frame_count) * frame_width * frame_height * 1.5; 2099 for (i=0; i<min_size; i++) { 2100 unsigned long long j = i % fourM; 2101 2102 if ((i % fourM) == 0) { 2103 if (srcyuv_ptr) 2104 munmap(srcyuv_ptr, fourM); 2105 if (recyuv_ptr) 2106 munmap(recyuv_ptr, fourM); 2107 2108 srcyuv_ptr = mmap(0, fourM, PROT_READ, MAP_SHARED, fileno(srcyuv_fp), i); 2109 recyuv_ptr = mmap(0, fourM, PROT_READ, MAP_SHARED, fileno(recyuv_fp), i); 2110 if ((srcyuv_ptr == MAP_FAILED) || (recyuv_ptr == MAP_FAILED)) { 2111 printf("Failed to mmap YUV files\n"); 2112 return 1; 2113 } 2114 } 2115 tmp = srcyuv_ptr[j] - recyuv_ptr[j]; 2116 sse += tmp * tmp; 2117 } 2118 ssemean = (double)sse/(double)min_size; 2119 *psnr = 20.0*log10(255) - 10.0*log10(ssemean); 2120 2121 if (srcyuv_ptr) 2122 munmap(srcyuv_ptr, fourM); 2123 if (recyuv_ptr) 2124 munmap(recyuv_ptr, fourM); 2125 2126 return 0; 2127 } 2128 2129 static int print_performance(unsigned int PictureCount) 2130 { 2131 unsigned int psnr_ret = 1, others = 0; 2132 double psnr = 0, total_size = frame_width * frame_height * 1.5 * frame_count; 2133 2134 if (calc_psnr && srcyuv_fp && recyuv_fp) 2135 psnr_ret = calc_PSNR(&psnr); 2136 2137 others = TotalTicks - UploadPictureTicks - BeginPictureTicks 2138 - RenderPictureTicks - EndPictureTicks - SyncPictureTicks - SavePictureTicks; 2139 2140 printf("\n\n"); 2141 2142 printf("PERFORMANCE: Frame Rate : %.2f fps (%d frames, %d ms (%.2f ms per frame))\n", 2143 (double) 1000*PictureCount / TotalTicks, PictureCount, 2144 TotalTicks, ((double) TotalTicks) / (double) PictureCount); 2145 printf("PERFORMANCE: Compression ratio : %d:1\n", (unsigned int)(total_size / frame_size)); 2146 if (psnr_ret == 0) 2147 printf("PERFORMANCE: PSNR : %.2f (%lld frames calculated)\n", 2148 psnr, MIN(frame_count, srcyuv_frames)); 2149 2150 printf("PERFORMANCE: UploadPicture : %d ms (%.2f, %.2f%% percent)\n", 2151 (int) UploadPictureTicks, ((double) UploadPictureTicks) / (double) PictureCount, 2152 UploadPictureTicks/(double) TotalTicks/0.01); 2153 printf("PERFORMANCE: vaBeginPicture : %d ms (%.2f, %.2f%% percent)\n", 2154 (int) BeginPictureTicks, ((double) BeginPictureTicks) / (double) PictureCount, 2155 BeginPictureTicks/(double) TotalTicks/0.01); 2156 printf("PERFORMANCE: vaRenderHeader : %d ms (%.2f, %.2f%% percent)\n", 2157 (int) RenderPictureTicks, ((double) RenderPictureTicks) / (double) PictureCount, 2158 RenderPictureTicks/(double) TotalTicks/0.01); 2159 printf("PERFORMANCE: vaEndPicture : %d ms (%.2f, %.2f%% percent)\n", 2160 (int) EndPictureTicks, ((double) EndPictureTicks) / (double) PictureCount, 2161 EndPictureTicks/(double) TotalTicks/0.01); 2162 printf("PERFORMANCE: vaSyncSurface : %d ms (%.2f, %.2f%% percent)\n", 2163 (int) SyncPictureTicks, ((double) SyncPictureTicks) / (double) PictureCount, 2164 SyncPictureTicks/(double) TotalTicks/0.01); 2165 printf("PERFORMANCE: SavePicture : %d ms (%.2f, %.2f%% percent)\n", 2166 (int) SavePictureTicks, ((double) SavePictureTicks) / (double) PictureCount, 2167 SavePictureTicks/(double) TotalTicks/0.01); 2168 printf("PERFORMANCE: Others : %d ms (%.2f, %.2f%% percent)\n", 2169 (int) others, ((double) others) / (double) PictureCount, 2170 others/(double) TotalTicks/0.01); 2171 2172 if (encode_syncmode == 0) 2173 printf("(Multithread enabled, the timing is only for reference)\n"); 2174 2175 return 0; 2176 } 2177 2178 2179 int main(int argc,char **argv) 2180 { 2181 unsigned int start; 2182 2183 process_cmdline(argc, argv); 2184 2185 print_input(); 2186 2187 start = GetTickCount(); 2188 2189 init_va(); 2190 setup_encode(); 2191 2192 encode_frames(); 2193 2194 release_encode(); 2195 deinit_va(); 2196 2197 TotalTicks += GetTickCount() - start; 2198 print_performance(frame_count); 2199 2200 return 0; 2201 } 2202