1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file contains an implementation of an H264 Annex-B video stream parser. 6 // Note: ported from Chromium commit head: 0a918e9 7 8 #ifndef H264_PARSER_H_ 9 #define H264_PARSER_H_ 10 11 #include <stddef.h> 12 #include <stdint.h> 13 #include <sys/types.h> 14 15 #include <map> 16 #include <memory> 17 #include <vector> 18 19 #include "base/macros.h" 20 #include "base/optional.h" 21 #include "h264_bit_reader.h" 22 #include "ranges.h" 23 #include "rect.h" 24 #include "size.h" 25 #include "subsample_entry.h" 26 27 namespace media { 28 29 struct SubsampleEntry; 30 31 // For explanations of each struct and its members, see H.264 specification 32 // at http://www.itu.int/rec/T-REC-H.264. 33 struct H264NALU { 34 H264NALU(); 35 36 enum Type { 37 kUnspecified = 0, 38 kNonIDRSlice = 1, 39 kSliceDataA = 2, 40 kSliceDataB = 3, 41 kSliceDataC = 4, 42 kIDRSlice = 5, 43 kSEIMessage = 6, 44 kSPS = 7, 45 kPPS = 8, 46 kAUD = 9, 47 kEOSeq = 10, 48 kEOStream = 11, 49 kFiller = 12, 50 kSPSExt = 13, 51 kReserved14 = 14, 52 kReserved15 = 15, 53 kReserved16 = 16, 54 kReserved17 = 17, 55 kReserved18 = 18, 56 kCodedSliceAux = 19, 57 kCodedSliceExtension = 20, 58 }; 59 60 // After (without) start code; we don't own the underlying memory 61 // and a shallow copy should be made when copying this struct. 62 const uint8_t* data; 63 off_t size; // From after start code to start code of next NALU (or EOS). 64 65 int nal_ref_idc; 66 int nal_unit_type; 67 }; 68 69 enum { 70 kH264ScalingList4x4Length = 16, 71 kH264ScalingList8x8Length = 64, 72 }; 73 74 struct H264SPS { 75 H264SPS(); 76 77 enum H264ProfileIDC { 78 kProfileIDCBaseline = 66, 79 kProfileIDCConstrainedBaseline = kProfileIDCBaseline, 80 kProfileIDCMain = 77, 81 kProfileIDScalableBaseline = 83, 82 kProfileIDScalableHigh = 86, 83 kProfileIDCHigh = 100, 84 kProfileIDHigh10 = 110, 85 kProfileIDSMultiviewHigh = 118, 86 kProfileIDHigh422 = 122, 87 kProfileIDStereoHigh = 128, 88 kProfileIDHigh444Predictive = 244, 89 }; 90 91 enum AspectRatioIdc { 92 kExtendedSar = 255, 93 }; 94 95 enum { 96 // Constants for HRD parameters (spec ch. E.2.2). 97 kBitRateScaleConstantTerm = 6, // Equation E-37. 98 kCPBSizeScaleConstantTerm = 4, // Equation E-38. 99 kDefaultInitialCPBRemovalDelayLength = 24, 100 kDefaultDPBOutputDelayLength = 24, 101 kDefaultTimeOffsetLength = 24, 102 }; 103 104 int profile_idc; 105 bool constraint_set0_flag; 106 bool constraint_set1_flag; 107 bool constraint_set2_flag; 108 bool constraint_set3_flag; 109 bool constraint_set4_flag; 110 bool constraint_set5_flag; 111 int level_idc; 112 int seq_parameter_set_id; 113 114 int chroma_format_idc; 115 bool separate_colour_plane_flag; 116 int bit_depth_luma_minus8; 117 int bit_depth_chroma_minus8; 118 bool qpprime_y_zero_transform_bypass_flag; 119 120 bool seq_scaling_matrix_present_flag; 121 int scaling_list4x4[6][kH264ScalingList4x4Length]; 122 int scaling_list8x8[6][kH264ScalingList8x8Length]; 123 124 int log2_max_frame_num_minus4; 125 int pic_order_cnt_type; 126 int log2_max_pic_order_cnt_lsb_minus4; 127 bool delta_pic_order_always_zero_flag; 128 int offset_for_non_ref_pic; 129 int offset_for_top_to_bottom_field; 130 int num_ref_frames_in_pic_order_cnt_cycle; 131 int expected_delta_per_pic_order_cnt_cycle; // calculated 132 int offset_for_ref_frame[255]; 133 int max_num_ref_frames; 134 bool gaps_in_frame_num_value_allowed_flag; 135 int pic_width_in_mbs_minus1; 136 int pic_height_in_map_units_minus1; 137 bool frame_mbs_only_flag; 138 bool mb_adaptive_frame_field_flag; 139 bool direct_8x8_inference_flag; 140 bool frame_cropping_flag; 141 int frame_crop_left_offset; 142 int frame_crop_right_offset; 143 int frame_crop_top_offset; 144 int frame_crop_bottom_offset; 145 146 bool vui_parameters_present_flag; 147 int sar_width; // Set to 0 when not specified. 148 int sar_height; // Set to 0 when not specified. 149 bool bitstream_restriction_flag; 150 int max_num_reorder_frames; 151 int max_dec_frame_buffering; 152 bool timing_info_present_flag; 153 int num_units_in_tick; 154 int time_scale; 155 bool fixed_frame_rate_flag; 156 157 bool video_signal_type_present_flag; 158 int video_format; 159 bool video_full_range_flag; 160 bool colour_description_present_flag; 161 int colour_primaries; 162 int transfer_characteristics; 163 int matrix_coefficients; 164 165 // TODO(posciak): actually parse these instead of ParseAndIgnoreHRDParameters. 166 bool nal_hrd_parameters_present_flag; 167 int cpb_cnt_minus1; 168 int bit_rate_scale; 169 int cpb_size_scale; 170 int bit_rate_value_minus1[32]; 171 int cpb_size_value_minus1[32]; 172 bool cbr_flag[32]; 173 int initial_cpb_removal_delay_length_minus_1; 174 int cpb_removal_delay_length_minus1; 175 int dpb_output_delay_length_minus1; 176 int time_offset_length; 177 178 bool low_delay_hrd_flag; 179 180 int chroma_array_type; 181 182 // Helpers to compute frequently-used values. These methods return 183 // base::nullopt if they encounter integer overflow. They do not verify that 184 // the results are in-spec for the given profile or level. 185 base::Optional<Size> GetCodedSize() const; 186 base::Optional<Rect> GetVisibleRect() const; 187 }; 188 189 struct H264PPS { 190 H264PPS(); 191 192 int pic_parameter_set_id; 193 int seq_parameter_set_id; 194 bool entropy_coding_mode_flag; 195 bool bottom_field_pic_order_in_frame_present_flag; 196 int num_slice_groups_minus1; 197 // TODO(posciak): Slice groups not implemented, could be added at some point. 198 int num_ref_idx_l0_default_active_minus1; 199 int num_ref_idx_l1_default_active_minus1; 200 bool weighted_pred_flag; 201 int weighted_bipred_idc; 202 int pic_init_qp_minus26; 203 int pic_init_qs_minus26; 204 int chroma_qp_index_offset; 205 bool deblocking_filter_control_present_flag; 206 bool constrained_intra_pred_flag; 207 bool redundant_pic_cnt_present_flag; 208 bool transform_8x8_mode_flag; 209 210 bool pic_scaling_matrix_present_flag; 211 int scaling_list4x4[6][kH264ScalingList4x4Length]; 212 int scaling_list8x8[6][kH264ScalingList8x8Length]; 213 214 int second_chroma_qp_index_offset; 215 }; 216 217 struct H264ModificationOfPicNum { 218 int modification_of_pic_nums_idc; 219 union { 220 int abs_diff_pic_num_minus1; 221 int long_term_pic_num; 222 }; 223 }; 224 225 struct H264WeightingFactors { 226 bool luma_weight_flag; 227 bool chroma_weight_flag; 228 int luma_weight[32]; 229 int luma_offset[32]; 230 int chroma_weight[32][2]; 231 int chroma_offset[32][2]; 232 }; 233 234 struct H264DecRefPicMarking { 235 int memory_mgmnt_control_operation; 236 int difference_of_pic_nums_minus1; 237 int long_term_pic_num; 238 int long_term_frame_idx; 239 int max_long_term_frame_idx_plus1; 240 }; 241 242 struct H264SliceHeader { 243 H264SliceHeader(); 244 245 enum { kRefListSize = 32, kRefListModSize = kRefListSize }; 246 247 enum Type { 248 kPSlice = 0, 249 kBSlice = 1, 250 kISlice = 2, 251 kSPSlice = 3, 252 kSISlice = 4, 253 }; 254 255 bool IsPSlice() const; 256 bool IsBSlice() const; 257 bool IsISlice() const; 258 bool IsSPSlice() const; 259 bool IsSISlice() const; 260 261 bool idr_pic_flag; // from NAL header 262 int nal_ref_idc; // from NAL header 263 const uint8_t* nalu_data; // from NAL header 264 off_t nalu_size; // from NAL header 265 off_t header_bit_size; // calculated 266 267 int first_mb_in_slice; 268 int slice_type; 269 int pic_parameter_set_id; 270 int colour_plane_id; // TODO(posciak): use this! http://crbug.com/139878 271 int frame_num; 272 bool field_pic_flag; 273 bool bottom_field_flag; 274 int idr_pic_id; 275 int pic_order_cnt_lsb; 276 int delta_pic_order_cnt_bottom; 277 int delta_pic_order_cnt0; 278 int delta_pic_order_cnt1; 279 int redundant_pic_cnt; 280 bool direct_spatial_mv_pred_flag; 281 282 bool num_ref_idx_active_override_flag; 283 int num_ref_idx_l0_active_minus1; 284 int num_ref_idx_l1_active_minus1; 285 bool ref_pic_list_modification_flag_l0; 286 bool ref_pic_list_modification_flag_l1; 287 H264ModificationOfPicNum ref_list_l0_modifications[kRefListModSize]; 288 H264ModificationOfPicNum ref_list_l1_modifications[kRefListModSize]; 289 290 int luma_log2_weight_denom; 291 int chroma_log2_weight_denom; 292 293 bool luma_weight_l0_flag; 294 bool chroma_weight_l0_flag; 295 H264WeightingFactors pred_weight_table_l0; 296 297 bool luma_weight_l1_flag; 298 bool chroma_weight_l1_flag; 299 H264WeightingFactors pred_weight_table_l1; 300 301 bool no_output_of_prior_pics_flag; 302 bool long_term_reference_flag; 303 304 bool adaptive_ref_pic_marking_mode_flag; 305 H264DecRefPicMarking ref_pic_marking[kRefListSize]; 306 307 int cabac_init_idc; 308 int slice_qp_delta; 309 bool sp_for_switch_flag; 310 int slice_qs_delta; 311 int disable_deblocking_filter_idc; 312 int slice_alpha_c0_offset_div2; 313 int slice_beta_offset_div2; 314 315 // Calculated. 316 // Size in bits of dec_ref_pic_marking() syntax element. 317 size_t dec_ref_pic_marking_bit_size; 318 size_t pic_order_cnt_bit_size; 319 }; 320 321 struct H264SEIRecoveryPoint { 322 int recovery_frame_cnt; 323 bool exact_match_flag; 324 bool broken_link_flag; 325 int changing_slice_group_idc; 326 }; 327 328 struct H264SEIMessage { 329 H264SEIMessage(); 330 331 enum Type { 332 kSEIRecoveryPoint = 6, 333 }; 334 335 int type; 336 int payload_size; 337 union { 338 // Placeholder; in future more supported types will contribute to more 339 // union members here. 340 H264SEIRecoveryPoint recovery_point; 341 }; 342 }; 343 344 // Class to parse an Annex-B H.264 stream, 345 // as specified in chapters 7 and Annex B of the H.264 spec. 346 class H264Parser { 347 public: 348 enum Result { 349 kOk, 350 kInvalidStream, // error in stream 351 kUnsupportedStream, // stream not supported by the parser 352 kEOStream, // end of stream 353 }; 354 355 // Find offset from start of data to next NALU start code 356 // and size of found start code (3 or 4 bytes). 357 // If no start code is found, offset is pointing to the first unprocessed byte 358 // (i.e. the first byte that was not considered as a possible start of a start 359 // code) and |*start_code_size| is set to 0. 360 // Preconditions: 361 // - |data_size| >= 0 362 // Postconditions: 363 // - |*offset| is between 0 and |data_size| included. 364 // It is strictly less than |data_size| if |data_size| > 0. 365 // - |*start_code_size| is either 0, 3 or 4. 366 static bool FindStartCode(const uint8_t* data, 367 off_t data_size, 368 off_t* offset, 369 off_t* start_code_size); 370 371 // Wrapper for FindStartCode() that skips over start codes that 372 // may appear inside of |encrypted_ranges_|. 373 // Returns true if a start code was found. Otherwise returns false. 374 static bool FindStartCodeInClearRanges(const uint8_t* data, 375 off_t data_size, 376 const Ranges<const uint8_t*>& ranges, 377 off_t* offset, 378 off_t* start_code_size); 379 380 // Parses the input stream and returns all the NALUs through |nalus|. Returns 381 // false if the stream is invalid. 382 static bool ParseNALUs(const uint8_t* stream, 383 size_t stream_size, 384 std::vector<H264NALU>* nalus); 385 386 H264Parser(); 387 ~H264Parser(); 388 389 void Reset(); 390 // Set current stream pointer to |stream| of |stream_size| in bytes, 391 // |stream| owned by caller. 392 // |subsamples| contains information about what parts of |stream| are 393 // encrypted. 394 void SetStream(const uint8_t* stream, off_t stream_size); 395 void SetEncryptedStream(const uint8_t* stream, 396 off_t stream_size, 397 const std::vector<SubsampleEntry>& subsamples); 398 399 // Read the stream to find the next NALU, identify it and return 400 // that information in |*nalu|. This advances the stream to the beginning 401 // of this NALU, but not past it, so subsequent calls to NALU-specific 402 // parsing functions (ParseSPS, etc.) will parse this NALU. 403 // If the caller wishes to skip the current NALU, it can call this function 404 // again, instead of any NALU-type specific parse functions below. 405 Result AdvanceToNextNALU(H264NALU* nalu); 406 407 // NALU-specific parsing functions. 408 // These should be called after AdvanceToNextNALU(). 409 410 // SPSes and PPSes are owned by the parser class and the memory for their 411 // structures is managed here, not by the caller, as they are reused 412 // across NALUs. 413 // 414 // Parse an SPS/PPS NALU and save their data in the parser, returning id 415 // of the parsed structure in |*pps_id|/|*sps_id|. 416 // To get a pointer to a given SPS/PPS structure, use GetSPS()/GetPPS(), 417 // passing the returned |*sps_id|/|*pps_id| as parameter. 418 // TODO(posciak,fischman): consider replacing returning Result from Parse*() 419 // methods with a scoped_ptr and adding an AtEOS() function to check for EOS 420 // if Parse*() return NULL. 421 Result ParseSPS(int* sps_id); 422 Result ParsePPS(int* pps_id); 423 424 // Return a pointer to SPS/PPS with given |sps_id|/|pps_id| or NULL if not 425 // present. 426 const H264SPS* GetSPS(int sps_id) const; 427 const H264PPS* GetPPS(int pps_id) const; 428 429 // Slice headers and SEI messages are not used across NALUs by the parser 430 // and can be discarded after current NALU, so the parser does not store 431 // them, nor does it manage their memory. 432 // The caller has to provide and manage it instead. 433 434 // Parse a slice header, returning it in |*shdr|. |*nalu| must be set to 435 // the NALU returned from AdvanceToNextNALU() and corresponding to |*shdr|. 436 Result ParseSliceHeader(const H264NALU& nalu, H264SliceHeader* shdr); 437 438 // Parse a SEI message, returning it in |*sei_msg|, provided and managed 439 // by the caller. 440 Result ParseSEI(H264SEIMessage* sei_msg); 441 442 private: 443 // Move the stream pointer to the beginning of the next NALU, 444 // i.e. pointing at the next start code. 445 // Return true if a NALU has been found. 446 // If a NALU is found: 447 // - its size in bytes is returned in |*nalu_size| and includes 448 // the start code as well as the trailing zero bits. 449 // - the size in bytes of the start code is returned in |*start_code_size|. 450 bool LocateNALU(off_t* nalu_size, off_t* start_code_size); 451 452 // Exp-Golomb code parsing as specified in chapter 9.1 of the spec. 453 // Read one unsigned exp-Golomb code from the stream and return in |*val|. 454 Result ReadUE(int* val); 455 456 // Read one signed exp-Golomb code from the stream and return in |*val|. 457 Result ReadSE(int* val); 458 459 // Parse scaling lists (see spec). 460 Result ParseScalingList(int size, int* scaling_list, bool* use_default); 461 Result ParseSPSScalingLists(H264SPS* sps); 462 Result ParsePPSScalingLists(const H264SPS& sps, H264PPS* pps); 463 464 // Parse optional VUI parameters in SPS (see spec). 465 Result ParseVUIParameters(H264SPS* sps); 466 // Set |hrd_parameters_present| to true only if they are present. 467 Result ParseAndIgnoreHRDParameters(bool* hrd_parameters_present); 468 469 // Parse reference picture lists' modifications (see spec). 470 Result ParseRefPicListModifications(H264SliceHeader* shdr); 471 Result ParseRefPicListModification(int num_ref_idx_active_minus1, 472 H264ModificationOfPicNum* ref_list_mods); 473 474 // Parse prediction weight table (see spec). 475 Result ParsePredWeightTable(const H264SPS& sps, H264SliceHeader* shdr); 476 477 // Parse weighting factors (see spec). 478 Result ParseWeightingFactors(int num_ref_idx_active_minus1, 479 int chroma_array_type, 480 int luma_log2_weight_denom, 481 int chroma_log2_weight_denom, 482 H264WeightingFactors* w_facts); 483 484 // Parse decoded reference picture marking information (see spec). 485 Result ParseDecRefPicMarking(H264SliceHeader* shdr); 486 487 // Pointer to the current NALU in the stream. 488 const uint8_t* stream_; 489 490 // Bytes left in the stream after the current NALU. 491 off_t bytes_left_; 492 493 H264BitReader br_; 494 495 // PPSes and SPSes stored for future reference. 496 std::map<int, std::unique_ptr<H264SPS>> active_SPSes_; 497 std::map<int, std::unique_ptr<H264PPS>> active_PPSes_; 498 499 // Ranges of encrypted bytes in the buffer passed to 500 // SetEncryptedStream(). 501 Ranges<const uint8_t*> encrypted_ranges_; 502 503 DISALLOW_COPY_AND_ASSIGN(H264Parser); 504 }; 505 506 } // namespace media 507 508 #endif // H264_PARSER_H_ 509