1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file contains an implementation of an H264 Annex-B video stream parser. 6 7 #ifndef MEDIA_FILTERS_H264_PARSER_H_ 8 #define MEDIA_FILTERS_H264_PARSER_H_ 9 10 #include <sys/types.h> 11 12 #include <map> 13 #include <vector> 14 15 #include "base/basictypes.h" 16 #include "media/base/media_export.h" 17 #include "media/base/ranges.h" 18 #include "media/filters/h264_bit_reader.h" 19 20 namespace media { 21 22 struct SubsampleEntry; 23 24 // For explanations of each struct and its members, see H.264 specification 25 // at http://www.itu.int/rec/T-REC-H.264. 26 struct MEDIA_EXPORT H264NALU { 27 H264NALU(); 28 29 enum Type { 30 kUnspecified = 0, 31 kNonIDRSlice = 1, 32 kSliceDataA = 2, 33 kSliceDataB = 3, 34 kSliceDataC = 4, 35 kIDRSlice = 5, 36 kSEIMessage = 6, 37 kSPS = 7, 38 kPPS = 8, 39 kAUD = 9, 40 kEOSeq = 10, 41 kEOStream = 11, 42 kFiller = 12, 43 kSPSExt = 13, 44 kReserved14 = 14, 45 kReserved15 = 15, 46 kReserved16 = 16, 47 kReserved17 = 17, 48 kReserved18 = 18, 49 kCodedSliceAux = 19, 50 kCodedSliceExtension = 20, 51 }; 52 53 // After (without) start code; we don't own the underlying memory 54 // and a shallow copy should be made when copying this struct. 55 const uint8* data; 56 off_t size; // From after start code to start code of next NALU (or EOS). 57 58 int nal_ref_idc; 59 int nal_unit_type; 60 }; 61 62 enum { 63 kH264ScalingList4x4Length = 16, 64 kH264ScalingList8x8Length = 64, 65 }; 66 67 struct MEDIA_EXPORT H264SPS { 68 H264SPS(); 69 70 enum H264ProfileIDC { 71 kProfileIDCBaseline = 66, 72 kProfileIDCConstrainedBaseline = kProfileIDCBaseline, 73 kProfileIDCMain = 77, 74 kProfileIDCHigh = 100, 75 }; 76 77 enum AspectRatioIdc { 78 kExtendedSar = 255, 79 }; 80 81 enum { 82 // Constants for HRD parameters (spec ch. E.2.2). 83 kBitRateScaleConstantTerm = 6, // Equation E-37. 84 kCPBSizeScaleConstantTerm = 4, // Equation E-38. 85 kDefaultInitialCPBRemovalDelayLength = 24, 86 kDefaultDPBOutputDelayLength = 24, 87 kDefaultTimeOffsetLength = 24, 88 }; 89 90 int profile_idc; 91 bool constraint_set0_flag; 92 bool constraint_set1_flag; 93 bool constraint_set2_flag; 94 bool constraint_set3_flag; 95 bool constraint_set4_flag; 96 bool constraint_set5_flag; 97 int level_idc; 98 int seq_parameter_set_id; 99 100 int chroma_format_idc; 101 bool separate_colour_plane_flag; 102 int bit_depth_luma_minus8; 103 int bit_depth_chroma_minus8; 104 bool qpprime_y_zero_transform_bypass_flag; 105 106 bool seq_scaling_matrix_present_flag; 107 int scaling_list4x4[6][kH264ScalingList4x4Length]; 108 int scaling_list8x8[6][kH264ScalingList8x8Length]; 109 110 int log2_max_frame_num_minus4; 111 int pic_order_cnt_type; 112 int log2_max_pic_order_cnt_lsb_minus4; 113 bool delta_pic_order_always_zero_flag; 114 int offset_for_non_ref_pic; 115 int offset_for_top_to_bottom_field; 116 int num_ref_frames_in_pic_order_cnt_cycle; 117 int expected_delta_per_pic_order_cnt_cycle; // calculated 118 int offset_for_ref_frame[255]; 119 int max_num_ref_frames; 120 bool gaps_in_frame_num_value_allowed_flag; 121 int pic_width_in_mbs_minus1; 122 int pic_height_in_map_units_minus1; 123 bool frame_mbs_only_flag; 124 bool mb_adaptive_frame_field_flag; 125 bool direct_8x8_inference_flag; 126 bool frame_cropping_flag; 127 int frame_crop_left_offset; 128 int frame_crop_right_offset; 129 int frame_crop_top_offset; 130 int frame_crop_bottom_offset; 131 132 bool vui_parameters_present_flag; 133 int sar_width; // Set to 0 when not specified. 134 int sar_height; // Set to 0 when not specified. 135 bool bitstream_restriction_flag; 136 int max_num_reorder_frames; 137 int max_dec_frame_buffering; 138 bool timing_info_present_flag; 139 int num_units_in_tick; 140 int time_scale; 141 bool fixed_frame_rate_flag; 142 143 // TODO(posciak): actually parse these instead of ParseAndIgnoreHRDParameters. 144 bool nal_hrd_parameters_present_flag; 145 int cpb_cnt_minus1; 146 int bit_rate_scale; 147 int cpb_size_scale; 148 int bit_rate_value_minus1[32]; 149 int cpb_size_value_minus1[32]; 150 bool cbr_flag[32]; 151 int initial_cpb_removal_delay_length_minus_1; 152 int cpb_removal_delay_length_minus1; 153 int dpb_output_delay_length_minus1; 154 int time_offset_length; 155 156 bool low_delay_hrd_flag; 157 158 int chroma_array_type; 159 }; 160 161 struct MEDIA_EXPORT H264PPS { 162 H264PPS(); 163 164 int pic_parameter_set_id; 165 int seq_parameter_set_id; 166 bool entropy_coding_mode_flag; 167 bool bottom_field_pic_order_in_frame_present_flag; 168 int num_slice_groups_minus1; 169 // TODO(posciak): Slice groups not implemented, could be added at some point. 170 int num_ref_idx_l0_default_active_minus1; 171 int num_ref_idx_l1_default_active_minus1; 172 bool weighted_pred_flag; 173 int weighted_bipred_idc; 174 int pic_init_qp_minus26; 175 int pic_init_qs_minus26; 176 int chroma_qp_index_offset; 177 bool deblocking_filter_control_present_flag; 178 bool constrained_intra_pred_flag; 179 bool redundant_pic_cnt_present_flag; 180 bool transform_8x8_mode_flag; 181 182 bool pic_scaling_matrix_present_flag; 183 int scaling_list4x4[6][kH264ScalingList4x4Length]; 184 int scaling_list8x8[6][kH264ScalingList8x8Length]; 185 186 int second_chroma_qp_index_offset; 187 }; 188 189 struct MEDIA_EXPORT H264ModificationOfPicNum { 190 int modification_of_pic_nums_idc; 191 union { 192 int abs_diff_pic_num_minus1; 193 int long_term_pic_num; 194 }; 195 }; 196 197 struct MEDIA_EXPORT H264WeightingFactors { 198 bool luma_weight_flag; 199 bool chroma_weight_flag; 200 int luma_weight[32]; 201 int luma_offset[32]; 202 int chroma_weight[32][2]; 203 int chroma_offset[32][2]; 204 }; 205 206 struct MEDIA_EXPORT H264DecRefPicMarking { 207 int memory_mgmnt_control_operation; 208 int difference_of_pic_nums_minus1; 209 int long_term_pic_num; 210 int long_term_frame_idx; 211 int max_long_term_frame_idx_plus1; 212 }; 213 214 struct MEDIA_EXPORT H264SliceHeader { 215 H264SliceHeader(); 216 217 enum { 218 kRefListSize = 32, 219 kRefListModSize = kRefListSize 220 }; 221 222 enum Type { 223 kPSlice = 0, 224 kBSlice = 1, 225 kISlice = 2, 226 kSPSlice = 3, 227 kSISlice = 4, 228 }; 229 230 bool IsPSlice() const; 231 bool IsBSlice() const; 232 bool IsISlice() const; 233 bool IsSPSlice() const; 234 bool IsSISlice() const; 235 236 bool idr_pic_flag; // from NAL header 237 int nal_ref_idc; // from NAL header 238 const uint8* nalu_data; // from NAL header 239 off_t nalu_size; // from NAL header 240 off_t header_bit_size; // calculated 241 242 int first_mb_in_slice; 243 int slice_type; 244 int pic_parameter_set_id; 245 int colour_plane_id; // TODO(posciak): use this! http://crbug.com/139878 246 int frame_num; 247 bool field_pic_flag; 248 bool bottom_field_flag; 249 int idr_pic_id; 250 int pic_order_cnt_lsb; 251 int delta_pic_order_cnt_bottom; 252 int delta_pic_order_cnt[2]; 253 int redundant_pic_cnt; 254 bool direct_spatial_mv_pred_flag; 255 256 bool num_ref_idx_active_override_flag; 257 int num_ref_idx_l0_active_minus1; 258 int num_ref_idx_l1_active_minus1; 259 bool ref_pic_list_modification_flag_l0; 260 bool ref_pic_list_modification_flag_l1; 261 H264ModificationOfPicNum ref_list_l0_modifications[kRefListModSize]; 262 H264ModificationOfPicNum ref_list_l1_modifications[kRefListModSize]; 263 264 int luma_log2_weight_denom; 265 int chroma_log2_weight_denom; 266 267 bool luma_weight_l0_flag; 268 bool chroma_weight_l0_flag; 269 H264WeightingFactors pred_weight_table_l0; 270 271 bool luma_weight_l1_flag; 272 bool chroma_weight_l1_flag; 273 H264WeightingFactors pred_weight_table_l1; 274 275 bool no_output_of_prior_pics_flag; 276 bool long_term_reference_flag; 277 278 bool adaptive_ref_pic_marking_mode_flag; 279 H264DecRefPicMarking ref_pic_marking[kRefListSize]; 280 281 int cabac_init_idc; 282 int slice_qp_delta; 283 bool sp_for_switch_flag; 284 int slice_qs_delta; 285 int disable_deblocking_filter_idc; 286 int slice_alpha_c0_offset_div2; 287 int slice_beta_offset_div2; 288 }; 289 290 struct H264SEIRecoveryPoint { 291 int recovery_frame_cnt; 292 bool exact_match_flag; 293 bool broken_link_flag; 294 int changing_slice_group_idc; 295 }; 296 297 struct MEDIA_EXPORT H264SEIMessage { 298 H264SEIMessage(); 299 300 enum Type { 301 kSEIRecoveryPoint = 6, 302 }; 303 304 int type; 305 int payload_size; 306 union { 307 // Placeholder; in future more supported types will contribute to more 308 // union members here. 309 H264SEIRecoveryPoint recovery_point; 310 }; 311 }; 312 313 // Class to parse an Annex-B H.264 stream, 314 // as specified in chapters 7 and Annex B of the H.264 spec. 315 class MEDIA_EXPORT H264Parser { 316 public: 317 enum Result { 318 kOk, 319 kInvalidStream, // error in stream 320 kUnsupportedStream, // stream not supported by the parser 321 kEOStream, // end of stream 322 }; 323 324 // Find offset from start of data to next NALU start code 325 // and size of found start code (3 or 4 bytes). 326 // If no start code is found, offset is pointing to the first unprocessed byte 327 // (i.e. the first byte that was not considered as a possible start of a start 328 // code) and |*start_code_size| is set to 0. 329 // Preconditions: 330 // - |data_size| >= 0 331 // Postconditions: 332 // - |*offset| is between 0 and |data_size| included. 333 // It is strictly less than |data_size| if |data_size| > 0. 334 // - |*start_code_size| is either 0, 3 or 4. 335 static bool FindStartCode(const uint8* data, off_t data_size, 336 off_t* offset, off_t* start_code_size); 337 338 H264Parser(); 339 ~H264Parser(); 340 341 void Reset(); 342 // Set current stream pointer to |stream| of |stream_size| in bytes, 343 // |stream| owned by caller. 344 // |subsamples| contains information about what parts of |stream| are 345 // encrypted. 346 void SetStream(const uint8* stream, off_t stream_size); 347 void SetEncryptedStream(const uint8* stream, off_t stream_size, 348 const std::vector<SubsampleEntry>& subsamples); 349 350 // Read the stream to find the next NALU, identify it and return 351 // that information in |*nalu|. This advances the stream to the beginning 352 // of this NALU, but not past it, so subsequent calls to NALU-specific 353 // parsing functions (ParseSPS, etc.) will parse this NALU. 354 // If the caller wishes to skip the current NALU, it can call this function 355 // again, instead of any NALU-type specific parse functions below. 356 Result AdvanceToNextNALU(H264NALU* nalu); 357 358 // NALU-specific parsing functions. 359 // These should be called after AdvanceToNextNALU(). 360 361 // SPSes and PPSes are owned by the parser class and the memory for their 362 // structures is managed here, not by the caller, as they are reused 363 // across NALUs. 364 // 365 // Parse an SPS/PPS NALU and save their data in the parser, returning id 366 // of the parsed structure in |*pps_id|/|*sps_id|. 367 // To get a pointer to a given SPS/PPS structure, use GetSPS()/GetPPS(), 368 // passing the returned |*sps_id|/|*pps_id| as parameter. 369 // TODO(posciak,fischman): consider replacing returning Result from Parse*() 370 // methods with a scoped_ptr and adding an AtEOS() function to check for EOS 371 // if Parse*() return NULL. 372 Result ParseSPS(int* sps_id); 373 Result ParsePPS(int* pps_id); 374 375 // Return a pointer to SPS/PPS with given |sps_id|/|pps_id| or NULL if not 376 // present. 377 const H264SPS* GetSPS(int sps_id); 378 const H264PPS* GetPPS(int pps_id); 379 380 // Slice headers and SEI messages are not used across NALUs by the parser 381 // and can be discarded after current NALU, so the parser does not store 382 // them, nor does it manage their memory. 383 // The caller has to provide and manage it instead. 384 385 // Parse a slice header, returning it in |*shdr|. |*nalu| must be set to 386 // the NALU returned from AdvanceToNextNALU() and corresponding to |*shdr|. 387 Result ParseSliceHeader(const H264NALU& nalu, H264SliceHeader* shdr); 388 389 // Parse a SEI message, returning it in |*sei_msg|, provided and managed 390 // by the caller. 391 Result ParseSEI(H264SEIMessage* sei_msg); 392 393 private: 394 // Move the stream pointer to the beginning of the next NALU, 395 // i.e. pointing at the next start code. 396 // Return true if a NALU has been found. 397 // If a NALU is found: 398 // - its size in bytes is returned in |*nalu_size| and includes 399 // the start code as well as the trailing zero bits. 400 // - the size in bytes of the start code is returned in |*start_code_size|. 401 bool LocateNALU(off_t* nalu_size, off_t* start_code_size); 402 403 // Wrapper for FindStartCode() that skips over start codes that 404 // may appear inside of |encrypted_ranges_|. 405 // Returns true if a start code was found. Otherwise returns false. 406 bool FindStartCodeInClearRanges(const uint8* data, off_t data_size, 407 off_t* offset, off_t* start_code_size); 408 409 // Exp-Golomb code parsing as specified in chapter 9.1 of the spec. 410 // Read one unsigned exp-Golomb code from the stream and return in |*val|. 411 Result ReadUE(int* val); 412 413 // Read one signed exp-Golomb code from the stream and return in |*val|. 414 Result ReadSE(int* val); 415 416 // Parse scaling lists (see spec). 417 Result ParseScalingList(int size, int* scaling_list, bool* use_default); 418 Result ParseSPSScalingLists(H264SPS* sps); 419 Result ParsePPSScalingLists(const H264SPS& sps, H264PPS* pps); 420 421 // Parse optional VUI parameters in SPS (see spec). 422 Result ParseVUIParameters(H264SPS* sps); 423 // Set |hrd_parameters_present| to true only if they are present. 424 Result ParseAndIgnoreHRDParameters(bool* hrd_parameters_present); 425 426 // Parse reference picture lists' modifications (see spec). 427 Result ParseRefPicListModifications(H264SliceHeader* shdr); 428 Result ParseRefPicListModification(int num_ref_idx_active_minus1, 429 H264ModificationOfPicNum* ref_list_mods); 430 431 // Parse prediction weight table (see spec). 432 Result ParsePredWeightTable(const H264SPS& sps, H264SliceHeader* shdr); 433 434 // Parse weighting factors (see spec). 435 Result ParseWeightingFactors(int num_ref_idx_active_minus1, 436 int chroma_array_type, 437 int luma_log2_weight_denom, 438 int chroma_log2_weight_denom, 439 H264WeightingFactors* w_facts); 440 441 // Parse decoded reference picture marking information (see spec). 442 Result ParseDecRefPicMarking(H264SliceHeader* shdr); 443 444 // Pointer to the current NALU in the stream. 445 const uint8* stream_; 446 447 // Bytes left in the stream after the current NALU. 448 off_t bytes_left_; 449 450 H264BitReader br_; 451 452 // PPSes and SPSes stored for future reference. 453 typedef std::map<int, H264SPS*> SPSById; 454 typedef std::map<int, H264PPS*> PPSById; 455 SPSById active_SPSes_; 456 PPSById active_PPSes_; 457 458 // Ranges of encrypted bytes in the buffer passed to 459 // SetEncryptedStream(). 460 Ranges<const uint8*> encrypted_ranges_; 461 462 DISALLOW_COPY_AND_ASSIGN(H264Parser); 463 }; 464 465 } // namespace media 466 467 #endif // MEDIA_FILTERS_H264_PARSER_H_ 468