1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/mp2t/es_parser_h264.h" 6 7 #include "base/basictypes.h" 8 #include "base/logging.h" 9 #include "media/base/bit_reader.h" 10 #include "media/base/buffers.h" 11 #include "media/base/stream_parser_buffer.h" 12 #include "media/base/video_frame.h" 13 #include "media/mp2t/mp2t_common.h" 14 #include "ui/gfx/rect.h" 15 #include "ui/gfx/size.h" 16 17 static const int kExtendedSar = 255; 18 19 // ISO 14496 part 10 20 // VUI parameters: Table E-1 "Meaning of sample aspect ratio indicator" 21 static const int kTableSarWidth[14] = { 22 0, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160 23 }; 24 25 static const int kTableSarHeight[14] = { 26 0, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99 27 }; 28 29 // Remove the start code emulation prevention ( 0x000003 ) 30 // and return the size of the converted buffer. 31 // Note: Size of |buf_rbsp| should be at least |size| to accomodate 32 // the worst case. 33 static int ConvertToRbsp(const uint8* buf, int size, uint8* buf_rbsp) { 34 int rbsp_size = 0; 35 int zero_count = 0; 36 for (int k = 0; k < size; k++) { 37 if (buf[k] == 0x3 && zero_count >= 2) { 38 zero_count = 0; 39 continue; 40 } 41 if (buf[k] == 0) 42 zero_count++; 43 else 44 zero_count = 0; 45 buf_rbsp[rbsp_size++] = buf[k]; 46 } 47 return rbsp_size; 48 } 49 50 namespace media { 51 namespace mp2t { 52 53 // ISO 14496 - Part 10: Table 7-1 "NAL unit type codes" 54 enum NalUnitType { 55 kNalUnitTypeNonIdrSlice = 1, 56 kNalUnitTypeIdrSlice = 5, 57 kNalUnitTypeSPS = 7, 58 kNalUnitTypePPS = 8, 59 kNalUnitTypeAUD = 9, 60 }; 61 62 class BitReaderH264 : public BitReader { 63 public: 64 BitReaderH264(const uint8* data, off_t size) 65 : BitReader(data, size) { } 66 67 // Read an unsigned exp-golomb value. 68 // Return true if successful. 69 bool ReadBitsExpGolomb(uint32* exp_golomb_value); 70 }; 71 72 bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) { 73 // Get the number of leading zeros. 74 int zero_count = 0; 75 while (true) { 76 int one_bit; 77 RCHECK(ReadBits(1, &one_bit)); 78 if (one_bit != 0) 79 break; 80 zero_count++; 81 } 82 83 // If zero_count is greater than 31, the calculated value will overflow. 84 if (zero_count > 31) { 85 SkipBits(zero_count); 86 return false; 87 } 88 89 // Read the actual value. 90 uint32 base = (1 << zero_count) - 1; 91 uint32 offset; 92 RCHECK(ReadBits(zero_count, &offset)); 93 *exp_golomb_value = base + offset; 94 95 return true; 96 } 97 98 EsParserH264::EsParserH264( 99 const NewVideoConfigCB& new_video_config_cb, 100 const EmitBufferCB& emit_buffer_cb) 101 : new_video_config_cb_(new_video_config_cb), 102 emit_buffer_cb_(emit_buffer_cb), 103 es_pos_(0), 104 current_nal_pos_(-1), 105 current_access_unit_pos_(-1), 106 is_key_frame_(false) { 107 } 108 109 EsParserH264::~EsParserH264() { 110 } 111 112 bool EsParserH264::Parse(const uint8* buf, int size, 113 base::TimeDelta pts, 114 base::TimeDelta dts) { 115 // Note: Parse is invoked each time a PES packet has been reassembled. 116 // Unfortunately, a PES packet does not necessarily map 117 // to an h264 access unit, although the HLS recommendation is to use one PES 118 // for each access unit (but this is just a recommendation and some streams 119 // do not comply with this recommendation). 120 121 // Link position |raw_es_size| in the ES stream with a timing descriptor. 122 // HLS recommendation: "In AVC video, you should have both a DTS and a 123 // PTS in each PES header". 124 if (dts == kNoTimestamp() && pts == kNoTimestamp()) { 125 DVLOG(1) << "A timestamp must be provided for each reassembled PES"; 126 return false; 127 } 128 TimingDesc timing_desc; 129 timing_desc.pts = pts; 130 timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts; 131 132 int raw_es_size; 133 const uint8* raw_es; 134 es_byte_queue_.Peek(&raw_es, &raw_es_size); 135 timing_desc_list_.push_back( 136 std::pair<int, TimingDesc>(raw_es_size, timing_desc)); 137 138 // Add the incoming bytes to the ES queue. 139 es_byte_queue_.Push(buf, size); 140 141 // Add NALs from the incoming buffer. 142 if (!ParseInternal()) 143 return false; 144 145 // Discard emitted frames 146 // or every byte that was parsed so far if there is no current frame. 147 int skip_count = 148 (current_access_unit_pos_ >= 0) ? current_access_unit_pos_ : es_pos_; 149 DiscardEs(skip_count); 150 151 return true; 152 } 153 154 void EsParserH264::Flush() { 155 if (current_access_unit_pos_ < 0) 156 return; 157 158 // Force emitting the last access unit. 159 int next_aud_pos; 160 const uint8* raw_es; 161 es_byte_queue_.Peek(&raw_es, &next_aud_pos); 162 EmitFrameIfNeeded(next_aud_pos); 163 current_nal_pos_ = -1; 164 StartFrame(-1); 165 166 // Discard the emitted frame. 167 DiscardEs(next_aud_pos); 168 } 169 170 void EsParserH264::Reset() { 171 DVLOG(1) << "EsParserH264::Reset"; 172 es_byte_queue_.Reset(); 173 timing_desc_list_.clear(); 174 es_pos_ = 0; 175 current_nal_pos_ = -1; 176 StartFrame(-1); 177 last_video_decoder_config_ = VideoDecoderConfig(); 178 } 179 180 bool EsParserH264::ParseInternal() { 181 int raw_es_size; 182 const uint8* raw_es; 183 es_byte_queue_.Peek(&raw_es, &raw_es_size); 184 185 DCHECK_GE(es_pos_, 0); 186 DCHECK_LT(es_pos_, raw_es_size); 187 188 // Resume h264 es parsing where it was left. 189 for ( ; es_pos_ < raw_es_size - 4; es_pos_++) { 190 // Make sure the syncword is either 00 00 00 01 or 00 00 01 191 if (raw_es[es_pos_ + 0] != 0 || raw_es[es_pos_ + 1] != 0) 192 continue; 193 int syncword_length = 0; 194 if (raw_es[es_pos_ + 2] == 0 && raw_es[es_pos_ + 3] == 1) 195 syncword_length = 4; 196 else if (raw_es[es_pos_ + 2] == 1) 197 syncword_length = 3; 198 else 199 continue; 200 201 // Parse the current NAL (and the new NAL then becomes the current one). 202 if (current_nal_pos_ >= 0) { 203 int nal_size = es_pos_ - current_nal_pos_; 204 DCHECK_GT(nal_size, 0); 205 RCHECK(NalParser(&raw_es[current_nal_pos_], nal_size)); 206 } 207 current_nal_pos_ = es_pos_ + syncword_length; 208 209 // Retrieve the NAL type. 210 int nal_header = raw_es[current_nal_pos_]; 211 int forbidden_zero_bit = (nal_header >> 7) & 0x1; 212 RCHECK(forbidden_zero_bit == 0); 213 NalUnitType nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f); 214 DVLOG(LOG_LEVEL_ES) << "nal: offset=" << es_pos_ 215 << " type=" << nal_unit_type; 216 217 // Emit a frame if needed. 218 if (nal_unit_type == kNalUnitTypeAUD) 219 EmitFrameIfNeeded(es_pos_); 220 221 // Skip the syncword. 222 es_pos_ += syncword_length; 223 } 224 225 return true; 226 } 227 228 void EsParserH264::EmitFrameIfNeeded(int next_aud_pos) { 229 // There is no current frame: start a new frame. 230 if (current_access_unit_pos_ < 0) { 231 StartFrame(next_aud_pos); 232 return; 233 } 234 235 // Get the access unit timing info. 236 TimingDesc current_timing_desc; 237 while (!timing_desc_list_.empty() && 238 timing_desc_list_.front().first <= current_access_unit_pos_) { 239 current_timing_desc = timing_desc_list_.front().second; 240 timing_desc_list_.pop_front(); 241 } 242 243 // Emit a frame. 244 int raw_es_size; 245 const uint8* raw_es; 246 es_byte_queue_.Peek(&raw_es, &raw_es_size); 247 int access_unit_size = next_aud_pos - current_access_unit_pos_; 248 scoped_refptr<StreamParserBuffer> stream_parser_buffer = 249 StreamParserBuffer::CopyFrom( 250 &raw_es[current_access_unit_pos_], 251 access_unit_size, 252 is_key_frame_); 253 stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts); 254 stream_parser_buffer->set_timestamp(current_timing_desc.pts); 255 emit_buffer_cb_.Run(stream_parser_buffer); 256 257 // Set the current frame position to the next AUD position. 258 StartFrame(next_aud_pos); 259 } 260 261 void EsParserH264::StartFrame(int aud_pos) { 262 // Two cases: 263 // - if aud_pos < 0, clear the current frame and set |is_key_frame| to a 264 // default value (false). 265 // - if aud_pos >= 0, start a new frame and set |is_key_frame| to true 266 // |is_key_frame_| will be updated while parsing the NALs of that frame. 267 // If any NAL is a non IDR NAL, it will be set to false. 268 current_access_unit_pos_ = aud_pos; 269 is_key_frame_ = (aud_pos >= 0); 270 } 271 272 void EsParserH264::DiscardEs(int nbytes) { 273 DCHECK_GE(nbytes, 0); 274 if (nbytes == 0) 275 return; 276 277 // Update the position of 278 // - the parser, 279 // - the current NAL, 280 // - the current access unit. 281 es_pos_ -= nbytes; 282 if (es_pos_ < 0) 283 es_pos_ = 0; 284 285 if (current_nal_pos_ >= 0) { 286 DCHECK_GE(current_nal_pos_, nbytes); 287 current_nal_pos_ -= nbytes; 288 } 289 if (current_access_unit_pos_ >= 0) { 290 DCHECK_GE(current_access_unit_pos_, nbytes); 291 current_access_unit_pos_ -= nbytes; 292 } 293 294 // Update the timing information accordingly. 295 std::list<std::pair<int, TimingDesc> >::iterator timing_it 296 = timing_desc_list_.begin(); 297 for (; timing_it != timing_desc_list_.end(); ++timing_it) 298 timing_it->first -= nbytes; 299 300 // Discard |nbytes| of ES. 301 es_byte_queue_.Pop(nbytes); 302 } 303 304 bool EsParserH264::NalParser(const uint8* buf, int size) { 305 // Get the NAL header. 306 if (size < 1) { 307 DVLOG(1) << "NalParser: incomplete NAL"; 308 return false; 309 } 310 int nal_header = buf[0]; 311 buf += 1; 312 size -= 1; 313 314 int forbidden_zero_bit = (nal_header >> 7) & 0x1; 315 if (forbidden_zero_bit != 0) 316 return false; 317 int nal_ref_idc = (nal_header >> 5) & 0x3; 318 int nal_unit_type = nal_header & 0x1f; 319 320 // Process the NAL content. 321 switch (nal_unit_type) { 322 case kNalUnitTypeSPS: 323 DVLOG(LOG_LEVEL_ES) << "NAL: SPS"; 324 // |nal_ref_idc| should not be 0 for a SPS. 325 if (nal_ref_idc == 0) 326 return false; 327 return ProcessSPS(buf, size); 328 case kNalUnitTypeIdrSlice: 329 DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice"; 330 return true; 331 case kNalUnitTypeNonIdrSlice: 332 DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice"; 333 is_key_frame_ = false; 334 return true; 335 case kNalUnitTypePPS: 336 DVLOG(LOG_LEVEL_ES) << "NAL: PPS"; 337 return true; 338 case kNalUnitTypeAUD: 339 DVLOG(LOG_LEVEL_ES) << "NAL: AUD"; 340 return true; 341 default: 342 DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type; 343 return true; 344 } 345 346 NOTREACHED(); 347 return false; 348 } 349 350 bool EsParserH264::ProcessSPS(const uint8* buf, int size) { 351 if (size <= 0) 352 return false; 353 354 // Removes start code emulation prevention. 355 // TODO(damienv): refactoring in media/base 356 // so as to have a unique H264 bit reader in Chrome. 357 scoped_ptr<uint8[]> buf_rbsp(new uint8[size]); 358 int rbsp_size = ConvertToRbsp(buf, size, buf_rbsp.get()); 359 360 BitReaderH264 bit_reader(buf_rbsp.get(), rbsp_size); 361 362 int profile_idc; 363 int constraint_setX_flag; 364 int level_idc; 365 uint32 seq_parameter_set_id; 366 uint32 log2_max_frame_num_minus4; 367 uint32 pic_order_cnt_type; 368 RCHECK(bit_reader.ReadBits(8, &profile_idc)); 369 RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag)); 370 RCHECK(bit_reader.ReadBits(8, &level_idc)); 371 RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id)); 372 RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4)); 373 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type)); 374 375 // |pic_order_cnt_type| shall be in the range of 0 to 2. 376 RCHECK(pic_order_cnt_type <= 2); 377 if (pic_order_cnt_type == 0) { 378 uint32 log2_max_pic_order_cnt_lsb_minus4; 379 RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4)); 380 } else if (pic_order_cnt_type == 1) { 381 // Note: |offset_for_non_ref_pic| and |offset_for_top_to_bottom_field| 382 // corresponds to their codenum not to their actual value. 383 int delta_pic_order_always_zero_flag; 384 uint32 offset_for_non_ref_pic; 385 uint32 offset_for_top_to_bottom_field; 386 uint32 num_ref_frames_in_pic_order_cnt_cycle; 387 RCHECK(bit_reader.ReadBits(1, &delta_pic_order_always_zero_flag)); 388 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_non_ref_pic)); 389 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_top_to_bottom_field)); 390 RCHECK( 391 bit_reader.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle)); 392 for (uint32 i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) { 393 uint32 offset_for_ref_frame_codenum; 394 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_ref_frame_codenum)); 395 } 396 } 397 398 uint32 num_ref_frames; 399 int gaps_in_frame_num_value_allowed_flag; 400 uint32 pic_width_in_mbs_minus1; 401 uint32 pic_height_in_map_units_minus1; 402 RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames)); 403 RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag)); 404 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1)); 405 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1)); 406 407 int frame_mbs_only_flag; 408 RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag)); 409 if (!frame_mbs_only_flag) { 410 int mb_adaptive_frame_field_flag; 411 RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag)); 412 } 413 414 int direct_8x8_inference_flag; 415 RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag)); 416 417 int frame_cropping_flag; 418 uint32 frame_crop_left_offset = 0; 419 uint32 frame_crop_right_offset = 0; 420 uint32 frame_crop_top_offset = 0; 421 uint32 frame_crop_bottom_offset = 0; 422 RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag)); 423 if (frame_cropping_flag) { 424 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset)); 425 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset)); 426 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset)); 427 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset)); 428 } 429 430 int vui_parameters_present_flag; 431 RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag)); 432 int sar_width = 1; 433 int sar_height = 1; 434 if (vui_parameters_present_flag) { 435 // Read only the aspect ratio information from the VUI section. 436 // TODO(damienv): check whether other VUI info are useful. 437 int aspect_ratio_info_present_flag; 438 RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag)); 439 if (aspect_ratio_info_present_flag) { 440 int aspect_ratio_idc; 441 RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc)); 442 if (aspect_ratio_idc == kExtendedSar) { 443 RCHECK(bit_reader.ReadBits(16, &sar_width)); 444 RCHECK(bit_reader.ReadBits(16, &sar_height)); 445 } else if (aspect_ratio_idc < 14) { 446 sar_width = kTableSarWidth[aspect_ratio_idc]; 447 sar_height = kTableSarHeight[aspect_ratio_idc]; 448 } 449 } 450 } 451 452 if (sar_width == 0 || sar_height == 0) { 453 DVLOG(1) << "Unspecified SAR not supported"; 454 return false; 455 } 456 457 // TODO(damienv): a MAP unit can be either 16 or 32 pixels. 458 // although it's 16 pixels for progressive non MBAFF frames. 459 gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16, 460 (pic_height_in_map_units_minus1 + 1) * 16); 461 gfx::Rect visible_rect( 462 frame_crop_left_offset, 463 frame_crop_top_offset, 464 (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset, 465 (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset); 466 if (visible_rect.width() <= 0 || visible_rect.height() <= 0) 467 return false; 468 gfx::Size natural_size((visible_rect.width() * sar_width) / sar_height, 469 visible_rect.height()); 470 if (natural_size.width() == 0) 471 return false; 472 473 // TODO(damienv): 474 // Assuming the SPS is used right away by the PPS 475 // and the slice headers is a strong assumption. 476 // In theory, we should process the SPS and PPS 477 // and only when one of the slice header is switching 478 // the PPS id, the video decoder config should be changed. 479 VideoDecoderConfig video_decoder_config( 480 kCodecH264, 481 VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv) 482 VideoFrame::YV12, 483 coded_size, 484 visible_rect, 485 natural_size, 486 NULL, 0, 487 false); 488 489 if (!video_decoder_config.Matches(last_video_decoder_config_)) { 490 DVLOG(1) << "Profile IDC: " << profile_idc; 491 DVLOG(1) << "Level IDC: " << level_idc; 492 DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16; 493 DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16; 494 DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4; 495 DVLOG(1) << "SAR: width=" << sar_width << " height=" << sar_height; 496 last_video_decoder_config_ = video_decoder_config; 497 new_video_config_cb_.Run(video_decoder_config); 498 } 499 500 return true; 501 } 502 503 } // namespace mp2t 504 } // namespace media 505 506