Home | History | Annotate | Download | only in mp2t
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/mp2t/es_parser_h264.h"
      6 
      7 #include "base/basictypes.h"
      8 #include "base/logging.h"
      9 #include "media/base/bit_reader.h"
     10 #include "media/base/buffers.h"
     11 #include "media/base/stream_parser_buffer.h"
     12 #include "media/base/video_frame.h"
     13 #include "media/mp2t/mp2t_common.h"
     14 #include "ui/gfx/rect.h"
     15 #include "ui/gfx/size.h"
     16 
     17 static const int kExtendedSar = 255;
     18 
     19 // ISO 14496 part 10
     20 // VUI parameters: Table E-1 "Meaning of sample aspect ratio indicator"
     21 static const int kTableSarWidth[14] = {
     22   0, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160
     23 };
     24 
     25 static const int kTableSarHeight[14] = {
     26   0, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99
     27 };
     28 
     29 // Remove the start code emulation prevention ( 0x000003 )
     30 // and return the size of the converted buffer.
     31 // Note: Size of |buf_rbsp| should be at least |size| to accomodate
     32 // the worst case.
     33 static int ConvertToRbsp(const uint8* buf, int size, uint8* buf_rbsp) {
     34   int rbsp_size = 0;
     35   int zero_count = 0;
     36   for (int k = 0; k < size; k++) {
     37     if (buf[k] == 0x3 && zero_count >= 2) {
     38       zero_count = 0;
     39       continue;
     40     }
     41     if (buf[k] == 0)
     42       zero_count++;
     43     else
     44       zero_count = 0;
     45     buf_rbsp[rbsp_size++] = buf[k];
     46   }
     47   return rbsp_size;
     48 }
     49 
     50 namespace media {
     51 namespace mp2t {
     52 
     53 // ISO 14496 - Part 10: Table 7-1 "NAL unit type codes"
     54 enum NalUnitType {
     55   kNalUnitTypeNonIdrSlice = 1,
     56   kNalUnitTypeIdrSlice = 5,
     57   kNalUnitTypeSPS = 7,
     58   kNalUnitTypePPS = 8,
     59   kNalUnitTypeAUD = 9,
     60 };
     61 
     62 class BitReaderH264 : public BitReader {
     63  public:
     64   BitReaderH264(const uint8* data, off_t size)
     65     : BitReader(data, size) { }
     66 
     67   // Read an unsigned exp-golomb value.
     68   // Return true if successful.
     69   bool ReadBitsExpGolomb(uint32* exp_golomb_value);
     70 };
     71 
     72 bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) {
     73   // Get the number of leading zeros.
     74   int zero_count = 0;
     75   while (true) {
     76     int one_bit;
     77     RCHECK(ReadBits(1, &one_bit));
     78     if (one_bit != 0)
     79       break;
     80     zero_count++;
     81   }
     82 
     83   // If zero_count is greater than 31, the calculated value will overflow.
     84   if (zero_count > 31) {
     85     SkipBits(zero_count);
     86     return false;
     87   }
     88 
     89   // Read the actual value.
     90   uint32 base = (1 << zero_count) - 1;
     91   uint32 offset;
     92   RCHECK(ReadBits(zero_count, &offset));
     93   *exp_golomb_value = base + offset;
     94 
     95   return true;
     96 }
     97 
     98 EsParserH264::EsParserH264(
     99     const NewVideoConfigCB& new_video_config_cb,
    100     const EmitBufferCB& emit_buffer_cb)
    101   : new_video_config_cb_(new_video_config_cb),
    102     emit_buffer_cb_(emit_buffer_cb),
    103     es_pos_(0),
    104     current_nal_pos_(-1),
    105     current_access_unit_pos_(-1),
    106     is_key_frame_(false) {
    107 }
    108 
    109 EsParserH264::~EsParserH264() {
    110 }
    111 
    112 bool EsParserH264::Parse(const uint8* buf, int size,
    113                          base::TimeDelta pts,
    114                          base::TimeDelta dts) {
    115   // Note: Parse is invoked each time a PES packet has been reassembled.
    116   // Unfortunately, a PES packet does not necessarily map
    117   // to an h264 access unit, although the HLS recommendation is to use one PES
    118   // for each access unit (but this is just a recommendation and some streams
    119   // do not comply with this recommendation).
    120 
    121   // Link position |raw_es_size| in the ES stream with a timing descriptor.
    122   // HLS recommendation: "In AVC video, you should have both a DTS and a
    123   // PTS in each PES header".
    124   if (dts == kNoTimestamp() && pts == kNoTimestamp()) {
    125     DVLOG(1) << "A timestamp must be provided for each reassembled PES";
    126     return false;
    127   }
    128   TimingDesc timing_desc;
    129   timing_desc.pts = pts;
    130   timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts;
    131 
    132   int raw_es_size;
    133   const uint8* raw_es;
    134   es_byte_queue_.Peek(&raw_es, &raw_es_size);
    135   timing_desc_list_.push_back(
    136       std::pair<int, TimingDesc>(raw_es_size, timing_desc));
    137 
    138   // Add the incoming bytes to the ES queue.
    139   es_byte_queue_.Push(buf, size);
    140 
    141   // Add NALs from the incoming buffer.
    142   if (!ParseInternal())
    143     return false;
    144 
    145   // Discard emitted frames
    146   // or every byte that was parsed so far if there is no current frame.
    147   int skip_count =
    148       (current_access_unit_pos_ >= 0) ? current_access_unit_pos_ : es_pos_;
    149   DiscardEs(skip_count);
    150 
    151   return true;
    152 }
    153 
    154 void EsParserH264::Flush() {
    155   if (current_access_unit_pos_ < 0)
    156     return;
    157 
    158   // Force emitting the last access unit.
    159   int next_aud_pos;
    160   const uint8* raw_es;
    161   es_byte_queue_.Peek(&raw_es, &next_aud_pos);
    162   EmitFrameIfNeeded(next_aud_pos);
    163   current_nal_pos_ = -1;
    164   StartFrame(-1);
    165 
    166   // Discard the emitted frame.
    167   DiscardEs(next_aud_pos);
    168 }
    169 
    170 void EsParserH264::Reset() {
    171   DVLOG(1) << "EsParserH264::Reset";
    172   es_byte_queue_.Reset();
    173   timing_desc_list_.clear();
    174   es_pos_ = 0;
    175   current_nal_pos_ = -1;
    176   StartFrame(-1);
    177   last_video_decoder_config_ = VideoDecoderConfig();
    178 }
    179 
    180 bool EsParserH264::ParseInternal() {
    181   int raw_es_size;
    182   const uint8* raw_es;
    183   es_byte_queue_.Peek(&raw_es, &raw_es_size);
    184 
    185   DCHECK_GE(es_pos_, 0);
    186   DCHECK_LT(es_pos_, raw_es_size);
    187 
    188   // Resume h264 es parsing where it was left.
    189   for ( ; es_pos_ < raw_es_size - 4; es_pos_++) {
    190     // Make sure the syncword is either 00 00 00 01 or 00 00 01
    191     if (raw_es[es_pos_ + 0] != 0 || raw_es[es_pos_ + 1] != 0)
    192       continue;
    193     int syncword_length = 0;
    194     if (raw_es[es_pos_ + 2] == 0 && raw_es[es_pos_ + 3] == 1)
    195       syncword_length = 4;
    196     else if (raw_es[es_pos_ + 2] == 1)
    197       syncword_length = 3;
    198     else
    199       continue;
    200 
    201     // Parse the current NAL (and the new NAL then becomes the current one).
    202     if (current_nal_pos_ >= 0) {
    203       int nal_size = es_pos_ - current_nal_pos_;
    204       DCHECK_GT(nal_size, 0);
    205       RCHECK(NalParser(&raw_es[current_nal_pos_], nal_size));
    206     }
    207     current_nal_pos_ = es_pos_ + syncword_length;
    208 
    209     // Retrieve the NAL type.
    210     int nal_header = raw_es[current_nal_pos_];
    211     int forbidden_zero_bit = (nal_header >> 7) & 0x1;
    212     RCHECK(forbidden_zero_bit == 0);
    213     NalUnitType nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f);
    214     DVLOG(LOG_LEVEL_ES) << "nal: offset=" << es_pos_
    215                         << " type=" << nal_unit_type;
    216 
    217     // Emit a frame if needed.
    218     if (nal_unit_type == kNalUnitTypeAUD)
    219       EmitFrameIfNeeded(es_pos_);
    220 
    221     // Skip the syncword.
    222     es_pos_ += syncword_length;
    223   }
    224 
    225   return true;
    226 }
    227 
    228 void EsParserH264::EmitFrameIfNeeded(int next_aud_pos) {
    229   // There is no current frame: start a new frame.
    230   if (current_access_unit_pos_ < 0) {
    231     StartFrame(next_aud_pos);
    232     return;
    233   }
    234 
    235   // Get the access unit timing info.
    236   TimingDesc current_timing_desc;
    237   while (!timing_desc_list_.empty() &&
    238          timing_desc_list_.front().first <= current_access_unit_pos_) {
    239     current_timing_desc = timing_desc_list_.front().second;
    240     timing_desc_list_.pop_front();
    241   }
    242 
    243   // Emit a frame.
    244   int raw_es_size;
    245   const uint8* raw_es;
    246   es_byte_queue_.Peek(&raw_es, &raw_es_size);
    247   int access_unit_size = next_aud_pos - current_access_unit_pos_;
    248   scoped_refptr<StreamParserBuffer> stream_parser_buffer =
    249       StreamParserBuffer::CopyFrom(
    250           &raw_es[current_access_unit_pos_],
    251           access_unit_size,
    252           is_key_frame_);
    253   stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts);
    254   stream_parser_buffer->set_timestamp(current_timing_desc.pts);
    255   emit_buffer_cb_.Run(stream_parser_buffer);
    256 
    257   // Set the current frame position to the next AUD position.
    258   StartFrame(next_aud_pos);
    259 }
    260 
    261 void EsParserH264::StartFrame(int aud_pos) {
    262   // Two cases:
    263   // - if aud_pos < 0, clear the current frame and set |is_key_frame| to a
    264   // default value (false).
    265   // - if aud_pos >= 0, start a new frame and set |is_key_frame| to true
    266   // |is_key_frame_| will be updated while parsing the NALs of that frame.
    267   // If any NAL is a non IDR NAL, it will be set to false.
    268   current_access_unit_pos_ = aud_pos;
    269   is_key_frame_ = (aud_pos >= 0);
    270 }
    271 
    272 void EsParserH264::DiscardEs(int nbytes) {
    273   DCHECK_GE(nbytes, 0);
    274   if (nbytes == 0)
    275     return;
    276 
    277   // Update the position of
    278   // - the parser,
    279   // - the current NAL,
    280   // - the current access unit.
    281   es_pos_ -= nbytes;
    282   if (es_pos_ < 0)
    283     es_pos_ = 0;
    284 
    285   if (current_nal_pos_ >= 0) {
    286     DCHECK_GE(current_nal_pos_, nbytes);
    287     current_nal_pos_ -= nbytes;
    288   }
    289   if (current_access_unit_pos_ >= 0) {
    290     DCHECK_GE(current_access_unit_pos_, nbytes);
    291     current_access_unit_pos_ -= nbytes;
    292   }
    293 
    294   // Update the timing information accordingly.
    295   std::list<std::pair<int, TimingDesc> >::iterator timing_it
    296       = timing_desc_list_.begin();
    297   for (; timing_it != timing_desc_list_.end(); ++timing_it)
    298     timing_it->first -= nbytes;
    299 
    300   // Discard |nbytes| of ES.
    301   es_byte_queue_.Pop(nbytes);
    302 }
    303 
    304 bool EsParserH264::NalParser(const uint8* buf, int size) {
    305   // Get the NAL header.
    306   if (size < 1) {
    307     DVLOG(1) << "NalParser: incomplete NAL";
    308     return false;
    309   }
    310   int nal_header = buf[0];
    311   buf += 1;
    312   size -= 1;
    313 
    314   int forbidden_zero_bit = (nal_header >> 7) & 0x1;
    315   if (forbidden_zero_bit != 0)
    316     return false;
    317   int nal_ref_idc = (nal_header >> 5) & 0x3;
    318   int nal_unit_type = nal_header & 0x1f;
    319 
    320   // Process the NAL content.
    321   switch (nal_unit_type) {
    322     case kNalUnitTypeSPS:
    323       DVLOG(LOG_LEVEL_ES) << "NAL: SPS";
    324       // |nal_ref_idc| should not be 0 for a SPS.
    325       if (nal_ref_idc == 0)
    326         return false;
    327       return ProcessSPS(buf, size);
    328     case kNalUnitTypeIdrSlice:
    329       DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice";
    330       return true;
    331     case kNalUnitTypeNonIdrSlice:
    332       DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice";
    333       is_key_frame_ = false;
    334       return true;
    335     case kNalUnitTypePPS:
    336       DVLOG(LOG_LEVEL_ES) << "NAL: PPS";
    337       return true;
    338     case  kNalUnitTypeAUD:
    339       DVLOG(LOG_LEVEL_ES) << "NAL: AUD";
    340       return true;
    341     default:
    342       DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type;
    343       return true;
    344   }
    345 
    346   NOTREACHED();
    347   return false;
    348 }
    349 
    350 bool EsParserH264::ProcessSPS(const uint8* buf, int size) {
    351   if (size <= 0)
    352     return false;
    353 
    354   // Removes start code emulation prevention.
    355   // TODO(damienv): refactoring in media/base
    356   // so as to have a unique H264 bit reader in Chrome.
    357   scoped_ptr<uint8[]> buf_rbsp(new uint8[size]);
    358   int rbsp_size = ConvertToRbsp(buf, size, buf_rbsp.get());
    359 
    360   BitReaderH264 bit_reader(buf_rbsp.get(), rbsp_size);
    361 
    362   int profile_idc;
    363   int constraint_setX_flag;
    364   int level_idc;
    365   uint32 seq_parameter_set_id;
    366   uint32 log2_max_frame_num_minus4;
    367   uint32 pic_order_cnt_type;
    368   RCHECK(bit_reader.ReadBits(8, &profile_idc));
    369   RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag));
    370   RCHECK(bit_reader.ReadBits(8, &level_idc));
    371   RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id));
    372   RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4));
    373   RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type));
    374 
    375   // |pic_order_cnt_type| shall be in the range of 0 to 2.
    376   RCHECK(pic_order_cnt_type <= 2);
    377   if (pic_order_cnt_type == 0) {
    378     uint32 log2_max_pic_order_cnt_lsb_minus4;
    379     RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4));
    380   } else if (pic_order_cnt_type == 1) {
    381     // Note: |offset_for_non_ref_pic| and |offset_for_top_to_bottom_field|
    382     // corresponds to their codenum not to their actual value.
    383     int delta_pic_order_always_zero_flag;
    384     uint32 offset_for_non_ref_pic;
    385     uint32 offset_for_top_to_bottom_field;
    386     uint32 num_ref_frames_in_pic_order_cnt_cycle;
    387     RCHECK(bit_reader.ReadBits(1, &delta_pic_order_always_zero_flag));
    388     RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_non_ref_pic));
    389     RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_top_to_bottom_field));
    390     RCHECK(
    391         bit_reader.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
    392     for (uint32 i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) {
    393       uint32 offset_for_ref_frame_codenum;
    394       RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_ref_frame_codenum));
    395     }
    396   }
    397 
    398   uint32 num_ref_frames;
    399   int gaps_in_frame_num_value_allowed_flag;
    400   uint32 pic_width_in_mbs_minus1;
    401   uint32 pic_height_in_map_units_minus1;
    402   RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames));
    403   RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag));
    404   RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1));
    405   RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1));
    406 
    407   int frame_mbs_only_flag;
    408   RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag));
    409   if (!frame_mbs_only_flag) {
    410     int mb_adaptive_frame_field_flag;
    411     RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag));
    412   }
    413 
    414   int direct_8x8_inference_flag;
    415   RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag));
    416 
    417   int frame_cropping_flag;
    418   uint32 frame_crop_left_offset = 0;
    419   uint32 frame_crop_right_offset = 0;
    420   uint32 frame_crop_top_offset = 0;
    421   uint32 frame_crop_bottom_offset = 0;
    422   RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag));
    423   if (frame_cropping_flag) {
    424     RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset));
    425     RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset));
    426     RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset));
    427     RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset));
    428   }
    429 
    430   int vui_parameters_present_flag;
    431   RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag));
    432   int sar_width = 1;
    433   int sar_height = 1;
    434   if (vui_parameters_present_flag) {
    435     // Read only the aspect ratio information from the VUI section.
    436     // TODO(damienv): check whether other VUI info are useful.
    437     int aspect_ratio_info_present_flag;
    438     RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag));
    439     if (aspect_ratio_info_present_flag) {
    440       int aspect_ratio_idc;
    441       RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc));
    442       if (aspect_ratio_idc == kExtendedSar) {
    443         RCHECK(bit_reader.ReadBits(16, &sar_width));
    444         RCHECK(bit_reader.ReadBits(16, &sar_height));
    445       } else if (aspect_ratio_idc < 14) {
    446         sar_width = kTableSarWidth[aspect_ratio_idc];
    447         sar_height = kTableSarHeight[aspect_ratio_idc];
    448       }
    449     }
    450   }
    451 
    452   if (sar_width == 0 || sar_height == 0) {
    453     DVLOG(1) << "Unspecified SAR not supported";
    454     return false;
    455   }
    456 
    457   // TODO(damienv): a MAP unit can be either 16 or 32 pixels.
    458   // although it's 16 pixels for progressive non MBAFF frames.
    459   gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16,
    460                        (pic_height_in_map_units_minus1 + 1) * 16);
    461   gfx::Rect visible_rect(
    462       frame_crop_left_offset,
    463       frame_crop_top_offset,
    464       (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset,
    465       (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset);
    466   if (visible_rect.width() <= 0 || visible_rect.height() <= 0)
    467     return false;
    468   gfx::Size natural_size((visible_rect.width() * sar_width) / sar_height,
    469                          visible_rect.height());
    470   if (natural_size.width() == 0)
    471     return false;
    472 
    473   // TODO(damienv):
    474   // Assuming the SPS is used right away by the PPS
    475   // and the slice headers is a strong assumption.
    476   // In theory, we should process the SPS and PPS
    477   // and only when one of the slice header is switching
    478   // the PPS id, the video decoder config should be changed.
    479   VideoDecoderConfig video_decoder_config(
    480       kCodecH264,
    481       VIDEO_CODEC_PROFILE_UNKNOWN,    // TODO(damienv)
    482       VideoFrame::YV12,
    483       coded_size,
    484       visible_rect,
    485       natural_size,
    486       NULL, 0,
    487       false);
    488 
    489   if (!video_decoder_config.Matches(last_video_decoder_config_)) {
    490     DVLOG(1) << "Profile IDC: " << profile_idc;
    491     DVLOG(1) << "Level IDC: " << level_idc;
    492     DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16;
    493     DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16;
    494     DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4;
    495     DVLOG(1) << "SAR: width=" << sar_width << " height=" << sar_height;
    496     last_video_decoder_config_ = video_decoder_config;
    497     new_video_config_cb_.Run(video_decoder_config);
    498   }
    499 
    500   return true;
    501 }
    502 
    503 }  // namespace mp2t
    504 }  // namespace media
    505 
    506