Home | History | Annotate | Download | only in webm
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/webm/webm_cluster_parser.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/logging.h"
     10 #include "base/sys_byteorder.h"
     11 #include "media/base/buffers.h"
     12 #include "media/base/decrypt_config.h"
     13 #include "media/filters/webvtt_util.h"
     14 #include "media/webm/webm_constants.h"
     15 #include "media/webm/webm_crypto_helpers.h"
     16 #include "media/webm/webm_webvtt_parser.h"
     17 
     18 namespace media {
     19 
     20 WebMClusterParser::TextTrackIterator::TextTrackIterator(
     21     const TextTrackMap& text_track_map) :
     22     iterator_(text_track_map.begin()),
     23     iterator_end_(text_track_map.end()) {
     24 }
     25 
     26 WebMClusterParser::TextTrackIterator::TextTrackIterator(
     27     const TextTrackIterator& rhs) :
     28     iterator_(rhs.iterator_),
     29     iterator_end_(rhs.iterator_end_) {
     30 }
     31 
     32 WebMClusterParser::TextTrackIterator::~TextTrackIterator() {
     33 }
     34 
     35 bool WebMClusterParser::TextTrackIterator::operator()(
     36   int* track_num,
     37   const BufferQueue** buffers) {
     38   if (iterator_ == iterator_end_) {
     39     *track_num = 0;
     40     *buffers = NULL;
     41 
     42     return false;
     43   }
     44 
     45   *track_num = iterator_->first;
     46   *buffers = &iterator_->second.buffers();
     47 
     48   ++iterator_;
     49   return true;
     50 }
     51 
     52 WebMClusterParser::WebMClusterParser(
     53     int64 timecode_scale, int audio_track_num, int video_track_num,
     54     const WebMTracksParser::TextTracks& text_tracks,
     55     const std::set<int64>& ignored_tracks,
     56     const std::string& audio_encryption_key_id,
     57     const std::string& video_encryption_key_id,
     58     const LogCB& log_cb)
     59     : timecode_multiplier_(timecode_scale / 1000.0),
     60       ignored_tracks_(ignored_tracks),
     61       audio_encryption_key_id_(audio_encryption_key_id),
     62       video_encryption_key_id_(video_encryption_key_id),
     63       parser_(kWebMIdCluster, this),
     64       last_block_timecode_(-1),
     65       block_data_size_(-1),
     66       block_duration_(-1),
     67       block_add_id_(-1),
     68       block_additional_data_size_(-1),
     69       discard_padding_(-1),
     70       cluster_timecode_(-1),
     71       cluster_start_time_(kNoTimestamp()),
     72       cluster_ended_(false),
     73       audio_(audio_track_num, false),
     74       video_(video_track_num, true),
     75       log_cb_(log_cb) {
     76   for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
     77        it != text_tracks.end();
     78        ++it) {
     79     text_track_map_.insert(std::make_pair(it->first, Track(it->first, false)));
     80   }
     81 }
     82 
     83 WebMClusterParser::~WebMClusterParser() {}
     84 
     85 void WebMClusterParser::Reset() {
     86   last_block_timecode_ = -1;
     87   cluster_timecode_ = -1;
     88   cluster_start_time_ = kNoTimestamp();
     89   cluster_ended_ = false;
     90   parser_.Reset();
     91   audio_.Reset();
     92   video_.Reset();
     93   ResetTextTracks();
     94 }
     95 
     96 int WebMClusterParser::Parse(const uint8* buf, int size) {
     97   audio_.Reset();
     98   video_.Reset();
     99   ResetTextTracks();
    100 
    101   int result = parser_.Parse(buf, size);
    102 
    103   if (result < 0) {
    104     cluster_ended_ = false;
    105     return result;
    106   }
    107 
    108   cluster_ended_ = parser_.IsParsingComplete();
    109   if (cluster_ended_) {
    110     // If there were no buffers in this cluster, set the cluster start time to
    111     // be the |cluster_timecode_|.
    112     if (cluster_start_time_ == kNoTimestamp()) {
    113       DCHECK_GT(cluster_timecode_, -1);
    114       cluster_start_time_ = base::TimeDelta::FromMicroseconds(
    115           cluster_timecode_ * timecode_multiplier_);
    116     }
    117 
    118     // Reset the parser if we're done parsing so that
    119     // it is ready to accept another cluster on the next
    120     // call.
    121     parser_.Reset();
    122 
    123     last_block_timecode_ = -1;
    124     cluster_timecode_ = -1;
    125   }
    126 
    127   return result;
    128 }
    129 
    130 WebMClusterParser::TextTrackIterator
    131 WebMClusterParser::CreateTextTrackIterator() const {
    132   return TextTrackIterator(text_track_map_);
    133 }
    134 
    135 WebMParserClient* WebMClusterParser::OnListStart(int id) {
    136   if (id == kWebMIdCluster) {
    137     cluster_timecode_ = -1;
    138     cluster_start_time_ = kNoTimestamp();
    139   } else if (id == kWebMIdBlockGroup) {
    140     block_data_.reset();
    141     block_data_size_ = -1;
    142     block_duration_ = -1;
    143     discard_padding_ = -1;
    144     discard_padding_set_ = false;
    145   } else if (id == kWebMIdBlockAdditions) {
    146     block_add_id_ = -1;
    147     block_additional_data_.reset();
    148     block_additional_data_size_ = -1;
    149   }
    150 
    151   return this;
    152 }
    153 
    154 bool WebMClusterParser::OnListEnd(int id) {
    155   if (id != kWebMIdBlockGroup)
    156     return true;
    157 
    158   // Make sure the BlockGroup actually had a Block.
    159   if (block_data_size_ == -1) {
    160     MEDIA_LOG(log_cb_) << "Block missing from BlockGroup.";
    161     return false;
    162   }
    163 
    164   bool result = ParseBlock(false, block_data_.get(), block_data_size_,
    165                            block_additional_data_.get(),
    166                            block_additional_data_size_, block_duration_,
    167                            discard_padding_set_ ? discard_padding_ : 0);
    168   block_data_.reset();
    169   block_data_size_ = -1;
    170   block_duration_ = -1;
    171   block_add_id_ = -1;
    172   block_additional_data_.reset();
    173   block_additional_data_size_ = -1;
    174   discard_padding_ = -1;
    175   discard_padding_set_ = false;
    176   return result;
    177 }
    178 
    179 bool WebMClusterParser::OnUInt(int id, int64 val) {
    180   int64* dst;
    181   switch (id) {
    182     case kWebMIdTimecode:
    183       dst = &cluster_timecode_;
    184       break;
    185     case kWebMIdBlockDuration:
    186       dst = &block_duration_;
    187       break;
    188     case kWebMIdBlockAddID:
    189       dst = &block_add_id_;
    190       break;
    191     case kWebMIdDiscardPadding:
    192       if (discard_padding_set_)
    193         return false;
    194       discard_padding_set_ = true;
    195       discard_padding_ = val;
    196       return true;
    197     default:
    198       return true;
    199   }
    200   if (*dst != -1)
    201     return false;
    202   *dst = val;
    203   return true;
    204 }
    205 
    206 bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
    207                                    int size, const uint8* additional,
    208                                    int additional_size, int duration,
    209                                    int64 discard_padding) {
    210   if (size < 4)
    211     return false;
    212 
    213   // Return an error if the trackNum > 127. We just aren't
    214   // going to support large track numbers right now.
    215   if (!(buf[0] & 0x80)) {
    216     MEDIA_LOG(log_cb_) << "TrackNumber over 127 not supported";
    217     return false;
    218   }
    219 
    220   int track_num = buf[0] & 0x7f;
    221   int timecode = buf[1] << 8 | buf[2];
    222   int flags = buf[3] & 0xff;
    223   int lacing = (flags >> 1) & 0x3;
    224 
    225   if (lacing) {
    226     MEDIA_LOG(log_cb_) << "Lacing " << lacing << " is not supported yet.";
    227     return false;
    228   }
    229 
    230   // Sign extend negative timecode offsets.
    231   if (timecode & 0x8000)
    232     timecode |= ~0xffff;
    233 
    234   const uint8* frame_data = buf + 4;
    235   int frame_size = size - (frame_data - buf);
    236   return OnBlock(is_simple_block, track_num, timecode, duration, flags,
    237                  frame_data, frame_size, additional, additional_size,
    238                  discard_padding);
    239 }
    240 
    241 bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
    242   switch (id) {
    243     case kWebMIdSimpleBlock:
    244       return ParseBlock(true, data, size, NULL, -1, -1, 0);
    245 
    246     case kWebMIdBlock:
    247       if (block_data_) {
    248         MEDIA_LOG(log_cb_) << "More than 1 Block in a BlockGroup is not "
    249                               "supported.";
    250         return false;
    251       }
    252       block_data_.reset(new uint8[size]);
    253       memcpy(block_data_.get(), data, size);
    254       block_data_size_ = size;
    255       return true;
    256 
    257     case kWebMIdBlockAdditional: {
    258       uint64 block_add_id = base::HostToNet64(block_add_id_);
    259       if (block_additional_data_) {
    260         // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
    261         // as per matroska spec. But for now we don't have a use case to
    262         // support parsing of such files. Take a look at this again when such a
    263         // case arises.
    264         MEDIA_LOG(log_cb_) << "More than 1 BlockAdditional in a BlockGroup is "
    265                               "not supported.";
    266         return false;
    267       }
    268       // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
    269       // element's value in Big Endian format. This is done to mimic ffmpeg
    270       // demuxer's behavior.
    271       block_additional_data_size_ = size + sizeof(block_add_id);
    272       block_additional_data_.reset(new uint8[block_additional_data_size_]);
    273       memcpy(block_additional_data_.get(), &block_add_id,
    274              sizeof(block_add_id));
    275       memcpy(block_additional_data_.get() + 8, data, size);
    276       return true;
    277     }
    278 
    279     default:
    280       return true;
    281   }
    282 }
    283 
    284 bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
    285                                 int timecode,
    286                                 int  block_duration,
    287                                 int flags,
    288                                 const uint8* data, int size,
    289                                 const uint8* additional, int additional_size,
    290                                 int64 discard_padding) {
    291   DCHECK_GE(size, 0);
    292   if (cluster_timecode_ == -1) {
    293     MEDIA_LOG(log_cb_) << "Got a block before cluster timecode.";
    294     return false;
    295   }
    296 
    297   // TODO(acolwell): Should relative negative timecode offsets be rejected?  Or
    298   // only when the absolute timecode is negative?  See http://crbug.com/271794
    299   if (timecode < 0) {
    300     MEDIA_LOG(log_cb_) << "Got a block with negative timecode offset "
    301                        << timecode;
    302     return false;
    303   }
    304 
    305   if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
    306     MEDIA_LOG(log_cb_)
    307         << "Got a block with a timecode before the previous block.";
    308     return false;
    309   }
    310 
    311   Track* track = NULL;
    312   bool is_text = false;
    313   std::string encryption_key_id;
    314   if (track_num == audio_.track_num()) {
    315     track = &audio_;
    316     encryption_key_id = audio_encryption_key_id_;
    317   } else if (track_num == video_.track_num()) {
    318     track = &video_;
    319     encryption_key_id = video_encryption_key_id_;
    320   } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
    321     return true;
    322   } else if (Track* const text_track = FindTextTrack(track_num)) {
    323     if (is_simple_block)  // BlockGroup is required for WebVTT cues
    324       return false;
    325     if (block_duration < 0)  // not specified
    326       return false;
    327     track = text_track;
    328     is_text = true;
    329   } else {
    330     MEDIA_LOG(log_cb_) << "Unexpected track number " << track_num;
    331     return false;
    332   }
    333 
    334   last_block_timecode_ = timecode;
    335 
    336   base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
    337       (cluster_timecode_ + timecode) * timecode_multiplier_);
    338 
    339   // The first bit of the flags is set when a SimpleBlock contains only
    340   // keyframes. If this is a Block, then inspection of the payload is
    341   // necessary to determine whether it contains a keyframe or not.
    342   // http://www.matroska.org/technical/specs/index.html
    343   bool is_keyframe =
    344       is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
    345 
    346   scoped_refptr<StreamParserBuffer> buffer;
    347   if (!is_text) {
    348     buffer = StreamParserBuffer::CopyFrom(data, size,
    349                                           additional, additional_size,
    350                                           is_keyframe);
    351   } else {
    352     std::string id, settings, content;
    353     WebMWebVTTParser::Parse(data, size,
    354                             &id, &settings, &content);
    355 
    356     std::vector<uint8> side_data;
    357     MakeSideData(id.begin(), id.end(),
    358                  settings.begin(), settings.end(),
    359                  &side_data);
    360 
    361     buffer = StreamParserBuffer::CopyFrom(
    362         reinterpret_cast<const uint8*>(content.data()),
    363         content.length(),
    364         &side_data[0],
    365         side_data.size(),
    366         is_keyframe);
    367   }
    368 
    369   // Every encrypted Block has a signal byte and IV prepended to it. Current
    370   // encrypted WebM request for comments specification is here
    371   // http://wiki.webmproject.org/encryption/webm-encryption-rfc
    372   if (!encryption_key_id.empty()) {
    373     scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(
    374         data, size,
    375         reinterpret_cast<const uint8*>(encryption_key_id.data()),
    376         encryption_key_id.size()));
    377     if (!config)
    378       return false;
    379     buffer->set_decrypt_config(config.Pass());
    380   }
    381 
    382   buffer->set_timestamp(timestamp);
    383   if (cluster_start_time_ == kNoTimestamp())
    384     cluster_start_time_ = timestamp;
    385 
    386   if (block_duration >= 0) {
    387     buffer->set_duration(base::TimeDelta::FromMicroseconds(
    388         block_duration * timecode_multiplier_));
    389   }
    390 
    391   if (discard_padding != 0) {
    392     buffer->set_discard_padding(base::TimeDelta::FromMicroseconds(
    393                                     discard_padding / 1000));
    394   }
    395 
    396   return track->AddBuffer(buffer);
    397 }
    398 
    399 WebMClusterParser::Track::Track(int track_num, bool is_video)
    400     : track_num_(track_num),
    401       is_video_(is_video) {
    402 }
    403 
    404 WebMClusterParser::Track::~Track() {}
    405 
    406 bool WebMClusterParser::Track::AddBuffer(
    407     const scoped_refptr<StreamParserBuffer>& buffer) {
    408   DVLOG(2) << "AddBuffer() : " << track_num_
    409            << " ts " << buffer->timestamp().InSecondsF()
    410            << " dur " << buffer->duration().InSecondsF()
    411            << " kf " << buffer->IsKeyframe()
    412            << " size " << buffer->data_size();
    413 
    414   buffers_.push_back(buffer);
    415   return true;
    416 }
    417 
    418 void WebMClusterParser::Track::Reset() {
    419   buffers_.clear();
    420 }
    421 
    422 bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const {
    423   // For now, assume that all blocks are keyframes for datatypes other than
    424   // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
    425   if (!is_video_)
    426     return true;
    427 
    428   // Make sure the block is big enough for the minimal keyframe header size.
    429   if (size < 7)
    430     return false;
    431 
    432   // The LSb of the first byte must be a 0 for a keyframe.
    433   // http://tools.ietf.org/html/rfc6386 Section 19.1
    434   if ((data[0] & 0x01) != 0)
    435     return false;
    436 
    437   // Verify VP8 keyframe startcode.
    438   // http://tools.ietf.org/html/rfc6386 Section 19.1
    439   if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
    440     return false;
    441 
    442   return true;
    443 }
    444 
    445 void WebMClusterParser::ResetTextTracks() {
    446   for (TextTrackMap::iterator it = text_track_map_.begin();
    447        it != text_track_map_.end();
    448        ++it) {
    449     it->second.Reset();
    450   }
    451 }
    452 
    453 WebMClusterParser::Track*
    454 WebMClusterParser::FindTextTrack(int track_num) {
    455   const TextTrackMap::iterator it = text_track_map_.find(track_num);
    456 
    457   if (it == text_track_map_.end())
    458     return NULL;
    459 
    460   return &it->second;
    461 }
    462 
    463 }  // namespace media
    464