1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/webm/webm_cluster_parser.h" 6 7 #include <vector> 8 9 #include "base/logging.h" 10 #include "base/sys_byteorder.h" 11 #include "media/base/buffers.h" 12 #include "media/base/decrypt_config.h" 13 #include "media/filters/webvtt_util.h" 14 #include "media/webm/webm_constants.h" 15 #include "media/webm/webm_crypto_helpers.h" 16 #include "media/webm/webm_webvtt_parser.h" 17 18 namespace media { 19 20 WebMClusterParser::TextTrackIterator::TextTrackIterator( 21 const TextTrackMap& text_track_map) : 22 iterator_(text_track_map.begin()), 23 iterator_end_(text_track_map.end()) { 24 } 25 26 WebMClusterParser::TextTrackIterator::TextTrackIterator( 27 const TextTrackIterator& rhs) : 28 iterator_(rhs.iterator_), 29 iterator_end_(rhs.iterator_end_) { 30 } 31 32 WebMClusterParser::TextTrackIterator::~TextTrackIterator() { 33 } 34 35 bool WebMClusterParser::TextTrackIterator::operator()( 36 int* track_num, 37 const BufferQueue** buffers) { 38 if (iterator_ == iterator_end_) { 39 *track_num = 0; 40 *buffers = NULL; 41 42 return false; 43 } 44 45 *track_num = iterator_->first; 46 *buffers = &iterator_->second.buffers(); 47 48 ++iterator_; 49 return true; 50 } 51 52 WebMClusterParser::WebMClusterParser( 53 int64 timecode_scale, int audio_track_num, int video_track_num, 54 const WebMTracksParser::TextTracks& text_tracks, 55 const std::set<int64>& ignored_tracks, 56 const std::string& audio_encryption_key_id, 57 const std::string& video_encryption_key_id, 58 const LogCB& log_cb) 59 : timecode_multiplier_(timecode_scale / 1000.0), 60 ignored_tracks_(ignored_tracks), 61 audio_encryption_key_id_(audio_encryption_key_id), 62 video_encryption_key_id_(video_encryption_key_id), 63 parser_(kWebMIdCluster, this), 64 last_block_timecode_(-1), 65 block_data_size_(-1), 66 block_duration_(-1), 67 block_add_id_(-1), 68 block_additional_data_size_(-1), 69 discard_padding_(-1), 70 cluster_timecode_(-1), 71 cluster_start_time_(kNoTimestamp()), 72 cluster_ended_(false), 73 audio_(audio_track_num, false), 74 video_(video_track_num, true), 75 log_cb_(log_cb) { 76 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin(); 77 it != text_tracks.end(); 78 ++it) { 79 text_track_map_.insert(std::make_pair(it->first, Track(it->first, false))); 80 } 81 } 82 83 WebMClusterParser::~WebMClusterParser() {} 84 85 void WebMClusterParser::Reset() { 86 last_block_timecode_ = -1; 87 cluster_timecode_ = -1; 88 cluster_start_time_ = kNoTimestamp(); 89 cluster_ended_ = false; 90 parser_.Reset(); 91 audio_.Reset(); 92 video_.Reset(); 93 ResetTextTracks(); 94 } 95 96 int WebMClusterParser::Parse(const uint8* buf, int size) { 97 audio_.Reset(); 98 video_.Reset(); 99 ResetTextTracks(); 100 101 int result = parser_.Parse(buf, size); 102 103 if (result < 0) { 104 cluster_ended_ = false; 105 return result; 106 } 107 108 cluster_ended_ = parser_.IsParsingComplete(); 109 if (cluster_ended_) { 110 // If there were no buffers in this cluster, set the cluster start time to 111 // be the |cluster_timecode_|. 112 if (cluster_start_time_ == kNoTimestamp()) { 113 DCHECK_GT(cluster_timecode_, -1); 114 cluster_start_time_ = base::TimeDelta::FromMicroseconds( 115 cluster_timecode_ * timecode_multiplier_); 116 } 117 118 // Reset the parser if we're done parsing so that 119 // it is ready to accept another cluster on the next 120 // call. 121 parser_.Reset(); 122 123 last_block_timecode_ = -1; 124 cluster_timecode_ = -1; 125 } 126 127 return result; 128 } 129 130 WebMClusterParser::TextTrackIterator 131 WebMClusterParser::CreateTextTrackIterator() const { 132 return TextTrackIterator(text_track_map_); 133 } 134 135 WebMParserClient* WebMClusterParser::OnListStart(int id) { 136 if (id == kWebMIdCluster) { 137 cluster_timecode_ = -1; 138 cluster_start_time_ = kNoTimestamp(); 139 } else if (id == kWebMIdBlockGroup) { 140 block_data_.reset(); 141 block_data_size_ = -1; 142 block_duration_ = -1; 143 discard_padding_ = -1; 144 discard_padding_set_ = false; 145 } else if (id == kWebMIdBlockAdditions) { 146 block_add_id_ = -1; 147 block_additional_data_.reset(); 148 block_additional_data_size_ = -1; 149 } 150 151 return this; 152 } 153 154 bool WebMClusterParser::OnListEnd(int id) { 155 if (id != kWebMIdBlockGroup) 156 return true; 157 158 // Make sure the BlockGroup actually had a Block. 159 if (block_data_size_ == -1) { 160 MEDIA_LOG(log_cb_) << "Block missing from BlockGroup."; 161 return false; 162 } 163 164 bool result = ParseBlock(false, block_data_.get(), block_data_size_, 165 block_additional_data_.get(), 166 block_additional_data_size_, block_duration_, 167 discard_padding_set_ ? discard_padding_ : 0); 168 block_data_.reset(); 169 block_data_size_ = -1; 170 block_duration_ = -1; 171 block_add_id_ = -1; 172 block_additional_data_.reset(); 173 block_additional_data_size_ = -1; 174 discard_padding_ = -1; 175 discard_padding_set_ = false; 176 return result; 177 } 178 179 bool WebMClusterParser::OnUInt(int id, int64 val) { 180 int64* dst; 181 switch (id) { 182 case kWebMIdTimecode: 183 dst = &cluster_timecode_; 184 break; 185 case kWebMIdBlockDuration: 186 dst = &block_duration_; 187 break; 188 case kWebMIdBlockAddID: 189 dst = &block_add_id_; 190 break; 191 case kWebMIdDiscardPadding: 192 if (discard_padding_set_) 193 return false; 194 discard_padding_set_ = true; 195 discard_padding_ = val; 196 return true; 197 default: 198 return true; 199 } 200 if (*dst != -1) 201 return false; 202 *dst = val; 203 return true; 204 } 205 206 bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf, 207 int size, const uint8* additional, 208 int additional_size, int duration, 209 int64 discard_padding) { 210 if (size < 4) 211 return false; 212 213 // Return an error if the trackNum > 127. We just aren't 214 // going to support large track numbers right now. 215 if (!(buf[0] & 0x80)) { 216 MEDIA_LOG(log_cb_) << "TrackNumber over 127 not supported"; 217 return false; 218 } 219 220 int track_num = buf[0] & 0x7f; 221 int timecode = buf[1] << 8 | buf[2]; 222 int flags = buf[3] & 0xff; 223 int lacing = (flags >> 1) & 0x3; 224 225 if (lacing) { 226 MEDIA_LOG(log_cb_) << "Lacing " << lacing << " is not supported yet."; 227 return false; 228 } 229 230 // Sign extend negative timecode offsets. 231 if (timecode & 0x8000) 232 timecode |= ~0xffff; 233 234 const uint8* frame_data = buf + 4; 235 int frame_size = size - (frame_data - buf); 236 return OnBlock(is_simple_block, track_num, timecode, duration, flags, 237 frame_data, frame_size, additional, additional_size, 238 discard_padding); 239 } 240 241 bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { 242 switch (id) { 243 case kWebMIdSimpleBlock: 244 return ParseBlock(true, data, size, NULL, -1, -1, 0); 245 246 case kWebMIdBlock: 247 if (block_data_) { 248 MEDIA_LOG(log_cb_) << "More than 1 Block in a BlockGroup is not " 249 "supported."; 250 return false; 251 } 252 block_data_.reset(new uint8[size]); 253 memcpy(block_data_.get(), data, size); 254 block_data_size_ = size; 255 return true; 256 257 case kWebMIdBlockAdditional: { 258 uint64 block_add_id = base::HostToNet64(block_add_id_); 259 if (block_additional_data_) { 260 // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed 261 // as per matroska spec. But for now we don't have a use case to 262 // support parsing of such files. Take a look at this again when such a 263 // case arises. 264 MEDIA_LOG(log_cb_) << "More than 1 BlockAdditional in a BlockGroup is " 265 "not supported."; 266 return false; 267 } 268 // First 8 bytes of side_data in DecoderBuffer is the BlockAddID 269 // element's value in Big Endian format. This is done to mimic ffmpeg 270 // demuxer's behavior. 271 block_additional_data_size_ = size + sizeof(block_add_id); 272 block_additional_data_.reset(new uint8[block_additional_data_size_]); 273 memcpy(block_additional_data_.get(), &block_add_id, 274 sizeof(block_add_id)); 275 memcpy(block_additional_data_.get() + 8, data, size); 276 return true; 277 } 278 279 default: 280 return true; 281 } 282 } 283 284 bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, 285 int timecode, 286 int block_duration, 287 int flags, 288 const uint8* data, int size, 289 const uint8* additional, int additional_size, 290 int64 discard_padding) { 291 DCHECK_GE(size, 0); 292 if (cluster_timecode_ == -1) { 293 MEDIA_LOG(log_cb_) << "Got a block before cluster timecode."; 294 return false; 295 } 296 297 // TODO(acolwell): Should relative negative timecode offsets be rejected? Or 298 // only when the absolute timecode is negative? See http://crbug.com/271794 299 if (timecode < 0) { 300 MEDIA_LOG(log_cb_) << "Got a block with negative timecode offset " 301 << timecode; 302 return false; 303 } 304 305 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) { 306 MEDIA_LOG(log_cb_) 307 << "Got a block with a timecode before the previous block."; 308 return false; 309 } 310 311 Track* track = NULL; 312 bool is_text = false; 313 std::string encryption_key_id; 314 if (track_num == audio_.track_num()) { 315 track = &audio_; 316 encryption_key_id = audio_encryption_key_id_; 317 } else if (track_num == video_.track_num()) { 318 track = &video_; 319 encryption_key_id = video_encryption_key_id_; 320 } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) { 321 return true; 322 } else if (Track* const text_track = FindTextTrack(track_num)) { 323 if (is_simple_block) // BlockGroup is required for WebVTT cues 324 return false; 325 if (block_duration < 0) // not specified 326 return false; 327 track = text_track; 328 is_text = true; 329 } else { 330 MEDIA_LOG(log_cb_) << "Unexpected track number " << track_num; 331 return false; 332 } 333 334 last_block_timecode_ = timecode; 335 336 base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds( 337 (cluster_timecode_ + timecode) * timecode_multiplier_); 338 339 // The first bit of the flags is set when a SimpleBlock contains only 340 // keyframes. If this is a Block, then inspection of the payload is 341 // necessary to determine whether it contains a keyframe or not. 342 // http://www.matroska.org/technical/specs/index.html 343 bool is_keyframe = 344 is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size); 345 346 scoped_refptr<StreamParserBuffer> buffer; 347 if (!is_text) { 348 buffer = StreamParserBuffer::CopyFrom(data, size, 349 additional, additional_size, 350 is_keyframe); 351 } else { 352 std::string id, settings, content; 353 WebMWebVTTParser::Parse(data, size, 354 &id, &settings, &content); 355 356 std::vector<uint8> side_data; 357 MakeSideData(id.begin(), id.end(), 358 settings.begin(), settings.end(), 359 &side_data); 360 361 buffer = StreamParserBuffer::CopyFrom( 362 reinterpret_cast<const uint8*>(content.data()), 363 content.length(), 364 &side_data[0], 365 side_data.size(), 366 is_keyframe); 367 } 368 369 // Every encrypted Block has a signal byte and IV prepended to it. Current 370 // encrypted WebM request for comments specification is here 371 // http://wiki.webmproject.org/encryption/webm-encryption-rfc 372 if (!encryption_key_id.empty()) { 373 scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig( 374 data, size, 375 reinterpret_cast<const uint8*>(encryption_key_id.data()), 376 encryption_key_id.size())); 377 if (!config) 378 return false; 379 buffer->set_decrypt_config(config.Pass()); 380 } 381 382 buffer->set_timestamp(timestamp); 383 if (cluster_start_time_ == kNoTimestamp()) 384 cluster_start_time_ = timestamp; 385 386 if (block_duration >= 0) { 387 buffer->set_duration(base::TimeDelta::FromMicroseconds( 388 block_duration * timecode_multiplier_)); 389 } 390 391 if (discard_padding != 0) { 392 buffer->set_discard_padding(base::TimeDelta::FromMicroseconds( 393 discard_padding / 1000)); 394 } 395 396 return track->AddBuffer(buffer); 397 } 398 399 WebMClusterParser::Track::Track(int track_num, bool is_video) 400 : track_num_(track_num), 401 is_video_(is_video) { 402 } 403 404 WebMClusterParser::Track::~Track() {} 405 406 bool WebMClusterParser::Track::AddBuffer( 407 const scoped_refptr<StreamParserBuffer>& buffer) { 408 DVLOG(2) << "AddBuffer() : " << track_num_ 409 << " ts " << buffer->timestamp().InSecondsF() 410 << " dur " << buffer->duration().InSecondsF() 411 << " kf " << buffer->IsKeyframe() 412 << " size " << buffer->data_size(); 413 414 buffers_.push_back(buffer); 415 return true; 416 } 417 418 void WebMClusterParser::Track::Reset() { 419 buffers_.clear(); 420 } 421 422 bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const { 423 // For now, assume that all blocks are keyframes for datatypes other than 424 // video. This is a valid assumption for Vorbis, WebVTT, & Opus. 425 if (!is_video_) 426 return true; 427 428 // Make sure the block is big enough for the minimal keyframe header size. 429 if (size < 7) 430 return false; 431 432 // The LSb of the first byte must be a 0 for a keyframe. 433 // http://tools.ietf.org/html/rfc6386 Section 19.1 434 if ((data[0] & 0x01) != 0) 435 return false; 436 437 // Verify VP8 keyframe startcode. 438 // http://tools.ietf.org/html/rfc6386 Section 19.1 439 if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a) 440 return false; 441 442 return true; 443 } 444 445 void WebMClusterParser::ResetTextTracks() { 446 for (TextTrackMap::iterator it = text_track_map_.begin(); 447 it != text_track_map_.end(); 448 ++it) { 449 it->second.Reset(); 450 } 451 } 452 453 WebMClusterParser::Track* 454 WebMClusterParser::FindTextTrack(int track_num) { 455 const TextTrackMap::iterator it = text_track_map_.find(track_num); 456 457 if (it == text_track_map_.end()) 458 return NULL; 459 460 return &it->second; 461 } 462 463 } // namespace media 464