media/webm/webm_cluster_parser.cc

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "media/webm/webm_cluster_parser.h"

#include <vector>

#include "base/logging.h"
#include "base/sys_byteorder.h"
#include "media/base/buffers.h"
#include "media/base/decrypt_config.h"
#include "media/webm/webm_constants.h"
#include "media/webm/webm_crypto_helpers.h"

namespace media {

WebMClusterParser::TextTrackIterator::TextTrackIterator(
    const TextTrackMap& text_track_map) :
    iterator_(text_track_map.begin()),
    iterator_end_(text_track_map.end()) {
}

WebMClusterParser::TextTrackIterator::TextTrackIterator(
    const TextTrackIterator& rhs) :
    iterator_(rhs.iterator_),
    iterator_end_(rhs.iterator_end_) {
}

WebMClusterParser::TextTrackIterator::~TextTrackIterator() {
}

bool WebMClusterParser::TextTrackIterator::operator()(
  int* track_num,
  const BufferQueue** buffers) {
  if (iterator_ == iterator_end_) {
    *track_num = 0;
    *buffers = NULL;

    return false;
  }

  *track_num = iterator_->first;
  *buffers = &iterator_->second.buffers();

  ++iterator_;
  return true;
}

WebMClusterParser::WebMClusterParser(
    int64 timecode_scale, int audio_track_num, int video_track_num,
    const WebMTracksParser::TextTracks& text_tracks,
    const std::set<int64>& ignored_tracks,
    const std::string& audio_encryption_key_id,
    const std::string& video_encryption_key_id,
    const LogCB& log_cb)
    : timecode_multiplier_(timecode_scale / 1000.0),
      ignored_tracks_(ignored_tracks),
      audio_encryption_key_id_(audio_encryption_key_id),
      video_encryption_key_id_(video_encryption_key_id),
      parser_(kWebMIdCluster, this),
      last_block_timecode_(-1),
      block_data_size_(-1),
      block_duration_(-1),
      block_add_id_(-1),
      block_additional_data_size_(-1),
      cluster_timecode_(-1),
      cluster_start_time_(kNoTimestamp()),
      cluster_ended_(false),
      audio_(audio_track_num, false),
      video_(video_track_num, true),
      log_cb_(log_cb) {
  for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
       it != text_tracks.end();
       ++it) {
    text_track_map_.insert(std::make_pair(it->first, Track(it->first, false)));
  }
}

WebMClusterParser::~WebMClusterParser() {}

void WebMClusterParser::Reset() {
  last_block_timecode_ = -1;
  cluster_timecode_ = -1;
  cluster_start_time_ = kNoTimestamp();
  cluster_ended_ = false;
  parser_.Reset();
  audio_.Reset();
  video_.Reset();
  ResetTextTracks();
}

int WebMClusterParser::Parse(const uint8* buf, int size) {
  audio_.Reset();
  video_.Reset();
  ResetTextTracks();

  int result = parser_.Parse(buf, size);

  if (result < 0) {
    cluster_ended_ = false;
    return result;
  }

  cluster_ended_ = parser_.IsParsingComplete();
  if (cluster_ended_) {
    // If there were no buffers in this cluster, set the cluster start time to
    // be the |cluster_timecode_|.
    if (cluster_start_time_ == kNoTimestamp()) {
      DCHECK_GT(cluster_timecode_, -1);
      cluster_start_time_ = base::TimeDelta::FromMicroseconds(
          cluster_timecode_ * timecode_multiplier_);
    }

    // Reset the parser if we're done parsing so that
    // it is ready to accept another cluster on the next
    // call.
    parser_.Reset();

    last_block_timecode_ = -1;
    cluster_timecode_ = -1;
  }

  return result;
}

WebMClusterParser::TextTrackIterator
WebMClusterParser::CreateTextTrackIterator() const {
  return TextTrackIterator(text_track_map_);
}

WebMParserClient* WebMClusterParser::OnListStart(int id) {
  if (id == kWebMIdCluster) {
    cluster_timecode_ = -1;
    cluster_start_time_ = kNoTimestamp();
  } else if (id == kWebMIdBlockGroup) {
    block_data_.reset();
    block_data_size_ = -1;
    block_duration_ = -1;
  } else if (id == kWebMIdBlockAdditions) {
    block_add_id_ = -1;
    block_additional_data_.reset();
    block_additional_data_size_ = -1;
  }

  return this;
}

bool WebMClusterParser::OnListEnd(int id) {
  if (id != kWebMIdBlockGroup)
    return true;

  // Make sure the BlockGroup actually had a Block.
  if (block_data_size_ == -1) {
    MEDIA_LOG(log_cb_) << "Block missing from BlockGroup.";
    return false;
  }

  bool result = ParseBlock(false, block_data_.get(), block_data_size_,
                           block_additional_data_.get(),
                           block_additional_data_size_, block_duration_);
  block_data_.reset();
  block_data_size_ = -1;
  block_duration_ = -1;
  block_add_id_ = -1;
  block_additional_data_.reset();
  block_additional_data_size_ = -1;
  return result;
}

bool WebMClusterParser::OnUInt(int id, int64 val) {
  int64* dst;
  switch (id) {
    case kWebMIdTimecode:
      dst = &cluster_timecode_;
      break;
    case kWebMIdBlockDuration:
      dst = &block_duration_;
      break;
    case kWebMIdBlockAddID:
      dst = &block_add_id_;
      break;
    default:
      return true;
  }
  if (*dst != -1)
    return false;
  *dst = val;
  return true;
}

bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
                                   int size, const uint8* additional,
                                   int additional_size, int duration) {
  if (size < 4)
    return false;

  // Return an error if the trackNum > 127. We just aren't
  // going to support large track numbers right now.
  if (!(buf[0] & 0x80)) {
    MEDIA_LOG(log_cb_) << "TrackNumber over 127 not supported";
    return false;
  }

  int track_num = buf[0] & 0x7f;
  int timecode = buf[1] << 8 | buf[2];
  int flags = buf[3] & 0xff;
  int lacing = (flags >> 1) & 0x3;

  if (lacing) {
    MEDIA_LOG(log_cb_) << "Lacing " << lacing << " is not supported yet.";
    return false;
  }

  // Sign extend negative timecode offsets.
  if (timecode & 0x8000)
    timecode |= (-1 << 16);

  const uint8* frame_data = buf + 4;
  int frame_size = size - (frame_data - buf);
  return OnBlock(is_simple_block, track_num, timecode, duration, flags,
                 frame_data, frame_size, additional, additional_size);
}

bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
  switch (id) {
    case kWebMIdSimpleBlock:
      return ParseBlock(true, data, size, NULL, -1, -1);

    case kWebMIdBlock:
      if (block_data_) {
        MEDIA_LOG(log_cb_) << "More than 1 Block in a BlockGroup is not "
                              "supported.";
        return false;
      }
      block_data_.reset(new uint8[size]);
      memcpy(block_data_.get(), data, size);
      block_data_size_ = size;
      return true;

    case kWebMIdBlockAdditional: {
      uint64 block_add_id = base::HostToNet64(block_add_id_);
      if (block_additional_data_) {
        // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
        // as per matroska spec. But for now we don't have a use case to
        // support parsing of such files. Take a look at this again when such a
        // case arises.
        MEDIA_LOG(log_cb_) << "More than 1 BlockAdditional in a BlockGroup is "
                              "not supported.";
        return false;
      }
      // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
      // element's value in Big Endian format. This is done to mimic ffmpeg
      // demuxer's behavior.
      block_additional_data_size_ = size + sizeof(block_add_id);
      block_additional_data_.reset(new uint8[block_additional_data_size_]);
      memcpy(block_additional_data_.get(), &block_add_id,
             sizeof(block_add_id));
      memcpy(block_additional_data_.get() + 8, data, size);
      return true;
    }

    default:
      return true;
  }
}

bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
                                int timecode,
                                int  block_duration,
                                int flags,
                                const uint8* data, int size,
                                const uint8* additional, int additional_size) {
  DCHECK_GE(size, 0);
  if (cluster_timecode_ == -1) {
    MEDIA_LOG(log_cb_) << "Got a block before cluster timecode.";
    return false;
  }

  if (timecode < 0) {
    MEDIA_LOG(log_cb_) << "Got a block with negative timecode offset "
                       << timecode;
    return false;
  }

  if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
    MEDIA_LOG(log_cb_)
        << "Got a block with a timecode before the previous block.";
    return false;
  }

  Track* track = NULL;
  std::string encryption_key_id;
  if (track_num == audio_.track_num()) {
    track = &audio_;
    encryption_key_id = audio_encryption_key_id_;
  } else if (track_num == video_.track_num()) {
    track = &video_;
    encryption_key_id = video_encryption_key_id_;
  } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
    return true;
  } else if (Track* const text_track = FindTextTrack(track_num)) {
    if (is_simple_block)  // BlockGroup is required for WebVTT cues
      return false;
    if (block_duration < 0)  // not specified
      return false;
    track = text_track;
  } else {
    MEDIA_LOG(log_cb_) << "Unexpected track number " << track_num;
    return false;
  }

  last_block_timecode_ = timecode;

  base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
      (cluster_timecode_ + timecode) * timecode_multiplier_);

  // The first bit of the flags is set when a SimpleBlock contains only
  // keyframes. If this is a Block, then inspection of the payload is
  // necessary to determine whether it contains a keyframe or not.
  // http://www.matroska.org/technical/specs/index.html
  bool is_keyframe =
      is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);

  scoped_refptr<StreamParserBuffer> buffer =
      StreamParserBuffer::CopyFrom(data, size, additional, additional_size,
                                   is_keyframe);

  // Every encrypted Block has a signal byte and IV prepended to it. Current
  // encrypted WebM request for comments specification is here
  // http://wiki.webmproject.org/encryption/webm-encryption-rfc
  if (!encryption_key_id.empty()) {
    scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(
        data, size,
        reinterpret_cast<const uint8*>(encryption_key_id.data()),
        encryption_key_id.size()));
    if (!config)
      return false;
    buffer->set_decrypt_config(config.Pass());
  }

  buffer->set_timestamp(timestamp);
  if (cluster_start_time_ == kNoTimestamp())
    cluster_start_time_ = timestamp;

  if (block_duration >= 0) {
    buffer->set_duration(base::TimeDelta::FromMicroseconds(
        block_duration * timecode_multiplier_));
  }

  return track->AddBuffer(buffer);
}

WebMClusterParser::Track::Track(int track_num, bool is_video)
    : track_num_(track_num),
      is_video_(is_video) {
}

WebMClusterParser::Track::~Track() {}

bool WebMClusterParser::Track::AddBuffer(
    const scoped_refptr<StreamParserBuffer>& buffer) {
  DVLOG(2) << "AddBuffer() : " << track_num_
           << " ts " << buffer->timestamp().InSecondsF()
           << " dur " << buffer->duration().InSecondsF()
           << " kf " << buffer->IsKeyframe()
           << " size " << buffer->data_size();

  buffers_.push_back(buffer);
  return true;
}

void WebMClusterParser::Track::Reset() {
  buffers_.clear();
}

bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const {
  // For now, assume that all blocks are keyframes for datatypes other than
  // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
  if (!is_video_)
    return true;

  // Make sure the block is big enough for the minimal keyframe header size.
  if (size < 7)
    return false;

  // The LSb of the first byte must be a 0 for a keyframe.
  // http://tools.ietf.org/html/rfc6386 Section 19.1
  if ((data[0] & 0x01) != 0)
    return false;

  // Verify VP8 keyframe startcode.
  // http://tools.ietf.org/html/rfc6386 Section 19.1
  if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
    return false;

  return true;
}

void WebMClusterParser::ResetTextTracks() {
  for (TextTrackMap::iterator it = text_track_map_.begin();
       it != text_track_map_.end();
       ++it) {
    it->second.Reset();
  }
}

WebMClusterParser::Track*
WebMClusterParser::FindTextTrack(int track_num) {
  const TextTrackMap::iterator it = text_track_map_.find(track_num);

  if (it == text_track_map_.end())
    return NULL;

  return &it->second;
}

}  // namespace media