Home | History | Annotate | Download | only in webm
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
      6 #define MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
      7 
      8 #include <deque>
      9 #include <map>
     10 #include <set>
     11 #include <string>
     12 
     13 #include "base/memory/scoped_ptr.h"
     14 #include "media/base/media_export.h"
     15 #include "media/base/media_log.h"
     16 #include "media/base/stream_parser.h"
     17 #include "media/base/stream_parser_buffer.h"
     18 #include "media/formats/webm/webm_parser.h"
     19 #include "media/formats/webm/webm_tracks_parser.h"
     20 
     21 namespace media {
     22 
     23 class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
     24  public:
     25   typedef StreamParser::TrackId TrackId;
     26   typedef std::deque<scoped_refptr<StreamParserBuffer> > BufferQueue;
     27   typedef std::map<TrackId, const BufferQueue> TextBufferQueueMap;
     28 
     29   // Arbitrarily-chosen numbers to estimate the duration of a buffer if none is
     30   // set and there is not enough information to get a better estimate.
     31   // TODO(wolenetz/acolwell): Parse audio codebook to determine missing audio
     32   // frame durations. See http://crbug.com/351166.
     33   enum {
     34     kDefaultAudioBufferDurationInMs = 23,  // Common 1k samples @44.1kHz
     35     kDefaultVideoBufferDurationInMs = 42  // Low 24fps to reduce stalls
     36   };
     37 
     38  private:
     39   // Helper class that manages per-track state.
     40   class Track {
     41    public:
     42     Track(int track_num,
     43           bool is_video,
     44           base::TimeDelta default_duration,
     45           const LogCB& log_cb);
     46     ~Track();
     47 
     48     int track_num() const { return track_num_; }
     49 
     50     // If a buffer is currently held aside pending duration calculation, returns
     51     // its decode timestamp. Otherwise, returns kInfiniteDuration().
     52     DecodeTimestamp GetReadyUpperBound();
     53 
     54     // Prepares |ready_buffers_| for retrieval. Prior to calling,
     55     // |ready_buffers_| must be empty. Moves all |buffers_| with decode
     56     // timestamp before |before_timestamp| to |ready_buffers_|, preserving their
     57     // order.
     58     void ExtractReadyBuffers(const DecodeTimestamp before_timestamp);
     59 
     60     const BufferQueue& ready_buffers() const { return ready_buffers_; }
     61 
     62     // If |last_added_buffer_missing_duration_| is set, updates its duration
     63     // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets
     64     // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing
     65     // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or
     66     // otherwise adds |buffer| to |buffers_|.
     67     bool AddBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
     68 
     69     // If |last_added_buffer_missing_duration_| is set, updates its duration to
     70     // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or an
     71     // arbitrary default, then adds it to |buffers_| and unsets
     72     // |last_added_buffer_missing_duration_|. (This method helps stream parser
     73     // emit all buffers in a media segment before signaling end of segment.)
     74     void ApplyDurationEstimateIfNeeded();
     75 
     76     // Clears |ready_buffers_| (use ExtractReadyBuffers() to fill it again).
     77     // Leaves as-is |buffers_| and any possibly held-aside buffer that is
     78     // missing duration.
     79     void ClearReadyBuffers();
     80 
     81     // Clears all buffer state, including any possibly held-aside buffer that
     82     // was missing duration, and all contents of |buffers_| and
     83     // |ready_buffers_|.
     84     void Reset();
     85 
     86     // Helper function used to inspect block data to determine if the
     87     // block is a keyframe.
     88     // |data| contains the bytes in the block.
     89     // |size| indicates the number of bytes in |data|.
     90     bool IsKeyframe(const uint8* data, int size) const;
     91 
     92     base::TimeDelta default_duration() const { return default_duration_; }
     93 
     94    private:
     95     // Helper that sanity-checks |buffer| duration, updates
     96     // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|.
     97     // Returns false if |buffer| failed sanity check and therefore was not added
     98     // to |buffers_|. Returns true otherwise.
     99     bool QueueBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
    100 
    101     // Helper that calculates the buffer duration to use in
    102     // ApplyDurationEstimateIfNeeded().
    103     base::TimeDelta GetDurationEstimate();
    104 
    105     int track_num_;
    106     bool is_video_;
    107 
    108     // Parsed track buffers, each with duration and in (decode) timestamp order,
    109     // that have not yet been extracted into |ready_buffers_|. Note that up to
    110     // one additional buffer missing duration may be tracked by
    111     // |last_added_buffer_missing_duration_|.
    112     BufferQueue buffers_;
    113     scoped_refptr<StreamParserBuffer> last_added_buffer_missing_duration_;
    114 
    115     // Buffers in (decode) timestamp order that were previously parsed into and
    116     // extracted from |buffers_|. Buffers are moved from |buffers_| to
    117     // |ready_buffers_| by ExtractReadyBuffers() if they are below a specified
    118     // upper bound timestamp. Track users can therefore extract only those
    119     // parsed buffers which are "ready" for emission (all before some maximum
    120     // timestamp).
    121     BufferQueue ready_buffers_;
    122 
    123     // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used.
    124     base::TimeDelta default_duration_;
    125 
    126     // If kNoTimestamp(), then a default value will be used. This estimate is
    127     // the maximum duration seen or derived so far for this track, and is valid
    128     // only if |default_duration_| is kNoTimestamp().
    129     base::TimeDelta estimated_next_frame_duration_;
    130 
    131     LogCB log_cb_;
    132   };
    133 
    134   typedef std::map<int, Track> TextTrackMap;
    135 
    136  public:
    137   WebMClusterParser(int64 timecode_scale,
    138                     int audio_track_num,
    139                     base::TimeDelta audio_default_duration,
    140                     int video_track_num,
    141                     base::TimeDelta video_default_duration,
    142                     const WebMTracksParser::TextTracks& text_tracks,
    143                     const std::set<int64>& ignored_tracks,
    144                     const std::string& audio_encryption_key_id,
    145                     const std::string& video_encryption_key_id,
    146                     const LogCB& log_cb);
    147   virtual ~WebMClusterParser();
    148 
    149   // Resets the parser state so it can accept a new cluster.
    150   void Reset();
    151 
    152   // Parses a WebM cluster element in |buf|.
    153   //
    154   // Returns -1 if the parse fails.
    155   // Returns 0 if more data is needed.
    156   // Returns the number of bytes parsed on success.
    157   int Parse(const uint8* buf, int size);
    158 
    159   base::TimeDelta cluster_start_time() const { return cluster_start_time_; }
    160 
    161   // Get the current ready buffers resulting from Parse().
    162   // If the parse reached the end of cluster and the last buffer was held aside
    163   // due to missing duration, the buffer is given an estimated duration and
    164   // included in the result.
    165   // Otherwise, if there are is a buffer held aside due to missing duration for
    166   // any of the tracks, no buffers with same or greater (decode) timestamp will
    167   // be included in the buffers.
    168   // The returned deques are cleared by Parse() or Reset() and updated by the
    169   // next calls to Get{Audio,Video}Buffers().
    170   // If no Parse() or Reset() has occurred since the last call to Get{Audio,
    171   // Video,Text}Buffers(), then the previous BufferQueue& is returned again
    172   // without any recalculation.
    173   const BufferQueue& GetAudioBuffers();
    174   const BufferQueue& GetVideoBuffers();
    175 
    176   // Constructs and returns a subset of |text_track_map_| containing only
    177   // tracks with non-empty buffer queues produced by the last Parse() and
    178   // filtered to exclude any buffers that have (decode) timestamp same or
    179   // greater than the lowest (decode) timestamp across all tracks of any buffer
    180   // held aside due to missing duration (unless the end of cluster has been
    181   // reached).
    182   // The returned map is cleared by Parse() or Reset() and updated by the next
    183   // call to GetTextBuffers().
    184   // If no Parse() or Reset() has occurred since the last call to
    185   // GetTextBuffers(), then the previous TextBufferQueueMap& is returned again
    186   // without any recalculation.
    187   const TextBufferQueueMap& GetTextBuffers();
    188 
    189   // Returns true if the last Parse() call stopped at the end of a cluster.
    190   bool cluster_ended() const { return cluster_ended_; }
    191 
    192  private:
    193   // WebMParserClient methods.
    194   virtual WebMParserClient* OnListStart(int id) OVERRIDE;
    195   virtual bool OnListEnd(int id) OVERRIDE;
    196   virtual bool OnUInt(int id, int64 val) OVERRIDE;
    197   virtual bool OnBinary(int id, const uint8* data, int size) OVERRIDE;
    198 
    199   bool ParseBlock(bool is_simple_block, const uint8* buf, int size,
    200                   const uint8* additional, int additional_size, int duration,
    201                   int64 discard_padding);
    202   bool OnBlock(bool is_simple_block, int track_num, int timecode, int duration,
    203                int flags, const uint8* data, int size,
    204                const uint8* additional, int additional_size,
    205                int64 discard_padding);
    206 
    207   // Resets the Track objects associated with each text track.
    208   void ResetTextTracks();
    209 
    210   // Clears the the ready buffers associated with each text track.
    211   void ClearTextTrackReadyBuffers();
    212 
    213   // Helper method for Get{Audio,Video,Text}Buffers() that recomputes
    214   // |ready_buffer_upper_bound_| and calls ExtractReadyBuffers() on each track.
    215   // If |cluster_ended_| is true, first applies duration estimate if needed for
    216   // |audio_| and |video_| and sets |ready_buffer_upper_bound_| to
    217   // kInfiniteDuration(). Otherwise, sets |ready_buffer_upper_bound_| to the
    218   // minimum upper bound across |audio_| and |video_|. (Text tracks can have no
    219   // buffers missing duration, so they are not involved in calculating the upper
    220   // bound.)
    221   // Parse() or Reset() must be called between calls to UpdateReadyBuffers() to
    222   // clear each track's ready buffers and to reset |ready_buffer_upper_bound_|
    223   // to kNoDecodeTimestamp().
    224   void UpdateReadyBuffers();
    225 
    226   // Search for the indicated track_num among the text tracks.  Returns NULL
    227   // if that track num is not a text track.
    228   Track* FindTextTrack(int track_num);
    229 
    230   double timecode_multiplier_;  // Multiplier used to convert timecodes into
    231                                 // microseconds.
    232   std::set<int64> ignored_tracks_;
    233   std::string audio_encryption_key_id_;
    234   std::string video_encryption_key_id_;
    235 
    236   WebMListParser parser_;
    237 
    238   int64 last_block_timecode_;
    239   scoped_ptr<uint8[]> block_data_;
    240   int block_data_size_;
    241   int64 block_duration_;
    242   int64 block_add_id_;
    243   scoped_ptr<uint8[]> block_additional_data_;
    244   int block_additional_data_size_;
    245   int64 discard_padding_;
    246   bool discard_padding_set_;
    247 
    248   int64 cluster_timecode_;
    249   base::TimeDelta cluster_start_time_;
    250   bool cluster_ended_;
    251 
    252   Track audio_;
    253   Track video_;
    254   TextTrackMap text_track_map_;
    255 
    256   // Subset of |text_track_map_| maintained by GetTextBuffers(), and cleared by
    257   // ClearTextTrackReadyBuffers(). Callers of GetTextBuffers() get a const-ref
    258   // to this member.
    259   TextBufferQueueMap text_buffers_map_;
    260 
    261   // Limits the range of buffers returned by Get{Audio,Video,Text}Buffers() to
    262   // this exclusive upper bound. Set to kNoDecodeTimestamp(), meaning not yet
    263   // calculated, by Reset() and Parse(). If kNoDecodeTimestamp(), then
    264   // Get{Audio,Video,Text}Buffers() will calculate it to be the minimum (decode)
    265   // timestamp across all tracks' |last_buffer_missing_duration_|, or
    266   // kInfiniteDuration() if no buffers are currently missing duration.
    267   DecodeTimestamp ready_buffer_upper_bound_;
    268 
    269   LogCB log_cb_;
    270 
    271   DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser);
    272 };
    273 
    274 }  // namespace media
    275 
    276 #endif  // MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
    277