Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/base/audio_splicer.h"
      6 
      7 #include <cstdlib>
      8 #include <deque>
      9 
     10 #include "base/logging.h"
     11 #include "media/base/audio_buffer.h"
     12 #include "media/base/audio_bus.h"
     13 #include "media/base/audio_decoder_config.h"
     14 #include "media/base/audio_timestamp_helper.h"
     15 #include "media/base/vector_math.h"
     16 
     17 namespace media {
     18 
     19 // Minimum gap size needed before the splicer will take action to
     20 // fill a gap. This avoids periodically inserting and then dropping samples
     21 // when the buffer timestamps are slightly off because of timestamp rounding
     22 // in the source content. Unit is frames.
     23 static const int kMinGapSize = 2;
     24 
     25 // AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so
     26 // manually adjust the duration and timestamp after trimming.
     27 static void AccurateTrimStart(int frames_to_trim,
     28                               const scoped_refptr<AudioBuffer> buffer,
     29                               const AudioTimestampHelper& timestamp_helper) {
     30   buffer->TrimStart(frames_to_trim);
     31   buffer->set_timestamp(timestamp_helper.GetTimestamp());
     32 }
     33 
     34 // Returns an AudioBus whose frame buffer is backed by the provided AudioBuffer.
     35 static scoped_ptr<AudioBus> CreateAudioBufferWrapper(
     36     const scoped_refptr<AudioBuffer>& buffer) {
     37   scoped_ptr<AudioBus> wrapper =
     38       AudioBus::CreateWrapper(buffer->channel_count());
     39   wrapper->set_frames(buffer->frame_count());
     40   for (int ch = 0; ch < buffer->channel_count(); ++ch) {
     41     wrapper->SetChannelData(
     42         ch, reinterpret_cast<float*>(buffer->channel_data()[ch]));
     43   }
     44   return wrapper.Pass();
     45 }
     46 
     47 class AudioStreamSanitizer {
     48  public:
     49   explicit AudioStreamSanitizer(int samples_per_second);
     50   ~AudioStreamSanitizer();
     51 
     52   // Resets the sanitizer state by clearing the output buffers queue, and
     53   // resetting the timestamp helper.
     54   void Reset();
     55 
     56   // Similar to Reset(), but initializes the timestamp helper with the given
     57   // parameters.
     58   void ResetTimestampState(int64 frame_count, base::TimeDelta base_timestamp);
     59 
     60   // Adds a new buffer full of samples or end of stream buffer to the splicer.
     61   // Returns true if the buffer was accepted. False is returned if an error
     62   // occurred.
     63   bool AddInput(const scoped_refptr<AudioBuffer>& input);
     64 
     65   // Returns true if the sanitizer has a buffer to return.
     66   bool HasNextBuffer() const;
     67 
     68   // Removes the next buffer from the output buffer queue and returns it; should
     69   // only be called if HasNextBuffer() returns true.
     70   scoped_refptr<AudioBuffer> GetNextBuffer();
     71 
     72   // Returns the total frame count of all buffers available for output.
     73   int GetFrameCount() const;
     74 
     75   const AudioTimestampHelper& timestamp_helper() {
     76     return output_timestamp_helper_;
     77   }
     78 
     79   // Transfer all buffers into |output|.  Returns false if AddInput() on the
     80   // |output| sanitizer fails for any buffer removed from |this|.
     81   bool DrainInto(AudioStreamSanitizer* output);
     82 
     83  private:
     84   void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer);
     85 
     86   AudioTimestampHelper output_timestamp_helper_;
     87   bool received_end_of_stream_;
     88 
     89   typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue;
     90   BufferQueue output_buffers_;
     91 
     92   DISALLOW_ASSIGN(AudioStreamSanitizer);
     93 };
     94 
     95 AudioStreamSanitizer::AudioStreamSanitizer(int samples_per_second)
     96     : output_timestamp_helper_(samples_per_second),
     97       received_end_of_stream_(false) {}
     98 
     99 AudioStreamSanitizer::~AudioStreamSanitizer() {}
    100 
    101 void AudioStreamSanitizer::Reset() {
    102   ResetTimestampState(0, kNoTimestamp());
    103 }
    104 
    105 void AudioStreamSanitizer::ResetTimestampState(int64 frame_count,
    106                                                base::TimeDelta base_timestamp) {
    107   output_buffers_.clear();
    108   received_end_of_stream_ = false;
    109   output_timestamp_helper_.SetBaseTimestamp(base_timestamp);
    110   if (frame_count > 0)
    111     output_timestamp_helper_.AddFrames(frame_count);
    112 }
    113 
    114 bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) {
    115   DCHECK(!received_end_of_stream_ || input->end_of_stream());
    116 
    117   if (input->end_of_stream()) {
    118     output_buffers_.push_back(input);
    119     received_end_of_stream_ = true;
    120     return true;
    121   }
    122 
    123   DCHECK(input->timestamp() != kNoTimestamp());
    124   DCHECK(input->duration() > base::TimeDelta());
    125   DCHECK_GT(input->frame_count(), 0);
    126 
    127   if (output_timestamp_helper_.base_timestamp() == kNoTimestamp())
    128     output_timestamp_helper_.SetBaseTimestamp(input->timestamp());
    129 
    130   if (output_timestamp_helper_.base_timestamp() > input->timestamp()) {
    131     DVLOG(1) << "Input timestamp is before the base timestamp.";
    132     return false;
    133   }
    134 
    135   const base::TimeDelta timestamp = input->timestamp();
    136   const base::TimeDelta expected_timestamp =
    137       output_timestamp_helper_.GetTimestamp();
    138   const base::TimeDelta delta = timestamp - expected_timestamp;
    139 
    140   if (std::abs(delta.InMilliseconds()) >
    141       AudioSplicer::kMaxTimeDeltaInMilliseconds) {
    142     DVLOG(1) << "Timestamp delta too large: " << delta.InMicroseconds() << "us";
    143     return false;
    144   }
    145 
    146   int frames_to_fill = 0;
    147   if (delta != base::TimeDelta())
    148     frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp);
    149 
    150   if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) {
    151     AddOutputBuffer(input);
    152     return true;
    153   }
    154 
    155   if (frames_to_fill > 0) {
    156     DVLOG(1) << "Gap detected @ " << expected_timestamp.InMicroseconds()
    157              << " us: " << delta.InMicroseconds() << " us";
    158 
    159     // Create a buffer with enough silence samples to fill the gap and
    160     // add it to the output buffer.
    161     scoped_refptr<AudioBuffer> gap =
    162         AudioBuffer::CreateEmptyBuffer(input->channel_layout(),
    163                                        input->channel_count(),
    164                                        input->sample_rate(),
    165                                        frames_to_fill,
    166                                        expected_timestamp);
    167     AddOutputBuffer(gap);
    168 
    169     // Add the input buffer now that the gap has been filled.
    170     AddOutputBuffer(input);
    171     return true;
    172   }
    173 
    174   // Overlapping buffers marked as splice frames are handled by AudioSplicer,
    175   // but decoder and demuxer quirks may sometimes produce overlapping samples
    176   // which need to be sanitized.
    177   //
    178   // A crossfade can't be done here because only the current buffer is available
    179   // at this point, not previous buffers.
    180   DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds()
    181            << " us: " << -delta.InMicroseconds() << " us";
    182 
    183   const int frames_to_skip = -frames_to_fill;
    184   if (input->frame_count() <= frames_to_skip) {
    185     DVLOG(1) << "Dropping whole buffer";
    186     return true;
    187   }
    188 
    189   // Copy the trailing samples that do not overlap samples already output
    190   // into a new buffer.  Add this new buffer to the output queue.
    191   //
    192   // TODO(acolwell): Implement a cross-fade here so the transition is less
    193   // jarring.
    194   AccurateTrimStart(frames_to_skip, input, output_timestamp_helper_);
    195   AddOutputBuffer(input);
    196   return true;
    197 }
    198 
    199 bool AudioStreamSanitizer::HasNextBuffer() const {
    200   return !output_buffers_.empty();
    201 }
    202 
    203 scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() {
    204   scoped_refptr<AudioBuffer> ret = output_buffers_.front();
    205   output_buffers_.pop_front();
    206   return ret;
    207 }
    208 
    209 void AudioStreamSanitizer::AddOutputBuffer(
    210     const scoped_refptr<AudioBuffer>& buffer) {
    211   output_timestamp_helper_.AddFrames(buffer->frame_count());
    212   output_buffers_.push_back(buffer);
    213 }
    214 
    215 int AudioStreamSanitizer::GetFrameCount() const {
    216   int frame_count = 0;
    217   for (BufferQueue::const_iterator it = output_buffers_.begin();
    218        it != output_buffers_.end(); ++it) {
    219     frame_count += (*it)->frame_count();
    220   }
    221   return frame_count;
    222 }
    223 
    224 bool AudioStreamSanitizer::DrainInto(AudioStreamSanitizer* output) {
    225   while (HasNextBuffer()) {
    226     if (!output->AddInput(GetNextBuffer()))
    227       return false;
    228   }
    229   return true;
    230 }
    231 
    232 AudioSplicer::AudioSplicer(int samples_per_second)
    233     : max_crossfade_duration_(
    234           base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds)),
    235       splice_timestamp_(kNoTimestamp()),
    236       max_splice_end_timestamp_(kNoTimestamp()),
    237       output_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
    238       pre_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
    239       post_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
    240       have_all_pre_splice_buffers_(false) {}
    241 
    242 AudioSplicer::~AudioSplicer() {}
    243 
    244 void AudioSplicer::Reset() {
    245   output_sanitizer_->Reset();
    246   pre_splice_sanitizer_->Reset();
    247   post_splice_sanitizer_->Reset();
    248   have_all_pre_splice_buffers_ = false;
    249   reset_splice_timestamps();
    250 }
    251 
    252 bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {
    253   // If we're not processing a splice, add the input to the output queue.
    254   if (splice_timestamp_ == kNoTimestamp()) {
    255     DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
    256     DCHECK(!post_splice_sanitizer_->HasNextBuffer());
    257     return output_sanitizer_->AddInput(input);
    258   }
    259 
    260   const AudioTimestampHelper& output_ts_helper =
    261       output_sanitizer_->timestamp_helper();
    262 
    263   if (!have_all_pre_splice_buffers_) {
    264     DCHECK(!input->end_of_stream());
    265 
    266     // If the provided buffer is entirely before the splice point it can also be
    267     // added to the output queue.
    268     if (input->timestamp() + input->duration() < splice_timestamp_) {
    269       DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
    270       return output_sanitizer_->AddInput(input);
    271     }
    272 
    273     // If we've encountered the first pre splice buffer, reset the pre splice
    274     // sanitizer based on |output_sanitizer_|.  This is done so that gaps and
    275     // overlaps between buffers across the sanitizers are accounted for prior
    276     // to calculating crossfade.
    277     if (!pre_splice_sanitizer_->HasNextBuffer()) {
    278       pre_splice_sanitizer_->ResetTimestampState(
    279           output_ts_helper.frame_count(), output_ts_helper.base_timestamp());
    280     }
    281 
    282     return pre_splice_sanitizer_->AddInput(input);
    283   }
    284 
    285   // The first post splice buffer is expected to match |splice_timestamp_|.
    286   if (!post_splice_sanitizer_->HasNextBuffer())
    287     CHECK(splice_timestamp_ == input->timestamp());
    288 
    289   // At this point we have all the fade out preroll buffers from the decoder.
    290   // We now need to wait until we have enough data to perform the crossfade (or
    291   // we receive an end of stream).
    292   if (!post_splice_sanitizer_->AddInput(input))
    293     return false;
    294 
    295   // Ensure |output_sanitizer_| has a valid base timestamp so we can use it for
    296   // timestamp calculations.
    297   if (output_ts_helper.base_timestamp() == kNoTimestamp()) {
    298     output_sanitizer_->ResetTimestampState(
    299         0, pre_splice_sanitizer_->timestamp_helper().base_timestamp());
    300   }
    301 
    302   // If a splice frame was incorrectly marked due to poor demuxed timestamps, we
    303   // may not actually have a splice.  Here we check if any frames exist before
    304   // the splice.  In this case, just transfer all data to the output sanitizer.
    305   if (pre_splice_sanitizer_->GetFrameCount() <=
    306       output_ts_helper.GetFramesToTarget(splice_timestamp_)) {
    307     CHECK(pre_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
    308 
    309     // If the file contains incorrectly muxed timestamps, there may be huge gaps
    310     // between the demuxed and decoded timestamps.
    311     if (!post_splice_sanitizer_->DrainInto(output_sanitizer_.get()))
    312       return false;
    313 
    314     reset_splice_timestamps();
    315     return true;
    316   }
    317 
    318   // Wait until we have enough data to crossfade or end of stream.
    319   if (!input->end_of_stream() &&
    320       input->timestamp() + input->duration() < max_splice_end_timestamp_) {
    321     return true;
    322   }
    323 
    324   scoped_refptr<AudioBuffer> crossfade_buffer;
    325   scoped_ptr<AudioBus> pre_splice =
    326       ExtractCrossfadeFromPreSplice(&crossfade_buffer);
    327 
    328   // Crossfade the pre splice and post splice sections and transfer all relevant
    329   // buffers into |output_sanitizer_|.
    330   CrossfadePostSplice(pre_splice.Pass(), crossfade_buffer);
    331 
    332   // Clear the splice timestamp so new splices can be accepted.
    333   reset_splice_timestamps();
    334   return true;
    335 }
    336 
    337 bool AudioSplicer::HasNextBuffer() const {
    338   return output_sanitizer_->HasNextBuffer();
    339 }
    340 
    341 scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() {
    342   return output_sanitizer_->GetNextBuffer();
    343 }
    344 
    345 void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) {
    346   if (splice_timestamp == kNoTimestamp()) {
    347     DCHECK(splice_timestamp_ != kNoTimestamp());
    348     DCHECK(!have_all_pre_splice_buffers_);
    349     have_all_pre_splice_buffers_ = true;
    350     return;
    351   }
    352 
    353   if (splice_timestamp_ == splice_timestamp)
    354     return;
    355 
    356   // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to
    357   // handle cases where another splice comes in before we've received 5ms of
    358   // data from the last one.  Leave this as a CHECK for now to figure out if
    359   // this case is possible.
    360   CHECK(splice_timestamp_ == kNoTimestamp());
    361   splice_timestamp_ = splice_timestamp;
    362   max_splice_end_timestamp_ = splice_timestamp_ + max_crossfade_duration_;
    363   pre_splice_sanitizer_->Reset();
    364   post_splice_sanitizer_->Reset();
    365   have_all_pre_splice_buffers_ = false;
    366 }
    367 
    368 scoped_ptr<AudioBus> AudioSplicer::ExtractCrossfadeFromPreSplice(
    369     scoped_refptr<AudioBuffer>* crossfade_buffer) {
    370   DCHECK(crossfade_buffer);
    371   const AudioTimestampHelper& output_ts_helper =
    372       output_sanitizer_->timestamp_helper();
    373 
    374   int frames_before_splice =
    375       output_ts_helper.GetFramesToTarget(splice_timestamp_);
    376 
    377   // Determine crossfade frame count based on available frames in each splicer
    378   // and capping to the maximum crossfade duration.
    379   const int max_crossfade_frame_count =
    380       output_ts_helper.GetFramesToTarget(max_splice_end_timestamp_) -
    381       frames_before_splice;
    382   const int frames_to_crossfade = std::min(
    383       max_crossfade_frame_count,
    384       std::min(pre_splice_sanitizer_->GetFrameCount() - frames_before_splice,
    385                post_splice_sanitizer_->GetFrameCount()));
    386   // There must always be frames to crossfade, otherwise the splice should not
    387   // have been generated.
    388   DCHECK_GT(frames_to_crossfade, 0);
    389 
    390   int frames_read = 0;
    391   scoped_ptr<AudioBus> output_bus;
    392   while (pre_splice_sanitizer_->HasNextBuffer() &&
    393          frames_read < frames_to_crossfade) {
    394     scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer();
    395 
    396     // We don't know the channel count until we see the first buffer, so wait
    397     // until the first buffer to allocate the output AudioBus.
    398     if (!output_bus) {
    399       output_bus =
    400           AudioBus::Create(preroll->channel_count(), frames_to_crossfade);
    401       // Allocate output buffer for crossfade.
    402       *crossfade_buffer = AudioBuffer::CreateBuffer(kSampleFormatPlanarF32,
    403                                                     preroll->channel_layout(),
    404                                                     preroll->channel_count(),
    405                                                     preroll->sample_rate(),
    406                                                     frames_to_crossfade);
    407     }
    408 
    409     // There may be enough of a gap introduced during decoding such that an
    410     // entire buffer exists before the splice point.
    411     if (frames_before_splice >= preroll->frame_count()) {
    412       // Adjust the number of frames remaining before the splice.  NOTE: This is
    413       // safe since |pre_splice_sanitizer_| is a continuation of the timeline in
    414       // |output_sanitizer_|.  As such we're guaranteed there are no gaps or
    415       // overlaps in the timeline between the two sanitizers.
    416       frames_before_splice -= preroll->frame_count();
    417       CHECK(output_sanitizer_->AddInput(preroll));
    418       continue;
    419     }
    420 
    421     const int frames_to_read =
    422         std::min(preroll->frame_count() - frames_before_splice,
    423                  output_bus->frames() - frames_read);
    424     preroll->ReadFrames(
    425         frames_to_read, frames_before_splice, frames_read, output_bus.get());
    426     frames_read += frames_to_read;
    427 
    428     // If only part of the buffer was consumed, trim it appropriately and stick
    429     // it into the output queue.
    430     if (frames_before_splice) {
    431       preroll->TrimEnd(preroll->frame_count() - frames_before_splice);
    432       CHECK(output_sanitizer_->AddInput(preroll));
    433       frames_before_splice = 0;
    434     }
    435   }
    436 
    437   // Ensure outputs were properly allocated.  The method should not have been
    438   // called if there is not enough data to crossfade.
    439   // TODO(dalecurtis): Convert to DCHECK() once http://crbug.com/356073 fixed.
    440   CHECK(output_bus);
    441   CHECK(*crossfade_buffer);
    442 
    443   // All necessary buffers have been processed, it's safe to reset.
    444   pre_splice_sanitizer_->Reset();
    445   DCHECK_EQ(output_bus->frames(), frames_read);
    446   DCHECK_EQ(output_ts_helper.GetFramesToTarget(splice_timestamp_), 0);
    447   return output_bus.Pass();
    448 }
    449 
    450 void AudioSplicer::CrossfadePostSplice(
    451     scoped_ptr<AudioBus> pre_splice_bus,
    452     scoped_refptr<AudioBuffer> crossfade_buffer) {
    453   // Use the calculated timestamp and duration to ensure there's no extra gaps
    454   // or overlaps to process when adding the buffer to |output_sanitizer_|.
    455   const AudioTimestampHelper& output_ts_helper =
    456       output_sanitizer_->timestamp_helper();
    457   crossfade_buffer->set_timestamp(output_ts_helper.GetTimestamp());
    458 
    459   // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap
    460   // our AudioBuffer in one so we can avoid extra data copies.
    461   scoped_ptr<AudioBus> output_bus = CreateAudioBufferWrapper(crossfade_buffer);
    462 
    463   // Extract crossfade section from the |post_splice_sanitizer_|.
    464   int frames_read = 0, frames_to_trim = 0;
    465   scoped_refptr<AudioBuffer> remainder;
    466   while (post_splice_sanitizer_->HasNextBuffer() &&
    467          frames_read < output_bus->frames()) {
    468     scoped_refptr<AudioBuffer> postroll =
    469         post_splice_sanitizer_->GetNextBuffer();
    470     const int frames_to_read =
    471         std::min(postroll->frame_count(), output_bus->frames() - frames_read);
    472     postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus.get());
    473     frames_read += frames_to_read;
    474 
    475     // If only part of the buffer was consumed, save it for after we've added
    476     // the crossfade buffer
    477     if (frames_to_read < postroll->frame_count()) {
    478       DCHECK(!remainder);
    479       remainder.swap(postroll);
    480       frames_to_trim = frames_to_read;
    481     }
    482   }
    483 
    484   DCHECK_EQ(output_bus->frames(), frames_read);
    485 
    486   // Crossfade the audio into |crossfade_buffer|.
    487   for (int ch = 0; ch < output_bus->channels(); ++ch) {
    488     vector_math::Crossfade(pre_splice_bus->channel(ch),
    489                            pre_splice_bus->frames(),
    490                            output_bus->channel(ch));
    491   }
    492 
    493   CHECK(output_sanitizer_->AddInput(crossfade_buffer));
    494   DCHECK_EQ(crossfade_buffer->frame_count(), output_bus->frames());
    495 
    496   if (remainder) {
    497     // Trim off consumed frames.
    498     AccurateTrimStart(frames_to_trim, remainder, output_ts_helper);
    499     CHECK(output_sanitizer_->AddInput(remainder));
    500   }
    501 
    502   // Transfer all remaining buffers out and reset once empty.
    503   CHECK(post_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
    504   post_splice_sanitizer_->Reset();
    505 }
    506 
    507 }  // namespace media
    508