Home | History | Annotate | Download | only in filters
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/filters/audio_renderer_algorithm.h"
      6 
      7 #include <algorithm>
      8 #include <cmath>
      9 
     10 #include "base/logging.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "media/base/audio_buffer.h"
     13 #include "media/base/audio_bus.h"
     14 #include "media/base/limits.h"
     15 #include "media/filters/wsola_internals.h"
     16 
     17 namespace media {
     18 
     19 
     20 // Waveform Similarity Overlap-and-add (WSOLA).
     21 //
     22 // One WSOLA iteration
     23 //
     24 // 1) Extract |target_block_| as input frames at indices
     25 //    [|target_block_index_|, |target_block_index_| + |ola_window_size_|).
     26 //    Note that |target_block_| is the "natural" continuation of the output.
     27 //
     28 // 2) Extract |search_block_| as input frames at indices
     29 //    [|search_block_index_|,
     30 //     |search_block_index_| + |num_candidate_blocks_| + |ola_window_size_|).
     31 //
     32 // 3) Find a block within the |search_block_| that is most similar
     33 //    to |target_block_|. Let |optimal_index| be the index of such block and
     34 //    write it to |optimal_block_|.
     35 //
     36 // 4) Update:
     37 //    |optimal_block_| = |transition_window_| * |target_block_| +
     38 //    (1 - |transition_window_|) * |optimal_block_|.
     39 //
     40 // 5) Overlap-and-add |optimal_block_| to the |wsola_output_|.
     41 //
     42 // 6) Update:
     43 //    |target_block_| = |optimal_index| + |ola_window_size_| / 2.
     44 //    |output_index_| = |output_index_| + |ola_window_size_| / 2,
     45 //    |search_block_center_offset_| = |output_index_| * |playback_rate_|, and
     46 //    |search_block_index_| = |search_block_center_offset_| -
     47 //        |search_block_center_offset_|.
     48 
     49 // Max/min supported playback rates for fast/slow audio. Audio outside of these
     50 // ranges are muted.
     51 // Audio at these speeds would sound better under a frequency domain algorithm.
     52 static const float kMinPlaybackRate = 0.5f;
     53 static const float kMaxPlaybackRate = 4.0f;
     54 
     55 // Overlap-and-add window size in milliseconds.
     56 static const int kOlaWindowSizeMs = 20;
     57 
     58 // Size of search interval in milliseconds. The search interval is
     59 // [-delta delta] around |output_index_| * |playback_rate_|. So the search
     60 // interval is 2 * delta.
     61 static const int kWsolaSearchIntervalMs = 30;
     62 
     63 // The maximum size in seconds for the |audio_buffer_|. Arbitrarily determined.
     64 static const int kMaxCapacityInSeconds = 3;
     65 
     66 // The starting size in frames for |audio_buffer_|. Previous usage maintained a
     67 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we
     68 // maintain this number of frames.
     69 static const int kStartingBufferSizeInFrames = 16 * 512;
     70 
     71 COMPILE_ASSERT(kStartingBufferSizeInFrames <
     72                (kMaxCapacityInSeconds * limits::kMinSampleRate),
     73                max_capacity_smaller_than_starting_buffer_size);
     74 
     75 AudioRendererAlgorithm::AudioRendererAlgorithm()
     76     : channels_(0),
     77       samples_per_second_(0),
     78       playback_rate_(0),
     79       muted_(false),
     80       muted_partial_frame_(0),
     81       capacity_(kStartingBufferSizeInFrames),
     82       output_time_(0.0),
     83       search_block_center_offset_(0),
     84       search_block_index_(0),
     85       num_candidate_blocks_(0),
     86       target_block_index_(0),
     87       ola_window_size_(0),
     88       ola_hop_size_(0),
     89       num_complete_frames_(0) {
     90 }
     91 
     92 AudioRendererAlgorithm::~AudioRendererAlgorithm() {}
     93 
     94 void AudioRendererAlgorithm::Initialize(float initial_playback_rate,
     95                                         const AudioParameters& params) {
     96   CHECK(params.IsValid());
     97 
     98   channels_ = params.channels();
     99   samples_per_second_ = params.sample_rate();
    100   SetPlaybackRate(initial_playback_rate);
    101   num_candidate_blocks_ = (kWsolaSearchIntervalMs * samples_per_second_) / 1000;
    102   ola_window_size_ = kOlaWindowSizeMs * samples_per_second_ / 1000;
    103 
    104   // Make sure window size in an even number.
    105   ola_window_size_ += ola_window_size_ & 1;
    106   ola_hop_size_ = ola_window_size_ / 2;
    107 
    108   // |num_candidate_blocks_| / 2 is the offset of the center of the search
    109   // block to the center of the first (left most) candidate block. The offset
    110   // of the center of a candidate block to its left most point is
    111   // |ola_window_size_| / 2 - 1. Note that |ola_window_size_| is even and in
    112   // our convention the center belongs to the left half, so we need to subtract
    113   // one frame to get the correct offset.
    114   //
    115   //                             Search Block
    116   //              <------------------------------------------->
    117   //
    118   //   |ola_window_size_| / 2 - 1
    119   //              <----
    120   //
    121   //             |num_candidate_blocks_| / 2
    122   //                   <----------------
    123   //                                 center
    124   //              X----X----------------X---------------X-----X
    125   //              <---------->                     <---------->
    126   //                Candidate      ...               Candidate
    127   //                   1,          ...         |num_candidate_blocks_|
    128   search_block_center_offset_ = num_candidate_blocks_ / 2 +
    129       (ola_window_size_ / 2 - 1);
    130 
    131   ola_window_.reset(new float[ola_window_size_]);
    132   internal::GetSymmetricHanningWindow(ola_window_size_, ola_window_.get());
    133 
    134   transition_window_.reset(new float[ola_window_size_ * 2]);
    135   internal::GetSymmetricHanningWindow(2 * ola_window_size_,
    136                                       transition_window_.get());
    137 
    138   wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_);
    139   wsola_output_->Zero();  // Initialize for overlap-and-add of the first block.
    140 
    141   // Auxiliary containers.
    142   optimal_block_ = AudioBus::Create(channels_, ola_window_size_);
    143   search_block_ = AudioBus::Create(
    144       channels_, num_candidate_blocks_ + (ola_window_size_ - 1));
    145   target_block_ = AudioBus::Create(channels_, ola_window_size_);
    146 }
    147 
    148 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) {
    149   if (playback_rate_ == 0)
    150     return 0;
    151 
    152   DCHECK_EQ(channels_, dest->channels());
    153 
    154   // Optimize the |muted_| case to issue a single clear instead of performing
    155   // the full crossfade and clearing each crossfaded frame.
    156   if (muted_) {
    157     int frames_to_render =
    158         std::min(static_cast<int>(audio_buffer_.frames() / playback_rate_),
    159                  requested_frames);
    160 
    161     // Compute accurate number of frames to actually skip in the source data.
    162     // Includes the leftover partial frame from last request. However, we can
    163     // only skip over complete frames, so a partial frame may remain for next
    164     // time.
    165     muted_partial_frame_ += frames_to_render * playback_rate_;
    166     int seek_frames = static_cast<int>(muted_partial_frame_);
    167     dest->ZeroFrames(frames_to_render);
    168     audio_buffer_.SeekFrames(seek_frames);
    169 
    170     // Determine the partial frame that remains to be skipped for next call. If
    171     // the user switches back to playing, it may be off time by this partial
    172     // frame, which would be undetectable. If they subsequently switch to
    173     // another playback rate that mutes, the code will attempt to line up the
    174     // frames again.
    175     muted_partial_frame_ -= seek_frames;
    176     return frames_to_render;
    177   }
    178 
    179   int slower_step = ceil(ola_window_size_ * playback_rate_);
    180   int faster_step = ceil(ola_window_size_ / playback_rate_);
    181 
    182   // Optimize the most common |playback_rate_| ~= 1 case to use a single copy
    183   // instead of copying frame by frame.
    184   if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) {
    185     const int frames_to_copy =
    186         std::min(audio_buffer_.frames(), requested_frames);
    187     const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest);
    188     DCHECK_EQ(frames_read, frames_to_copy);
    189     return frames_read;
    190   }
    191 
    192   int rendered_frames = 0;
    193   do {
    194     rendered_frames += WriteCompletedFramesTo(
    195         requested_frames - rendered_frames, rendered_frames, dest);
    196   } while (rendered_frames < requested_frames && RunOneWsolaIteration());
    197   return rendered_frames;
    198 }
    199 
    200 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) {
    201   DCHECK_GE(new_rate, 0);
    202   playback_rate_ = new_rate;
    203   muted_ =
    204       playback_rate_ < kMinPlaybackRate || playback_rate_ > kMaxPlaybackRate;
    205 }
    206 
    207 void AudioRendererAlgorithm::FlushBuffers() {
    208   // Clear the queue of decoded packets (releasing the buffers).
    209   audio_buffer_.Clear();
    210   output_time_ = 0.0;
    211   search_block_index_ = 0;
    212   target_block_index_ = 0;
    213   wsola_output_->Zero();
    214   num_complete_frames_ = 0;
    215 
    216   // Reset |capacity_| so growth triggered by underflows doesn't penalize
    217   // seek time.
    218   capacity_ = kStartingBufferSizeInFrames;
    219 }
    220 
    221 base::TimeDelta AudioRendererAlgorithm::GetTime() {
    222   return audio_buffer_.current_time();
    223 }
    224 
    225 void AudioRendererAlgorithm::EnqueueBuffer(
    226     const scoped_refptr<AudioBuffer>& buffer_in) {
    227   DCHECK(!buffer_in->end_of_stream());
    228   audio_buffer_.Append(buffer_in);
    229 }
    230 
    231 bool AudioRendererAlgorithm::IsQueueFull() {
    232   return audio_buffer_.frames() >= capacity_;
    233 }
    234 
    235 void AudioRendererAlgorithm::IncreaseQueueCapacity() {
    236   int max_capacity = kMaxCapacityInSeconds * samples_per_second_;
    237   DCHECK_LE(capacity_, max_capacity);
    238 
    239   capacity_ = std::min(2 * capacity_, max_capacity);
    240 }
    241 
    242 bool AudioRendererAlgorithm::CanPerformWsola() const {
    243   const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1);
    244   const int frames = audio_buffer_.frames();
    245   return target_block_index_ + ola_window_size_ <= frames &&
    246       search_block_index_ + search_block_size <= frames;
    247 }
    248 
    249 bool AudioRendererAlgorithm::RunOneWsolaIteration() {
    250   if (!CanPerformWsola())
    251     return false;
    252 
    253   GetOptimalBlock();
    254 
    255   // Overlap-and-add.
    256   for (int k = 0; k < channels_; ++k) {
    257     const float* const ch_opt_frame = optimal_block_->channel(k);
    258     float* ch_output = wsola_output_->channel(k) + num_complete_frames_;
    259     for (int n = 0; n < ola_hop_size_; ++n) {
    260       ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] +
    261           ch_opt_frame[n] * ola_window_[n];
    262     }
    263 
    264     // Copy the second half to the output.
    265     memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_],
    266            sizeof(*ch_opt_frame) * ola_hop_size_);
    267   }
    268 
    269   num_complete_frames_ += ola_hop_size_;
    270   UpdateOutputTime(ola_hop_size_);
    271   RemoveOldInputFrames();
    272   return true;
    273 }
    274 
    275 void AudioRendererAlgorithm::UpdateOutputTime(double time_change) {
    276   output_time_ += time_change;
    277   // Center of the search region, in frames.
    278   const int search_block_center_index = static_cast<int>(
    279       output_time_ * playback_rate_ + 0.5);
    280   search_block_index_ = search_block_center_index - search_block_center_offset_;
    281 }
    282 
    283 void AudioRendererAlgorithm::RemoveOldInputFrames() {
    284   const int earliest_used_index = std::min(target_block_index_,
    285                                            search_block_index_);
    286   if (earliest_used_index <= 0)
    287     return;  // Nothing to remove.
    288 
    289   // Remove frames from input and adjust indices accordingly.
    290   audio_buffer_.SeekFrames(earliest_used_index);
    291   target_block_index_ -= earliest_used_index;
    292 
    293   // Adjust output index.
    294   double output_time_change = static_cast<double>(earliest_used_index) /
    295       playback_rate_;
    296   CHECK_GE(output_time_, output_time_change);
    297   UpdateOutputTime(-output_time_change);
    298 }
    299 
    300 int AudioRendererAlgorithm::WriteCompletedFramesTo(
    301     int requested_frames, int dest_offset, AudioBus* dest) {
    302   int rendered_frames = std::min(num_complete_frames_, requested_frames);
    303 
    304   if (rendered_frames == 0)
    305     return 0;  // There is nothing to read from |wsola_output_|, return.
    306 
    307   wsola_output_->CopyPartialFramesTo(0, rendered_frames, dest_offset, dest);
    308 
    309   // Remove the frames which are read.
    310   int frames_to_move = wsola_output_->frames() - rendered_frames;
    311   for (int k = 0; k < channels_; ++k) {
    312     float* ch = wsola_output_->channel(k);
    313     memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move);
    314   }
    315   num_complete_frames_ -= rendered_frames;
    316   return rendered_frames;
    317 }
    318 
    319 bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const {
    320   const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1);
    321 
    322   return target_block_index_ >= search_block_index_ &&
    323       target_block_index_ + ola_window_size_ <=
    324       search_block_index_ + search_block_size;
    325 }
    326 
    327 void AudioRendererAlgorithm::GetOptimalBlock() {
    328   int optimal_index = 0;
    329 
    330   // An interval around last optimal block which is excluded from the search.
    331   // This is to reduce the buzzy sound. The number 160 is rather arbitrary and
    332   // derived heuristically.
    333   const int kExcludeIntervalLengthFrames = 160;
    334   if (TargetIsWithinSearchRegion()) {
    335     optimal_index = target_block_index_;
    336     PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get());
    337   } else {
    338     PeekAudioWithZeroPrepend(target_block_index_, target_block_.get());
    339     PeekAudioWithZeroPrepend(search_block_index_, search_block_.get());
    340     int last_optimal = target_block_index_ - ola_hop_size_ -
    341         search_block_index_;
    342     internal::Interval exclude_iterval = std::make_pair(
    343         last_optimal - kExcludeIntervalLengthFrames / 2,
    344         last_optimal + kExcludeIntervalLengthFrames / 2);
    345 
    346     // |optimal_index| is in frames and it is relative to the beginning of the
    347     // |search_block_|.
    348     optimal_index = internal::OptimalIndex(
    349         search_block_.get(), target_block_.get(), exclude_iterval);
    350 
    351     // Translate |index| w.r.t. the beginning of |audio_buffer_| and extract the
    352     // optimal block.
    353     optimal_index += search_block_index_;
    354     PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get());
    355 
    356     // Make a transition from target block to the optimal block if different.
    357     // Target block has the best continuation to the current output.
    358     // Optimal block is the most similar block to the target, however, it might
    359     // introduce some discontinuity when over-lap-added. Therefore, we combine
    360     // them for a smoother transition. The length of transition window is twice
    361     // as that of the optimal-block which makes it like a weighting function
    362     // where target-block has higher weight close to zero (weight of 1 at index
    363     // 0) and lower weight close the end.
    364     for (int k = 0; k < channels_; ++k) {
    365       float* ch_opt = optimal_block_->channel(k);
    366       const float* const ch_target = target_block_->channel(k);
    367       for (int n = 0; n < ola_window_size_; ++n) {
    368         ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] *
    369             transition_window_[ola_window_size_ + n];
    370       }
    371     }
    372   }
    373 
    374   // Next target is one hop ahead of the current optimal.
    375   target_block_index_ = optimal_index + ola_hop_size_;
    376 }
    377 
    378 void AudioRendererAlgorithm::PeekAudioWithZeroPrepend(
    379     int read_offset_frames, AudioBus* dest) {
    380   CHECK_LE(read_offset_frames + dest->frames(), audio_buffer_.frames());
    381 
    382   int write_offset = 0;
    383   int num_frames_to_read = dest->frames();
    384   if (read_offset_frames < 0) {
    385     int num_zero_frames_appended = std::min(-read_offset_frames,
    386                                             num_frames_to_read);
    387     read_offset_frames = 0;
    388     num_frames_to_read -= num_zero_frames_appended;
    389     write_offset = num_zero_frames_appended;
    390     dest->ZeroFrames(num_zero_frames_appended);
    391   }
    392   audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames,
    393                            write_offset, dest);
    394 }
    395 
    396 }  // namespace media
    397