Home | History | Annotate | Download | only in filters
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of
      6 // this object provides audio data to the object through EnqueueBuffer() and
      7 // requests data from the buffer via FillBuffer(). The owner also sets the
      8 // playback rate, and the AudioRendererAlgorithm will stretch or compress the
      9 // buffered audio as necessary to match the playback rate when fulfilling
     10 // FillBuffer() requests.
     11 //
     12 // This class is *not* thread-safe. Calls to enqueue and retrieve data must be
     13 // locked if called from multiple threads.
     14 //
     15 // AudioRendererAlgorithm uses the Waveform Similarity Overlap and Add (WSOLA)
     16 // algorithm to stretch or compress audio data to meet playback speeds less than
     17 // or greater than the natural playback of the audio stream. The algorithm
     18 // preserves local properties of the audio, therefore, pitch and harmonics are
     19 // are preserved. See audio_renderer_algorith.cc for a more elaborate
     20 // description of the algorithm.
     21 //
     22 // Audio at very low or very high playback rates are muted to preserve quality.
     23 //
     24 
     25 #ifndef MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_
     26 #define MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_
     27 
     28 #include "base/memory/ref_counted.h"
     29 #include "base/memory/scoped_ptr.h"
     30 #include "media/audio/audio_parameters.h"
     31 #include "media/base/audio_buffer.h"
     32 #include "media/base/audio_buffer_queue.h"
     33 
     34 namespace media {
     35 
     36 class AudioBus;
     37 
     38 class MEDIA_EXPORT AudioRendererAlgorithm {
     39  public:
     40   AudioRendererAlgorithm();
     41   ~AudioRendererAlgorithm();
     42 
     43   // Initializes this object with information about the audio stream.
     44   void Initialize(float initial_playback_rate, const AudioParameters& params);
     45 
     46   // Tries to fill |requested_frames| frames into |dest| with possibly scaled
     47   // data from our |audio_buffer_|. Data is scaled based on the playback rate,
     48   // using a variation of the Overlap-Add method to combine sample windows.
     49   //
     50   // Data from |audio_buffer_| is consumed in proportion to the playback rate.
     51   //
     52   // Returns the number of frames copied into |dest|. May request more reads via
     53   // |request_read_cb_| before returning.
     54   int FillBuffer(AudioBus* dest, int requested_frames);
     55 
     56   // Clears |audio_buffer_|.
     57   void FlushBuffers();
     58 
     59   // Returns the time of the next byte in our data or kNoTimestamp() if current
     60   // time is unknown.
     61   base::TimeDelta GetTime();
     62 
     63   // Enqueues a buffer. It is called from the owner of the algorithm after a
     64   // read completes.
     65   void EnqueueBuffer(const scoped_refptr<AudioBuffer>& buffer_in);
     66 
     67   float playback_rate() const { return playback_rate_; }
     68   void SetPlaybackRate(float new_rate);
     69 
     70   // Returns true if |audio_buffer_| is at or exceeds capacity.
     71   bool IsQueueFull();
     72 
     73   // Returns the capacity of |audio_buffer_| in frames.
     74   int QueueCapacity() const { return capacity_; }
     75 
     76   // Increase the capacity of |audio_buffer_| if possible.
     77   void IncreaseQueueCapacity();
     78 
     79   // Returns the number of frames left in |audio_buffer_|, which may be larger
     80   // than QueueCapacity() in the event that EnqueueBuffer() delivered more data
     81   // than |audio_buffer_| was intending to hold.
     82   int frames_buffered() { return audio_buffer_.frames(); }
     83 
     84   // Returns the samples per second for this audio stream.
     85   int samples_per_second() { return samples_per_second_; }
     86 
     87   // Is the sound currently muted?
     88   bool is_muted() { return muted_; }
     89 
     90  private:
     91   // Within |search_block_|, find the block of data that is most similar to
     92   // |target_block_|, and write it in |optimal_block_|. This method assumes that
     93   // there is enough data to perform a search, i.e. |search_block_| and
     94   // |target_block_| can be extracted from the available frames.
     95   void GetOptimalBlock();
     96 
     97   // Read a maximum of |requested_frames| frames from |wsola_output_|. Returns
     98   // number of frames actually read.
     99   int WriteCompletedFramesTo(
    100       int requested_frames, int output_offset, AudioBus* dest);
    101 
    102   // Fill |dest| with frames from |audio_buffer_| starting from frame
    103   // |read_offset_frames|. |dest| is expected to have the same number of
    104   // channels as |audio_buffer_|. A negative offset, i.e.
    105   // |read_offset_frames| < 0, is accepted assuming that |audio_buffer| is zero
    106   // for negative indices. This might happen for few first frames. This method
    107   // assumes there is enough frames to fill |dest|, i.e. |read_offset_frames| +
    108   // |dest->frames()| does not extend to future.
    109   void PeekAudioWithZeroPrepend(int read_offset_frames, AudioBus* dest);
    110 
    111   // Run one iteration of WSOLA, if there are sufficient frames. This will
    112   // overlap-and-add one block to |wsola_output_|, hence, |num_complete_frames_|
    113   // is incremented by |ola_hop_size_|.
    114   bool RunOneWsolaIteration();
    115 
    116   // Seek |audio_buffer_| forward to remove frames from input that are not used
    117   // any more. State of the WSOLA will be updated accordingly.
    118   void RemoveOldInputFrames();
    119 
    120   // Update |output_time_| by |time_change|. In turn |search_block_index_| is
    121   // updated.
    122   void UpdateOutputTime(double time_change);
    123 
    124   // Is |target_block_| fully within |search_block_|? If so, we don't need to
    125   // perform the search.
    126   bool TargetIsWithinSearchRegion() const;
    127 
    128   // Do we have enough data to perform one round of WSOLA?
    129   bool CanPerformWsola() const;
    130 
    131   // Number of channels in audio stream.
    132   int channels_;
    133 
    134   // Sample rate of audio stream.
    135   int samples_per_second_;
    136 
    137   // Used by algorithm to scale output.
    138   float playback_rate_;
    139 
    140   // Buffered audio data.
    141   AudioBufferQueue audio_buffer_;
    142 
    143   // True if the audio should be muted.
    144   bool muted_;
    145 
    146   // If muted, keep track of partial frames that should have been skipped over.
    147   double muted_partial_frame_;
    148 
    149   // How many frames to have in the queue before we report the queue is full.
    150   int capacity_;
    151 
    152   // Book keeping of the current time of generated audio, in frames. This
    153   // should be appropriately updated when out samples are generated, regardless
    154   // of whether we push samples out when FillBuffer() is called or we store
    155   // audio in |wsola_output_| for the subsequent calls to FillBuffer().
    156   // Furthermore, if samples from |audio_buffer_| are evicted then this
    157   // member variable should be updated based on |playback_rate_|.
    158   // Note that this member should be updated ONLY by calling UpdateOutputTime(),
    159   // so that |search_block_index_| is update accordingly.
    160   double output_time_;
    161 
    162   // The offset of the center frame of |search_block_| w.r.t. its first frame.
    163   int search_block_center_offset_;
    164 
    165   // Index of the beginning of the |search_block_|, in frames.
    166   int search_block_index_;
    167 
    168   // Number of Blocks to search to find the most similar one to the target
    169   // frame.
    170   int num_candidate_blocks_;
    171 
    172   // Index of the beginning of the target block, counted in frames.
    173   int target_block_index_;
    174 
    175   // Overlap-and-add window size in frames.
    176   int ola_window_size_;
    177 
    178   // The hop size of overlap-and-add in frames. This implementation assumes 50%
    179   // overlap-and-add.
    180   int ola_hop_size_;
    181 
    182   // Number of frames in |wsola_output_| that overlap-and-add is completed for
    183   // them and can be copied to output if FillBuffer() is called. It also
    184   // specifies the index where the next WSOLA window has to overlap-and-add.
    185   int num_complete_frames_;
    186 
    187   // This stores a part of the output that is created but couldn't be rendered.
    188   // Output is generated frame-by-frame which at some point might exceed the
    189   // number of requested samples. Furthermore, due to overlap-and-add,
    190   // the last half-window of the output is incomplete, which is stored in this
    191   // buffer.
    192   scoped_ptr<AudioBus> wsola_output_;
    193 
    194   // Overlap-and-add window.
    195   scoped_ptr<float[]> ola_window_;
    196 
    197   // Transition window, used to update |optimal_block_| by a weighted sum of
    198   // |optimal_block_| and |target_block_|.
    199   scoped_ptr<float[]> transition_window_;
    200 
    201   // Auxiliary variables to avoid allocation in every iteration.
    202 
    203   // Stores the optimal block in every iteration. This is the most
    204   // similar block to |target_block_| within |search_block_| and it is
    205   // overlap-and-added to |wsola_output_|.
    206   scoped_ptr<AudioBus> optimal_block_;
    207 
    208   // A block of data that search is performed over to find the |optimal_block_|.
    209   scoped_ptr<AudioBus> search_block_;
    210 
    211   // Stores the target block, denoted as |target| above. |search_block_| is
    212   // searched for a block (|optimal_block_|) that is most similar to
    213   // |target_block_|.
    214   scoped_ptr<AudioBus> target_block_;
    215 
    216   DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm);
    217 };
    218 
    219 }  // namespace media
    220 
    221 #endif  // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_
    222