1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of 6 // this object provides audio data to the object through EnqueueBuffer() and 7 // requests data from the buffer via FillBuffer(). The owner also sets the 8 // playback rate, and the AudioRendererAlgorithm will stretch or compress the 9 // buffered audio as necessary to match the playback rate when fulfilling 10 // FillBuffer() requests. 11 // 12 // This class is *not* thread-safe. Calls to enqueue and retrieve data must be 13 // locked if called from multiple threads. 14 // 15 // AudioRendererAlgorithm uses the Waveform Similarity Overlap and Add (WSOLA) 16 // algorithm to stretch or compress audio data to meet playback speeds less than 17 // or greater than the natural playback of the audio stream. The algorithm 18 // preserves local properties of the audio, therefore, pitch and harmonics are 19 // are preserved. See audio_renderer_algorith.cc for a more elaborate 20 // description of the algorithm. 21 // 22 // Audio at very low or very high playback rates are muted to preserve quality. 23 // 24 25 #ifndef MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ 26 #define MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ 27 28 #include "base/memory/ref_counted.h" 29 #include "base/memory/scoped_ptr.h" 30 #include "media/audio/audio_parameters.h" 31 #include "media/base/audio_buffer.h" 32 #include "media/base/audio_buffer_queue.h" 33 34 namespace media { 35 36 class AudioBus; 37 38 class MEDIA_EXPORT AudioRendererAlgorithm { 39 public: 40 AudioRendererAlgorithm(); 41 ~AudioRendererAlgorithm(); 42 43 // Initializes this object with information about the audio stream. 44 void Initialize(float initial_playback_rate, const AudioParameters& params); 45 46 // Tries to fill |requested_frames| frames into |dest| with possibly scaled 47 // data from our |audio_buffer_|. Data is scaled based on the playback rate, 48 // using a variation of the Overlap-Add method to combine sample windows. 49 // 50 // Data from |audio_buffer_| is consumed in proportion to the playback rate. 51 // 52 // Returns the number of frames copied into |dest|. May request more reads via 53 // |request_read_cb_| before returning. 54 int FillBuffer(AudioBus* dest, int requested_frames); 55 56 // Clears |audio_buffer_|. 57 void FlushBuffers(); 58 59 // Returns the time of the next byte in our data or kNoTimestamp() if current 60 // time is unknown. 61 base::TimeDelta GetTime(); 62 63 // Enqueues a buffer. It is called from the owner of the algorithm after a 64 // read completes. 65 void EnqueueBuffer(const scoped_refptr<AudioBuffer>& buffer_in); 66 67 float playback_rate() const { return playback_rate_; } 68 void SetPlaybackRate(float new_rate); 69 70 // Returns true if |audio_buffer_| is at or exceeds capacity. 71 bool IsQueueFull(); 72 73 // Returns the capacity of |audio_buffer_| in frames. 74 int QueueCapacity() const { return capacity_; } 75 76 // Increase the capacity of |audio_buffer_| if possible. 77 void IncreaseQueueCapacity(); 78 79 // Returns the number of frames left in |audio_buffer_|, which may be larger 80 // than QueueCapacity() in the event that EnqueueBuffer() delivered more data 81 // than |audio_buffer_| was intending to hold. 82 int frames_buffered() { return audio_buffer_.frames(); } 83 84 // Returns the samples per second for this audio stream. 85 int samples_per_second() { return samples_per_second_; } 86 87 // Is the sound currently muted? 88 bool is_muted() { return muted_; } 89 90 private: 91 // Within |search_block_|, find the block of data that is most similar to 92 // |target_block_|, and write it in |optimal_block_|. This method assumes that 93 // there is enough data to perform a search, i.e. |search_block_| and 94 // |target_block_| can be extracted from the available frames. 95 void GetOptimalBlock(); 96 97 // Read a maximum of |requested_frames| frames from |wsola_output_|. Returns 98 // number of frames actually read. 99 int WriteCompletedFramesTo( 100 int requested_frames, int output_offset, AudioBus* dest); 101 102 // Fill |dest| with frames from |audio_buffer_| starting from frame 103 // |read_offset_frames|. |dest| is expected to have the same number of 104 // channels as |audio_buffer_|. A negative offset, i.e. 105 // |read_offset_frames| < 0, is accepted assuming that |audio_buffer| is zero 106 // for negative indices. This might happen for few first frames. This method 107 // assumes there is enough frames to fill |dest|, i.e. |read_offset_frames| + 108 // |dest->frames()| does not extend to future. 109 void PeekAudioWithZeroPrepend(int read_offset_frames, AudioBus* dest); 110 111 // Run one iteration of WSOLA, if there are sufficient frames. This will 112 // overlap-and-add one block to |wsola_output_|, hence, |num_complete_frames_| 113 // is incremented by |ola_hop_size_|. 114 bool RunOneWsolaIteration(); 115 116 // Seek |audio_buffer_| forward to remove frames from input that are not used 117 // any more. State of the WSOLA will be updated accordingly. 118 void RemoveOldInputFrames(); 119 120 // Update |output_time_| by |time_change|. In turn |search_block_index_| is 121 // updated. 122 void UpdateOutputTime(double time_change); 123 124 // Is |target_block_| fully within |search_block_|? If so, we don't need to 125 // perform the search. 126 bool TargetIsWithinSearchRegion() const; 127 128 // Do we have enough data to perform one round of WSOLA? 129 bool CanPerformWsola() const; 130 131 // Number of channels in audio stream. 132 int channels_; 133 134 // Sample rate of audio stream. 135 int samples_per_second_; 136 137 // Used by algorithm to scale output. 138 float playback_rate_; 139 140 // Buffered audio data. 141 AudioBufferQueue audio_buffer_; 142 143 // True if the audio should be muted. 144 bool muted_; 145 146 // If muted, keep track of partial frames that should have been skipped over. 147 double muted_partial_frame_; 148 149 // How many frames to have in the queue before we report the queue is full. 150 int capacity_; 151 152 // Book keeping of the current time of generated audio, in frames. This 153 // should be appropriately updated when out samples are generated, regardless 154 // of whether we push samples out when FillBuffer() is called or we store 155 // audio in |wsola_output_| for the subsequent calls to FillBuffer(). 156 // Furthermore, if samples from |audio_buffer_| are evicted then this 157 // member variable should be updated based on |playback_rate_|. 158 // Note that this member should be updated ONLY by calling UpdateOutputTime(), 159 // so that |search_block_index_| is update accordingly. 160 double output_time_; 161 162 // The offset of the center frame of |search_block_| w.r.t. its first frame. 163 int search_block_center_offset_; 164 165 // Index of the beginning of the |search_block_|, in frames. 166 int search_block_index_; 167 168 // Number of Blocks to search to find the most similar one to the target 169 // frame. 170 int num_candidate_blocks_; 171 172 // Index of the beginning of the target block, counted in frames. 173 int target_block_index_; 174 175 // Overlap-and-add window size in frames. 176 int ola_window_size_; 177 178 // The hop size of overlap-and-add in frames. This implementation assumes 50% 179 // overlap-and-add. 180 int ola_hop_size_; 181 182 // Number of frames in |wsola_output_| that overlap-and-add is completed for 183 // them and can be copied to output if FillBuffer() is called. It also 184 // specifies the index where the next WSOLA window has to overlap-and-add. 185 int num_complete_frames_; 186 187 // This stores a part of the output that is created but couldn't be rendered. 188 // Output is generated frame-by-frame which at some point might exceed the 189 // number of requested samples. Furthermore, due to overlap-and-add, 190 // the last half-window of the output is incomplete, which is stored in this 191 // buffer. 192 scoped_ptr<AudioBus> wsola_output_; 193 194 // Overlap-and-add window. 195 scoped_ptr<float[]> ola_window_; 196 197 // Transition window, used to update |optimal_block_| by a weighted sum of 198 // |optimal_block_| and |target_block_|. 199 scoped_ptr<float[]> transition_window_; 200 201 // Auxiliary variables to avoid allocation in every iteration. 202 203 // Stores the optimal block in every iteration. This is the most 204 // similar block to |target_block_| within |search_block_| and it is 205 // overlap-and-added to |wsola_output_|. 206 scoped_ptr<AudioBus> optimal_block_; 207 208 // A block of data that search is performed over to find the |optimal_block_|. 209 scoped_ptr<AudioBus> search_block_; 210 211 // Stores the target block, denoted as |target| above. |search_block_| is 212 // searched for a block (|optimal_block_|) that is most similar to 213 // |target_block_|. 214 scoped_ptr<AudioBus> target_block_; 215 216 DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm); 217 }; 218 219 } // namespace media 220 221 #endif // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ 222