1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/filters/audio_renderer_algorithm.h" 6 7 #include <algorithm> 8 #include <cmath> 9 10 #include "base/logging.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "media/base/audio_buffer.h" 13 #include "media/base/audio_bus.h" 14 #include "media/base/limits.h" 15 #include "media/filters/wsola_internals.h" 16 17 namespace media { 18 19 20 // Waveform Similarity Overlap-and-add (WSOLA). 21 // 22 // One WSOLA iteration 23 // 24 // 1) Extract |target_block_| as input frames at indices 25 // [|target_block_index_|, |target_block_index_| + |ola_window_size_|). 26 // Note that |target_block_| is the "natural" continuation of the output. 27 // 28 // 2) Extract |search_block_| as input frames at indices 29 // [|search_block_index_|, 30 // |search_block_index_| + |num_candidate_blocks_| + |ola_window_size_|). 31 // 32 // 3) Find a block within the |search_block_| that is most similar 33 // to |target_block_|. Let |optimal_index| be the index of such block and 34 // write it to |optimal_block_|. 35 // 36 // 4) Update: 37 // |optimal_block_| = |transition_window_| * |target_block_| + 38 // (1 - |transition_window_|) * |optimal_block_|. 39 // 40 // 5) Overlap-and-add |optimal_block_| to the |wsola_output_|. 41 // 42 // 6) Update: 43 // |target_block_| = |optimal_index| + |ola_window_size_| / 2. 44 // |output_index_| = |output_index_| + |ola_window_size_| / 2, 45 // |search_block_center_offset_| = |output_index_| * |playback_rate_|, and 46 // |search_block_index_| = |search_block_center_offset_| - 47 // |search_block_center_offset_|. 48 49 // Max/min supported playback rates for fast/slow audio. Audio outside of these 50 // ranges are muted. 51 // Audio at these speeds would sound better under a frequency domain algorithm. 52 static const float kMinPlaybackRate = 0.5f; 53 static const float kMaxPlaybackRate = 4.0f; 54 55 // Overlap-and-add window size in milliseconds. 56 static const int kOlaWindowSizeMs = 20; 57 58 // Size of search interval in milliseconds. The search interval is 59 // [-delta delta] around |output_index_| * |playback_rate_|. So the search 60 // interval is 2 * delta. 61 static const int kWsolaSearchIntervalMs = 30; 62 63 // The maximum size in seconds for the |audio_buffer_|. Arbitrarily determined. 64 static const int kMaxCapacityInSeconds = 3; 65 66 // The starting size in frames for |audio_buffer_|. Previous usage maintained a 67 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we 68 // maintain this number of frames. 69 static const int kStartingBufferSizeInFrames = 16 * 512; 70 71 COMPILE_ASSERT(kStartingBufferSizeInFrames < 72 (kMaxCapacityInSeconds * limits::kMinSampleRate), 73 max_capacity_smaller_than_starting_buffer_size); 74 75 AudioRendererAlgorithm::AudioRendererAlgorithm() 76 : channels_(0), 77 samples_per_second_(0), 78 playback_rate_(0), 79 muted_(false), 80 muted_partial_frame_(0), 81 capacity_(kStartingBufferSizeInFrames), 82 output_time_(0.0), 83 search_block_center_offset_(0), 84 search_block_index_(0), 85 num_candidate_blocks_(0), 86 target_block_index_(0), 87 ola_window_size_(0), 88 ola_hop_size_(0), 89 num_complete_frames_(0) { 90 } 91 92 AudioRendererAlgorithm::~AudioRendererAlgorithm() {} 93 94 void AudioRendererAlgorithm::Initialize(float initial_playback_rate, 95 const AudioParameters& params) { 96 CHECK(params.IsValid()); 97 98 channels_ = params.channels(); 99 samples_per_second_ = params.sample_rate(); 100 SetPlaybackRate(initial_playback_rate); 101 num_candidate_blocks_ = (kWsolaSearchIntervalMs * samples_per_second_) / 1000; 102 ola_window_size_ = kOlaWindowSizeMs * samples_per_second_ / 1000; 103 104 // Make sure window size in an even number. 105 ola_window_size_ += ola_window_size_ & 1; 106 ola_hop_size_ = ola_window_size_ / 2; 107 108 // |num_candidate_blocks_| / 2 is the offset of the center of the search 109 // block to the center of the first (left most) candidate block. The offset 110 // of the center of a candidate block to its left most point is 111 // |ola_window_size_| / 2 - 1. Note that |ola_window_size_| is even and in 112 // our convention the center belongs to the left half, so we need to subtract 113 // one frame to get the correct offset. 114 // 115 // Search Block 116 // <-------------------------------------------> 117 // 118 // |ola_window_size_| / 2 - 1 119 // <---- 120 // 121 // |num_candidate_blocks_| / 2 122 // <---------------- 123 // center 124 // X----X----------------X---------------X-----X 125 // <----------> <----------> 126 // Candidate ... Candidate 127 // 1, ... |num_candidate_blocks_| 128 search_block_center_offset_ = num_candidate_blocks_ / 2 + 129 (ola_window_size_ / 2 - 1); 130 131 ola_window_.reset(new float[ola_window_size_]); 132 internal::GetSymmetricHanningWindow(ola_window_size_, ola_window_.get()); 133 134 transition_window_.reset(new float[ola_window_size_ * 2]); 135 internal::GetSymmetricHanningWindow(2 * ola_window_size_, 136 transition_window_.get()); 137 138 wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_); 139 wsola_output_->Zero(); // Initialize for overlap-and-add of the first block. 140 141 // Auxiliary containers. 142 optimal_block_ = AudioBus::Create(channels_, ola_window_size_); 143 search_block_ = AudioBus::Create( 144 channels_, num_candidate_blocks_ + (ola_window_size_ - 1)); 145 target_block_ = AudioBus::Create(channels_, ola_window_size_); 146 } 147 148 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) { 149 if (playback_rate_ == 0) 150 return 0; 151 152 DCHECK_EQ(channels_, dest->channels()); 153 154 // Optimize the |muted_| case to issue a single clear instead of performing 155 // the full crossfade and clearing each crossfaded frame. 156 if (muted_) { 157 int frames_to_render = 158 std::min(static_cast<int>(audio_buffer_.frames() / playback_rate_), 159 requested_frames); 160 161 // Compute accurate number of frames to actually skip in the source data. 162 // Includes the leftover partial frame from last request. However, we can 163 // only skip over complete frames, so a partial frame may remain for next 164 // time. 165 muted_partial_frame_ += frames_to_render * playback_rate_; 166 int seek_frames = static_cast<int>(muted_partial_frame_); 167 dest->ZeroFrames(frames_to_render); 168 audio_buffer_.SeekFrames(seek_frames); 169 170 // Determine the partial frame that remains to be skipped for next call. If 171 // the user switches back to playing, it may be off time by this partial 172 // frame, which would be undetectable. If they subsequently switch to 173 // another playback rate that mutes, the code will attempt to line up the 174 // frames again. 175 muted_partial_frame_ -= seek_frames; 176 return frames_to_render; 177 } 178 179 int slower_step = ceil(ola_window_size_ * playback_rate_); 180 int faster_step = ceil(ola_window_size_ / playback_rate_); 181 182 // Optimize the most common |playback_rate_| ~= 1 case to use a single copy 183 // instead of copying frame by frame. 184 if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) { 185 const int frames_to_copy = 186 std::min(audio_buffer_.frames(), requested_frames); 187 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest); 188 DCHECK_EQ(frames_read, frames_to_copy); 189 return frames_read; 190 } 191 192 int rendered_frames = 0; 193 do { 194 rendered_frames += WriteCompletedFramesTo( 195 requested_frames - rendered_frames, rendered_frames, dest); 196 } while (rendered_frames < requested_frames && RunOneWsolaIteration()); 197 return rendered_frames; 198 } 199 200 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) { 201 DCHECK_GE(new_rate, 0); 202 playback_rate_ = new_rate; 203 muted_ = 204 playback_rate_ < kMinPlaybackRate || playback_rate_ > kMaxPlaybackRate; 205 } 206 207 void AudioRendererAlgorithm::FlushBuffers() { 208 // Clear the queue of decoded packets (releasing the buffers). 209 audio_buffer_.Clear(); 210 output_time_ = 0.0; 211 search_block_index_ = 0; 212 target_block_index_ = 0; 213 wsola_output_->Zero(); 214 num_complete_frames_ = 0; 215 216 // Reset |capacity_| so growth triggered by underflows doesn't penalize 217 // seek time. 218 capacity_ = kStartingBufferSizeInFrames; 219 } 220 221 base::TimeDelta AudioRendererAlgorithm::GetTime() { 222 return audio_buffer_.current_time(); 223 } 224 225 void AudioRendererAlgorithm::EnqueueBuffer( 226 const scoped_refptr<AudioBuffer>& buffer_in) { 227 DCHECK(!buffer_in->end_of_stream()); 228 audio_buffer_.Append(buffer_in); 229 } 230 231 bool AudioRendererAlgorithm::IsQueueFull() { 232 return audio_buffer_.frames() >= capacity_; 233 } 234 235 void AudioRendererAlgorithm::IncreaseQueueCapacity() { 236 int max_capacity = kMaxCapacityInSeconds * samples_per_second_; 237 DCHECK_LE(capacity_, max_capacity); 238 239 capacity_ = std::min(2 * capacity_, max_capacity); 240 } 241 242 bool AudioRendererAlgorithm::CanPerformWsola() const { 243 const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1); 244 const int frames = audio_buffer_.frames(); 245 return target_block_index_ + ola_window_size_ <= frames && 246 search_block_index_ + search_block_size <= frames; 247 } 248 249 bool AudioRendererAlgorithm::RunOneWsolaIteration() { 250 if (!CanPerformWsola()) 251 return false; 252 253 GetOptimalBlock(); 254 255 // Overlap-and-add. 256 for (int k = 0; k < channels_; ++k) { 257 const float* const ch_opt_frame = optimal_block_->channel(k); 258 float* ch_output = wsola_output_->channel(k) + num_complete_frames_; 259 for (int n = 0; n < ola_hop_size_; ++n) { 260 ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] + 261 ch_opt_frame[n] * ola_window_[n]; 262 } 263 264 // Copy the second half to the output. 265 memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_], 266 sizeof(*ch_opt_frame) * ola_hop_size_); 267 } 268 269 num_complete_frames_ += ola_hop_size_; 270 UpdateOutputTime(ola_hop_size_); 271 RemoveOldInputFrames(); 272 return true; 273 } 274 275 void AudioRendererAlgorithm::UpdateOutputTime(double time_change) { 276 output_time_ += time_change; 277 // Center of the search region, in frames. 278 const int search_block_center_index = static_cast<int>( 279 output_time_ * playback_rate_ + 0.5); 280 search_block_index_ = search_block_center_index - search_block_center_offset_; 281 } 282 283 void AudioRendererAlgorithm::RemoveOldInputFrames() { 284 const int earliest_used_index = std::min(target_block_index_, 285 search_block_index_); 286 if (earliest_used_index <= 0) 287 return; // Nothing to remove. 288 289 // Remove frames from input and adjust indices accordingly. 290 audio_buffer_.SeekFrames(earliest_used_index); 291 target_block_index_ -= earliest_used_index; 292 293 // Adjust output index. 294 double output_time_change = static_cast<double>(earliest_used_index) / 295 playback_rate_; 296 CHECK_GE(output_time_, output_time_change); 297 UpdateOutputTime(-output_time_change); 298 } 299 300 int AudioRendererAlgorithm::WriteCompletedFramesTo( 301 int requested_frames, int dest_offset, AudioBus* dest) { 302 int rendered_frames = std::min(num_complete_frames_, requested_frames); 303 304 if (rendered_frames == 0) 305 return 0; // There is nothing to read from |wsola_output_|, return. 306 307 wsola_output_->CopyPartialFramesTo(0, rendered_frames, dest_offset, dest); 308 309 // Remove the frames which are read. 310 int frames_to_move = wsola_output_->frames() - rendered_frames; 311 for (int k = 0; k < channels_; ++k) { 312 float* ch = wsola_output_->channel(k); 313 memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move); 314 } 315 num_complete_frames_ -= rendered_frames; 316 return rendered_frames; 317 } 318 319 bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const { 320 const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1); 321 322 return target_block_index_ >= search_block_index_ && 323 target_block_index_ + ola_window_size_ <= 324 search_block_index_ + search_block_size; 325 } 326 327 void AudioRendererAlgorithm::GetOptimalBlock() { 328 int optimal_index = 0; 329 330 // An interval around last optimal block which is excluded from the search. 331 // This is to reduce the buzzy sound. The number 160 is rather arbitrary and 332 // derived heuristically. 333 const int kExcludeIntervalLengthFrames = 160; 334 if (TargetIsWithinSearchRegion()) { 335 optimal_index = target_block_index_; 336 PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get()); 337 } else { 338 PeekAudioWithZeroPrepend(target_block_index_, target_block_.get()); 339 PeekAudioWithZeroPrepend(search_block_index_, search_block_.get()); 340 int last_optimal = target_block_index_ - ola_hop_size_ - 341 search_block_index_; 342 internal::Interval exclude_iterval = std::make_pair( 343 last_optimal - kExcludeIntervalLengthFrames / 2, 344 last_optimal + kExcludeIntervalLengthFrames / 2); 345 346 // |optimal_index| is in frames and it is relative to the beginning of the 347 // |search_block_|. 348 optimal_index = internal::OptimalIndex( 349 search_block_.get(), target_block_.get(), exclude_iterval); 350 351 // Translate |index| w.r.t. the beginning of |audio_buffer_| and extract the 352 // optimal block. 353 optimal_index += search_block_index_; 354 PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get()); 355 356 // Make a transition from target block to the optimal block if different. 357 // Target block has the best continuation to the current output. 358 // Optimal block is the most similar block to the target, however, it might 359 // introduce some discontinuity when over-lap-added. Therefore, we combine 360 // them for a smoother transition. The length of transition window is twice 361 // as that of the optimal-block which makes it like a weighting function 362 // where target-block has higher weight close to zero (weight of 1 at index 363 // 0) and lower weight close the end. 364 for (int k = 0; k < channels_; ++k) { 365 float* ch_opt = optimal_block_->channel(k); 366 const float* const ch_target = target_block_->channel(k); 367 for (int n = 0; n < ola_window_size_; ++n) { 368 ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] * 369 transition_window_[ola_window_size_ + n]; 370 } 371 } 372 } 373 374 // Next target is one hop ahead of the current optimal. 375 target_block_index_ = optimal_index + ola_hop_size_; 376 } 377 378 void AudioRendererAlgorithm::PeekAudioWithZeroPrepend( 379 int read_offset_frames, AudioBus* dest) { 380 CHECK_LE(read_offset_frames + dest->frames(), audio_buffer_.frames()); 381 382 int write_offset = 0; 383 int num_frames_to_read = dest->frames(); 384 if (read_offset_frames < 0) { 385 int num_zero_frames_appended = std::min(-read_offset_frames, 386 num_frames_to_read); 387 read_offset_frames = 0; 388 num_frames_to_read -= num_zero_frames_appended; 389 write_offset = num_zero_frames_appended; 390 dest->ZeroFrames(num_zero_frames_appended); 391 } 392 audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames, 393 write_offset, dest); 394 } 395 396 } // namespace media 397