1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "sola_time_scaler.h" 18 19 #include <math.h> 20 #include <hlogging.h> 21 #include <algorithm> 22 23 #include "ring_buffer.h" 24 25 #define FLAGS_sola_ring_buffer 2.0 26 #define FLAGS_sola_enable_correlation true 27 28 29 namespace video_editing { 30 31 // Returns a cross-correlation score for the specified buffers. 32 int SolaAnalyzer::Correlate(const float* buffer1, const float* buffer2, 33 int num_frames) { 34 CHECK(initialized_); 35 36 int score = 0; 37 num_frames *= num_channels_; 38 while (num_frames-- > 0) { 39 // Increment the score if the sign bits match. 40 score += ((bit_cast<int32>(*buffer1++) ^ bit_cast<int32>(*buffer2++)) >= 0) 41 ? 1 : 0; 42 } 43 return score; 44 } 45 46 // Trivial SolaAnalyzer class to bypass correlation. 47 class SolaBypassAnalyzer : public SolaAnalyzer { 48 public: 49 SolaBypassAnalyzer() { } 50 virtual int Correlate(const float*, const float*, int num_frames) { 51 return num_frames * num_channels_; 52 } 53 }; 54 55 56 // Default constructor. 57 SolaTimeScaler::SolaTimeScaler() 58 : input_buffer_(NULL), output_buffer_(NULL), analyzer_(NULL) { 59 sample_rate_ = 0; 60 num_channels_ = 0; 61 62 draining_ = false; 63 initialized_ = false; 64 } 65 66 SolaTimeScaler::~SolaTimeScaler() { 67 delete input_buffer_; 68 delete output_buffer_; 69 delete analyzer_; 70 } 71 72 // Injects a SolaAnalyzer instance for analyzing signal frames. 73 void SolaTimeScaler::set_analyzer(SolaAnalyzer* analyzer) { 74 MutexLock lock(&mutex_); // lock out processing while updating 75 delete analyzer_; 76 analyzer_ = analyzer; 77 } 78 79 // Initializes a SOLA timescaler. 80 void SolaTimeScaler::Init(double sample_rate, 81 int num_channels, 82 double initial_speed, 83 double window_duration, 84 double overlap_duration) { 85 MutexLock lock(&mutex_); // lock out processing while updating 86 87 sample_rate_ = sample_rate; 88 num_channels_ = num_channels; 89 speed_ = initial_speed; 90 window_duration_ = window_duration; 91 overlap_duration_ = overlap_duration; 92 93 initialized_ = true; 94 GenerateParameters(); 95 Reset(); 96 } 97 98 // Adjusts the rate scaling factor. 99 void SolaTimeScaler::set_speed(double speed) { 100 MutexLock lock(&mutex_); // lock out processing while updating 101 102 speed_ = speed; 103 GenerateParameters(); 104 } 105 106 // Generates processing parameters from the current settings. 107 void SolaTimeScaler::GenerateParameters() { 108 if (speed_ < 0.1) { 109 LOGE("Requested speed %fx limited to 0.1x", speed_); 110 speed_ = 0.1; 111 } else if (speed_ > 8.0) { 112 LOGE("Requested speed %fx limited to 8.0x", speed_); 113 speed_ = 8.0; 114 } 115 116 ratio_ = 1.0 / speed_; 117 118 num_window_frames_ = nearbyint(sample_rate_ * window_duration_); 119 120 // Limit the overlap to half the window size, and round up to an odd number. 121 // Half of overlap window (rounded down) is also a useful number. 122 overlap_duration_ = min(overlap_duration_, window_duration_ / 2.0); 123 num_overlap_frames_ = nearbyint(sample_rate_ * overlap_duration_); 124 num_overlap_frames_ |= 1; 125 half_overlap_frames_ = num_overlap_frames_ >> 1; 126 127 if (speed_ >= 1.) { 128 // For compression (speed up), adjacent input windows overlap in the output. 129 input_window_offset_ = num_window_frames_; 130 target_merge_offset_ = nearbyint(num_window_frames_ * ratio_); 131 } else { 132 // For expansion (slow down), each input window start point overlaps the 133 // previous, and they are placed adjacently in the output 134 // (+/- half the overlap size). 135 input_window_offset_ = nearbyint(num_window_frames_ * speed_); 136 target_merge_offset_ = num_window_frames_; 137 } 138 139 // Make sure we copy enough extra data to be able to perform a 140 // frame correlation over the range of target merge point +/- half overlap, 141 // even when the previous merge point was adjusted backwards a half overlap. 142 max_frames_to_merge_ = max(num_window_frames_, 143 target_merge_offset_ + (2 * num_overlap_frames_)); 144 min_output_to_hold_= 145 max_frames_to_merge_ + num_overlap_frames_ - target_merge_offset_; 146 } 147 148 // The input buffer has one writer and reader. 149 // The output buffer has one reader/updater, and one reader/consumer. 150 static const int kInputReader = 0; 151 static const int kOutputAnalysis = 0; 152 static const int kOutputConsumer = 1; 153 154 void SolaTimeScaler::Reset() { 155 CHECK(initialized_); 156 double duration = max(FLAGS_sola_ring_buffer, 20. * window_duration_); 157 draining_ = false; 158 159 delete input_buffer_; 160 input_buffer_ = new RingBuffer(); 161 input_buffer_->Init(static_cast<int> 162 (sample_rate_ * duration), num_channels_, 1); 163 164 delete output_buffer_; 165 output_buffer_ = new RingBuffer(); 166 output_buffer_->Init(static_cast<int> 167 (sample_rate_ * ratio_ * duration), num_channels_, 2); 168 169 if (analyzer_ == NULL) { 170 if (FLAGS_sola_enable_correlation) { 171 analyzer_ = new SolaAnalyzer(); 172 } else { 173 analyzer_ = new SolaBypassAnalyzer(); 174 } 175 } 176 analyzer_->Init(sample_rate_, num_channels_); 177 } 178 179 // Returns the number of frames that the input buffer can accept. 180 int SolaTimeScaler::input_limit() const { 181 CHECK(initialized_); 182 return input_buffer_->overhead(); 183 } 184 185 // Returns the number of available output frames. 186 int SolaTimeScaler::available() { 187 CHECK(initialized_); 188 189 int available = output_buffer_->available(kOutputConsumer); 190 if (available > min_output_to_hold_) { 191 available -= min_output_to_hold_; 192 } else if (draining_) { 193 Process(); 194 available = output_buffer_->available(kOutputConsumer); 195 if (available > min_output_to_hold_) { 196 available -= min_output_to_hold_; 197 } 198 } else { 199 available = 0; 200 } 201 return available; 202 } 203 204 void SolaTimeScaler::Drain() { 205 CHECK(initialized_); 206 207 draining_ = true; 208 } 209 210 211 // Feeds audio to the timescaler, and processes as much data as possible. 212 int SolaTimeScaler::InjectSamples(float* buffer, int num_frames) { 213 CHECK(initialized_); 214 215 // Do not write more frames than the buffer can accept. 216 num_frames = min(input_limit(), num_frames); 217 if (!num_frames) { 218 return 0; 219 } 220 221 // Copy samples to the input buffer and then process whatever can be consumed. 222 input_buffer_->Write(buffer, num_frames); 223 Process(); 224 return num_frames; 225 } 226 227 // Retrieves audio data from the timescaler. 228 int SolaTimeScaler::RetrieveSamples(float* buffer, int num_frames) { 229 CHECK(initialized_); 230 231 // Do not read more frames than available. 232 num_frames = min(available(), num_frames); 233 if (!num_frames) { 234 return 0; 235 } 236 237 output_buffer_->Copy(kOutputConsumer, buffer, num_frames); 238 output_buffer_->Seek(kOutputConsumer, 239 output_buffer_->Tell(kOutputConsumer) + num_frames); 240 241 return num_frames; 242 } 243 244 // Munges input samples to produce output. 245 bool SolaTimeScaler::Process() { 246 CHECK(initialized_); 247 bool generated_data = false; 248 249 // We can only process data if there is sufficient input available 250 // (or we are draining the latency), and there is sufficient room 251 // for output to be merged. 252 while (((input_buffer_->available(kInputReader) > max_frames_to_merge_) || 253 draining_) && (output_buffer_->overhead() >= max_frames_to_merge_)) { 254 MutexLock lock(&mutex_); // lock out updates while processing each window 255 256 // Determine the number of samples to merge into the output. 257 int input_count = 258 min(input_buffer_->available(kInputReader), max_frames_to_merge_); 259 if (input_count == 0) { 260 break; 261 } 262 // The input reader always points to the next window to process. 263 float* input_pointer = input_buffer_->GetPointer(kInputReader, input_count); 264 265 // The analysis reader always points to the ideal target merge point, 266 // minus half an overlap window (ie, the starting point for correlation). 267 // That means the available data from that point equals the number 268 // of samples that must be cross-faded. 269 int output_merge_cnt = output_buffer_->available(kOutputAnalysis); 270 float* output_pointer = 271 output_buffer_->GetPointer(kOutputAnalysis, output_merge_cnt); 272 273 // If there is not enough data to do a proper correlation, 274 // just merge at the ideal target point. Otherwise, 275 // find the best correlation score, working from the center out. 276 int merge_offset = min(output_merge_cnt, half_overlap_frames_); 277 278 if ((output_merge_cnt >= (2 * num_overlap_frames_)) && 279 (input_count >= num_overlap_frames_)) { 280 int best_offset = merge_offset; 281 int best_score = 0; 282 int score; 283 for (int i = 0; i <= half_overlap_frames_; ++i) { 284 score = analyzer_->Correlate(input_pointer, 285 output_pointer + ((merge_offset + i) * num_channels_), 286 num_overlap_frames_); 287 if (score > best_score) { 288 best_score = score; 289 best_offset = merge_offset + i; 290 if (score == (num_overlap_frames_ * num_channels_)) { 291 break; // It doesn't get better than perfect. 292 } 293 } 294 if (i > 0) { 295 score = analyzer_->Correlate(input_pointer, 296 output_pointer + ((merge_offset - i) * num_channels_), 297 num_overlap_frames_); 298 if (score > best_score) { 299 best_score = score; 300 best_offset = merge_offset - i; 301 if (score == (num_overlap_frames_ * num_channels_)) { 302 break; // It doesn't get better than perfect. 303 } 304 } 305 } 306 } 307 merge_offset = best_offset; 308 } else if ((output_merge_cnt > 0) && !draining_) { 309 LOGE("no correlation performed"); 310 } 311 312 // Crossfade the overlap between input and output, and then 313 // copy in the remaining input. 314 int crossfade_count = max(0, (output_merge_cnt - merge_offset)); 315 crossfade_count = min(crossfade_count, input_count); 316 int remaining_count = input_count - crossfade_count; 317 318 float* merge_pointer = output_pointer + (merge_offset * num_channels_); 319 float flt_count = static_cast<float>(crossfade_count); 320 for (int i = 0; i < crossfade_count; ++i) { 321 // Linear cross-fade, for now. 322 float input_scale = static_cast<float>(i) / flt_count; 323 float output_scale = 1. - input_scale; 324 for (int j = 0; j < num_channels_; ++j) { 325 *merge_pointer = (*merge_pointer * output_scale) + 326 (*input_pointer++ * input_scale); 327 ++merge_pointer; 328 } 329 } 330 // Copy the merged buffer back into the output, if necessary, and 331 // append the rest of the window. 332 output_buffer_->MergeBack(kOutputAnalysis, 333 output_pointer, output_merge_cnt); 334 output_buffer_->Write(input_pointer, remaining_count); 335 336 // Advance the output analysis pointer to the next target merge point, 337 // minus half an overlap window. The target merge point is always 338 // calculated as a delta from the previous ideal target, not the actual 339 // target, to avoid drift. 340 int output_advance = target_merge_offset_; 341 if (output_merge_cnt < half_overlap_frames_) { 342 // On the first window, back up the pointer for the next correlation. 343 // Thereafter, that compensation is preserved. 344 output_advance -= half_overlap_frames_; 345 } 346 347 // Don't advance beyond the available data, when finishing up. 348 if (draining_) { 349 output_advance = 350 min(output_advance, output_buffer_->available(kOutputAnalysis)); 351 } 352 output_buffer_->Seek(kOutputAnalysis, 353 output_buffer_->Tell(kOutputAnalysis) + output_advance); 354 355 // Advance the input pointer beyond the frames that are no longer needed. 356 input_buffer_->Seek(kInputReader, input_buffer_->Tell(kInputReader) + 357 min(input_count, input_window_offset_)); 358 359 if ((crossfade_count + remaining_count) > 0) { 360 generated_data = true; 361 } 362 } // while (more to process) 363 return generated_data; 364 } 365 366 } // namespace video_editing 367