Home | History | Annotate | Download | only in jni
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "sola_time_scaler.h"
     18 
     19 #include <math.h>
     20 #include <hlogging.h>
     21 #include <algorithm>
     22 
     23 #include "ring_buffer.h"
     24 
     25 #define FLAGS_sola_ring_buffer 2.0
     26 #define FLAGS_sola_enable_correlation true
     27 
     28 
     29 namespace video_editing {
     30 
     31 // Returns a cross-correlation score for the specified buffers.
     32 int SolaAnalyzer::Correlate(const float* buffer1, const float* buffer2,
     33                             int num_frames) {
     34   CHECK(initialized_);
     35 
     36   int score = 0;
     37   num_frames *= num_channels_;
     38   while (num_frames-- > 0) {
     39     // Increment the score if the sign bits match.
     40     score += ((bit_cast<int32>(*buffer1++) ^ bit_cast<int32>(*buffer2++)) >= 0)
     41               ? 1 : 0;
     42   }
     43   return score;
     44 }
     45 
     46 // Trivial SolaAnalyzer class to bypass correlation.
     47 class SolaBypassAnalyzer : public SolaAnalyzer {
     48  public:
     49   SolaBypassAnalyzer() { }
     50   virtual int Correlate(const float*, const float*, int num_frames) {
     51     return num_frames * num_channels_;
     52   }
     53 };
     54 
     55 
     56 // Default constructor.
     57 SolaTimeScaler::SolaTimeScaler()
     58     : input_buffer_(NULL), output_buffer_(NULL), analyzer_(NULL) {
     59   sample_rate_ = 0;
     60   num_channels_ = 0;
     61 
     62   draining_ = false;
     63   initialized_ = false;
     64 }
     65 
     66 SolaTimeScaler::~SolaTimeScaler() {
     67   delete input_buffer_;
     68   delete output_buffer_;
     69   delete analyzer_;
     70 }
     71 
     72 // Injects a SolaAnalyzer instance for analyzing signal frames.
     73 void SolaTimeScaler::set_analyzer(SolaAnalyzer* analyzer) {
     74   MutexLock lock(&mutex_);  // lock out processing while updating
     75   delete analyzer_;
     76   analyzer_ = analyzer;
     77 }
     78 
     79 // Initializes a SOLA timescaler.
     80 void SolaTimeScaler::Init(double sample_rate,
     81                           int num_channels,
     82                           double initial_speed,
     83                           double window_duration,
     84                           double overlap_duration) {
     85   MutexLock lock(&mutex_);  // lock out processing while updating
     86 
     87   sample_rate_ = sample_rate;
     88   num_channels_ = num_channels;
     89   speed_ = initial_speed;
     90   window_duration_ = window_duration;
     91   overlap_duration_ = overlap_duration;
     92 
     93   initialized_ = true;
     94   GenerateParameters();
     95   Reset();
     96 }
     97 
     98 // Adjusts the rate scaling factor.
     99 void SolaTimeScaler::set_speed(double speed) {
    100   MutexLock lock(&mutex_);  // lock out processing while updating
    101 
    102   speed_ = speed;
    103   GenerateParameters();
    104 }
    105 
    106 // Generates processing parameters from the current settings.
    107 void SolaTimeScaler::GenerateParameters() {
    108   if (speed_ < 0.1) {
    109     LOGE("Requested speed %fx limited to 0.1x", speed_);
    110     speed_ = 0.1;
    111   } else if (speed_ > 8.0) {
    112     LOGE("Requested speed %fx limited to 8.0x", speed_);
    113     speed_ = 8.0;
    114   }
    115 
    116   ratio_ = 1.0 / speed_;
    117 
    118   num_window_frames_ = nearbyint(sample_rate_ * window_duration_);
    119 
    120   // Limit the overlap to half the window size, and round up to an odd number.
    121   // Half of overlap window (rounded down) is also a useful number.
    122   overlap_duration_ = min(overlap_duration_, window_duration_ / 2.0);
    123   num_overlap_frames_ = nearbyint(sample_rate_ * overlap_duration_);
    124   num_overlap_frames_ |= 1;
    125   half_overlap_frames_ = num_overlap_frames_ >> 1;
    126 
    127   if (speed_ >= 1.) {
    128     // For compression (speed up), adjacent input windows overlap in the output.
    129     input_window_offset_ = num_window_frames_;
    130     target_merge_offset_ = nearbyint(num_window_frames_ * ratio_);
    131   } else {
    132     // For expansion (slow down), each input window start point overlaps the
    133     // previous, and they are placed adjacently in the output
    134     // (+/- half the overlap size).
    135     input_window_offset_ = nearbyint(num_window_frames_ * speed_);
    136     target_merge_offset_ = num_window_frames_;
    137   }
    138 
    139   // Make sure we copy enough extra data to be able to perform a
    140   // frame correlation over the range of target merge point +/- half overlap,
    141   // even when the previous merge point was adjusted backwards a half overlap.
    142   max_frames_to_merge_ = max(num_window_frames_,
    143       target_merge_offset_ + (2 * num_overlap_frames_));
    144   min_output_to_hold_=
    145       max_frames_to_merge_ + num_overlap_frames_ - target_merge_offset_;
    146 }
    147 
    148 // The input buffer has one writer and reader.
    149 // The output buffer has one reader/updater, and one reader/consumer.
    150 static const int kInputReader = 0;
    151 static const int kOutputAnalysis = 0;
    152 static const int kOutputConsumer = 1;
    153 
    154 void SolaTimeScaler::Reset() {
    155   CHECK(initialized_);
    156   double duration = max(FLAGS_sola_ring_buffer, 20. * window_duration_);
    157   draining_ = false;
    158 
    159   delete input_buffer_;
    160   input_buffer_ = new RingBuffer();
    161   input_buffer_->Init(static_cast<int>
    162       (sample_rate_ * duration), num_channels_, 1);
    163 
    164   delete output_buffer_;
    165   output_buffer_ = new RingBuffer();
    166   output_buffer_->Init(static_cast<int>
    167       (sample_rate_ * ratio_ * duration), num_channels_, 2);
    168 
    169   if (analyzer_ == NULL) {
    170     if (FLAGS_sola_enable_correlation) {
    171       analyzer_ = new SolaAnalyzer();
    172     } else {
    173       analyzer_ = new SolaBypassAnalyzer();
    174     }
    175   }
    176   analyzer_->Init(sample_rate_, num_channels_);
    177 }
    178 
    179 // Returns the number of frames that the input buffer can accept.
    180 int SolaTimeScaler::input_limit() const {
    181   CHECK(initialized_);
    182   return input_buffer_->overhead();
    183 }
    184 
    185 // Returns the number of available output frames.
    186 int SolaTimeScaler::available() {
    187   CHECK(initialized_);
    188 
    189   int available = output_buffer_->available(kOutputConsumer);
    190   if (available > min_output_to_hold_) {
    191     available -= min_output_to_hold_;
    192   } else if (draining_) {
    193     Process();
    194     available = output_buffer_->available(kOutputConsumer);
    195     if (available > min_output_to_hold_) {
    196       available -= min_output_to_hold_;
    197     }
    198   } else {
    199     available = 0;
    200   }
    201   return available;
    202 }
    203 
    204 void SolaTimeScaler::Drain() {
    205   CHECK(initialized_);
    206 
    207   draining_ = true;
    208 }
    209 
    210 
    211 // Feeds audio to the timescaler, and processes as much data as possible.
    212 int SolaTimeScaler::InjectSamples(float* buffer, int num_frames) {
    213   CHECK(initialized_);
    214 
    215   // Do not write more frames than the buffer can accept.
    216   num_frames = min(input_limit(), num_frames);
    217   if (!num_frames) {
    218     return 0;
    219   }
    220 
    221   // Copy samples to the input buffer and then process whatever can be consumed.
    222   input_buffer_->Write(buffer, num_frames);
    223   Process();
    224   return num_frames;
    225 }
    226 
    227 // Retrieves audio data from the timescaler.
    228 int SolaTimeScaler::RetrieveSamples(float* buffer, int num_frames) {
    229   CHECK(initialized_);
    230 
    231   // Do not read more frames than available.
    232   num_frames = min(available(), num_frames);
    233   if (!num_frames) {
    234     return 0;
    235   }
    236 
    237   output_buffer_->Copy(kOutputConsumer, buffer, num_frames);
    238   output_buffer_->Seek(kOutputConsumer,
    239                        output_buffer_->Tell(kOutputConsumer) + num_frames);
    240 
    241   return num_frames;
    242 }
    243 
    244 // Munges input samples to produce output.
    245 bool SolaTimeScaler::Process() {
    246   CHECK(initialized_);
    247   bool generated_data = false;
    248 
    249   // We can only process data if there is sufficient input available
    250   // (or we are draining the latency), and there is sufficient room
    251   // for output to be merged.
    252   while (((input_buffer_->available(kInputReader) > max_frames_to_merge_) ||
    253          draining_) && (output_buffer_->overhead() >= max_frames_to_merge_)) {
    254     MutexLock lock(&mutex_);  // lock out updates while processing each window
    255 
    256     // Determine the number of samples to merge into the output.
    257     int input_count =
    258         min(input_buffer_->available(kInputReader), max_frames_to_merge_);
    259     if (input_count == 0) {
    260       break;
    261     }
    262     // The input reader always points to the next window to process.
    263     float* input_pointer = input_buffer_->GetPointer(kInputReader, input_count);
    264 
    265     // The analysis reader always points to the ideal target merge point,
    266     // minus half an overlap window (ie, the starting point for correlation).
    267     // That means the available data from that point equals the number
    268     // of samples that must be cross-faded.
    269     int output_merge_cnt = output_buffer_->available(kOutputAnalysis);
    270     float* output_pointer =
    271         output_buffer_->GetPointer(kOutputAnalysis, output_merge_cnt);
    272 
    273     // If there is not enough data to do a proper correlation,
    274     // just merge at the ideal target point. Otherwise,
    275     // find the best correlation score, working from the center out.
    276     int merge_offset = min(output_merge_cnt, half_overlap_frames_);
    277 
    278     if ((output_merge_cnt >= (2 * num_overlap_frames_)) &&
    279         (input_count >= num_overlap_frames_)) {
    280       int best_offset = merge_offset;
    281       int best_score = 0;
    282       int score;
    283       for (int i = 0; i <= half_overlap_frames_; ++i) {
    284         score = analyzer_->Correlate(input_pointer,
    285             output_pointer + ((merge_offset + i) * num_channels_),
    286             num_overlap_frames_);
    287         if (score > best_score) {
    288           best_score = score;
    289           best_offset = merge_offset + i;
    290           if (score == (num_overlap_frames_ * num_channels_)) {
    291             break;  // It doesn't get better than perfect.
    292           }
    293         }
    294         if (i > 0) {
    295           score = analyzer_->Correlate(input_pointer,
    296               output_pointer + ((merge_offset - i) * num_channels_),
    297               num_overlap_frames_);
    298           if (score > best_score) {
    299             best_score = score;
    300             best_offset = merge_offset - i;
    301             if (score == (num_overlap_frames_ * num_channels_)) {
    302               break;  // It doesn't get better than perfect.
    303             }
    304           }
    305         }
    306       }
    307       merge_offset = best_offset;
    308     } else if ((output_merge_cnt > 0) && !draining_) {
    309       LOGE("no correlation performed");
    310     }
    311 
    312     // Crossfade the overlap between input and output, and then
    313     // copy in the remaining input.
    314     int crossfade_count = max(0, (output_merge_cnt - merge_offset));
    315     crossfade_count = min(crossfade_count, input_count);
    316     int remaining_count = input_count - crossfade_count;
    317 
    318     float* merge_pointer = output_pointer + (merge_offset * num_channels_);
    319     float flt_count = static_cast<float>(crossfade_count);
    320     for (int i = 0; i < crossfade_count; ++i) {
    321       // Linear cross-fade, for now.
    322       float input_scale = static_cast<float>(i) / flt_count;
    323       float output_scale = 1. - input_scale;
    324       for (int j = 0; j < num_channels_; ++j) {
    325         *merge_pointer = (*merge_pointer * output_scale) +
    326                          (*input_pointer++ * input_scale);
    327         ++merge_pointer;
    328       }
    329     }
    330     // Copy the merged buffer back into the output, if necessary, and
    331     // append the rest of the window.
    332     output_buffer_->MergeBack(kOutputAnalysis,
    333                               output_pointer, output_merge_cnt);
    334     output_buffer_->Write(input_pointer, remaining_count);
    335 
    336     // Advance the output analysis pointer to the next target merge point,
    337     // minus half an overlap window.  The target merge point is always
    338     // calculated as a delta from the previous ideal target, not the actual
    339     // target, to avoid drift.
    340     int output_advance = target_merge_offset_;
    341     if (output_merge_cnt < half_overlap_frames_) {
    342       // On the first window, back up the pointer for the next correlation.
    343       // Thereafter, that compensation is preserved.
    344       output_advance -= half_overlap_frames_;
    345     }
    346 
    347     // Don't advance beyond the available data, when finishing up.
    348     if (draining_) {
    349       output_advance =
    350           min(output_advance, output_buffer_->available(kOutputAnalysis));
    351     }
    352     output_buffer_->Seek(kOutputAnalysis,
    353         output_buffer_->Tell(kOutputAnalysis) + output_advance);
    354 
    355     // Advance the input pointer beyond the frames that are no longer needed.
    356     input_buffer_->Seek(kInputReader, input_buffer_->Tell(kInputReader) +
    357                         min(input_count, input_window_offset_));
    358 
    359     if ((crossfade_count + remaining_count) > 0) {
    360       generated_data = true;
    361     }
    362   }  // while (more to process)
    363   return generated_data;
    364 }
    365 
    366 }  // namespace video_editing
    367