Home | History | Annotate | Download | only in audio_processing
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_processing/audio_buffer.h"
     12 
     13 #include "webrtc/common_audio/include/audio_util.h"
     14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
     15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
     16 
     17 namespace webrtc {
     18 namespace {
     19 
     20 enum {
     21   kSamplesPer8kHzChannel = 80,
     22   kSamplesPer16kHzChannel = 160,
     23   kSamplesPer32kHzChannel = 320
     24 };
     25 
     26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
     27   switch (layout) {
     28     case AudioProcessing::kMono:
     29     case AudioProcessing::kStereo:
     30       return false;
     31     case AudioProcessing::kMonoAndKeyboard:
     32     case AudioProcessing::kStereoAndKeyboard:
     33       return true;
     34   }
     35   assert(false);
     36   return false;
     37 }
     38 
     39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
     40   switch (layout) {
     41     case AudioProcessing::kMono:
     42     case AudioProcessing::kStereo:
     43       assert(false);
     44       return -1;
     45     case AudioProcessing::kMonoAndKeyboard:
     46       return 1;
     47     case AudioProcessing::kStereoAndKeyboard:
     48       return 2;
     49   }
     50   assert(false);
     51   return -1;
     52 }
     53 
     54 void StereoToMono(const float* left, const float* right, float* out,
     55                   int samples_per_channel) {
     56   for (int i = 0; i < samples_per_channel; ++i) {
     57     out[i] = (left[i] + right[i]) / 2;
     58   }
     59 }
     60 
     61 void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
     62                   int samples_per_channel) {
     63   for (int i = 0; i < samples_per_channel; ++i) {
     64     out[i] = (left[i] + right[i]) >> 1;
     65   }
     66 }
     67 
     68 }  // namespace
     69 
     70 // One int16_t and one float ChannelBuffer that are kept in sync. The sync is
     71 // broken when someone requests write access to either ChannelBuffer, and
     72 // reestablished when someone requests the outdated ChannelBuffer. It is
     73 // therefore safe to use the return value of ibuf_const() and fbuf_const()
     74 // until the next call to ibuf() or fbuf(), and the return value of ibuf() and
     75 // fbuf() until the next call to any of the other functions.
     76 class IFChannelBuffer {
     77  public:
     78   IFChannelBuffer(int samples_per_channel, int num_channels)
     79       : ivalid_(true),
     80         ibuf_(samples_per_channel, num_channels),
     81         fvalid_(true),
     82         fbuf_(samples_per_channel, num_channels) {}
     83 
     84   ChannelBuffer<int16_t>* ibuf() { return ibuf(false); }
     85   ChannelBuffer<float>* fbuf() { return fbuf(false); }
     86   const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); }
     87   const ChannelBuffer<float>* fbuf_const() { return fbuf(true); }
     88 
     89  private:
     90   ChannelBuffer<int16_t>* ibuf(bool readonly) {
     91     RefreshI();
     92     fvalid_ = readonly;
     93     return &ibuf_;
     94   }
     95 
     96   ChannelBuffer<float>* fbuf(bool readonly) {
     97     RefreshF();
     98     ivalid_ = readonly;
     99     return &fbuf_;
    100   }
    101 
    102   void RefreshF() {
    103     if (!fvalid_) {
    104       assert(ivalid_);
    105       const int16_t* const int_data = ibuf_.data();
    106       float* const float_data = fbuf_.data();
    107       const int length = fbuf_.length();
    108       for (int i = 0; i < length; ++i)
    109         float_data[i] = int_data[i];
    110       fvalid_ = true;
    111     }
    112   }
    113 
    114   void RefreshI() {
    115     if (!ivalid_) {
    116       assert(fvalid_);
    117       const float* const float_data = fbuf_.data();
    118       int16_t* const int_data = ibuf_.data();
    119       const int length = ibuf_.length();
    120       for (int i = 0; i < length; ++i)
    121         int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
    122                                      float_data[i],
    123                                      std::numeric_limits<int16_t>::min());
    124       ivalid_ = true;
    125     }
    126   }
    127 
    128   bool ivalid_;
    129   ChannelBuffer<int16_t> ibuf_;
    130   bool fvalid_;
    131   ChannelBuffer<float> fbuf_;
    132 };
    133 
    134 AudioBuffer::AudioBuffer(int input_samples_per_channel,
    135                          int num_input_channels,
    136                          int process_samples_per_channel,
    137                          int num_process_channels,
    138                          int output_samples_per_channel)
    139   : input_samples_per_channel_(input_samples_per_channel),
    140     num_input_channels_(num_input_channels),
    141     proc_samples_per_channel_(process_samples_per_channel),
    142     num_proc_channels_(num_process_channels),
    143     output_samples_per_channel_(output_samples_per_channel),
    144     samples_per_split_channel_(proc_samples_per_channel_),
    145     mixed_low_pass_valid_(false),
    146     reference_copied_(false),
    147     activity_(AudioFrame::kVadUnknown),
    148     keyboard_data_(NULL),
    149     channels_(new IFChannelBuffer(proc_samples_per_channel_,
    150                                   num_proc_channels_)) {
    151   assert(input_samples_per_channel_ > 0);
    152   assert(proc_samples_per_channel_ > 0);
    153   assert(output_samples_per_channel_ > 0);
    154   assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
    155   assert(num_proc_channels_ <= num_input_channels);
    156 
    157   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
    158     input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
    159                                                  num_proc_channels_));
    160   }
    161 
    162   if (input_samples_per_channel_ != proc_samples_per_channel_ ||
    163       output_samples_per_channel_ != proc_samples_per_channel_) {
    164     // Create an intermediate buffer for resampling.
    165     process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
    166                                                    num_proc_channels_));
    167   }
    168 
    169   if (input_samples_per_channel_ != proc_samples_per_channel_) {
    170     input_resamplers_.reserve(num_proc_channels_);
    171     for (int i = 0; i < num_proc_channels_; ++i) {
    172       input_resamplers_.push_back(
    173           new PushSincResampler(input_samples_per_channel_,
    174                                 proc_samples_per_channel_));
    175     }
    176   }
    177 
    178   if (output_samples_per_channel_ != proc_samples_per_channel_) {
    179     output_resamplers_.reserve(num_proc_channels_);
    180     for (int i = 0; i < num_proc_channels_; ++i) {
    181       output_resamplers_.push_back(
    182           new PushSincResampler(proc_samples_per_channel_,
    183                                 output_samples_per_channel_));
    184     }
    185   }
    186 
    187   if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
    188     samples_per_split_channel_ = kSamplesPer16kHzChannel;
    189     split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_,
    190                                                   num_proc_channels_));
    191     split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_,
    192                                                    num_proc_channels_));
    193     filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
    194   }
    195 }
    196 
    197 AudioBuffer::~AudioBuffer() {}
    198 
    199 void AudioBuffer::CopyFrom(const float* const* data,
    200                            int samples_per_channel,
    201                            AudioProcessing::ChannelLayout layout) {
    202   assert(samples_per_channel == input_samples_per_channel_);
    203   assert(ChannelsFromLayout(layout) == num_input_channels_);
    204   InitForNewData();
    205 
    206   if (HasKeyboardChannel(layout)) {
    207     keyboard_data_ = data[KeyboardChannelIndex(layout)];
    208   }
    209 
    210   // Downmix.
    211   const float* const* data_ptr = data;
    212   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
    213     StereoToMono(data[0],
    214                  data[1],
    215                  input_buffer_->channel(0),
    216                  input_samples_per_channel_);
    217     data_ptr = input_buffer_->channels();
    218   }
    219 
    220   // Resample.
    221   if (input_samples_per_channel_ != proc_samples_per_channel_) {
    222     for (int i = 0; i < num_proc_channels_; ++i) {
    223       input_resamplers_[i]->Resample(data_ptr[i],
    224                                      input_samples_per_channel_,
    225                                      process_buffer_->channel(i),
    226                                      proc_samples_per_channel_);
    227     }
    228     data_ptr = process_buffer_->channels();
    229   }
    230 
    231   // Convert to int16.
    232   for (int i = 0; i < num_proc_channels_; ++i) {
    233     ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
    234                          channels_->ibuf()->channel(i));
    235   }
    236 }
    237 
    238 void AudioBuffer::CopyTo(int samples_per_channel,
    239                          AudioProcessing::ChannelLayout layout,
    240                          float* const* data) {
    241   assert(samples_per_channel == output_samples_per_channel_);
    242   assert(ChannelsFromLayout(layout) == num_proc_channels_);
    243 
    244   // Convert to float.
    245   float* const* data_ptr = data;
    246   if (output_samples_per_channel_ != proc_samples_per_channel_) {
    247     // Convert to an intermediate buffer for subsequent resampling.
    248     data_ptr = process_buffer_->channels();
    249   }
    250   for (int i = 0; i < num_proc_channels_; ++i) {
    251     ScaleToFloat(channels_->ibuf()->channel(i),
    252                  proc_samples_per_channel_,
    253                  data_ptr[i]);
    254   }
    255 
    256   // Resample.
    257   if (output_samples_per_channel_ != proc_samples_per_channel_) {
    258     for (int i = 0; i < num_proc_channels_; ++i) {
    259       output_resamplers_[i]->Resample(data_ptr[i],
    260                                       proc_samples_per_channel_,
    261                                       data[i],
    262                                       output_samples_per_channel_);
    263     }
    264   }
    265 }
    266 
    267 void AudioBuffer::InitForNewData() {
    268   keyboard_data_ = NULL;
    269   mixed_low_pass_valid_ = false;
    270   reference_copied_ = false;
    271   activity_ = AudioFrame::kVadUnknown;
    272 }
    273 
    274 const int16_t* AudioBuffer::data(int channel) const {
    275   return channels_->ibuf_const()->channel(channel);
    276 }
    277 
    278 int16_t* AudioBuffer::data(int channel) {
    279   mixed_low_pass_valid_ = false;
    280   return channels_->ibuf()->channel(channel);
    281 }
    282 
    283 const float* AudioBuffer::data_f(int channel) const {
    284   return channels_->fbuf_const()->channel(channel);
    285 }
    286 
    287 float* AudioBuffer::data_f(int channel) {
    288   mixed_low_pass_valid_ = false;
    289   return channels_->fbuf()->channel(channel);
    290 }
    291 
    292 const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
    293   return split_channels_low_.get()
    294       ? split_channels_low_->ibuf_const()->channel(channel)
    295       : data(channel);
    296 }
    297 
    298 int16_t* AudioBuffer::low_pass_split_data(int channel) {
    299   mixed_low_pass_valid_ = false;
    300   return split_channels_low_.get()
    301       ? split_channels_low_->ibuf()->channel(channel)
    302       : data(channel);
    303 }
    304 
    305 const float* AudioBuffer::low_pass_split_data_f(int channel) const {
    306   return split_channels_low_.get()
    307       ? split_channels_low_->fbuf_const()->channel(channel)
    308       : data_f(channel);
    309 }
    310 
    311 float* AudioBuffer::low_pass_split_data_f(int channel) {
    312   mixed_low_pass_valid_ = false;
    313   return split_channels_low_.get()
    314       ? split_channels_low_->fbuf()->channel(channel)
    315       : data_f(channel);
    316 }
    317 
    318 const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
    319   return split_channels_high_.get()
    320       ? split_channels_high_->ibuf_const()->channel(channel)
    321       : NULL;
    322 }
    323 
    324 int16_t* AudioBuffer::high_pass_split_data(int channel) {
    325   return split_channels_high_.get()
    326       ? split_channels_high_->ibuf()->channel(channel)
    327       : NULL;
    328 }
    329 
    330 const float* AudioBuffer::high_pass_split_data_f(int channel) const {
    331   return split_channels_high_.get()
    332       ? split_channels_high_->fbuf_const()->channel(channel)
    333       : NULL;
    334 }
    335 
    336 float* AudioBuffer::high_pass_split_data_f(int channel) {
    337   return split_channels_high_.get()
    338       ? split_channels_high_->fbuf()->channel(channel)
    339       : NULL;
    340 }
    341 
    342 const int16_t* AudioBuffer::mixed_low_pass_data() {
    343   // Currently only mixing stereo to mono is supported.
    344   assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
    345 
    346   if (num_proc_channels_ == 1) {
    347     return low_pass_split_data(0);
    348   }
    349 
    350   if (!mixed_low_pass_valid_) {
    351     if (!mixed_low_pass_channels_.get()) {
    352       mixed_low_pass_channels_.reset(
    353           new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
    354     }
    355     StereoToMono(low_pass_split_data(0),
    356                  low_pass_split_data(1),
    357                  mixed_low_pass_channels_->data(),
    358                  samples_per_split_channel_);
    359     mixed_low_pass_valid_ = true;
    360   }
    361   return mixed_low_pass_channels_->data();
    362 }
    363 
    364 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
    365   if (!reference_copied_) {
    366     return NULL;
    367   }
    368 
    369   return low_pass_reference_channels_->channel(channel);
    370 }
    371 
    372 const float* AudioBuffer::keyboard_data() const {
    373   return keyboard_data_;
    374 }
    375 
    376 SplitFilterStates* AudioBuffer::filter_states(int channel) {
    377   assert(channel >= 0 && channel < num_proc_channels_);
    378   return &filter_states_[channel];
    379 }
    380 
    381 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
    382   activity_ = activity;
    383 }
    384 
    385 AudioFrame::VADActivity AudioBuffer::activity() const {
    386   return activity_;
    387 }
    388 
    389 int AudioBuffer::num_channels() const {
    390   return num_proc_channels_;
    391 }
    392 
    393 int AudioBuffer::samples_per_channel() const {
    394   return proc_samples_per_channel_;
    395 }
    396 
    397 int AudioBuffer::samples_per_split_channel() const {
    398   return samples_per_split_channel_;
    399 }
    400 
    401 int AudioBuffer::samples_per_keyboard_channel() const {
    402   // We don't resample the keyboard channel.
    403   return input_samples_per_channel_;
    404 }
    405 
    406 // TODO(andrew): Do deinterleaving and mixing in one step?
    407 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
    408   assert(proc_samples_per_channel_ == input_samples_per_channel_);
    409   assert(frame->num_channels_ == num_input_channels_);
    410   assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
    411   InitForNewData();
    412   activity_ = frame->vad_activity_;
    413 
    414   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
    415     // Downmix directly; no explicit deinterleaving needed.
    416     int16_t* downmixed = channels_->ibuf()->channel(0);
    417     for (int i = 0; i < input_samples_per_channel_; ++i) {
    418       // HACK(ajm): The downmixing in the int16_t path is in practice never
    419       // called from production code. We do this weird scaling to and from float
    420       // to satisfy tests checking for bit-exactness with the float path.
    421       float downmix_float = (ScaleToFloat(frame->data_[i * 2]) +
    422                              ScaleToFloat(frame->data_[i * 2 + 1])) / 2;
    423       downmixed[i] = ScaleAndRoundToInt16(downmix_float);
    424     }
    425   } else {
    426     assert(num_proc_channels_ == num_input_channels_);
    427     int16_t* interleaved = frame->data_;
    428     for (int i = 0; i < num_proc_channels_; ++i) {
    429       int16_t* deinterleaved = channels_->ibuf()->channel(i);
    430       int interleaved_idx = i;
    431       for (int j = 0; j < proc_samples_per_channel_; ++j) {
    432         deinterleaved[j] = interleaved[interleaved_idx];
    433         interleaved_idx += num_proc_channels_;
    434       }
    435     }
    436   }
    437 }
    438 
    439 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
    440   assert(proc_samples_per_channel_ == output_samples_per_channel_);
    441   assert(num_proc_channels_ == num_input_channels_);
    442   assert(frame->num_channels_ == num_proc_channels_);
    443   assert(frame->samples_per_channel_ == proc_samples_per_channel_);
    444   frame->vad_activity_ = activity_;
    445 
    446   if (!data_changed) {
    447     return;
    448   }
    449 
    450   int16_t* interleaved = frame->data_;
    451   for (int i = 0; i < num_proc_channels_; i++) {
    452     int16_t* deinterleaved = channels_->ibuf()->channel(i);
    453     int interleaved_idx = i;
    454     for (int j = 0; j < proc_samples_per_channel_; j++) {
    455       interleaved[interleaved_idx] = deinterleaved[j];
    456       interleaved_idx += num_proc_channels_;
    457     }
    458   }
    459 }
    460 
    461 void AudioBuffer::CopyLowPassToReference() {
    462   reference_copied_ = true;
    463   if (!low_pass_reference_channels_.get()) {
    464     low_pass_reference_channels_.reset(
    465         new ChannelBuffer<int16_t>(samples_per_split_channel_,
    466                                    num_proc_channels_));
    467   }
    468   for (int i = 0; i < num_proc_channels_; i++) {
    469     low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
    470   }
    471 }
    472 
    473 }  // namespace webrtc
    474