Home | History | Annotate | Download | only in audio_processing
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_processing/audio_buffer.h"
     12 
     13 #include "webrtc/common_audio/include/audio_util.h"
     14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
     15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
     16 #include "webrtc/common_audio/channel_buffer.h"
     17 #include "webrtc/modules/audio_processing/common.h"
     18 
     19 namespace webrtc {
     20 namespace {
     21 
     22 const size_t kSamplesPer16kHzChannel = 160;
     23 const size_t kSamplesPer32kHzChannel = 320;
     24 const size_t kSamplesPer48kHzChannel = 480;
     25 
     26 int KeyboardChannelIndex(const StreamConfig& stream_config) {
     27   if (!stream_config.has_keyboard()) {
     28     assert(false);
     29     return 0;
     30   }
     31 
     32   return stream_config.num_channels();
     33 }
     34 
     35 size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
     36   size_t num_bands = 1;
     37   if (num_frames == kSamplesPer32kHzChannel ||
     38       num_frames == kSamplesPer48kHzChannel) {
     39     num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
     40   }
     41   return num_bands;
     42 }
     43 
     44 }  // namespace
     45 
     46 AudioBuffer::AudioBuffer(size_t input_num_frames,
     47                          size_t num_input_channels,
     48                          size_t process_num_frames,
     49                          size_t num_process_channels,
     50                          size_t output_num_frames)
     51   : input_num_frames_(input_num_frames),
     52     num_input_channels_(num_input_channels),
     53     proc_num_frames_(process_num_frames),
     54     num_proc_channels_(num_process_channels),
     55     output_num_frames_(output_num_frames),
     56     num_channels_(num_process_channels),
     57     num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
     58     num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
     59     mixed_low_pass_valid_(false),
     60     reference_copied_(false),
     61     activity_(AudioFrame::kVadUnknown),
     62     keyboard_data_(NULL),
     63     data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
     64   assert(input_num_frames_ > 0);
     65   assert(proc_num_frames_ > 0);
     66   assert(output_num_frames_ > 0);
     67   assert(num_input_channels_ > 0);
     68   assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);
     69 
     70   if (input_num_frames_ != proc_num_frames_ ||
     71       output_num_frames_ != proc_num_frames_) {
     72     // Create an intermediate buffer for resampling.
     73     process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
     74                                                    num_proc_channels_));
     75 
     76     if (input_num_frames_ != proc_num_frames_) {
     77       for (size_t i = 0; i < num_proc_channels_; ++i) {
     78         input_resamplers_.push_back(
     79             new PushSincResampler(input_num_frames_,
     80                                   proc_num_frames_));
     81       }
     82     }
     83 
     84     if (output_num_frames_ != proc_num_frames_) {
     85       for (size_t i = 0; i < num_proc_channels_; ++i) {
     86         output_resamplers_.push_back(
     87             new PushSincResampler(proc_num_frames_,
     88                                   output_num_frames_));
     89       }
     90     }
     91   }
     92 
     93   if (num_bands_ > 1) {
     94     split_data_.reset(new IFChannelBuffer(proc_num_frames_,
     95                                           num_proc_channels_,
     96                                           num_bands_));
     97     splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
     98                                                 num_bands_,
     99                                                 proc_num_frames_));
    100   }
    101 }
    102 
    103 AudioBuffer::~AudioBuffer() {}
    104 
    105 void AudioBuffer::CopyFrom(const float* const* data,
    106                            const StreamConfig& stream_config) {
    107   assert(stream_config.num_frames() == input_num_frames_);
    108   assert(stream_config.num_channels() == num_input_channels_);
    109   InitForNewData();
    110   // Initialized lazily because there's a different condition in
    111   // DeinterleaveFrom.
    112   const bool need_to_downmix =
    113       num_input_channels_ > 1 && num_proc_channels_ == 1;
    114   if (need_to_downmix && !input_buffer_) {
    115     input_buffer_.reset(
    116         new IFChannelBuffer(input_num_frames_, num_proc_channels_));
    117   }
    118 
    119   if (stream_config.has_keyboard()) {
    120     keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
    121   }
    122 
    123   // Downmix.
    124   const float* const* data_ptr = data;
    125   if (need_to_downmix) {
    126     DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
    127                                 input_buffer_->fbuf()->channels()[0]);
    128     data_ptr = input_buffer_->fbuf_const()->channels();
    129   }
    130 
    131   // Resample.
    132   if (input_num_frames_ != proc_num_frames_) {
    133     for (size_t i = 0; i < num_proc_channels_; ++i) {
    134       input_resamplers_[i]->Resample(data_ptr[i],
    135                                      input_num_frames_,
    136                                      process_buffer_->channels()[i],
    137                                      proc_num_frames_);
    138     }
    139     data_ptr = process_buffer_->channels();
    140   }
    141 
    142   // Convert to the S16 range.
    143   for (size_t i = 0; i < num_proc_channels_; ++i) {
    144     FloatToFloatS16(data_ptr[i],
    145                     proc_num_frames_,
    146                     data_->fbuf()->channels()[i]);
    147   }
    148 }
    149 
    150 void AudioBuffer::CopyTo(const StreamConfig& stream_config,
    151                          float* const* data) {
    152   assert(stream_config.num_frames() == output_num_frames_);
    153   assert(stream_config.num_channels() == num_channels_ || num_channels_ == 1);
    154 
    155   // Convert to the float range.
    156   float* const* data_ptr = data;
    157   if (output_num_frames_ != proc_num_frames_) {
    158     // Convert to an intermediate buffer for subsequent resampling.
    159     data_ptr = process_buffer_->channels();
    160   }
    161   for (size_t i = 0; i < num_channels_; ++i) {
    162     FloatS16ToFloat(data_->fbuf()->channels()[i],
    163                     proc_num_frames_,
    164                     data_ptr[i]);
    165   }
    166 
    167   // Resample.
    168   if (output_num_frames_ != proc_num_frames_) {
    169     for (size_t i = 0; i < num_channels_; ++i) {
    170       output_resamplers_[i]->Resample(data_ptr[i],
    171                                       proc_num_frames_,
    172                                       data[i],
    173                                       output_num_frames_);
    174     }
    175   }
    176 
    177   // Upmix.
    178   for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) {
    179     memcpy(data[i], data[0], output_num_frames_ * sizeof(**data));
    180   }
    181 }
    182 
    183 void AudioBuffer::InitForNewData() {
    184   keyboard_data_ = NULL;
    185   mixed_low_pass_valid_ = false;
    186   reference_copied_ = false;
    187   activity_ = AudioFrame::kVadUnknown;
    188   num_channels_ = num_proc_channels_;
    189 }
    190 
    191 const int16_t* const* AudioBuffer::channels_const() const {
    192   return data_->ibuf_const()->channels();
    193 }
    194 
    195 int16_t* const* AudioBuffer::channels() {
    196   mixed_low_pass_valid_ = false;
    197   return data_->ibuf()->channels();
    198 }
    199 
    200 const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
    201   return split_data_.get() ?
    202          split_data_->ibuf_const()->bands(channel) :
    203          data_->ibuf_const()->bands(channel);
    204 }
    205 
    206 int16_t* const* AudioBuffer::split_bands(size_t channel) {
    207   mixed_low_pass_valid_ = false;
    208   return split_data_.get() ?
    209          split_data_->ibuf()->bands(channel) :
    210          data_->ibuf()->bands(channel);
    211 }
    212 
    213 const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
    214   if (split_data_.get()) {
    215     return split_data_->ibuf_const()->channels(band);
    216   } else {
    217     return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
    218   }
    219 }
    220 
    221 int16_t* const* AudioBuffer::split_channels(Band band) {
    222   mixed_low_pass_valid_ = false;
    223   if (split_data_.get()) {
    224     return split_data_->ibuf()->channels(band);
    225   } else {
    226     return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
    227   }
    228 }
    229 
    230 ChannelBuffer<int16_t>* AudioBuffer::data() {
    231   mixed_low_pass_valid_ = false;
    232   return data_->ibuf();
    233 }
    234 
    235 const ChannelBuffer<int16_t>* AudioBuffer::data() const {
    236   return data_->ibuf_const();
    237 }
    238 
    239 ChannelBuffer<int16_t>* AudioBuffer::split_data() {
    240   mixed_low_pass_valid_ = false;
    241   return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
    242 }
    243 
    244 const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
    245   return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
    246 }
    247 
    248 const float* const* AudioBuffer::channels_const_f() const {
    249   return data_->fbuf_const()->channels();
    250 }
    251 
    252 float* const* AudioBuffer::channels_f() {
    253   mixed_low_pass_valid_ = false;
    254   return data_->fbuf()->channels();
    255 }
    256 
    257 const float* const* AudioBuffer::split_bands_const_f(size_t channel) const {
    258   return split_data_.get() ?
    259          split_data_->fbuf_const()->bands(channel) :
    260          data_->fbuf_const()->bands(channel);
    261 }
    262 
    263 float* const* AudioBuffer::split_bands_f(size_t channel) {
    264   mixed_low_pass_valid_ = false;
    265   return split_data_.get() ?
    266          split_data_->fbuf()->bands(channel) :
    267          data_->fbuf()->bands(channel);
    268 }
    269 
    270 const float* const* AudioBuffer::split_channels_const_f(Band band) const {
    271   if (split_data_.get()) {
    272     return split_data_->fbuf_const()->channels(band);
    273   } else {
    274     return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
    275   }
    276 }
    277 
    278 float* const* AudioBuffer::split_channels_f(Band band) {
    279   mixed_low_pass_valid_ = false;
    280   if (split_data_.get()) {
    281     return split_data_->fbuf()->channels(band);
    282   } else {
    283     return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
    284   }
    285 }
    286 
    287 ChannelBuffer<float>* AudioBuffer::data_f() {
    288   mixed_low_pass_valid_ = false;
    289   return data_->fbuf();
    290 }
    291 
    292 const ChannelBuffer<float>* AudioBuffer::data_f() const {
    293   return data_->fbuf_const();
    294 }
    295 
    296 ChannelBuffer<float>* AudioBuffer::split_data_f() {
    297   mixed_low_pass_valid_ = false;
    298   return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
    299 }
    300 
    301 const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
    302   return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
    303 }
    304 
    305 const int16_t* AudioBuffer::mixed_low_pass_data() {
    306   if (num_proc_channels_ == 1) {
    307     return split_bands_const(0)[kBand0To8kHz];
    308   }
    309 
    310   if (!mixed_low_pass_valid_) {
    311     if (!mixed_low_pass_channels_.get()) {
    312       mixed_low_pass_channels_.reset(
    313           new ChannelBuffer<int16_t>(num_split_frames_, 1));
    314     }
    315 
    316     DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
    317                                     num_split_frames_, num_channels_,
    318                                     mixed_low_pass_channels_->channels()[0]);
    319     mixed_low_pass_valid_ = true;
    320   }
    321   return mixed_low_pass_channels_->channels()[0];
    322 }
    323 
    324 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
    325   if (!reference_copied_) {
    326     return NULL;
    327   }
    328 
    329   return low_pass_reference_channels_->channels()[channel];
    330 }
    331 
    332 const float* AudioBuffer::keyboard_data() const {
    333   return keyboard_data_;
    334 }
    335 
    336 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
    337   activity_ = activity;
    338 }
    339 
    340 AudioFrame::VADActivity AudioBuffer::activity() const {
    341   return activity_;
    342 }
    343 
    344 size_t AudioBuffer::num_channels() const {
    345   return num_channels_;
    346 }
    347 
    348 void AudioBuffer::set_num_channels(size_t num_channels) {
    349   num_channels_ = num_channels;
    350 }
    351 
    352 size_t AudioBuffer::num_frames() const {
    353   return proc_num_frames_;
    354 }
    355 
    356 size_t AudioBuffer::num_frames_per_band() const {
    357   return num_split_frames_;
    358 }
    359 
    360 size_t AudioBuffer::num_keyboard_frames() const {
    361   // We don't resample the keyboard channel.
    362   return input_num_frames_;
    363 }
    364 
    365 size_t AudioBuffer::num_bands() const {
    366   return num_bands_;
    367 }
    368 
    369 // The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
    370 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
    371   assert(frame->num_channels_ == num_input_channels_);
    372   assert(frame->samples_per_channel_ == input_num_frames_);
    373   InitForNewData();
    374   // Initialized lazily because there's a different condition in CopyFrom.
    375   if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
    376     input_buffer_.reset(
    377         new IFChannelBuffer(input_num_frames_, num_proc_channels_));
    378   }
    379   activity_ = frame->vad_activity_;
    380 
    381   int16_t* const* deinterleaved;
    382   if (input_num_frames_ == proc_num_frames_) {
    383     deinterleaved = data_->ibuf()->channels();
    384   } else {
    385     deinterleaved = input_buffer_->ibuf()->channels();
    386   }
    387   if (num_proc_channels_ == 1) {
    388     // Downmix and deinterleave simultaneously.
    389     DownmixInterleavedToMono(frame->data_, input_num_frames_,
    390                              num_input_channels_, deinterleaved[0]);
    391   } else {
    392     assert(num_proc_channels_ == num_input_channels_);
    393     Deinterleave(frame->data_,
    394                  input_num_frames_,
    395                  num_proc_channels_,
    396                  deinterleaved);
    397   }
    398 
    399   // Resample.
    400   if (input_num_frames_ != proc_num_frames_) {
    401     for (size_t i = 0; i < num_proc_channels_; ++i) {
    402       input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
    403                                      input_num_frames_,
    404                                      data_->fbuf()->channels()[i],
    405                                      proc_num_frames_);
    406     }
    407   }
    408 }
    409 
    410 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
    411   frame->vad_activity_ = activity_;
    412   if (!data_changed) {
    413     return;
    414   }
    415 
    416   assert(frame->num_channels_ == num_channels_ || num_channels_ == 1);
    417   assert(frame->samples_per_channel_ == output_num_frames_);
    418 
    419   // Resample if necessary.
    420   IFChannelBuffer* data_ptr = data_.get();
    421   if (proc_num_frames_ != output_num_frames_) {
    422     if (!output_buffer_) {
    423       output_buffer_.reset(
    424           new IFChannelBuffer(output_num_frames_, num_channels_));
    425     }
    426     for (size_t i = 0; i < num_channels_; ++i) {
    427       output_resamplers_[i]->Resample(
    428           data_->fbuf()->channels()[i], proc_num_frames_,
    429           output_buffer_->fbuf()->channels()[i], output_num_frames_);
    430     }
    431     data_ptr = output_buffer_.get();
    432   }
    433 
    434   if (frame->num_channels_ == num_channels_) {
    435     Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_,
    436                frame->data_);
    437   } else {
    438     UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_,
    439                            frame->num_channels_, frame->data_);
    440   }
    441 }
    442 
    443 void AudioBuffer::CopyLowPassToReference() {
    444   reference_copied_ = true;
    445   if (!low_pass_reference_channels_.get() ||
    446       low_pass_reference_channels_->num_channels() != num_channels_) {
    447     low_pass_reference_channels_.reset(
    448         new ChannelBuffer<int16_t>(num_split_frames_,
    449                                    num_proc_channels_));
    450   }
    451   for (size_t i = 0; i < num_proc_channels_; i++) {
    452     memcpy(low_pass_reference_channels_->channels()[i],
    453            split_bands_const(i)[kBand0To8kHz],
    454            low_pass_reference_channels_->num_frames_per_band() *
    455                sizeof(split_bands_const(i)[kBand0To8kHz][0]));
    456   }
    457 }
    458 
    459 void AudioBuffer::SplitIntoFrequencyBands() {
    460   splitting_filter_->Analysis(data_.get(), split_data_.get());
    461 }
    462 
    463 void AudioBuffer::MergeFrequencyBands() {
    464   splitting_filter_->Synthesis(split_data_.get(), data_.get());
    465 }
    466 
    467 }  // namespace webrtc
    468