Home | History | Annotate | Download | only in audio_processing
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "audio_processing_impl.h"
     12 
     13 #include <assert.h>
     14 
     15 #include "audio_buffer.h"
     16 #include "critical_section_wrapper.h"
     17 #include "echo_cancellation_impl.h"
     18 #include "echo_control_mobile_impl.h"
     19 #include "file_wrapper.h"
     20 #include "high_pass_filter_impl.h"
     21 #include "gain_control_impl.h"
     22 #include "level_estimator_impl.h"
     23 #include "module_common_types.h"
     24 #include "noise_suppression_impl.h"
     25 #include "processing_component.h"
     26 #include "splitting_filter.h"
     27 #include "voice_detection_impl.h"
     28 
     29 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
     30 // Files generated at build-time by the protobuf compiler.
     31 #ifdef WEBRTC_ANDROID
     32 #include "external/webrtc/src/modules/audio_processing/debug.pb.h"
     33 #else
     34 #include "webrtc/audio_processing/debug.pb.h"
     35 #endif
     36 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
     37 
     38 namespace webrtc {
     39 AudioProcessing* AudioProcessing::Create(int id) {
     40   /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
     41              webrtc::kTraceAudioProcessing,
     42              id,
     43              "AudioProcessing::Create()");*/
     44 
     45   AudioProcessingImpl* apm = new AudioProcessingImpl(id);
     46   if (apm->Initialize() != kNoError) {
     47     delete apm;
     48     apm = NULL;
     49   }
     50 
     51   return apm;
     52 }
     53 
     54 void AudioProcessing::Destroy(AudioProcessing* apm) {
     55   delete static_cast<AudioProcessingImpl*>(apm);
     56 }
     57 
     58 AudioProcessingImpl::AudioProcessingImpl(int id)
     59     : id_(id),
     60       echo_cancellation_(NULL),
     61       echo_control_mobile_(NULL),
     62       gain_control_(NULL),
     63       high_pass_filter_(NULL),
     64       level_estimator_(NULL),
     65       noise_suppression_(NULL),
     66       voice_detection_(NULL),
     67       crit_(CriticalSectionWrapper::CreateCriticalSection()),
     68       render_audio_(NULL),
     69       capture_audio_(NULL),
     70 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
     71       debug_file_(FileWrapper::Create()),
     72       event_msg_(new audioproc::Event()),
     73 #endif
     74       sample_rate_hz_(kSampleRate16kHz),
     75       split_sample_rate_hz_(kSampleRate16kHz),
     76       samples_per_channel_(sample_rate_hz_ / 100),
     77       stream_delay_ms_(0),
     78       was_stream_delay_set_(false),
     79       num_reverse_channels_(1),
     80       num_input_channels_(1),
     81       num_output_channels_(1) {
     82 
     83   echo_cancellation_ = new EchoCancellationImpl(this);
     84   component_list_.push_back(echo_cancellation_);
     85 
     86   echo_control_mobile_ = new EchoControlMobileImpl(this);
     87   component_list_.push_back(echo_control_mobile_);
     88 
     89   gain_control_ = new GainControlImpl(this);
     90   component_list_.push_back(gain_control_);
     91 
     92   high_pass_filter_ = new HighPassFilterImpl(this);
     93   component_list_.push_back(high_pass_filter_);
     94 
     95   level_estimator_ = new LevelEstimatorImpl(this);
     96   component_list_.push_back(level_estimator_);
     97 
     98   noise_suppression_ = new NoiseSuppressionImpl(this);
     99   component_list_.push_back(noise_suppression_);
    100 
    101   voice_detection_ = new VoiceDetectionImpl(this);
    102   component_list_.push_back(voice_detection_);
    103 }
    104 
    105 AudioProcessingImpl::~AudioProcessingImpl() {
    106   while (!component_list_.empty()) {
    107     ProcessingComponent* component = component_list_.front();
    108     component->Destroy();
    109     delete component;
    110     component_list_.pop_front();
    111   }
    112 
    113 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
    114   if (debug_file_->Open()) {
    115     debug_file_->CloseFile();
    116   }
    117 #endif
    118 
    119   delete crit_;
    120   crit_ = NULL;
    121 
    122   if (render_audio_) {
    123     delete render_audio_;
    124     render_audio_ = NULL;
    125   }
    126 
    127   if (capture_audio_) {
    128     delete capture_audio_;
    129     capture_audio_ = NULL;
    130   }
    131 }
    132 
    133 CriticalSectionWrapper* AudioProcessingImpl::crit() const {
    134   return crit_;
    135 }
    136 
    137 int AudioProcessingImpl::split_sample_rate_hz() const {
    138   return split_sample_rate_hz_;
    139 }
    140 
    141 int AudioProcessingImpl::Initialize() {
    142   CriticalSectionScoped crit_scoped(*crit_);
    143   return InitializeLocked();
    144 }
    145 
    146 int AudioProcessingImpl::InitializeLocked() {
    147   if (render_audio_ != NULL) {
    148     delete render_audio_;
    149     render_audio_ = NULL;
    150   }
    151 
    152   if (capture_audio_ != NULL) {
    153     delete capture_audio_;
    154     capture_audio_ = NULL;
    155   }
    156 
    157   render_audio_ = new AudioBuffer(num_reverse_channels_,
    158                                   samples_per_channel_);
    159   capture_audio_ = new AudioBuffer(num_input_channels_,
    160                                    samples_per_channel_);
    161 
    162   was_stream_delay_set_ = false;
    163 
    164   // Initialize all components.
    165   std::list<ProcessingComponent*>::iterator it;
    166   for (it = component_list_.begin(); it != component_list_.end(); it++) {
    167     int err = (*it)->Initialize();
    168     if (err != kNoError) {
    169       return err;
    170     }
    171   }
    172 
    173 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
    174   if (debug_file_->Open()) {
    175     int err = WriteInitMessage();
    176     if (err != kNoError) {
    177       return err;
    178     }
    179   }
    180 #endif
    181 
    182   return kNoError;
    183 }
    184 
    185 int AudioProcessingImpl::set_sample_rate_hz(int rate) {
    186   CriticalSectionScoped crit_scoped(*crit_);
    187   if (rate != kSampleRate8kHz &&
    188       rate != kSampleRate16kHz &&
    189       rate != kSampleRate32kHz) {
    190     return kBadParameterError;
    191   }
    192 
    193   sample_rate_hz_ = rate;
    194   samples_per_channel_ = rate / 100;
    195 
    196   if (sample_rate_hz_ == kSampleRate32kHz) {
    197     split_sample_rate_hz_ = kSampleRate16kHz;
    198   } else {
    199     split_sample_rate_hz_ = sample_rate_hz_;
    200   }
    201 
    202   return InitializeLocked();
    203 }
    204 
    205 int AudioProcessingImpl::sample_rate_hz() const {
    206   return sample_rate_hz_;
    207 }
    208 
    209 int AudioProcessingImpl::set_num_reverse_channels(int channels) {
    210   CriticalSectionScoped crit_scoped(*crit_);
    211   // Only stereo supported currently.
    212   if (channels > 2 || channels < 1) {
    213     return kBadParameterError;
    214   }
    215 
    216   num_reverse_channels_ = channels;
    217 
    218   return InitializeLocked();
    219 }
    220 
    221 int AudioProcessingImpl::num_reverse_channels() const {
    222   return num_reverse_channels_;
    223 }
    224 
    225 int AudioProcessingImpl::set_num_channels(
    226     int input_channels,
    227     int output_channels) {
    228   CriticalSectionScoped crit_scoped(*crit_);
    229   if (output_channels > input_channels) {
    230     return kBadParameterError;
    231   }
    232 
    233   // Only stereo supported currently.
    234   if (input_channels > 2 || input_channels < 1) {
    235     return kBadParameterError;
    236   }
    237 
    238   if (output_channels > 2 || output_channels < 1) {
    239     return kBadParameterError;
    240   }
    241 
    242   num_input_channels_ = input_channels;
    243   num_output_channels_ = output_channels;
    244 
    245   return InitializeLocked();
    246 }
    247 
    248 int AudioProcessingImpl::num_input_channels() const {
    249   return num_input_channels_;
    250 }
    251 
    252 int AudioProcessingImpl::num_output_channels() const {
    253   return num_output_channels_;
    254 }
    255 
    256 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
    257   CriticalSectionScoped crit_scoped(*crit_);
    258   int err = kNoError;
    259 
    260   if (frame == NULL) {
    261     return kNullPointerError;
    262   }
    263 
    264   if (frame->_frequencyInHz != sample_rate_hz_) {
    265     return kBadSampleRateError;
    266   }
    267 
    268   if (frame->_audioChannel != num_input_channels_) {
    269     return kBadNumberChannelsError;
    270   }
    271 
    272   if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
    273     return kBadDataLengthError;
    274   }
    275 
    276 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
    277   if (debug_file_->Open()) {
    278     event_msg_->set_type(audioproc::Event::STREAM);
    279     audioproc::Stream* msg = event_msg_->mutable_stream();
    280     const size_t data_size = sizeof(int16_t) *
    281                              frame->_payloadDataLengthInSamples *
    282                              frame->_audioChannel;
    283     msg->set_input_data(frame->_payloadData, data_size);
    284     msg->set_delay(stream_delay_ms_);
    285     msg->set_drift(echo_cancellation_->stream_drift_samples());
    286     msg->set_level(gain_control_->stream_analog_level());
    287   }
    288 #endif
    289 
    290   capture_audio_->DeinterleaveFrom(frame);
    291 
    292   // TODO(ajm): experiment with mixing and AEC placement.
    293   if (num_output_channels_ < num_input_channels_) {
    294     capture_audio_->Mix(num_output_channels_);
    295     frame->_audioChannel = num_output_channels_;
    296   }
    297 
    298   bool data_changed = stream_data_changed();
    299   if (analysis_needed(data_changed)) {
    300     for (int i = 0; i < num_output_channels_; i++) {
    301       // Split into a low and high band.
    302       SplittingFilterAnalysis(capture_audio_->data(i),
    303                               capture_audio_->low_pass_split_data(i),
    304                               capture_audio_->high_pass_split_data(i),
    305                               capture_audio_->analysis_filter_state1(i),
    306                               capture_audio_->analysis_filter_state2(i));
    307     }
    308   }
    309 
    310   err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
    311   if (err != kNoError) {
    312     return err;
    313   }
    314 
    315   err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
    316   if (err != kNoError) {
    317     return err;
    318   }
    319 
    320   err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
    321   if (err != kNoError) {
    322     return err;
    323   }
    324 
    325   if (echo_control_mobile_->is_enabled() &&
    326       noise_suppression_->is_enabled()) {
    327     capture_audio_->CopyLowPassToReference();
    328   }
    329 
    330   err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
    331   if (err != kNoError) {
    332     return err;
    333   }
    334 
    335   err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
    336   if (err != kNoError) {
    337     return err;
    338   }
    339 
    340   err = voice_detection_->ProcessCaptureAudio(capture_audio_);
    341   if (err != kNoError) {
    342     return err;
    343   }
    344 
    345   err = gain_control_->ProcessCaptureAudio(capture_audio_);
    346   if (err != kNoError) {
    347     return err;
    348   }
    349 
    350   if (synthesis_needed(data_changed)) {
    351     for (int i = 0; i < num_output_channels_; i++) {
    352       // Recombine low and high bands.
    353       SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
    354                                capture_audio_->high_pass_split_data(i),
    355                                capture_audio_->data(i),
    356                                capture_audio_->synthesis_filter_state1(i),
    357                                capture_audio_->synthesis_filter_state2(i));
    358     }
    359   }
    360 
    361   // The level estimator operates on the recombined data.
    362   err = level_estimator_->ProcessStream(capture_audio_);
    363   if (err != kNoError) {
    364     return err;
    365   }
    366 
    367   capture_audio_->InterleaveTo(frame, data_changed);
    368 
    369 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
    370   if (debug_file_->Open()) {
    371     audioproc::Stream* msg = event_msg_->mutable_stream();
    372     const size_t data_size = sizeof(int16_t) *
    373                              frame->_payloadDataLengthInSamples *
    374                              frame->_audioChannel;
    375     msg->set_output_data(frame->_payloadData, data_size);
    376     err = WriteMessageToDebugFile();
    377     if (err != kNoError) {
    378       return err;
    379     }
    380   }
    381 #endif
    382 
    383   was_stream_delay_set_ = false;
    384   return kNoError;
    385 }
    386 
    387 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
    388   CriticalSectionScoped crit_scoped(*crit_);
    389   int err = kNoError;
    390 
    391   if (frame == NULL) {
    392     return kNullPointerError;
    393   }
    394 
    395   if (frame->_frequencyInHz != sample_rate_hz_) {
    396     return kBadSampleRateError;
    397   }
    398 
    399   if (frame->_audioChannel != num_reverse_channels_) {
    400     return kBadNumberChannelsError;
    401   }
    402 
    403   if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
    404     return kBadDataLengthError;
    405   }
    406 
    407 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
    408   if (debug_file_->Open()) {
    409     event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
    410     audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
    411     const size_t data_size = sizeof(int16_t) *
    412                              frame->_payloadDataLengthInSamples *
    413                              frame->_audioChannel;
    414     msg->set_data(frame->_payloadData, data_size);
    415     err = WriteMessageToDebugFile();
    416     if (err != kNoError) {
    417       return err;
    418     }
    419   }
    420 #endif
    421 
    422   render_audio_->DeinterleaveFrom(frame);
    423 
    424   // TODO(ajm): turn the splitting filter into a component?
    425   if (sample_rate_hz_ == kSampleRate32kHz) {
    426     for (int i = 0; i < num_reverse_channels_; i++) {
    427       // Split into low and high band.
    428       SplittingFilterAnalysis(render_audio_->data(i),
    429                               render_audio_->low_pass_split_data(i),
    430                               render_audio_->high_pass_split_data(i),
    431                               render_audio_->analysis_filter_state1(i),
    432                               render_audio_->analysis_filter_state2(i));
    433     }
    434   }
    435 
    436   // TODO(ajm): warnings possible from components?
    437   err = echo_cancellation_->ProcessRenderAudio(render_audio_);
    438   if (err != kNoError) {
    439     return err;
    440   }
    441 
    442   err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
    443   if (err != kNoError) {
    444     return err;
    445   }
    446 
    447   err = gain_control_->ProcessRenderAudio(render_audio_);
    448   if (err != kNoError) {
    449     return err;
    450   }
    451 
    452   return err;  // TODO(ajm): this is for returning warnings; necessary?
    453 }
    454 
    455 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
    456   was_stream_delay_set_ = true;
    457   if (delay < 0) {
    458     return kBadParameterError;
    459   }
    460 
    461   // TODO(ajm): the max is rather arbitrarily chosen; investigate.
    462   if (delay > 500) {
    463     stream_delay_ms_ = 500;
    464     return kBadStreamParameterWarning;
    465   }
    466 
    467   stream_delay_ms_ = delay;
    468   return kNoError;
    469 }
    470 
    471 int AudioProcessingImpl::stream_delay_ms() const {
    472   return stream_delay_ms_;
    473 }
    474 
    475 bool AudioProcessingImpl::was_stream_delay_set() const {
    476   return was_stream_delay_set_;
    477 }
    478 
    479 int AudioProcessingImpl::StartDebugRecording(
    480     const char filename[AudioProcessing::kMaxFilenameSize]) {
    481   CriticalSectionScoped crit_scoped(*crit_);
    482   assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);
    483 
    484   if (filename == NULL) {
    485     return kNullPointerError;
    486   }
    487 
    488 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
    489   // Stop any ongoing recording.
    490   if (debug_file_->Open()) {
    491     if (debug_file_->CloseFile() == -1) {
    492       return kFileError;
    493     }
    494   }
    495 
    496   if (debug_file_->OpenFile(filename, false) == -1) {
    497     debug_file_->CloseFile();
    498     return kFileError;
    499   }
    500 
    501   int err = WriteInitMessage();
    502   if (err != kNoError) {
    503     return err;
    504   }
    505   return kNoError;
    506 #else
    507   return kUnsupportedFunctionError;
    508 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
    509 }
    510 
    511 int AudioProcessingImpl::StopDebugRecording() {
    512   CriticalSectionScoped crit_scoped(*crit_);
    513 
    514 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
    515   // We just return if recording hasn't started.
    516   if (debug_file_->Open()) {
    517     if (debug_file_->CloseFile() == -1) {
    518       return kFileError;
    519     }
    520   }
    521   return kNoError;
    522 #else
    523   return kUnsupportedFunctionError;
    524 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
    525 }
    526 
    527 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
    528   return echo_cancellation_;
    529 }
    530 
    531 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
    532   return echo_control_mobile_;
    533 }
    534 
    535 GainControl* AudioProcessingImpl::gain_control() const {
    536   return gain_control_;
    537 }
    538 
    539 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
    540   return high_pass_filter_;
    541 }
    542 
    543 LevelEstimator* AudioProcessingImpl::level_estimator() const {
    544   return level_estimator_;
    545 }
    546 
    547 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
    548   return noise_suppression_;
    549 }
    550 
    551 VoiceDetection* AudioProcessingImpl::voice_detection() const {
    552   return voice_detection_;
    553 }
    554 
    555 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
    556   CriticalSectionScoped crit_scoped(*crit_);
    557   /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
    558              webrtc::kTraceAudioProcessing,
    559              id_,
    560              "ChangeUniqueId(new id = %d)",
    561              id);*/
    562   id_ = id;
    563 
    564   return kNoError;
    565 }
    566 
    567 bool AudioProcessingImpl::stream_data_changed() const {
    568   int enabled_count = 0;
    569   std::list<ProcessingComponent*>::const_iterator it;
    570   for (it = component_list_.begin(); it != component_list_.end(); it++) {
    571     if ((*it)->is_component_enabled()) {
    572       enabled_count++;
    573     }
    574   }
    575 
    576   // Data is unchanged if no components are enabled, or if only level_estimator_
    577   // or voice_detection_ is enabled.
    578   if (enabled_count == 0) {
    579     return false;
    580   } else if (enabled_count == 1) {
    581     if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) {
    582       return false;
    583     }
    584   } else if (enabled_count == 2) {
    585     if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) {
    586       return false;
    587     }
    588   }
    589   return true;
    590 }
    591 
    592 bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const {
    593   return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz);
    594 }
    595 
    596 bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const {
    597   if (!stream_data_changed && !voice_detection_->is_enabled()) {
    598     // Only level_estimator_ is enabled.
    599     return false;
    600   } else if (sample_rate_hz_ == kSampleRate32kHz) {
    601     // Something besides level_estimator_ is enabled, and we have super-wb.
    602     return true;
    603   }
    604   return false;
    605 }
    606 
    607 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
    608 int AudioProcessingImpl::WriteMessageToDebugFile() {
    609   int32_t size = event_msg_->ByteSize();
    610   if (size <= 0) {
    611     return kUnspecifiedError;
    612   }
    613 #if defined(WEBRTC_BIG_ENDIAN)
    614   // TODO(ajm): Use little-endian "on the wire". For the moment, we can be
    615   //            pretty safe in assuming little-endian.
    616 #endif
    617 
    618   if (!event_msg_->SerializeToString(&event_str_)) {
    619     return kUnspecifiedError;
    620   }
    621 
    622   // Write message preceded by its size.
    623   if (!debug_file_->Write(&size, sizeof(int32_t))) {
    624     return kFileError;
    625   }
    626   if (!debug_file_->Write(event_str_.data(), event_str_.length())) {
    627     return kFileError;
    628   }
    629 
    630   event_msg_->Clear();
    631 
    632   return 0;
    633 }
    634 
    635 int AudioProcessingImpl::WriteInitMessage() {
    636   event_msg_->set_type(audioproc::Event::INIT);
    637   audioproc::Init* msg = event_msg_->mutable_init();
    638   msg->set_sample_rate(sample_rate_hz_);
    639   msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz());
    640   msg->set_num_input_channels(num_input_channels_);
    641   msg->set_num_output_channels(num_output_channels_);
    642   msg->set_num_reverse_channels(num_reverse_channels_);
    643 
    644   int err = WriteMessageToDebugFile();
    645   if (err != kNoError) {
    646     return err;
    647   }
    648 
    649   return kNoError;
    650 }
    651 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
    652 }  // namespace webrtc
    653