Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "audio_processing_impl.h"
     12 
     13 #include <cassert>
     14 
     15 #include "module_common_types.h"
     16 
     17 #include "critical_section_wrapper.h"
     18 #include "file_wrapper.h"
     19 
     20 #include "audio_buffer.h"
     21 #include "echo_cancellation_impl.h"
     22 #include "echo_control_mobile_impl.h"
     23 #include "high_pass_filter_impl.h"
     24 #include "gain_control_impl.h"
     25 #include "level_estimator_impl.h"
     26 #include "noise_suppression_impl.h"
     27 #include "processing_component.h"
     28 #include "splitting_filter.h"
     29 #include "voice_detection_impl.h"
     30 
     31 namespace webrtc {
     32 namespace {
     33 
     34 enum Events {
     35   kInitializeEvent,
     36   kRenderEvent,
     37   kCaptureEvent
     38 };
     39 
     40 const char kMagicNumber[] = "#!vqetrace1.2";
     41 }  // namespace
     42 
     43 AudioProcessing* AudioProcessing::Create(int id) {
     44   /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
     45              webrtc::kTraceAudioProcessing,
     46              id,
     47              "AudioProcessing::Create()");*/
     48 
     49   AudioProcessingImpl* apm = new AudioProcessingImpl(id);
     50   if (apm->Initialize() != kNoError) {
     51     delete apm;
     52     apm = NULL;
     53   }
     54 
     55   return apm;
     56 }
     57 
     58 void AudioProcessing::Destroy(AudioProcessing* apm) {
     59   delete static_cast<AudioProcessingImpl*>(apm);
     60 }
     61 
     62 AudioProcessingImpl::AudioProcessingImpl(int id)
     63     : id_(id),
     64       echo_cancellation_(NULL),
     65       echo_control_mobile_(NULL),
     66       gain_control_(NULL),
     67       high_pass_filter_(NULL),
     68       level_estimator_(NULL),
     69       noise_suppression_(NULL),
     70       voice_detection_(NULL),
     71       debug_file_(FileWrapper::Create()),
     72       crit_(CriticalSectionWrapper::CreateCriticalSection()),
     73       render_audio_(NULL),
     74       capture_audio_(NULL),
     75       sample_rate_hz_(kSampleRate16kHz),
     76       split_sample_rate_hz_(kSampleRate16kHz),
     77       samples_per_channel_(sample_rate_hz_ / 100),
     78       stream_delay_ms_(0),
     79       was_stream_delay_set_(false),
     80       num_render_input_channels_(1),
     81       num_capture_input_channels_(1),
     82       num_capture_output_channels_(1) {
     83 
     84   echo_cancellation_ = new EchoCancellationImpl(this);
     85   component_list_.push_back(echo_cancellation_);
     86 
     87   echo_control_mobile_ = new EchoControlMobileImpl(this);
     88   component_list_.push_back(echo_control_mobile_);
     89 
     90   gain_control_ = new GainControlImpl(this);
     91   component_list_.push_back(gain_control_);
     92 
     93   high_pass_filter_ = new HighPassFilterImpl(this);
     94   component_list_.push_back(high_pass_filter_);
     95 
     96   level_estimator_ = new LevelEstimatorImpl(this);
     97   component_list_.push_back(level_estimator_);
     98 
     99   noise_suppression_ = new NoiseSuppressionImpl(this);
    100   component_list_.push_back(noise_suppression_);
    101 
    102   voice_detection_ = new VoiceDetectionImpl(this);
    103   component_list_.push_back(voice_detection_);
    104 }
    105 
    106 AudioProcessingImpl::~AudioProcessingImpl() {
    107   while (!component_list_.empty()) {
    108     ProcessingComponent* component = component_list_.front();
    109     component->Destroy();
    110     delete component;
    111     component_list_.pop_front();
    112   }
    113 
    114   if (debug_file_->Open()) {
    115     debug_file_->CloseFile();
    116   }
    117   delete debug_file_;
    118   debug_file_ = NULL;
    119 
    120   delete crit_;
    121   crit_ = NULL;
    122 
    123   if (render_audio_ != NULL) {
    124     delete render_audio_;
    125     render_audio_ = NULL;
    126   }
    127 
    128   if (capture_audio_ != NULL) {
    129     delete capture_audio_;
    130     capture_audio_ = NULL;
    131   }
    132 }
    133 
    134 CriticalSectionWrapper* AudioProcessingImpl::crit() const {
    135   return crit_;
    136 }
    137 
    138 int AudioProcessingImpl::split_sample_rate_hz() const {
    139   return split_sample_rate_hz_;
    140 }
    141 
    142 int AudioProcessingImpl::Initialize() {
    143   CriticalSectionScoped crit_scoped(*crit_);
    144   return InitializeLocked();
    145 }
    146 
    147 int AudioProcessingImpl::InitializeLocked() {
    148   if (render_audio_ != NULL) {
    149     delete render_audio_;
    150     render_audio_ = NULL;
    151   }
    152 
    153   if (capture_audio_ != NULL) {
    154     delete capture_audio_;
    155     capture_audio_ = NULL;
    156   }
    157 
    158   render_audio_ = new AudioBuffer(num_render_input_channels_,
    159                                   samples_per_channel_);
    160   capture_audio_ = new AudioBuffer(num_capture_input_channels_,
    161                                    samples_per_channel_);
    162 
    163   was_stream_delay_set_ = false;
    164 
    165   // Initialize all components.
    166   std::list<ProcessingComponent*>::iterator it;
    167   for (it = component_list_.begin(); it != component_list_.end(); it++) {
    168     int err = (*it)->Initialize();
    169     if (err != kNoError) {
    170       return err;
    171     }
    172   }
    173 
    174   return kNoError;
    175 }
    176 
    177 int AudioProcessingImpl::set_sample_rate_hz(int rate) {
    178   CriticalSectionScoped crit_scoped(*crit_);
    179   if (rate != kSampleRate8kHz &&
    180       rate != kSampleRate16kHz &&
    181       rate != kSampleRate32kHz) {
    182     return kBadParameterError;
    183   }
    184 
    185   sample_rate_hz_ = rate;
    186   samples_per_channel_ = rate / 100;
    187 
    188   if (sample_rate_hz_ == kSampleRate32kHz) {
    189     split_sample_rate_hz_ = kSampleRate16kHz;
    190   } else {
    191     split_sample_rate_hz_ = sample_rate_hz_;
    192   }
    193 
    194   return InitializeLocked();
    195 }
    196 
    197 int AudioProcessingImpl::sample_rate_hz() const {
    198   return sample_rate_hz_;
    199 }
    200 
    201 int AudioProcessingImpl::set_num_reverse_channels(int channels) {
    202   CriticalSectionScoped crit_scoped(*crit_);
    203   // Only stereo supported currently.
    204   if (channels > 2 || channels < 1) {
    205     return kBadParameterError;
    206   }
    207 
    208   num_render_input_channels_ = channels;
    209 
    210   return InitializeLocked();
    211 }
    212 
    213 int AudioProcessingImpl::num_reverse_channels() const {
    214   return num_render_input_channels_;
    215 }
    216 
    217 int AudioProcessingImpl::set_num_channels(
    218     int input_channels,
    219     int output_channels) {
    220   CriticalSectionScoped crit_scoped(*crit_);
    221   if (output_channels > input_channels) {
    222     return kBadParameterError;
    223   }
    224 
    225   // Only stereo supported currently.
    226   if (input_channels > 2 || input_channels < 1) {
    227     return kBadParameterError;
    228   }
    229 
    230   if (output_channels > 2 || output_channels < 1) {
    231     return kBadParameterError;
    232   }
    233 
    234   num_capture_input_channels_ = input_channels;
    235   num_capture_output_channels_ = output_channels;
    236 
    237   return InitializeLocked();
    238 }
    239 
    240 int AudioProcessingImpl::num_input_channels() const {
    241   return num_capture_input_channels_;
    242 }
    243 
    244 int AudioProcessingImpl::num_output_channels() const {
    245   return num_capture_output_channels_;
    246 }
    247 
    248 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
    249   CriticalSectionScoped crit_scoped(*crit_);
    250   int err = kNoError;
    251 
    252   if (frame == NULL) {
    253     return kNullPointerError;
    254   }
    255 
    256   if (frame->_frequencyInHz !=
    257       static_cast<WebRtc_UWord32>(sample_rate_hz_)) {
    258     return kBadSampleRateError;
    259   }
    260 
    261   if (frame->_audioChannel != num_capture_input_channels_) {
    262     return kBadNumberChannelsError;
    263   }
    264 
    265   if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
    266     return kBadDataLengthError;
    267   }
    268 
    269   if (debug_file_->Open()) {
    270     WebRtc_UWord8 event = kCaptureEvent;
    271     if (!debug_file_->Write(&event, sizeof(event))) {
    272       return kFileError;
    273     }
    274 
    275     if (!debug_file_->Write(&frame->_frequencyInHz,
    276                                    sizeof(frame->_frequencyInHz))) {
    277       return kFileError;
    278     }
    279 
    280     if (!debug_file_->Write(&frame->_audioChannel,
    281                                    sizeof(frame->_audioChannel))) {
    282       return kFileError;
    283     }
    284 
    285     if (!debug_file_->Write(&frame->_payloadDataLengthInSamples,
    286         sizeof(frame->_payloadDataLengthInSamples))) {
    287       return kFileError;
    288     }
    289 
    290     if (!debug_file_->Write(frame->_payloadData,
    291         sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples *
    292         frame->_audioChannel)) {
    293       return kFileError;
    294     }
    295   }
    296 
    297   capture_audio_->DeinterleaveFrom(frame);
    298 
    299   // TODO(ajm): experiment with mixing and AEC placement.
    300   if (num_capture_output_channels_ < num_capture_input_channels_) {
    301     capture_audio_->Mix(num_capture_output_channels_);
    302 
    303     frame->_audioChannel = num_capture_output_channels_;
    304   }
    305 
    306   if (sample_rate_hz_ == kSampleRate32kHz) {
    307     for (int i = 0; i < num_capture_input_channels_; i++) {
    308       // Split into a low and high band.
    309       SplittingFilterAnalysis(capture_audio_->data(i),
    310                               capture_audio_->low_pass_split_data(i),
    311                               capture_audio_->high_pass_split_data(i),
    312                               capture_audio_->analysis_filter_state1(i),
    313                               capture_audio_->analysis_filter_state2(i));
    314     }
    315   }
    316 
    317   err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
    318   if (err != kNoError) {
    319     return err;
    320   }
    321 
    322   err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
    323   if (err != kNoError) {
    324     return err;
    325   }
    326 
    327   err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
    328   if (err != kNoError) {
    329     return err;
    330   }
    331 
    332   if (echo_control_mobile_->is_enabled() &&
    333       noise_suppression_->is_enabled()) {
    334     capture_audio_->CopyLowPassToReference();
    335   }
    336 
    337   err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
    338   if (err != kNoError) {
    339     return err;
    340   }
    341 
    342   err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
    343   if (err != kNoError) {
    344     return err;
    345   }
    346 
    347   err = voice_detection_->ProcessCaptureAudio(capture_audio_);
    348   if (err != kNoError) {
    349     return err;
    350   }
    351 
    352   err = gain_control_->ProcessCaptureAudio(capture_audio_);
    353   if (err != kNoError) {
    354     return err;
    355   }
    356 
    357   //err = level_estimator_->ProcessCaptureAudio(capture_audio_);
    358   //if (err != kNoError) {
    359   //  return err;
    360   //}
    361 
    362   if (sample_rate_hz_ == kSampleRate32kHz) {
    363     for (int i = 0; i < num_capture_output_channels_; i++) {
    364       // Recombine low and high bands.
    365       SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
    366                                capture_audio_->high_pass_split_data(i),
    367                                capture_audio_->data(i),
    368                                capture_audio_->synthesis_filter_state1(i),
    369                                capture_audio_->synthesis_filter_state2(i));
    370     }
    371   }
    372 
    373   capture_audio_->InterleaveTo(frame);
    374 
    375   return kNoError;
    376 }
    377 
    378 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
    379   CriticalSectionScoped crit_scoped(*crit_);
    380   int err = kNoError;
    381 
    382   if (frame == NULL) {
    383     return kNullPointerError;
    384   }
    385 
    386   if (frame->_frequencyInHz !=
    387       static_cast<WebRtc_UWord32>(sample_rate_hz_)) {
    388     return kBadSampleRateError;
    389   }
    390 
    391   if (frame->_audioChannel != num_render_input_channels_) {
    392     return kBadNumberChannelsError;
    393   }
    394 
    395   if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
    396     return kBadDataLengthError;
    397   }
    398 
    399   if (debug_file_->Open()) {
    400     WebRtc_UWord8 event = kRenderEvent;
    401     if (!debug_file_->Write(&event, sizeof(event))) {
    402       return kFileError;
    403     }
    404 
    405     if (!debug_file_->Write(&frame->_frequencyInHz,
    406                                    sizeof(frame->_frequencyInHz))) {
    407       return kFileError;
    408     }
    409 
    410     if (!debug_file_->Write(&frame->_audioChannel,
    411                                    sizeof(frame->_audioChannel))) {
    412       return kFileError;
    413     }
    414 
    415     if (!debug_file_->Write(&frame->_payloadDataLengthInSamples,
    416         sizeof(frame->_payloadDataLengthInSamples))) {
    417       return kFileError;
    418     }
    419 
    420     if (!debug_file_->Write(frame->_payloadData,
    421         sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples *
    422         frame->_audioChannel)) {
    423       return kFileError;
    424     }
    425   }
    426 
    427   render_audio_->DeinterleaveFrom(frame);
    428 
    429   // TODO(ajm): turn the splitting filter into a component?
    430   if (sample_rate_hz_ == kSampleRate32kHz) {
    431     for (int i = 0; i < num_render_input_channels_; i++) {
    432       // Split into low and high band.
    433       SplittingFilterAnalysis(render_audio_->data(i),
    434                               render_audio_->low_pass_split_data(i),
    435                               render_audio_->high_pass_split_data(i),
    436                               render_audio_->analysis_filter_state1(i),
    437                               render_audio_->analysis_filter_state2(i));
    438     }
    439   }
    440 
    441   // TODO(ajm): warnings possible from components?
    442   err = echo_cancellation_->ProcessRenderAudio(render_audio_);
    443   if (err != kNoError) {
    444     return err;
    445   }
    446 
    447   err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
    448   if (err != kNoError) {
    449     return err;
    450   }
    451 
    452   err = gain_control_->ProcessRenderAudio(render_audio_);
    453   if (err != kNoError) {
    454     return err;
    455   }
    456 
    457   //err = level_estimator_->AnalyzeReverseStream(render_audio_);
    458   //if (err != kNoError) {
    459   //  return err;
    460   //}
    461 
    462   was_stream_delay_set_ = false;
    463   return err;  // TODO(ajm): this is for returning warnings; necessary?
    464 }
    465 
    466 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
    467   was_stream_delay_set_ = true;
    468   if (delay < 0) {
    469     return kBadParameterError;
    470   }
    471 
    472   // TODO(ajm): the max is rather arbitrarily chosen; investigate.
    473   if (delay > 500) {
    474     stream_delay_ms_ = 500;
    475     return kBadStreamParameterWarning;
    476   }
    477 
    478   stream_delay_ms_ = delay;
    479   return kNoError;
    480 }
    481 
    482 int AudioProcessingImpl::stream_delay_ms() const {
    483   return stream_delay_ms_;
    484 }
    485 
    486 bool AudioProcessingImpl::was_stream_delay_set() const {
    487   return was_stream_delay_set_;
    488 }
    489 
    490 int AudioProcessingImpl::StartDebugRecording(
    491     const char filename[AudioProcessing::kMaxFilenameSize]) {
    492   CriticalSectionScoped crit_scoped(*crit_);
    493   assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);
    494 
    495   if (filename == NULL) {
    496     return kNullPointerError;
    497   }
    498 
    499   // Stop any ongoing recording.
    500   if (debug_file_->Open()) {
    501     if (debug_file_->CloseFile() == -1) {
    502       return kFileError;
    503     }
    504   }
    505 
    506   if (debug_file_->OpenFile(filename, false) == -1) {
    507     debug_file_->CloseFile();
    508     return kFileError;
    509   }
    510 
    511   if (debug_file_->WriteText("%s\n", kMagicNumber) == -1) {
    512     debug_file_->CloseFile();
    513     return kFileError;
    514   }
    515 
    516   // TODO(ajm): should we do this? If so, we need the number of channels etc.
    517   // Record the default sample rate.
    518   WebRtc_UWord8 event = kInitializeEvent;
    519   if (!debug_file_->Write(&event, sizeof(event))) {
    520     return kFileError;
    521   }
    522 
    523   if (!debug_file_->Write(&sample_rate_hz_, sizeof(sample_rate_hz_))) {
    524     return kFileError;
    525   }
    526 
    527   return kNoError;
    528 }
    529 
    530 int AudioProcessingImpl::StopDebugRecording() {
    531   CriticalSectionScoped crit_scoped(*crit_);
    532   // We just return if recording hasn't started.
    533   if (debug_file_->Open()) {
    534     if (debug_file_->CloseFile() == -1) {
    535       return kFileError;
    536     }
    537   }
    538 
    539   return kNoError;
    540 }
    541 
    542 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
    543   return echo_cancellation_;
    544 }
    545 
    546 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
    547   return echo_control_mobile_;
    548 }
    549 
    550 GainControl* AudioProcessingImpl::gain_control() const {
    551   return gain_control_;
    552 }
    553 
    554 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
    555   return high_pass_filter_;
    556 }
    557 
    558 LevelEstimator* AudioProcessingImpl::level_estimator() const {
    559   return level_estimator_;
    560 }
    561 
    562 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
    563   return noise_suppression_;
    564 }
    565 
    566 VoiceDetection* AudioProcessingImpl::voice_detection() const {
    567   return voice_detection_;
    568 }
    569 
    570 WebRtc_Word32 AudioProcessingImpl::Version(WebRtc_Word8* version,
    571     WebRtc_UWord32& bytes_remaining, WebRtc_UWord32& position) const {
    572   if (version == NULL) {
    573     /*WEBRTC_TRACE(webrtc::kTraceError,
    574                webrtc::kTraceAudioProcessing,
    575                -1,
    576                "Null version pointer");*/
    577     return kNullPointerError;
    578   }
    579   memset(&version[position], 0, bytes_remaining);
    580 
    581   WebRtc_Word8 my_version[] = "AudioProcessing 1.0.0";
    582   // Includes null termination.
    583   WebRtc_UWord32 length = static_cast<WebRtc_UWord32>(strlen(my_version));
    584   if (bytes_remaining < length) {
    585     /*WEBRTC_TRACE(webrtc::kTraceError,
    586                webrtc::kTraceAudioProcessing,
    587                -1,
    588                "Buffer of insufficient length");*/
    589     return kBadParameterError;
    590   }
    591   memcpy(&version[position], my_version, length);
    592   bytes_remaining -= length;
    593   position += length;
    594 
    595   std::list<ProcessingComponent*>::const_iterator it;
    596   for (it = component_list_.begin(); it != component_list_.end(); it++) {
    597     char component_version[256];
    598     strcpy(component_version, "\n");
    599     int err = (*it)->get_version(&component_version[1],
    600                                  sizeof(component_version) - 1);
    601     if (err != kNoError) {
    602       return err;
    603     }
    604     if (strncmp(&component_version[1], "\0", 1) == 0) {
    605       // Assume empty if first byte is NULL.
    606       continue;
    607     }
    608 
    609     length = static_cast<WebRtc_UWord32>(strlen(component_version));
    610     if (bytes_remaining < length) {
    611       /*WEBRTC_TRACE(webrtc::kTraceError,
    612                  webrtc::kTraceAudioProcessing,
    613                  -1,
    614                  "Buffer of insufficient length");*/
    615       return kBadParameterError;
    616     }
    617     memcpy(&version[position], component_version, length);
    618     bytes_remaining -= length;
    619     position += length;
    620   }
    621 
    622   return kNoError;
    623 }
    624 
    625 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
    626   CriticalSectionScoped crit_scoped(*crit_);
    627   /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
    628              webrtc::kTraceAudioProcessing,
    629              id_,
    630              "ChangeUniqueId(new id = %d)",
    631              id);*/
    632   id_ = id;
    633 
    634   return kNoError;
    635 }
    636 }  // namespace webrtc
    637