1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "audio_processing_impl.h" 12 13 #include <assert.h> 14 15 #include "audio_buffer.h" 16 #include "critical_section_wrapper.h" 17 #include "echo_cancellation_impl.h" 18 #include "echo_control_mobile_impl.h" 19 #include "file_wrapper.h" 20 #include "high_pass_filter_impl.h" 21 #include "gain_control_impl.h" 22 #include "level_estimator_impl.h" 23 #include "module_common_types.h" 24 #include "noise_suppression_impl.h" 25 #include "processing_component.h" 26 #include "splitting_filter.h" 27 #include "voice_detection_impl.h" 28 29 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 30 // Files generated at build-time by the protobuf compiler. 31 #ifdef WEBRTC_ANDROID 32 #include "external/webrtc/src/modules/audio_processing/debug.pb.h" 33 #else 34 #include "webrtc/audio_processing/debug.pb.h" 35 #endif 36 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 37 38 namespace webrtc { 39 AudioProcessing* AudioProcessing::Create(int id) { 40 /*WEBRTC_TRACE(webrtc::kTraceModuleCall, 41 webrtc::kTraceAudioProcessing, 42 id, 43 "AudioProcessing::Create()");*/ 44 45 AudioProcessingImpl* apm = new AudioProcessingImpl(id); 46 if (apm->Initialize() != kNoError) { 47 delete apm; 48 apm = NULL; 49 } 50 51 return apm; 52 } 53 54 void AudioProcessing::Destroy(AudioProcessing* apm) { 55 delete static_cast<AudioProcessingImpl*>(apm); 56 } 57 58 AudioProcessingImpl::AudioProcessingImpl(int id) 59 : id_(id), 60 echo_cancellation_(NULL), 61 echo_control_mobile_(NULL), 62 gain_control_(NULL), 63 high_pass_filter_(NULL), 64 level_estimator_(NULL), 65 noise_suppression_(NULL), 66 voice_detection_(NULL), 67 crit_(CriticalSectionWrapper::CreateCriticalSection()), 68 render_audio_(NULL), 69 capture_audio_(NULL), 70 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 71 debug_file_(FileWrapper::Create()), 72 event_msg_(new audioproc::Event()), 73 #endif 74 sample_rate_hz_(kSampleRate16kHz), 75 split_sample_rate_hz_(kSampleRate16kHz), 76 samples_per_channel_(sample_rate_hz_ / 100), 77 stream_delay_ms_(0), 78 was_stream_delay_set_(false), 79 num_reverse_channels_(1), 80 num_input_channels_(1), 81 num_output_channels_(1) { 82 83 echo_cancellation_ = new EchoCancellationImpl(this); 84 component_list_.push_back(echo_cancellation_); 85 86 echo_control_mobile_ = new EchoControlMobileImpl(this); 87 component_list_.push_back(echo_control_mobile_); 88 89 gain_control_ = new GainControlImpl(this); 90 component_list_.push_back(gain_control_); 91 92 high_pass_filter_ = new HighPassFilterImpl(this); 93 component_list_.push_back(high_pass_filter_); 94 95 level_estimator_ = new LevelEstimatorImpl(this); 96 component_list_.push_back(level_estimator_); 97 98 noise_suppression_ = new NoiseSuppressionImpl(this); 99 component_list_.push_back(noise_suppression_); 100 101 voice_detection_ = new VoiceDetectionImpl(this); 102 component_list_.push_back(voice_detection_); 103 } 104 105 AudioProcessingImpl::~AudioProcessingImpl() { 106 while (!component_list_.empty()) { 107 ProcessingComponent* component = component_list_.front(); 108 component->Destroy(); 109 delete component; 110 component_list_.pop_front(); 111 } 112 113 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 114 if (debug_file_->Open()) { 115 debug_file_->CloseFile(); 116 } 117 #endif 118 119 delete crit_; 120 crit_ = NULL; 121 122 if (render_audio_) { 123 delete render_audio_; 124 render_audio_ = NULL; 125 } 126 127 if (capture_audio_) { 128 delete capture_audio_; 129 capture_audio_ = NULL; 130 } 131 } 132 133 CriticalSectionWrapper* AudioProcessingImpl::crit() const { 134 return crit_; 135 } 136 137 int AudioProcessingImpl::split_sample_rate_hz() const { 138 return split_sample_rate_hz_; 139 } 140 141 int AudioProcessingImpl::Initialize() { 142 CriticalSectionScoped crit_scoped(*crit_); 143 return InitializeLocked(); 144 } 145 146 int AudioProcessingImpl::InitializeLocked() { 147 if (render_audio_ != NULL) { 148 delete render_audio_; 149 render_audio_ = NULL; 150 } 151 152 if (capture_audio_ != NULL) { 153 delete capture_audio_; 154 capture_audio_ = NULL; 155 } 156 157 render_audio_ = new AudioBuffer(num_reverse_channels_, 158 samples_per_channel_); 159 capture_audio_ = new AudioBuffer(num_input_channels_, 160 samples_per_channel_); 161 162 was_stream_delay_set_ = false; 163 164 // Initialize all components. 165 std::list<ProcessingComponent*>::iterator it; 166 for (it = component_list_.begin(); it != component_list_.end(); it++) { 167 int err = (*it)->Initialize(); 168 if (err != kNoError) { 169 return err; 170 } 171 } 172 173 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 174 if (debug_file_->Open()) { 175 int err = WriteInitMessage(); 176 if (err != kNoError) { 177 return err; 178 } 179 } 180 #endif 181 182 return kNoError; 183 } 184 185 int AudioProcessingImpl::set_sample_rate_hz(int rate) { 186 CriticalSectionScoped crit_scoped(*crit_); 187 if (rate != kSampleRate8kHz && 188 rate != kSampleRate16kHz && 189 rate != kSampleRate32kHz) { 190 return kBadParameterError; 191 } 192 193 sample_rate_hz_ = rate; 194 samples_per_channel_ = rate / 100; 195 196 if (sample_rate_hz_ == kSampleRate32kHz) { 197 split_sample_rate_hz_ = kSampleRate16kHz; 198 } else { 199 split_sample_rate_hz_ = sample_rate_hz_; 200 } 201 202 return InitializeLocked(); 203 } 204 205 int AudioProcessingImpl::sample_rate_hz() const { 206 return sample_rate_hz_; 207 } 208 209 int AudioProcessingImpl::set_num_reverse_channels(int channels) { 210 CriticalSectionScoped crit_scoped(*crit_); 211 // Only stereo supported currently. 212 if (channels > 2 || channels < 1) { 213 return kBadParameterError; 214 } 215 216 num_reverse_channels_ = channels; 217 218 return InitializeLocked(); 219 } 220 221 int AudioProcessingImpl::num_reverse_channels() const { 222 return num_reverse_channels_; 223 } 224 225 int AudioProcessingImpl::set_num_channels( 226 int input_channels, 227 int output_channels) { 228 CriticalSectionScoped crit_scoped(*crit_); 229 if (output_channels > input_channels) { 230 return kBadParameterError; 231 } 232 233 // Only stereo supported currently. 234 if (input_channels > 2 || input_channels < 1) { 235 return kBadParameterError; 236 } 237 238 if (output_channels > 2 || output_channels < 1) { 239 return kBadParameterError; 240 } 241 242 num_input_channels_ = input_channels; 243 num_output_channels_ = output_channels; 244 245 return InitializeLocked(); 246 } 247 248 int AudioProcessingImpl::num_input_channels() const { 249 return num_input_channels_; 250 } 251 252 int AudioProcessingImpl::num_output_channels() const { 253 return num_output_channels_; 254 } 255 256 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { 257 CriticalSectionScoped crit_scoped(*crit_); 258 int err = kNoError; 259 260 if (frame == NULL) { 261 return kNullPointerError; 262 } 263 264 if (frame->_frequencyInHz != sample_rate_hz_) { 265 return kBadSampleRateError; 266 } 267 268 if (frame->_audioChannel != num_input_channels_) { 269 return kBadNumberChannelsError; 270 } 271 272 if (frame->_payloadDataLengthInSamples != samples_per_channel_) { 273 return kBadDataLengthError; 274 } 275 276 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 277 if (debug_file_->Open()) { 278 event_msg_->set_type(audioproc::Event::STREAM); 279 audioproc::Stream* msg = event_msg_->mutable_stream(); 280 const size_t data_size = sizeof(int16_t) * 281 frame->_payloadDataLengthInSamples * 282 frame->_audioChannel; 283 msg->set_input_data(frame->_payloadData, data_size); 284 msg->set_delay(stream_delay_ms_); 285 msg->set_drift(echo_cancellation_->stream_drift_samples()); 286 msg->set_level(gain_control_->stream_analog_level()); 287 } 288 #endif 289 290 capture_audio_->DeinterleaveFrom(frame); 291 292 // TODO(ajm): experiment with mixing and AEC placement. 293 if (num_output_channels_ < num_input_channels_) { 294 capture_audio_->Mix(num_output_channels_); 295 frame->_audioChannel = num_output_channels_; 296 } 297 298 bool data_changed = stream_data_changed(); 299 if (analysis_needed(data_changed)) { 300 for (int i = 0; i < num_output_channels_; i++) { 301 // Split into a low and high band. 302 SplittingFilterAnalysis(capture_audio_->data(i), 303 capture_audio_->low_pass_split_data(i), 304 capture_audio_->high_pass_split_data(i), 305 capture_audio_->analysis_filter_state1(i), 306 capture_audio_->analysis_filter_state2(i)); 307 } 308 } 309 310 err = high_pass_filter_->ProcessCaptureAudio(capture_audio_); 311 if (err != kNoError) { 312 return err; 313 } 314 315 err = gain_control_->AnalyzeCaptureAudio(capture_audio_); 316 if (err != kNoError) { 317 return err; 318 } 319 320 err = echo_cancellation_->ProcessCaptureAudio(capture_audio_); 321 if (err != kNoError) { 322 return err; 323 } 324 325 if (echo_control_mobile_->is_enabled() && 326 noise_suppression_->is_enabled()) { 327 capture_audio_->CopyLowPassToReference(); 328 } 329 330 err = noise_suppression_->ProcessCaptureAudio(capture_audio_); 331 if (err != kNoError) { 332 return err; 333 } 334 335 err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_); 336 if (err != kNoError) { 337 return err; 338 } 339 340 err = voice_detection_->ProcessCaptureAudio(capture_audio_); 341 if (err != kNoError) { 342 return err; 343 } 344 345 err = gain_control_->ProcessCaptureAudio(capture_audio_); 346 if (err != kNoError) { 347 return err; 348 } 349 350 if (synthesis_needed(data_changed)) { 351 for (int i = 0; i < num_output_channels_; i++) { 352 // Recombine low and high bands. 353 SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i), 354 capture_audio_->high_pass_split_data(i), 355 capture_audio_->data(i), 356 capture_audio_->synthesis_filter_state1(i), 357 capture_audio_->synthesis_filter_state2(i)); 358 } 359 } 360 361 // The level estimator operates on the recombined data. 362 err = level_estimator_->ProcessStream(capture_audio_); 363 if (err != kNoError) { 364 return err; 365 } 366 367 capture_audio_->InterleaveTo(frame, data_changed); 368 369 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 370 if (debug_file_->Open()) { 371 audioproc::Stream* msg = event_msg_->mutable_stream(); 372 const size_t data_size = sizeof(int16_t) * 373 frame->_payloadDataLengthInSamples * 374 frame->_audioChannel; 375 msg->set_output_data(frame->_payloadData, data_size); 376 err = WriteMessageToDebugFile(); 377 if (err != kNoError) { 378 return err; 379 } 380 } 381 #endif 382 383 was_stream_delay_set_ = false; 384 return kNoError; 385 } 386 387 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { 388 CriticalSectionScoped crit_scoped(*crit_); 389 int err = kNoError; 390 391 if (frame == NULL) { 392 return kNullPointerError; 393 } 394 395 if (frame->_frequencyInHz != sample_rate_hz_) { 396 return kBadSampleRateError; 397 } 398 399 if (frame->_audioChannel != num_reverse_channels_) { 400 return kBadNumberChannelsError; 401 } 402 403 if (frame->_payloadDataLengthInSamples != samples_per_channel_) { 404 return kBadDataLengthError; 405 } 406 407 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 408 if (debug_file_->Open()) { 409 event_msg_->set_type(audioproc::Event::REVERSE_STREAM); 410 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); 411 const size_t data_size = sizeof(int16_t) * 412 frame->_payloadDataLengthInSamples * 413 frame->_audioChannel; 414 msg->set_data(frame->_payloadData, data_size); 415 err = WriteMessageToDebugFile(); 416 if (err != kNoError) { 417 return err; 418 } 419 } 420 #endif 421 422 render_audio_->DeinterleaveFrom(frame); 423 424 // TODO(ajm): turn the splitting filter into a component? 425 if (sample_rate_hz_ == kSampleRate32kHz) { 426 for (int i = 0; i < num_reverse_channels_; i++) { 427 // Split into low and high band. 428 SplittingFilterAnalysis(render_audio_->data(i), 429 render_audio_->low_pass_split_data(i), 430 render_audio_->high_pass_split_data(i), 431 render_audio_->analysis_filter_state1(i), 432 render_audio_->analysis_filter_state2(i)); 433 } 434 } 435 436 // TODO(ajm): warnings possible from components? 437 err = echo_cancellation_->ProcessRenderAudio(render_audio_); 438 if (err != kNoError) { 439 return err; 440 } 441 442 err = echo_control_mobile_->ProcessRenderAudio(render_audio_); 443 if (err != kNoError) { 444 return err; 445 } 446 447 err = gain_control_->ProcessRenderAudio(render_audio_); 448 if (err != kNoError) { 449 return err; 450 } 451 452 return err; // TODO(ajm): this is for returning warnings; necessary? 453 } 454 455 int AudioProcessingImpl::set_stream_delay_ms(int delay) { 456 was_stream_delay_set_ = true; 457 if (delay < 0) { 458 return kBadParameterError; 459 } 460 461 // TODO(ajm): the max is rather arbitrarily chosen; investigate. 462 if (delay > 500) { 463 stream_delay_ms_ = 500; 464 return kBadStreamParameterWarning; 465 } 466 467 stream_delay_ms_ = delay; 468 return kNoError; 469 } 470 471 int AudioProcessingImpl::stream_delay_ms() const { 472 return stream_delay_ms_; 473 } 474 475 bool AudioProcessingImpl::was_stream_delay_set() const { 476 return was_stream_delay_set_; 477 } 478 479 int AudioProcessingImpl::StartDebugRecording( 480 const char filename[AudioProcessing::kMaxFilenameSize]) { 481 CriticalSectionScoped crit_scoped(*crit_); 482 assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize); 483 484 if (filename == NULL) { 485 return kNullPointerError; 486 } 487 488 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 489 // Stop any ongoing recording. 490 if (debug_file_->Open()) { 491 if (debug_file_->CloseFile() == -1) { 492 return kFileError; 493 } 494 } 495 496 if (debug_file_->OpenFile(filename, false) == -1) { 497 debug_file_->CloseFile(); 498 return kFileError; 499 } 500 501 int err = WriteInitMessage(); 502 if (err != kNoError) { 503 return err; 504 } 505 return kNoError; 506 #else 507 return kUnsupportedFunctionError; 508 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 509 } 510 511 int AudioProcessingImpl::StopDebugRecording() { 512 CriticalSectionScoped crit_scoped(*crit_); 513 514 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 515 // We just return if recording hasn't started. 516 if (debug_file_->Open()) { 517 if (debug_file_->CloseFile() == -1) { 518 return kFileError; 519 } 520 } 521 return kNoError; 522 #else 523 return kUnsupportedFunctionError; 524 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 525 } 526 527 EchoCancellation* AudioProcessingImpl::echo_cancellation() const { 528 return echo_cancellation_; 529 } 530 531 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { 532 return echo_control_mobile_; 533 } 534 535 GainControl* AudioProcessingImpl::gain_control() const { 536 return gain_control_; 537 } 538 539 HighPassFilter* AudioProcessingImpl::high_pass_filter() const { 540 return high_pass_filter_; 541 } 542 543 LevelEstimator* AudioProcessingImpl::level_estimator() const { 544 return level_estimator_; 545 } 546 547 NoiseSuppression* AudioProcessingImpl::noise_suppression() const { 548 return noise_suppression_; 549 } 550 551 VoiceDetection* AudioProcessingImpl::voice_detection() const { 552 return voice_detection_; 553 } 554 555 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) { 556 CriticalSectionScoped crit_scoped(*crit_); 557 /*WEBRTC_TRACE(webrtc::kTraceModuleCall, 558 webrtc::kTraceAudioProcessing, 559 id_, 560 "ChangeUniqueId(new id = %d)", 561 id);*/ 562 id_ = id; 563 564 return kNoError; 565 } 566 567 bool AudioProcessingImpl::stream_data_changed() const { 568 int enabled_count = 0; 569 std::list<ProcessingComponent*>::const_iterator it; 570 for (it = component_list_.begin(); it != component_list_.end(); it++) { 571 if ((*it)->is_component_enabled()) { 572 enabled_count++; 573 } 574 } 575 576 // Data is unchanged if no components are enabled, or if only level_estimator_ 577 // or voice_detection_ is enabled. 578 if (enabled_count == 0) { 579 return false; 580 } else if (enabled_count == 1) { 581 if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) { 582 return false; 583 } 584 } else if (enabled_count == 2) { 585 if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) { 586 return false; 587 } 588 } 589 return true; 590 } 591 592 bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const { 593 return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz); 594 } 595 596 bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const { 597 if (!stream_data_changed && !voice_detection_->is_enabled()) { 598 // Only level_estimator_ is enabled. 599 return false; 600 } else if (sample_rate_hz_ == kSampleRate32kHz) { 601 // Something besides level_estimator_ is enabled, and we have super-wb. 602 return true; 603 } 604 return false; 605 } 606 607 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 608 int AudioProcessingImpl::WriteMessageToDebugFile() { 609 int32_t size = event_msg_->ByteSize(); 610 if (size <= 0) { 611 return kUnspecifiedError; 612 } 613 #if defined(WEBRTC_BIG_ENDIAN) 614 // TODO(ajm): Use little-endian "on the wire". For the moment, we can be 615 // pretty safe in assuming little-endian. 616 #endif 617 618 if (!event_msg_->SerializeToString(&event_str_)) { 619 return kUnspecifiedError; 620 } 621 622 // Write message preceded by its size. 623 if (!debug_file_->Write(&size, sizeof(int32_t))) { 624 return kFileError; 625 } 626 if (!debug_file_->Write(event_str_.data(), event_str_.length())) { 627 return kFileError; 628 } 629 630 event_msg_->Clear(); 631 632 return 0; 633 } 634 635 int AudioProcessingImpl::WriteInitMessage() { 636 event_msg_->set_type(audioproc::Event::INIT); 637 audioproc::Init* msg = event_msg_->mutable_init(); 638 msg->set_sample_rate(sample_rate_hz_); 639 msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz()); 640 msg->set_num_input_channels(num_input_channels_); 641 msg->set_num_output_channels(num_output_channels_); 642 msg->set_num_reverse_channels(num_reverse_channels_); 643 644 int err = WriteMessageToDebugFile(); 645 if (err != kNoError) { 646 return err; 647 } 648 649 return kNoError; 650 } 651 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 652 } // namespace webrtc 653