1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "audio_processing_impl.h" 12 13 #include <cassert> 14 15 #include "module_common_types.h" 16 17 #include "critical_section_wrapper.h" 18 #include "file_wrapper.h" 19 20 #include "audio_buffer.h" 21 #include "echo_cancellation_impl.h" 22 #include "echo_control_mobile_impl.h" 23 #include "high_pass_filter_impl.h" 24 #include "gain_control_impl.h" 25 #include "level_estimator_impl.h" 26 #include "noise_suppression_impl.h" 27 #include "processing_component.h" 28 #include "splitting_filter.h" 29 #include "voice_detection_impl.h" 30 31 namespace webrtc { 32 namespace { 33 34 enum Events { 35 kInitializeEvent, 36 kRenderEvent, 37 kCaptureEvent 38 }; 39 40 const char kMagicNumber[] = "#!vqetrace1.2"; 41 } // namespace 42 43 AudioProcessing* AudioProcessing::Create(int id) { 44 /*WEBRTC_TRACE(webrtc::kTraceModuleCall, 45 webrtc::kTraceAudioProcessing, 46 id, 47 "AudioProcessing::Create()");*/ 48 49 AudioProcessingImpl* apm = new AudioProcessingImpl(id); 50 if (apm->Initialize() != kNoError) { 51 delete apm; 52 apm = NULL; 53 } 54 55 return apm; 56 } 57 58 void AudioProcessing::Destroy(AudioProcessing* apm) { 59 delete static_cast<AudioProcessingImpl*>(apm); 60 } 61 62 AudioProcessingImpl::AudioProcessingImpl(int id) 63 : id_(id), 64 echo_cancellation_(NULL), 65 echo_control_mobile_(NULL), 66 gain_control_(NULL), 67 high_pass_filter_(NULL), 68 level_estimator_(NULL), 69 noise_suppression_(NULL), 70 voice_detection_(NULL), 71 debug_file_(FileWrapper::Create()), 72 crit_(CriticalSectionWrapper::CreateCriticalSection()), 73 render_audio_(NULL), 74 capture_audio_(NULL), 75 sample_rate_hz_(kSampleRate16kHz), 76 split_sample_rate_hz_(kSampleRate16kHz), 77 samples_per_channel_(sample_rate_hz_ / 100), 78 stream_delay_ms_(0), 79 was_stream_delay_set_(false), 80 num_render_input_channels_(1), 81 num_capture_input_channels_(1), 82 num_capture_output_channels_(1) { 83 84 echo_cancellation_ = new EchoCancellationImpl(this); 85 component_list_.push_back(echo_cancellation_); 86 87 echo_control_mobile_ = new EchoControlMobileImpl(this); 88 component_list_.push_back(echo_control_mobile_); 89 90 gain_control_ = new GainControlImpl(this); 91 component_list_.push_back(gain_control_); 92 93 high_pass_filter_ = new HighPassFilterImpl(this); 94 component_list_.push_back(high_pass_filter_); 95 96 level_estimator_ = new LevelEstimatorImpl(this); 97 component_list_.push_back(level_estimator_); 98 99 noise_suppression_ = new NoiseSuppressionImpl(this); 100 component_list_.push_back(noise_suppression_); 101 102 voice_detection_ = new VoiceDetectionImpl(this); 103 component_list_.push_back(voice_detection_); 104 } 105 106 AudioProcessingImpl::~AudioProcessingImpl() { 107 while (!component_list_.empty()) { 108 ProcessingComponent* component = component_list_.front(); 109 component->Destroy(); 110 delete component; 111 component_list_.pop_front(); 112 } 113 114 if (debug_file_->Open()) { 115 debug_file_->CloseFile(); 116 } 117 delete debug_file_; 118 debug_file_ = NULL; 119 120 delete crit_; 121 crit_ = NULL; 122 123 if (render_audio_ != NULL) { 124 delete render_audio_; 125 render_audio_ = NULL; 126 } 127 128 if (capture_audio_ != NULL) { 129 delete capture_audio_; 130 capture_audio_ = NULL; 131 } 132 } 133 134 CriticalSectionWrapper* AudioProcessingImpl::crit() const { 135 return crit_; 136 } 137 138 int AudioProcessingImpl::split_sample_rate_hz() const { 139 return split_sample_rate_hz_; 140 } 141 142 int AudioProcessingImpl::Initialize() { 143 CriticalSectionScoped crit_scoped(*crit_); 144 return InitializeLocked(); 145 } 146 147 int AudioProcessingImpl::InitializeLocked() { 148 if (render_audio_ != NULL) { 149 delete render_audio_; 150 render_audio_ = NULL; 151 } 152 153 if (capture_audio_ != NULL) { 154 delete capture_audio_; 155 capture_audio_ = NULL; 156 } 157 158 render_audio_ = new AudioBuffer(num_render_input_channels_, 159 samples_per_channel_); 160 capture_audio_ = new AudioBuffer(num_capture_input_channels_, 161 samples_per_channel_); 162 163 was_stream_delay_set_ = false; 164 165 // Initialize all components. 166 std::list<ProcessingComponent*>::iterator it; 167 for (it = component_list_.begin(); it != component_list_.end(); it++) { 168 int err = (*it)->Initialize(); 169 if (err != kNoError) { 170 return err; 171 } 172 } 173 174 return kNoError; 175 } 176 177 int AudioProcessingImpl::set_sample_rate_hz(int rate) { 178 CriticalSectionScoped crit_scoped(*crit_); 179 if (rate != kSampleRate8kHz && 180 rate != kSampleRate16kHz && 181 rate != kSampleRate32kHz) { 182 return kBadParameterError; 183 } 184 185 sample_rate_hz_ = rate; 186 samples_per_channel_ = rate / 100; 187 188 if (sample_rate_hz_ == kSampleRate32kHz) { 189 split_sample_rate_hz_ = kSampleRate16kHz; 190 } else { 191 split_sample_rate_hz_ = sample_rate_hz_; 192 } 193 194 return InitializeLocked(); 195 } 196 197 int AudioProcessingImpl::sample_rate_hz() const { 198 return sample_rate_hz_; 199 } 200 201 int AudioProcessingImpl::set_num_reverse_channels(int channels) { 202 CriticalSectionScoped crit_scoped(*crit_); 203 // Only stereo supported currently. 204 if (channels > 2 || channels < 1) { 205 return kBadParameterError; 206 } 207 208 num_render_input_channels_ = channels; 209 210 return InitializeLocked(); 211 } 212 213 int AudioProcessingImpl::num_reverse_channels() const { 214 return num_render_input_channels_; 215 } 216 217 int AudioProcessingImpl::set_num_channels( 218 int input_channels, 219 int output_channels) { 220 CriticalSectionScoped crit_scoped(*crit_); 221 if (output_channels > input_channels) { 222 return kBadParameterError; 223 } 224 225 // Only stereo supported currently. 226 if (input_channels > 2 || input_channels < 1) { 227 return kBadParameterError; 228 } 229 230 if (output_channels > 2 || output_channels < 1) { 231 return kBadParameterError; 232 } 233 234 num_capture_input_channels_ = input_channels; 235 num_capture_output_channels_ = output_channels; 236 237 return InitializeLocked(); 238 } 239 240 int AudioProcessingImpl::num_input_channels() const { 241 return num_capture_input_channels_; 242 } 243 244 int AudioProcessingImpl::num_output_channels() const { 245 return num_capture_output_channels_; 246 } 247 248 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { 249 CriticalSectionScoped crit_scoped(*crit_); 250 int err = kNoError; 251 252 if (frame == NULL) { 253 return kNullPointerError; 254 } 255 256 if (frame->_frequencyInHz != 257 static_cast<WebRtc_UWord32>(sample_rate_hz_)) { 258 return kBadSampleRateError; 259 } 260 261 if (frame->_audioChannel != num_capture_input_channels_) { 262 return kBadNumberChannelsError; 263 } 264 265 if (frame->_payloadDataLengthInSamples != samples_per_channel_) { 266 return kBadDataLengthError; 267 } 268 269 if (debug_file_->Open()) { 270 WebRtc_UWord8 event = kCaptureEvent; 271 if (!debug_file_->Write(&event, sizeof(event))) { 272 return kFileError; 273 } 274 275 if (!debug_file_->Write(&frame->_frequencyInHz, 276 sizeof(frame->_frequencyInHz))) { 277 return kFileError; 278 } 279 280 if (!debug_file_->Write(&frame->_audioChannel, 281 sizeof(frame->_audioChannel))) { 282 return kFileError; 283 } 284 285 if (!debug_file_->Write(&frame->_payloadDataLengthInSamples, 286 sizeof(frame->_payloadDataLengthInSamples))) { 287 return kFileError; 288 } 289 290 if (!debug_file_->Write(frame->_payloadData, 291 sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples * 292 frame->_audioChannel)) { 293 return kFileError; 294 } 295 } 296 297 capture_audio_->DeinterleaveFrom(frame); 298 299 // TODO(ajm): experiment with mixing and AEC placement. 300 if (num_capture_output_channels_ < num_capture_input_channels_) { 301 capture_audio_->Mix(num_capture_output_channels_); 302 303 frame->_audioChannel = num_capture_output_channels_; 304 } 305 306 if (sample_rate_hz_ == kSampleRate32kHz) { 307 for (int i = 0; i < num_capture_input_channels_; i++) { 308 // Split into a low and high band. 309 SplittingFilterAnalysis(capture_audio_->data(i), 310 capture_audio_->low_pass_split_data(i), 311 capture_audio_->high_pass_split_data(i), 312 capture_audio_->analysis_filter_state1(i), 313 capture_audio_->analysis_filter_state2(i)); 314 } 315 } 316 317 err = high_pass_filter_->ProcessCaptureAudio(capture_audio_); 318 if (err != kNoError) { 319 return err; 320 } 321 322 err = gain_control_->AnalyzeCaptureAudio(capture_audio_); 323 if (err != kNoError) { 324 return err; 325 } 326 327 err = echo_cancellation_->ProcessCaptureAudio(capture_audio_); 328 if (err != kNoError) { 329 return err; 330 } 331 332 if (echo_control_mobile_->is_enabled() && 333 noise_suppression_->is_enabled()) { 334 capture_audio_->CopyLowPassToReference(); 335 } 336 337 err = noise_suppression_->ProcessCaptureAudio(capture_audio_); 338 if (err != kNoError) { 339 return err; 340 } 341 342 err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_); 343 if (err != kNoError) { 344 return err; 345 } 346 347 err = voice_detection_->ProcessCaptureAudio(capture_audio_); 348 if (err != kNoError) { 349 return err; 350 } 351 352 err = gain_control_->ProcessCaptureAudio(capture_audio_); 353 if (err != kNoError) { 354 return err; 355 } 356 357 //err = level_estimator_->ProcessCaptureAudio(capture_audio_); 358 //if (err != kNoError) { 359 // return err; 360 //} 361 362 if (sample_rate_hz_ == kSampleRate32kHz) { 363 for (int i = 0; i < num_capture_output_channels_; i++) { 364 // Recombine low and high bands. 365 SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i), 366 capture_audio_->high_pass_split_data(i), 367 capture_audio_->data(i), 368 capture_audio_->synthesis_filter_state1(i), 369 capture_audio_->synthesis_filter_state2(i)); 370 } 371 } 372 373 capture_audio_->InterleaveTo(frame); 374 375 return kNoError; 376 } 377 378 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { 379 CriticalSectionScoped crit_scoped(*crit_); 380 int err = kNoError; 381 382 if (frame == NULL) { 383 return kNullPointerError; 384 } 385 386 if (frame->_frequencyInHz != 387 static_cast<WebRtc_UWord32>(sample_rate_hz_)) { 388 return kBadSampleRateError; 389 } 390 391 if (frame->_audioChannel != num_render_input_channels_) { 392 return kBadNumberChannelsError; 393 } 394 395 if (frame->_payloadDataLengthInSamples != samples_per_channel_) { 396 return kBadDataLengthError; 397 } 398 399 if (debug_file_->Open()) { 400 WebRtc_UWord8 event = kRenderEvent; 401 if (!debug_file_->Write(&event, sizeof(event))) { 402 return kFileError; 403 } 404 405 if (!debug_file_->Write(&frame->_frequencyInHz, 406 sizeof(frame->_frequencyInHz))) { 407 return kFileError; 408 } 409 410 if (!debug_file_->Write(&frame->_audioChannel, 411 sizeof(frame->_audioChannel))) { 412 return kFileError; 413 } 414 415 if (!debug_file_->Write(&frame->_payloadDataLengthInSamples, 416 sizeof(frame->_payloadDataLengthInSamples))) { 417 return kFileError; 418 } 419 420 if (!debug_file_->Write(frame->_payloadData, 421 sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples * 422 frame->_audioChannel)) { 423 return kFileError; 424 } 425 } 426 427 render_audio_->DeinterleaveFrom(frame); 428 429 // TODO(ajm): turn the splitting filter into a component? 430 if (sample_rate_hz_ == kSampleRate32kHz) { 431 for (int i = 0; i < num_render_input_channels_; i++) { 432 // Split into low and high band. 433 SplittingFilterAnalysis(render_audio_->data(i), 434 render_audio_->low_pass_split_data(i), 435 render_audio_->high_pass_split_data(i), 436 render_audio_->analysis_filter_state1(i), 437 render_audio_->analysis_filter_state2(i)); 438 } 439 } 440 441 // TODO(ajm): warnings possible from components? 442 err = echo_cancellation_->ProcessRenderAudio(render_audio_); 443 if (err != kNoError) { 444 return err; 445 } 446 447 err = echo_control_mobile_->ProcessRenderAudio(render_audio_); 448 if (err != kNoError) { 449 return err; 450 } 451 452 err = gain_control_->ProcessRenderAudio(render_audio_); 453 if (err != kNoError) { 454 return err; 455 } 456 457 //err = level_estimator_->AnalyzeReverseStream(render_audio_); 458 //if (err != kNoError) { 459 // return err; 460 //} 461 462 was_stream_delay_set_ = false; 463 return err; // TODO(ajm): this is for returning warnings; necessary? 464 } 465 466 int AudioProcessingImpl::set_stream_delay_ms(int delay) { 467 was_stream_delay_set_ = true; 468 if (delay < 0) { 469 return kBadParameterError; 470 } 471 472 // TODO(ajm): the max is rather arbitrarily chosen; investigate. 473 if (delay > 500) { 474 stream_delay_ms_ = 500; 475 return kBadStreamParameterWarning; 476 } 477 478 stream_delay_ms_ = delay; 479 return kNoError; 480 } 481 482 int AudioProcessingImpl::stream_delay_ms() const { 483 return stream_delay_ms_; 484 } 485 486 bool AudioProcessingImpl::was_stream_delay_set() const { 487 return was_stream_delay_set_; 488 } 489 490 int AudioProcessingImpl::StartDebugRecording( 491 const char filename[AudioProcessing::kMaxFilenameSize]) { 492 CriticalSectionScoped crit_scoped(*crit_); 493 assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize); 494 495 if (filename == NULL) { 496 return kNullPointerError; 497 } 498 499 // Stop any ongoing recording. 500 if (debug_file_->Open()) { 501 if (debug_file_->CloseFile() == -1) { 502 return kFileError; 503 } 504 } 505 506 if (debug_file_->OpenFile(filename, false) == -1) { 507 debug_file_->CloseFile(); 508 return kFileError; 509 } 510 511 if (debug_file_->WriteText("%s\n", kMagicNumber) == -1) { 512 debug_file_->CloseFile(); 513 return kFileError; 514 } 515 516 // TODO(ajm): should we do this? If so, we need the number of channels etc. 517 // Record the default sample rate. 518 WebRtc_UWord8 event = kInitializeEvent; 519 if (!debug_file_->Write(&event, sizeof(event))) { 520 return kFileError; 521 } 522 523 if (!debug_file_->Write(&sample_rate_hz_, sizeof(sample_rate_hz_))) { 524 return kFileError; 525 } 526 527 return kNoError; 528 } 529 530 int AudioProcessingImpl::StopDebugRecording() { 531 CriticalSectionScoped crit_scoped(*crit_); 532 // We just return if recording hasn't started. 533 if (debug_file_->Open()) { 534 if (debug_file_->CloseFile() == -1) { 535 return kFileError; 536 } 537 } 538 539 return kNoError; 540 } 541 542 EchoCancellation* AudioProcessingImpl::echo_cancellation() const { 543 return echo_cancellation_; 544 } 545 546 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { 547 return echo_control_mobile_; 548 } 549 550 GainControl* AudioProcessingImpl::gain_control() const { 551 return gain_control_; 552 } 553 554 HighPassFilter* AudioProcessingImpl::high_pass_filter() const { 555 return high_pass_filter_; 556 } 557 558 LevelEstimator* AudioProcessingImpl::level_estimator() const { 559 return level_estimator_; 560 } 561 562 NoiseSuppression* AudioProcessingImpl::noise_suppression() const { 563 return noise_suppression_; 564 } 565 566 VoiceDetection* AudioProcessingImpl::voice_detection() const { 567 return voice_detection_; 568 } 569 570 WebRtc_Word32 AudioProcessingImpl::Version(WebRtc_Word8* version, 571 WebRtc_UWord32& bytes_remaining, WebRtc_UWord32& position) const { 572 if (version == NULL) { 573 /*WEBRTC_TRACE(webrtc::kTraceError, 574 webrtc::kTraceAudioProcessing, 575 -1, 576 "Null version pointer");*/ 577 return kNullPointerError; 578 } 579 memset(&version[position], 0, bytes_remaining); 580 581 WebRtc_Word8 my_version[] = "AudioProcessing 1.0.0"; 582 // Includes null termination. 583 WebRtc_UWord32 length = static_cast<WebRtc_UWord32>(strlen(my_version)); 584 if (bytes_remaining < length) { 585 /*WEBRTC_TRACE(webrtc::kTraceError, 586 webrtc::kTraceAudioProcessing, 587 -1, 588 "Buffer of insufficient length");*/ 589 return kBadParameterError; 590 } 591 memcpy(&version[position], my_version, length); 592 bytes_remaining -= length; 593 position += length; 594 595 std::list<ProcessingComponent*>::const_iterator it; 596 for (it = component_list_.begin(); it != component_list_.end(); it++) { 597 char component_version[256]; 598 strcpy(component_version, "\n"); 599 int err = (*it)->get_version(&component_version[1], 600 sizeof(component_version) - 1); 601 if (err != kNoError) { 602 return err; 603 } 604 if (strncmp(&component_version[1], "\0", 1) == 0) { 605 // Assume empty if first byte is NULL. 606 continue; 607 } 608 609 length = static_cast<WebRtc_UWord32>(strlen(component_version)); 610 if (bytes_remaining < length) { 611 /*WEBRTC_TRACE(webrtc::kTraceError, 612 webrtc::kTraceAudioProcessing, 613 -1, 614 "Buffer of insufficient length");*/ 615 return kBadParameterError; 616 } 617 memcpy(&version[position], component_version, length); 618 bytes_remaining -= length; 619 position += length; 620 } 621 622 return kNoError; 623 } 624 625 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) { 626 CriticalSectionScoped crit_scoped(*crit_); 627 /*WEBRTC_TRACE(webrtc::kTraceModuleCall, 628 webrtc::kTraceAudioProcessing, 629 id_, 630 "ChangeUniqueId(new id = %d)", 631 id);*/ 632 id_ = id; 633 634 return kNoError; 635 } 636 } // namespace webrtc 637