1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/audio_processing_impl.h" 12 13 #include <assert.h> 14 #include <algorithm> 15 16 #include "webrtc/base/checks.h" 17 #include "webrtc/base/platform_file.h" 18 #include "webrtc/base/trace_event.h" 19 #include "webrtc/common_audio/audio_converter.h" 20 #include "webrtc/common_audio/channel_buffer.h" 21 #include "webrtc/common_audio/include/audio_util.h" 22 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 23 extern "C" { 24 #include "webrtc/modules/audio_processing/aec/aec_core.h" 25 } 26 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" 27 #include "webrtc/modules/audio_processing/audio_buffer.h" 28 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" 29 #include "webrtc/modules/audio_processing/common.h" 30 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h" 31 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" 32 #include "webrtc/modules/audio_processing/gain_control_impl.h" 33 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" 34 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h" 35 #include "webrtc/modules/audio_processing/level_estimator_impl.h" 36 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" 37 #include "webrtc/modules/audio_processing/processing_component.h" 38 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" 39 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 40 #include "webrtc/modules/include/module_common_types.h" 41 #include "webrtc/system_wrappers/include/file_wrapper.h" 42 #include "webrtc/system_wrappers/include/logging.h" 43 #include "webrtc/system_wrappers/include/metrics.h" 44 45 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 46 // Files generated at build-time by the protobuf compiler. 47 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD 48 #include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" 49 #else 50 #include "webrtc/audio_processing/debug.pb.h" 51 #endif 52 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 53 54 #define RETURN_ON_ERR(expr) \ 55 do { \ 56 int err = (expr); \ 57 if (err != kNoError) { \ 58 return err; \ 59 } \ 60 } while (0) 61 62 namespace webrtc { 63 namespace { 64 65 static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) { 66 switch (layout) { 67 case AudioProcessing::kMono: 68 case AudioProcessing::kStereo: 69 return false; 70 case AudioProcessing::kMonoAndKeyboard: 71 case AudioProcessing::kStereoAndKeyboard: 72 return true; 73 } 74 75 assert(false); 76 return false; 77 } 78 } // namespace 79 80 // Throughout webrtc, it's assumed that success is represented by zero. 81 static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero"); 82 83 // This class has two main functionalities: 84 // 85 // 1) It is returned instead of the real GainControl after the new AGC has been 86 // enabled in order to prevent an outside user from overriding compression 87 // settings. It doesn't do anything in its implementation, except for 88 // delegating the const methods and Enable calls to the real GainControl, so 89 // AGC can still be disabled. 90 // 91 // 2) It is injected into AgcManagerDirect and implements volume callbacks for 92 // getting and setting the volume level. It just caches this value to be used 93 // in VoiceEngine later. 94 class GainControlForNewAgc : public GainControl, public VolumeCallbacks { 95 public: 96 explicit GainControlForNewAgc(GainControlImpl* gain_control) 97 : real_gain_control_(gain_control), volume_(0) {} 98 99 // GainControl implementation. 100 int Enable(bool enable) override { 101 return real_gain_control_->Enable(enable); 102 } 103 bool is_enabled() const override { return real_gain_control_->is_enabled(); } 104 int set_stream_analog_level(int level) override { 105 volume_ = level; 106 return AudioProcessing::kNoError; 107 } 108 int stream_analog_level() override { return volume_; } 109 int set_mode(Mode mode) override { return AudioProcessing::kNoError; } 110 Mode mode() const override { return GainControl::kAdaptiveAnalog; } 111 int set_target_level_dbfs(int level) override { 112 return AudioProcessing::kNoError; 113 } 114 int target_level_dbfs() const override { 115 return real_gain_control_->target_level_dbfs(); 116 } 117 int set_compression_gain_db(int gain) override { 118 return AudioProcessing::kNoError; 119 } 120 int compression_gain_db() const override { 121 return real_gain_control_->compression_gain_db(); 122 } 123 int enable_limiter(bool enable) override { return AudioProcessing::kNoError; } 124 bool is_limiter_enabled() const override { 125 return real_gain_control_->is_limiter_enabled(); 126 } 127 int set_analog_level_limits(int minimum, int maximum) override { 128 return AudioProcessing::kNoError; 129 } 130 int analog_level_minimum() const override { 131 return real_gain_control_->analog_level_minimum(); 132 } 133 int analog_level_maximum() const override { 134 return real_gain_control_->analog_level_maximum(); 135 } 136 bool stream_is_saturated() const override { 137 return real_gain_control_->stream_is_saturated(); 138 } 139 140 // VolumeCallbacks implementation. 141 void SetMicVolume(int volume) override { volume_ = volume; } 142 int GetMicVolume() override { return volume_; } 143 144 private: 145 GainControl* real_gain_control_; 146 int volume_; 147 }; 148 149 struct AudioProcessingImpl::ApmPublicSubmodules { 150 ApmPublicSubmodules() 151 : echo_cancellation(nullptr), 152 echo_control_mobile(nullptr), 153 gain_control(nullptr) {} 154 // Accessed externally of APM without any lock acquired. 155 EchoCancellationImpl* echo_cancellation; 156 EchoControlMobileImpl* echo_control_mobile; 157 GainControlImpl* gain_control; 158 rtc::scoped_ptr<HighPassFilterImpl> high_pass_filter; 159 rtc::scoped_ptr<LevelEstimatorImpl> level_estimator; 160 rtc::scoped_ptr<NoiseSuppressionImpl> noise_suppression; 161 rtc::scoped_ptr<VoiceDetectionImpl> voice_detection; 162 rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc; 163 164 // Accessed internally from both render and capture. 165 rtc::scoped_ptr<TransientSuppressor> transient_suppressor; 166 rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer; 167 }; 168 169 struct AudioProcessingImpl::ApmPrivateSubmodules { 170 explicit ApmPrivateSubmodules(Beamformer<float>* beamformer) 171 : beamformer(beamformer) {} 172 // Accessed internally from capture or during initialization 173 std::list<ProcessingComponent*> component_list; 174 rtc::scoped_ptr<Beamformer<float>> beamformer; 175 rtc::scoped_ptr<AgcManagerDirect> agc_manager; 176 }; 177 178 const int AudioProcessing::kNativeSampleRatesHz[] = { 179 AudioProcessing::kSampleRate8kHz, 180 AudioProcessing::kSampleRate16kHz, 181 AudioProcessing::kSampleRate32kHz, 182 AudioProcessing::kSampleRate48kHz}; 183 const size_t AudioProcessing::kNumNativeSampleRates = 184 arraysize(AudioProcessing::kNativeSampleRatesHz); 185 const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing:: 186 kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1]; 187 const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz; 188 189 AudioProcessing* AudioProcessing::Create() { 190 Config config; 191 return Create(config, nullptr); 192 } 193 194 AudioProcessing* AudioProcessing::Create(const Config& config) { 195 return Create(config, nullptr); 196 } 197 198 AudioProcessing* AudioProcessing::Create(const Config& config, 199 Beamformer<float>* beamformer) { 200 AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer); 201 if (apm->Initialize() != kNoError) { 202 delete apm; 203 apm = nullptr; 204 } 205 206 return apm; 207 } 208 209 AudioProcessingImpl::AudioProcessingImpl(const Config& config) 210 : AudioProcessingImpl(config, nullptr) {} 211 212 AudioProcessingImpl::AudioProcessingImpl(const Config& config, 213 Beamformer<float>* beamformer) 214 : public_submodules_(new ApmPublicSubmodules()), 215 private_submodules_(new ApmPrivateSubmodules(beamformer)), 216 constants_(config.Get<ExperimentalAgc>().startup_min_volume, 217 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) 218 false, 219 #else 220 config.Get<ExperimentalAgc>().enabled, 221 #endif 222 config.Get<Intelligibility>().enabled), 223 224 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) 225 capture_(false, 226 #else 227 capture_(config.Get<ExperimentalNs>().enabled, 228 #endif 229 config.Get<Beamforming>().array_geometry, 230 config.Get<Beamforming>().target_direction), 231 capture_nonlocked_(config.Get<Beamforming>().enabled) 232 { 233 { 234 rtc::CritScope cs_render(&crit_render_); 235 rtc::CritScope cs_capture(&crit_capture_); 236 237 public_submodules_->echo_cancellation = 238 new EchoCancellationImpl(this, &crit_render_, &crit_capture_); 239 public_submodules_->echo_control_mobile = 240 new EchoControlMobileImpl(this, &crit_render_, &crit_capture_); 241 public_submodules_->gain_control = 242 new GainControlImpl(this, &crit_capture_, &crit_capture_); 243 public_submodules_->high_pass_filter.reset( 244 new HighPassFilterImpl(&crit_capture_)); 245 public_submodules_->level_estimator.reset( 246 new LevelEstimatorImpl(&crit_capture_)); 247 public_submodules_->noise_suppression.reset( 248 new NoiseSuppressionImpl(&crit_capture_)); 249 public_submodules_->voice_detection.reset( 250 new VoiceDetectionImpl(&crit_capture_)); 251 public_submodules_->gain_control_for_new_agc.reset( 252 new GainControlForNewAgc(public_submodules_->gain_control)); 253 254 private_submodules_->component_list.push_back( 255 public_submodules_->echo_cancellation); 256 private_submodules_->component_list.push_back( 257 public_submodules_->echo_control_mobile); 258 private_submodules_->component_list.push_back( 259 public_submodules_->gain_control); 260 } 261 262 SetExtraOptions(config); 263 } 264 265 AudioProcessingImpl::~AudioProcessingImpl() { 266 // Depends on gain_control_ and 267 // public_submodules_->gain_control_for_new_agc. 268 private_submodules_->agc_manager.reset(); 269 // Depends on gain_control_. 270 public_submodules_->gain_control_for_new_agc.reset(); 271 while (!private_submodules_->component_list.empty()) { 272 ProcessingComponent* component = 273 private_submodules_->component_list.front(); 274 component->Destroy(); 275 delete component; 276 private_submodules_->component_list.pop_front(); 277 } 278 279 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 280 if (debug_dump_.debug_file->Open()) { 281 debug_dump_.debug_file->CloseFile(); 282 } 283 #endif 284 } 285 286 int AudioProcessingImpl::Initialize() { 287 // Run in a single-threaded manner during initialization. 288 rtc::CritScope cs_render(&crit_render_); 289 rtc::CritScope cs_capture(&crit_capture_); 290 return InitializeLocked(); 291 } 292 293 int AudioProcessingImpl::Initialize(int input_sample_rate_hz, 294 int output_sample_rate_hz, 295 int reverse_sample_rate_hz, 296 ChannelLayout input_layout, 297 ChannelLayout output_layout, 298 ChannelLayout reverse_layout) { 299 const ProcessingConfig processing_config = { 300 {{input_sample_rate_hz, 301 ChannelsFromLayout(input_layout), 302 LayoutHasKeyboard(input_layout)}, 303 {output_sample_rate_hz, 304 ChannelsFromLayout(output_layout), 305 LayoutHasKeyboard(output_layout)}, 306 {reverse_sample_rate_hz, 307 ChannelsFromLayout(reverse_layout), 308 LayoutHasKeyboard(reverse_layout)}, 309 {reverse_sample_rate_hz, 310 ChannelsFromLayout(reverse_layout), 311 LayoutHasKeyboard(reverse_layout)}}}; 312 313 return Initialize(processing_config); 314 } 315 316 int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) { 317 // Run in a single-threaded manner during initialization. 318 rtc::CritScope cs_render(&crit_render_); 319 rtc::CritScope cs_capture(&crit_capture_); 320 return InitializeLocked(processing_config); 321 } 322 323 int AudioProcessingImpl::MaybeInitializeRender( 324 const ProcessingConfig& processing_config) { 325 return MaybeInitialize(processing_config); 326 } 327 328 int AudioProcessingImpl::MaybeInitializeCapture( 329 const ProcessingConfig& processing_config) { 330 return MaybeInitialize(processing_config); 331 } 332 333 // Calls InitializeLocked() if any of the audio parameters have changed from 334 // their current values (needs to be called while holding the crit_render_lock). 335 int AudioProcessingImpl::MaybeInitialize( 336 const ProcessingConfig& processing_config) { 337 // Called from both threads. Thread check is therefore not possible. 338 if (processing_config == formats_.api_format) { 339 return kNoError; 340 } 341 342 rtc::CritScope cs_capture(&crit_capture_); 343 return InitializeLocked(processing_config); 344 } 345 346 int AudioProcessingImpl::InitializeLocked() { 347 const int fwd_audio_buffer_channels = 348 capture_nonlocked_.beamformer_enabled 349 ? formats_.api_format.input_stream().num_channels() 350 : formats_.api_format.output_stream().num_channels(); 351 const int rev_audio_buffer_out_num_frames = 352 formats_.api_format.reverse_output_stream().num_frames() == 0 353 ? formats_.rev_proc_format.num_frames() 354 : formats_.api_format.reverse_output_stream().num_frames(); 355 if (formats_.api_format.reverse_input_stream().num_channels() > 0) { 356 render_.render_audio.reset(new AudioBuffer( 357 formats_.api_format.reverse_input_stream().num_frames(), 358 formats_.api_format.reverse_input_stream().num_channels(), 359 formats_.rev_proc_format.num_frames(), 360 formats_.rev_proc_format.num_channels(), 361 rev_audio_buffer_out_num_frames)); 362 if (rev_conversion_needed()) { 363 render_.render_converter = AudioConverter::Create( 364 formats_.api_format.reverse_input_stream().num_channels(), 365 formats_.api_format.reverse_input_stream().num_frames(), 366 formats_.api_format.reverse_output_stream().num_channels(), 367 formats_.api_format.reverse_output_stream().num_frames()); 368 } else { 369 render_.render_converter.reset(nullptr); 370 } 371 } else { 372 render_.render_audio.reset(nullptr); 373 render_.render_converter.reset(nullptr); 374 } 375 capture_.capture_audio.reset( 376 new AudioBuffer(formats_.api_format.input_stream().num_frames(), 377 formats_.api_format.input_stream().num_channels(), 378 capture_nonlocked_.fwd_proc_format.num_frames(), 379 fwd_audio_buffer_channels, 380 formats_.api_format.output_stream().num_frames())); 381 382 // Initialize all components. 383 for (auto item : private_submodules_->component_list) { 384 int err = item->Initialize(); 385 if (err != kNoError) { 386 return err; 387 } 388 } 389 390 InitializeExperimentalAgc(); 391 InitializeTransient(); 392 InitializeBeamformer(); 393 InitializeIntelligibility(); 394 InitializeHighPassFilter(); 395 InitializeNoiseSuppression(); 396 InitializeLevelEstimator(); 397 InitializeVoiceDetection(); 398 399 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 400 if (debug_dump_.debug_file->Open()) { 401 int err = WriteInitMessage(); 402 if (err != kNoError) { 403 return err; 404 } 405 } 406 #endif 407 408 return kNoError; 409 } 410 411 int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { 412 for (const auto& stream : config.streams) { 413 if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) { 414 return kBadSampleRateError; 415 } 416 } 417 418 const size_t num_in_channels = config.input_stream().num_channels(); 419 const size_t num_out_channels = config.output_stream().num_channels(); 420 421 // Need at least one input channel. 422 // Need either one output channel or as many outputs as there are inputs. 423 if (num_in_channels == 0 || 424 !(num_out_channels == 1 || num_out_channels == num_in_channels)) { 425 return kBadNumberChannelsError; 426 } 427 428 if (capture_nonlocked_.beamformer_enabled && 429 num_in_channels != capture_.array_geometry.size()) { 430 return kBadNumberChannelsError; 431 } 432 433 formats_.api_format = config; 434 435 // We process at the closest native rate >= min(input rate, output rate)... 436 const int min_proc_rate = 437 std::min(formats_.api_format.input_stream().sample_rate_hz(), 438 formats_.api_format.output_stream().sample_rate_hz()); 439 int fwd_proc_rate; 440 for (size_t i = 0; i < kNumNativeSampleRates; ++i) { 441 fwd_proc_rate = kNativeSampleRatesHz[i]; 442 if (fwd_proc_rate >= min_proc_rate) { 443 break; 444 } 445 } 446 // ...with one exception. 447 if (public_submodules_->echo_control_mobile->is_enabled() && 448 min_proc_rate > kMaxAECMSampleRateHz) { 449 fwd_proc_rate = kMaxAECMSampleRateHz; 450 } 451 452 capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate); 453 454 // We normally process the reverse stream at 16 kHz. Unless... 455 int rev_proc_rate = kSampleRate16kHz; 456 if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) { 457 // ...the forward stream is at 8 kHz. 458 rev_proc_rate = kSampleRate8kHz; 459 } else { 460 if (formats_.api_format.reverse_input_stream().sample_rate_hz() == 461 kSampleRate32kHz) { 462 // ...or the input is at 32 kHz, in which case we use the splitting 463 // filter rather than the resampler. 464 rev_proc_rate = kSampleRate32kHz; 465 } 466 } 467 468 // Always downmix the reverse stream to mono for analysis. This has been 469 // demonstrated to work well for AEC in most practical scenarios. 470 formats_.rev_proc_format = StreamConfig(rev_proc_rate, 1); 471 472 if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate32kHz || 473 capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate48kHz) { 474 capture_nonlocked_.split_rate = kSampleRate16kHz; 475 } else { 476 capture_nonlocked_.split_rate = 477 capture_nonlocked_.fwd_proc_format.sample_rate_hz(); 478 } 479 480 return InitializeLocked(); 481 } 482 483 void AudioProcessingImpl::SetExtraOptions(const Config& config) { 484 // Run in a single-threaded manner when setting the extra options. 485 rtc::CritScope cs_render(&crit_render_); 486 rtc::CritScope cs_capture(&crit_capture_); 487 for (auto item : private_submodules_->component_list) { 488 item->SetExtraOptions(config); 489 } 490 491 if (capture_.transient_suppressor_enabled != 492 config.Get<ExperimentalNs>().enabled) { 493 capture_.transient_suppressor_enabled = 494 config.Get<ExperimentalNs>().enabled; 495 InitializeTransient(); 496 } 497 498 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD 499 if (capture_nonlocked_.beamformer_enabled != 500 config.Get<Beamforming>().enabled) { 501 capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled; 502 if (config.Get<Beamforming>().array_geometry.size() > 1) { 503 capture_.array_geometry = config.Get<Beamforming>().array_geometry; 504 } 505 capture_.target_direction = config.Get<Beamforming>().target_direction; 506 InitializeBeamformer(); 507 } 508 #endif // WEBRTC_ANDROID_PLATFORM_BUILD 509 } 510 511 int AudioProcessingImpl::input_sample_rate_hz() const { 512 // Accessed from outside APM, hence a lock is needed. 513 rtc::CritScope cs(&crit_capture_); 514 return formats_.api_format.input_stream().sample_rate_hz(); 515 } 516 517 int AudioProcessingImpl::proc_sample_rate_hz() const { 518 // Used as callback from submodules, hence locking is not allowed. 519 return capture_nonlocked_.fwd_proc_format.sample_rate_hz(); 520 } 521 522 int AudioProcessingImpl::proc_split_sample_rate_hz() const { 523 // Used as callback from submodules, hence locking is not allowed. 524 return capture_nonlocked_.split_rate; 525 } 526 527 size_t AudioProcessingImpl::num_reverse_channels() const { 528 // Used as callback from submodules, hence locking is not allowed. 529 return formats_.rev_proc_format.num_channels(); 530 } 531 532 size_t AudioProcessingImpl::num_input_channels() const { 533 // Used as callback from submodules, hence locking is not allowed. 534 return formats_.api_format.input_stream().num_channels(); 535 } 536 537 size_t AudioProcessingImpl::num_proc_channels() const { 538 // Used as callback from submodules, hence locking is not allowed. 539 return capture_nonlocked_.beamformer_enabled ? 1 : num_output_channels(); 540 } 541 542 size_t AudioProcessingImpl::num_output_channels() const { 543 // Used as callback from submodules, hence locking is not allowed. 544 return formats_.api_format.output_stream().num_channels(); 545 } 546 547 void AudioProcessingImpl::set_output_will_be_muted(bool muted) { 548 rtc::CritScope cs(&crit_capture_); 549 capture_.output_will_be_muted = muted; 550 if (private_submodules_->agc_manager.get()) { 551 private_submodules_->agc_manager->SetCaptureMuted( 552 capture_.output_will_be_muted); 553 } 554 } 555 556 557 int AudioProcessingImpl::ProcessStream(const float* const* src, 558 size_t samples_per_channel, 559 int input_sample_rate_hz, 560 ChannelLayout input_layout, 561 int output_sample_rate_hz, 562 ChannelLayout output_layout, 563 float* const* dest) { 564 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_ChannelLayout"); 565 StreamConfig input_stream; 566 StreamConfig output_stream; 567 { 568 // Access the formats_.api_format.input_stream beneath the capture lock. 569 // The lock must be released as it is later required in the call 570 // to ProcessStream(,,,); 571 rtc::CritScope cs(&crit_capture_); 572 input_stream = formats_.api_format.input_stream(); 573 output_stream = formats_.api_format.output_stream(); 574 } 575 576 input_stream.set_sample_rate_hz(input_sample_rate_hz); 577 input_stream.set_num_channels(ChannelsFromLayout(input_layout)); 578 input_stream.set_has_keyboard(LayoutHasKeyboard(input_layout)); 579 output_stream.set_sample_rate_hz(output_sample_rate_hz); 580 output_stream.set_num_channels(ChannelsFromLayout(output_layout)); 581 output_stream.set_has_keyboard(LayoutHasKeyboard(output_layout)); 582 583 if (samples_per_channel != input_stream.num_frames()) { 584 return kBadDataLengthError; 585 } 586 return ProcessStream(src, input_stream, output_stream, dest); 587 } 588 589 int AudioProcessingImpl::ProcessStream(const float* const* src, 590 const StreamConfig& input_config, 591 const StreamConfig& output_config, 592 float* const* dest) { 593 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig"); 594 ProcessingConfig processing_config; 595 { 596 // Acquire the capture lock in order to safely call the function 597 // that retrieves the render side data. This function accesses apm 598 // getters that need the capture lock held when being called. 599 rtc::CritScope cs_capture(&crit_capture_); 600 public_submodules_->echo_cancellation->ReadQueuedRenderData(); 601 public_submodules_->echo_control_mobile->ReadQueuedRenderData(); 602 public_submodules_->gain_control->ReadQueuedRenderData(); 603 604 if (!src || !dest) { 605 return kNullPointerError; 606 } 607 608 processing_config = formats_.api_format; 609 } 610 611 processing_config.input_stream() = input_config; 612 processing_config.output_stream() = output_config; 613 614 { 615 // Do conditional reinitialization. 616 rtc::CritScope cs_render(&crit_render_); 617 RETURN_ON_ERR(MaybeInitializeCapture(processing_config)); 618 } 619 rtc::CritScope cs_capture(&crit_capture_); 620 assert(processing_config.input_stream().num_frames() == 621 formats_.api_format.input_stream().num_frames()); 622 623 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 624 if (debug_dump_.debug_file->Open()) { 625 RETURN_ON_ERR(WriteConfigMessage(false)); 626 627 debug_dump_.capture.event_msg->set_type(audioproc::Event::STREAM); 628 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); 629 const size_t channel_size = 630 sizeof(float) * formats_.api_format.input_stream().num_frames(); 631 for (size_t i = 0; i < formats_.api_format.input_stream().num_channels(); 632 ++i) 633 msg->add_input_channel(src[i], channel_size); 634 } 635 #endif 636 637 capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream()); 638 RETURN_ON_ERR(ProcessStreamLocked()); 639 capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest); 640 641 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 642 if (debug_dump_.debug_file->Open()) { 643 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); 644 const size_t channel_size = 645 sizeof(float) * formats_.api_format.output_stream().num_frames(); 646 for (size_t i = 0; i < formats_.api_format.output_stream().num_channels(); 647 ++i) 648 msg->add_output_channel(dest[i], channel_size); 649 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), 650 &crit_debug_, &debug_dump_.capture)); 651 } 652 #endif 653 654 return kNoError; 655 } 656 657 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { 658 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame"); 659 { 660 // Acquire the capture lock in order to safely call the function 661 // that retrieves the render side data. This function accesses apm 662 // getters that need the capture lock held when being called. 663 // The lock needs to be released as 664 // public_submodules_->echo_control_mobile->is_enabled() aquires this lock 665 // as well. 666 rtc::CritScope cs_capture(&crit_capture_); 667 public_submodules_->echo_cancellation->ReadQueuedRenderData(); 668 public_submodules_->echo_control_mobile->ReadQueuedRenderData(); 669 public_submodules_->gain_control->ReadQueuedRenderData(); 670 } 671 672 if (!frame) { 673 return kNullPointerError; 674 } 675 // Must be a native rate. 676 if (frame->sample_rate_hz_ != kSampleRate8kHz && 677 frame->sample_rate_hz_ != kSampleRate16kHz && 678 frame->sample_rate_hz_ != kSampleRate32kHz && 679 frame->sample_rate_hz_ != kSampleRate48kHz) { 680 return kBadSampleRateError; 681 } 682 683 if (public_submodules_->echo_control_mobile->is_enabled() && 684 frame->sample_rate_hz_ > kMaxAECMSampleRateHz) { 685 LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates"; 686 return kUnsupportedComponentError; 687 } 688 689 ProcessingConfig processing_config; 690 { 691 // Aquire lock for the access of api_format. 692 // The lock is released immediately due to the conditional 693 // reinitialization. 694 rtc::CritScope cs_capture(&crit_capture_); 695 // TODO(ajm): The input and output rates and channels are currently 696 // constrained to be identical in the int16 interface. 697 processing_config = formats_.api_format; 698 } 699 processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_); 700 processing_config.input_stream().set_num_channels(frame->num_channels_); 701 processing_config.output_stream().set_sample_rate_hz(frame->sample_rate_hz_); 702 processing_config.output_stream().set_num_channels(frame->num_channels_); 703 704 { 705 // Do conditional reinitialization. 706 rtc::CritScope cs_render(&crit_render_); 707 RETURN_ON_ERR(MaybeInitializeCapture(processing_config)); 708 } 709 rtc::CritScope cs_capture(&crit_capture_); 710 if (frame->samples_per_channel_ != 711 formats_.api_format.input_stream().num_frames()) { 712 return kBadDataLengthError; 713 } 714 715 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 716 if (debug_dump_.debug_file->Open()) { 717 debug_dump_.capture.event_msg->set_type(audioproc::Event::STREAM); 718 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); 719 const size_t data_size = 720 sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; 721 msg->set_input_data(frame->data_, data_size); 722 } 723 #endif 724 725 capture_.capture_audio->DeinterleaveFrom(frame); 726 RETURN_ON_ERR(ProcessStreamLocked()); 727 capture_.capture_audio->InterleaveTo(frame, 728 output_copy_needed(is_data_processed())); 729 730 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 731 if (debug_dump_.debug_file->Open()) { 732 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); 733 const size_t data_size = 734 sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; 735 msg->set_output_data(frame->data_, data_size); 736 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), 737 &crit_debug_, &debug_dump_.capture)); 738 } 739 #endif 740 741 return kNoError; 742 } 743 744 int AudioProcessingImpl::ProcessStreamLocked() { 745 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 746 if (debug_dump_.debug_file->Open()) { 747 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); 748 msg->set_delay(capture_nonlocked_.stream_delay_ms); 749 msg->set_drift( 750 public_submodules_->echo_cancellation->stream_drift_samples()); 751 msg->set_level(gain_control()->stream_analog_level()); 752 msg->set_keypress(capture_.key_pressed); 753 } 754 #endif 755 756 MaybeUpdateHistograms(); 757 758 AudioBuffer* ca = capture_.capture_audio.get(); // For brevity. 759 760 if (constants_.use_new_agc && 761 public_submodules_->gain_control->is_enabled()) { 762 private_submodules_->agc_manager->AnalyzePreProcess( 763 ca->channels()[0], ca->num_channels(), 764 capture_nonlocked_.fwd_proc_format.num_frames()); 765 } 766 767 bool data_processed = is_data_processed(); 768 if (analysis_needed(data_processed)) { 769 ca->SplitIntoFrequencyBands(); 770 } 771 772 if (constants_.intelligibility_enabled) { 773 public_submodules_->intelligibility_enhancer->AnalyzeCaptureAudio( 774 ca->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate, 775 ca->num_channels()); 776 } 777 778 if (capture_nonlocked_.beamformer_enabled) { 779 private_submodules_->beamformer->ProcessChunk(*ca->split_data_f(), 780 ca->split_data_f()); 781 ca->set_num_channels(1); 782 } 783 784 public_submodules_->high_pass_filter->ProcessCaptureAudio(ca); 785 RETURN_ON_ERR(public_submodules_->gain_control->AnalyzeCaptureAudio(ca)); 786 public_submodules_->noise_suppression->AnalyzeCaptureAudio(ca); 787 RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessCaptureAudio(ca)); 788 789 if (public_submodules_->echo_control_mobile->is_enabled() && 790 public_submodules_->noise_suppression->is_enabled()) { 791 ca->CopyLowPassToReference(); 792 } 793 public_submodules_->noise_suppression->ProcessCaptureAudio(ca); 794 RETURN_ON_ERR( 795 public_submodules_->echo_control_mobile->ProcessCaptureAudio(ca)); 796 public_submodules_->voice_detection->ProcessCaptureAudio(ca); 797 798 if (constants_.use_new_agc && 799 public_submodules_->gain_control->is_enabled() && 800 (!capture_nonlocked_.beamformer_enabled || 801 private_submodules_->beamformer->is_target_present())) { 802 private_submodules_->agc_manager->Process( 803 ca->split_bands_const(0)[kBand0To8kHz], ca->num_frames_per_band(), 804 capture_nonlocked_.split_rate); 805 } 806 RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(ca)); 807 808 if (synthesis_needed(data_processed)) { 809 ca->MergeFrequencyBands(); 810 } 811 812 // TODO(aluebs): Investigate if the transient suppression placement should be 813 // before or after the AGC. 814 if (capture_.transient_suppressor_enabled) { 815 float voice_probability = 816 private_submodules_->agc_manager.get() 817 ? private_submodules_->agc_manager->voice_probability() 818 : 1.f; 819 820 public_submodules_->transient_suppressor->Suppress( 821 ca->channels_f()[0], ca->num_frames(), ca->num_channels(), 822 ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(), 823 ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability, 824 capture_.key_pressed); 825 } 826 827 // The level estimator operates on the recombined data. 828 public_submodules_->level_estimator->ProcessStream(ca); 829 830 capture_.was_stream_delay_set = false; 831 return kNoError; 832 } 833 834 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, 835 size_t samples_per_channel, 836 int rev_sample_rate_hz, 837 ChannelLayout layout) { 838 TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_ChannelLayout"); 839 rtc::CritScope cs(&crit_render_); 840 const StreamConfig reverse_config = { 841 rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), 842 }; 843 if (samples_per_channel != reverse_config.num_frames()) { 844 return kBadDataLengthError; 845 } 846 return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config); 847 } 848 849 int AudioProcessingImpl::ProcessReverseStream( 850 const float* const* src, 851 const StreamConfig& reverse_input_config, 852 const StreamConfig& reverse_output_config, 853 float* const* dest) { 854 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig"); 855 rtc::CritScope cs(&crit_render_); 856 RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, reverse_input_config, 857 reverse_output_config)); 858 if (is_rev_processed()) { 859 render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(), 860 dest); 861 } else if (render_check_rev_conversion_needed()) { 862 render_.render_converter->Convert(src, reverse_input_config.num_samples(), 863 dest, 864 reverse_output_config.num_samples()); 865 } else { 866 CopyAudioIfNeeded(src, reverse_input_config.num_frames(), 867 reverse_input_config.num_channels(), dest); 868 } 869 870 return kNoError; 871 } 872 873 int AudioProcessingImpl::AnalyzeReverseStreamLocked( 874 const float* const* src, 875 const StreamConfig& reverse_input_config, 876 const StreamConfig& reverse_output_config) { 877 if (src == nullptr) { 878 return kNullPointerError; 879 } 880 881 if (reverse_input_config.num_channels() == 0) { 882 return kBadNumberChannelsError; 883 } 884 885 ProcessingConfig processing_config = formats_.api_format; 886 processing_config.reverse_input_stream() = reverse_input_config; 887 processing_config.reverse_output_stream() = reverse_output_config; 888 889 RETURN_ON_ERR(MaybeInitializeRender(processing_config)); 890 assert(reverse_input_config.num_frames() == 891 formats_.api_format.reverse_input_stream().num_frames()); 892 893 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 894 if (debug_dump_.debug_file->Open()) { 895 debug_dump_.render.event_msg->set_type(audioproc::Event::REVERSE_STREAM); 896 audioproc::ReverseStream* msg = 897 debug_dump_.render.event_msg->mutable_reverse_stream(); 898 const size_t channel_size = 899 sizeof(float) * formats_.api_format.reverse_input_stream().num_frames(); 900 for (size_t i = 0; 901 i < formats_.api_format.reverse_input_stream().num_channels(); ++i) 902 msg->add_channel(src[i], channel_size); 903 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), 904 &crit_debug_, &debug_dump_.render)); 905 } 906 #endif 907 908 render_.render_audio->CopyFrom(src, 909 formats_.api_format.reverse_input_stream()); 910 return ProcessReverseStreamLocked(); 911 } 912 913 int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { 914 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame"); 915 RETURN_ON_ERR(AnalyzeReverseStream(frame)); 916 rtc::CritScope cs(&crit_render_); 917 if (is_rev_processed()) { 918 render_.render_audio->InterleaveTo(frame, true); 919 } 920 921 return kNoError; 922 } 923 924 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { 925 TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_AudioFrame"); 926 rtc::CritScope cs(&crit_render_); 927 if (frame == nullptr) { 928 return kNullPointerError; 929 } 930 // Must be a native rate. 931 if (frame->sample_rate_hz_ != kSampleRate8kHz && 932 frame->sample_rate_hz_ != kSampleRate16kHz && 933 frame->sample_rate_hz_ != kSampleRate32kHz && 934 frame->sample_rate_hz_ != kSampleRate48kHz) { 935 return kBadSampleRateError; 936 } 937 // This interface does not tolerate different forward and reverse rates. 938 if (frame->sample_rate_hz_ != 939 formats_.api_format.input_stream().sample_rate_hz()) { 940 return kBadSampleRateError; 941 } 942 943 if (frame->num_channels_ <= 0) { 944 return kBadNumberChannelsError; 945 } 946 947 ProcessingConfig processing_config = formats_.api_format; 948 processing_config.reverse_input_stream().set_sample_rate_hz( 949 frame->sample_rate_hz_); 950 processing_config.reverse_input_stream().set_num_channels( 951 frame->num_channels_); 952 processing_config.reverse_output_stream().set_sample_rate_hz( 953 frame->sample_rate_hz_); 954 processing_config.reverse_output_stream().set_num_channels( 955 frame->num_channels_); 956 957 RETURN_ON_ERR(MaybeInitializeRender(processing_config)); 958 if (frame->samples_per_channel_ != 959 formats_.api_format.reverse_input_stream().num_frames()) { 960 return kBadDataLengthError; 961 } 962 963 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 964 if (debug_dump_.debug_file->Open()) { 965 debug_dump_.render.event_msg->set_type(audioproc::Event::REVERSE_STREAM); 966 audioproc::ReverseStream* msg = 967 debug_dump_.render.event_msg->mutable_reverse_stream(); 968 const size_t data_size = 969 sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; 970 msg->set_data(frame->data_, data_size); 971 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), 972 &crit_debug_, &debug_dump_.render)); 973 } 974 #endif 975 render_.render_audio->DeinterleaveFrom(frame); 976 return ProcessReverseStreamLocked(); 977 } 978 979 int AudioProcessingImpl::ProcessReverseStreamLocked() { 980 AudioBuffer* ra = render_.render_audio.get(); // For brevity. 981 if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz) { 982 ra->SplitIntoFrequencyBands(); 983 } 984 985 if (constants_.intelligibility_enabled) { 986 // Currently run in single-threaded mode when the intelligibility 987 // enhancer is activated. 988 // TODO(peah): Fix to be properly multi-threaded. 989 rtc::CritScope cs(&crit_capture_); 990 public_submodules_->intelligibility_enhancer->ProcessRenderAudio( 991 ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate, 992 ra->num_channels()); 993 } 994 995 RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessRenderAudio(ra)); 996 RETURN_ON_ERR( 997 public_submodules_->echo_control_mobile->ProcessRenderAudio(ra)); 998 if (!constants_.use_new_agc) { 999 RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra)); 1000 } 1001 1002 if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz && 1003 is_rev_processed()) { 1004 ra->MergeFrequencyBands(); 1005 } 1006 1007 return kNoError; 1008 } 1009 1010 int AudioProcessingImpl::set_stream_delay_ms(int delay) { 1011 rtc::CritScope cs(&crit_capture_); 1012 Error retval = kNoError; 1013 capture_.was_stream_delay_set = true; 1014 delay += capture_.delay_offset_ms; 1015 1016 if (delay < 0) { 1017 delay = 0; 1018 retval = kBadStreamParameterWarning; 1019 } 1020 1021 // TODO(ajm): the max is rather arbitrarily chosen; investigate. 1022 if (delay > 500) { 1023 delay = 500; 1024 retval = kBadStreamParameterWarning; 1025 } 1026 1027 capture_nonlocked_.stream_delay_ms = delay; 1028 return retval; 1029 } 1030 1031 int AudioProcessingImpl::stream_delay_ms() const { 1032 // Used as callback from submodules, hence locking is not allowed. 1033 return capture_nonlocked_.stream_delay_ms; 1034 } 1035 1036 bool AudioProcessingImpl::was_stream_delay_set() const { 1037 // Used as callback from submodules, hence locking is not allowed. 1038 return capture_.was_stream_delay_set; 1039 } 1040 1041 void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) { 1042 rtc::CritScope cs(&crit_capture_); 1043 capture_.key_pressed = key_pressed; 1044 } 1045 1046 void AudioProcessingImpl::set_delay_offset_ms(int offset) { 1047 rtc::CritScope cs(&crit_capture_); 1048 capture_.delay_offset_ms = offset; 1049 } 1050 1051 int AudioProcessingImpl::delay_offset_ms() const { 1052 rtc::CritScope cs(&crit_capture_); 1053 return capture_.delay_offset_ms; 1054 } 1055 1056 int AudioProcessingImpl::StartDebugRecording( 1057 const char filename[AudioProcessing::kMaxFilenameSize]) { 1058 // Run in a single-threaded manner. 1059 rtc::CritScope cs_render(&crit_render_); 1060 rtc::CritScope cs_capture(&crit_capture_); 1061 static_assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize, ""); 1062 1063 if (filename == nullptr) { 1064 return kNullPointerError; 1065 } 1066 1067 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 1068 // Stop any ongoing recording. 1069 if (debug_dump_.debug_file->Open()) { 1070 if (debug_dump_.debug_file->CloseFile() == -1) { 1071 return kFileError; 1072 } 1073 } 1074 1075 if (debug_dump_.debug_file->OpenFile(filename, false) == -1) { 1076 debug_dump_.debug_file->CloseFile(); 1077 return kFileError; 1078 } 1079 1080 RETURN_ON_ERR(WriteConfigMessage(true)); 1081 RETURN_ON_ERR(WriteInitMessage()); 1082 return kNoError; 1083 #else 1084 return kUnsupportedFunctionError; 1085 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 1086 } 1087 1088 int AudioProcessingImpl::StartDebugRecording(FILE* handle) { 1089 // Run in a single-threaded manner. 1090 rtc::CritScope cs_render(&crit_render_); 1091 rtc::CritScope cs_capture(&crit_capture_); 1092 1093 if (handle == nullptr) { 1094 return kNullPointerError; 1095 } 1096 1097 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 1098 // Stop any ongoing recording. 1099 if (debug_dump_.debug_file->Open()) { 1100 if (debug_dump_.debug_file->CloseFile() == -1) { 1101 return kFileError; 1102 } 1103 } 1104 1105 if (debug_dump_.debug_file->OpenFromFileHandle(handle, true, false) == -1) { 1106 return kFileError; 1107 } 1108 1109 RETURN_ON_ERR(WriteConfigMessage(true)); 1110 RETURN_ON_ERR(WriteInitMessage()); 1111 return kNoError; 1112 #else 1113 return kUnsupportedFunctionError; 1114 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 1115 } 1116 1117 int AudioProcessingImpl::StartDebugRecordingForPlatformFile( 1118 rtc::PlatformFile handle) { 1119 // Run in a single-threaded manner. 1120 rtc::CritScope cs_render(&crit_render_); 1121 rtc::CritScope cs_capture(&crit_capture_); 1122 FILE* stream = rtc::FdopenPlatformFileForWriting(handle); 1123 return StartDebugRecording(stream); 1124 } 1125 1126 int AudioProcessingImpl::StopDebugRecording() { 1127 // Run in a single-threaded manner. 1128 rtc::CritScope cs_render(&crit_render_); 1129 rtc::CritScope cs_capture(&crit_capture_); 1130 1131 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 1132 // We just return if recording hasn't started. 1133 if (debug_dump_.debug_file->Open()) { 1134 if (debug_dump_.debug_file->CloseFile() == -1) { 1135 return kFileError; 1136 } 1137 } 1138 return kNoError; 1139 #else 1140 return kUnsupportedFunctionError; 1141 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 1142 } 1143 1144 EchoCancellation* AudioProcessingImpl::echo_cancellation() const { 1145 // Adding a lock here has no effect as it allows any access to the submodule 1146 // from the returned pointer. 1147 return public_submodules_->echo_cancellation; 1148 } 1149 1150 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { 1151 // Adding a lock here has no effect as it allows any access to the submodule 1152 // from the returned pointer. 1153 return public_submodules_->echo_control_mobile; 1154 } 1155 1156 GainControl* AudioProcessingImpl::gain_control() const { 1157 // Adding a lock here has no effect as it allows any access to the submodule 1158 // from the returned pointer. 1159 if (constants_.use_new_agc) { 1160 return public_submodules_->gain_control_for_new_agc.get(); 1161 } 1162 return public_submodules_->gain_control; 1163 } 1164 1165 HighPassFilter* AudioProcessingImpl::high_pass_filter() const { 1166 // Adding a lock here has no effect as it allows any access to the submodule 1167 // from the returned pointer. 1168 return public_submodules_->high_pass_filter.get(); 1169 } 1170 1171 LevelEstimator* AudioProcessingImpl::level_estimator() const { 1172 // Adding a lock here has no effect as it allows any access to the submodule 1173 // from the returned pointer. 1174 return public_submodules_->level_estimator.get(); 1175 } 1176 1177 NoiseSuppression* AudioProcessingImpl::noise_suppression() const { 1178 // Adding a lock here has no effect as it allows any access to the submodule 1179 // from the returned pointer. 1180 return public_submodules_->noise_suppression.get(); 1181 } 1182 1183 VoiceDetection* AudioProcessingImpl::voice_detection() const { 1184 // Adding a lock here has no effect as it allows any access to the submodule 1185 // from the returned pointer. 1186 return public_submodules_->voice_detection.get(); 1187 } 1188 1189 bool AudioProcessingImpl::is_data_processed() const { 1190 if (capture_nonlocked_.beamformer_enabled) { 1191 return true; 1192 } 1193 1194 int enabled_count = 0; 1195 for (auto item : private_submodules_->component_list) { 1196 if (item->is_component_enabled()) { 1197 enabled_count++; 1198 } 1199 } 1200 if (public_submodules_->high_pass_filter->is_enabled()) { 1201 enabled_count++; 1202 } 1203 if (public_submodules_->noise_suppression->is_enabled()) { 1204 enabled_count++; 1205 } 1206 if (public_submodules_->level_estimator->is_enabled()) { 1207 enabled_count++; 1208 } 1209 if (public_submodules_->voice_detection->is_enabled()) { 1210 enabled_count++; 1211 } 1212 1213 // Data is unchanged if no components are enabled, or if only 1214 // public_submodules_->level_estimator 1215 // or public_submodules_->voice_detection is enabled. 1216 if (enabled_count == 0) { 1217 return false; 1218 } else if (enabled_count == 1) { 1219 if (public_submodules_->level_estimator->is_enabled() || 1220 public_submodules_->voice_detection->is_enabled()) { 1221 return false; 1222 } 1223 } else if (enabled_count == 2) { 1224 if (public_submodules_->level_estimator->is_enabled() && 1225 public_submodules_->voice_detection->is_enabled()) { 1226 return false; 1227 } 1228 } 1229 return true; 1230 } 1231 1232 bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { 1233 // Check if we've upmixed or downmixed the audio. 1234 return ((formats_.api_format.output_stream().num_channels() != 1235 formats_.api_format.input_stream().num_channels()) || 1236 is_data_processed || capture_.transient_suppressor_enabled); 1237 } 1238 1239 bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { 1240 return (is_data_processed && 1241 (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == 1242 kSampleRate32kHz || 1243 capture_nonlocked_.fwd_proc_format.sample_rate_hz() == 1244 kSampleRate48kHz)); 1245 } 1246 1247 bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { 1248 if (!is_data_processed && 1249 !public_submodules_->voice_detection->is_enabled() && 1250 !capture_.transient_suppressor_enabled) { 1251 // Only public_submodules_->level_estimator is enabled. 1252 return false; 1253 } else if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == 1254 kSampleRate32kHz || 1255 capture_nonlocked_.fwd_proc_format.sample_rate_hz() == 1256 kSampleRate48kHz) { 1257 // Something besides public_submodules_->level_estimator is enabled, and we 1258 // have super-wb. 1259 return true; 1260 } 1261 return false; 1262 } 1263 1264 bool AudioProcessingImpl::is_rev_processed() const { 1265 return constants_.intelligibility_enabled && 1266 public_submodules_->intelligibility_enhancer->active(); 1267 } 1268 1269 bool AudioProcessingImpl::render_check_rev_conversion_needed() const { 1270 return rev_conversion_needed(); 1271 } 1272 1273 bool AudioProcessingImpl::rev_conversion_needed() const { 1274 return (formats_.api_format.reverse_input_stream() != 1275 formats_.api_format.reverse_output_stream()); 1276 } 1277 1278 void AudioProcessingImpl::InitializeExperimentalAgc() { 1279 if (constants_.use_new_agc) { 1280 if (!private_submodules_->agc_manager.get()) { 1281 private_submodules_->agc_manager.reset(new AgcManagerDirect( 1282 public_submodules_->gain_control, 1283 public_submodules_->gain_control_for_new_agc.get(), 1284 constants_.agc_startup_min_volume)); 1285 } 1286 private_submodules_->agc_manager->Initialize(); 1287 private_submodules_->agc_manager->SetCaptureMuted( 1288 capture_.output_will_be_muted); 1289 } 1290 } 1291 1292 void AudioProcessingImpl::InitializeTransient() { 1293 if (capture_.transient_suppressor_enabled) { 1294 if (!public_submodules_->transient_suppressor.get()) { 1295 public_submodules_->transient_suppressor.reset(new TransientSuppressor()); 1296 } 1297 public_submodules_->transient_suppressor->Initialize( 1298 capture_nonlocked_.fwd_proc_format.sample_rate_hz(), 1299 capture_nonlocked_.split_rate, 1300 num_proc_channels()); 1301 } 1302 } 1303 1304 void AudioProcessingImpl::InitializeBeamformer() { 1305 if (capture_nonlocked_.beamformer_enabled) { 1306 if (!private_submodules_->beamformer) { 1307 private_submodules_->beamformer.reset(new NonlinearBeamformer( 1308 capture_.array_geometry, capture_.target_direction)); 1309 } 1310 private_submodules_->beamformer->Initialize(kChunkSizeMs, 1311 capture_nonlocked_.split_rate); 1312 } 1313 } 1314 1315 void AudioProcessingImpl::InitializeIntelligibility() { 1316 if (constants_.intelligibility_enabled) { 1317 IntelligibilityEnhancer::Config config; 1318 config.sample_rate_hz = capture_nonlocked_.split_rate; 1319 config.num_capture_channels = capture_.capture_audio->num_channels(); 1320 config.num_render_channels = render_.render_audio->num_channels(); 1321 public_submodules_->intelligibility_enhancer.reset( 1322 new IntelligibilityEnhancer(config)); 1323 } 1324 } 1325 1326 void AudioProcessingImpl::InitializeHighPassFilter() { 1327 public_submodules_->high_pass_filter->Initialize(num_proc_channels(), 1328 proc_sample_rate_hz()); 1329 } 1330 1331 void AudioProcessingImpl::InitializeNoiseSuppression() { 1332 public_submodules_->noise_suppression->Initialize(num_proc_channels(), 1333 proc_sample_rate_hz()); 1334 } 1335 1336 void AudioProcessingImpl::InitializeLevelEstimator() { 1337 public_submodules_->level_estimator->Initialize(); 1338 } 1339 1340 void AudioProcessingImpl::InitializeVoiceDetection() { 1341 public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz()); 1342 } 1343 1344 void AudioProcessingImpl::MaybeUpdateHistograms() { 1345 static const int kMinDiffDelayMs = 60; 1346 1347 if (echo_cancellation()->is_enabled()) { 1348 // Activate delay_jumps_ counters if we know echo_cancellation is runnning. 1349 // If a stream has echo we know that the echo_cancellation is in process. 1350 if (capture_.stream_delay_jumps == -1 && 1351 echo_cancellation()->stream_has_echo()) { 1352 capture_.stream_delay_jumps = 0; 1353 } 1354 if (capture_.aec_system_delay_jumps == -1 && 1355 echo_cancellation()->stream_has_echo()) { 1356 capture_.aec_system_delay_jumps = 0; 1357 } 1358 1359 // Detect a jump in platform reported system delay and log the difference. 1360 const int diff_stream_delay_ms = 1361 capture_nonlocked_.stream_delay_ms - capture_.last_stream_delay_ms; 1362 if (diff_stream_delay_ms > kMinDiffDelayMs && 1363 capture_.last_stream_delay_ms != 0) { 1364 RTC_HISTOGRAM_COUNTS_SPARSE( 1365 "WebRTC.Audio.PlatformReportedStreamDelayJump", diff_stream_delay_ms, 1366 kMinDiffDelayMs, 1000, 100); 1367 if (capture_.stream_delay_jumps == -1) { 1368 capture_.stream_delay_jumps = 0; // Activate counter if needed. 1369 } 1370 capture_.stream_delay_jumps++; 1371 } 1372 capture_.last_stream_delay_ms = capture_nonlocked_.stream_delay_ms; 1373 1374 // Detect a jump in AEC system delay and log the difference. 1375 const int frames_per_ms = 1376 rtc::CheckedDivExact(capture_nonlocked_.split_rate, 1000); 1377 const int aec_system_delay_ms = 1378 WebRtcAec_system_delay(echo_cancellation()->aec_core()) / frames_per_ms; 1379 const int diff_aec_system_delay_ms = 1380 aec_system_delay_ms - capture_.last_aec_system_delay_ms; 1381 if (diff_aec_system_delay_ms > kMinDiffDelayMs && 1382 capture_.last_aec_system_delay_ms != 0) { 1383 RTC_HISTOGRAM_COUNTS_SPARSE("WebRTC.Audio.AecSystemDelayJump", 1384 diff_aec_system_delay_ms, kMinDiffDelayMs, 1385 1000, 100); 1386 if (capture_.aec_system_delay_jumps == -1) { 1387 capture_.aec_system_delay_jumps = 0; // Activate counter if needed. 1388 } 1389 capture_.aec_system_delay_jumps++; 1390 } 1391 capture_.last_aec_system_delay_ms = aec_system_delay_ms; 1392 } 1393 } 1394 1395 void AudioProcessingImpl::UpdateHistogramsOnCallEnd() { 1396 // Run in a single-threaded manner. 1397 rtc::CritScope cs_render(&crit_render_); 1398 rtc::CritScope cs_capture(&crit_capture_); 1399 1400 if (capture_.stream_delay_jumps > -1) { 1401 RTC_HISTOGRAM_ENUMERATION_SPARSE( 1402 "WebRTC.Audio.NumOfPlatformReportedStreamDelayJumps", 1403 capture_.stream_delay_jumps, 51); 1404 } 1405 capture_.stream_delay_jumps = -1; 1406 capture_.last_stream_delay_ms = 0; 1407 1408 if (capture_.aec_system_delay_jumps > -1) { 1409 RTC_HISTOGRAM_ENUMERATION_SPARSE("WebRTC.Audio.NumOfAecSystemDelayJumps", 1410 capture_.aec_system_delay_jumps, 51); 1411 } 1412 capture_.aec_system_delay_jumps = -1; 1413 capture_.last_aec_system_delay_ms = 0; 1414 } 1415 1416 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 1417 int AudioProcessingImpl::WriteMessageToDebugFile( 1418 FileWrapper* debug_file, 1419 rtc::CriticalSection* crit_debug, 1420 ApmDebugDumpThreadState* debug_state) { 1421 int32_t size = debug_state->event_msg->ByteSize(); 1422 if (size <= 0) { 1423 return kUnspecifiedError; 1424 } 1425 #if defined(WEBRTC_ARCH_BIG_ENDIAN) 1426 // TODO(ajm): Use little-endian "on the wire". For the moment, we can be 1427 // pretty safe in assuming little-endian. 1428 #endif 1429 1430 if (!debug_state->event_msg->SerializeToString(&debug_state->event_str)) { 1431 return kUnspecifiedError; 1432 } 1433 1434 { 1435 // Ensure atomic writes of the message. 1436 rtc::CritScope cs_capture(crit_debug); 1437 // Write message preceded by its size. 1438 if (!debug_file->Write(&size, sizeof(int32_t))) { 1439 return kFileError; 1440 } 1441 if (!debug_file->Write(debug_state->event_str.data(), 1442 debug_state->event_str.length())) { 1443 return kFileError; 1444 } 1445 } 1446 1447 debug_state->event_msg->Clear(); 1448 1449 return kNoError; 1450 } 1451 1452 int AudioProcessingImpl::WriteInitMessage() { 1453 debug_dump_.capture.event_msg->set_type(audioproc::Event::INIT); 1454 audioproc::Init* msg = debug_dump_.capture.event_msg->mutable_init(); 1455 msg->set_sample_rate(formats_.api_format.input_stream().sample_rate_hz()); 1456 1457 msg->set_num_input_channels(static_cast<google::protobuf::int32>( 1458 formats_.api_format.input_stream().num_channels())); 1459 msg->set_num_output_channels(static_cast<google::protobuf::int32>( 1460 formats_.api_format.output_stream().num_channels())); 1461 msg->set_num_reverse_channels(static_cast<google::protobuf::int32>( 1462 formats_.api_format.reverse_input_stream().num_channels())); 1463 msg->set_reverse_sample_rate( 1464 formats_.api_format.reverse_input_stream().sample_rate_hz()); 1465 msg->set_output_sample_rate( 1466 formats_.api_format.output_stream().sample_rate_hz()); 1467 // TODO(ekmeyerson): Add reverse output fields to 1468 // debug_dump_.capture.event_msg. 1469 1470 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), 1471 &crit_debug_, &debug_dump_.capture)); 1472 return kNoError; 1473 } 1474 1475 int AudioProcessingImpl::WriteConfigMessage(bool forced) { 1476 audioproc::Config config; 1477 1478 config.set_aec_enabled(public_submodules_->echo_cancellation->is_enabled()); 1479 config.set_aec_delay_agnostic_enabled( 1480 public_submodules_->echo_cancellation->is_delay_agnostic_enabled()); 1481 config.set_aec_drift_compensation_enabled( 1482 public_submodules_->echo_cancellation->is_drift_compensation_enabled()); 1483 config.set_aec_extended_filter_enabled( 1484 public_submodules_->echo_cancellation->is_extended_filter_enabled()); 1485 config.set_aec_suppression_level(static_cast<int>( 1486 public_submodules_->echo_cancellation->suppression_level())); 1487 1488 config.set_aecm_enabled( 1489 public_submodules_->echo_control_mobile->is_enabled()); 1490 config.set_aecm_comfort_noise_enabled( 1491 public_submodules_->echo_control_mobile->is_comfort_noise_enabled()); 1492 config.set_aecm_routing_mode(static_cast<int>( 1493 public_submodules_->echo_control_mobile->routing_mode())); 1494 1495 config.set_agc_enabled(public_submodules_->gain_control->is_enabled()); 1496 config.set_agc_mode( 1497 static_cast<int>(public_submodules_->gain_control->mode())); 1498 config.set_agc_limiter_enabled( 1499 public_submodules_->gain_control->is_limiter_enabled()); 1500 config.set_noise_robust_agc_enabled(constants_.use_new_agc); 1501 1502 config.set_hpf_enabled(public_submodules_->high_pass_filter->is_enabled()); 1503 1504 config.set_ns_enabled(public_submodules_->noise_suppression->is_enabled()); 1505 config.set_ns_level( 1506 static_cast<int>(public_submodules_->noise_suppression->level())); 1507 1508 config.set_transient_suppression_enabled( 1509 capture_.transient_suppressor_enabled); 1510 1511 std::string serialized_config = config.SerializeAsString(); 1512 if (!forced && 1513 debug_dump_.capture.last_serialized_config == serialized_config) { 1514 return kNoError; 1515 } 1516 1517 debug_dump_.capture.last_serialized_config = serialized_config; 1518 1519 debug_dump_.capture.event_msg->set_type(audioproc::Event::CONFIG); 1520 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config); 1521 1522 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), 1523 &crit_debug_, &debug_dump_.capture)); 1524 return kNoError; 1525 } 1526 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 1527 1528 } // namespace webrtc 1529