1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/renderer/media/media_stream_audio_processor.h" 6 7 #include "base/command_line.h" 8 #include "base/debug/trace_event.h" 9 #include "base/metrics/histogram.h" 10 #include "content/public/common/content_switches.h" 11 #include "content/renderer/media/media_stream_audio_processor_options.h" 12 #include "content/renderer/media/rtc_media_constraints.h" 13 #include "content/renderer/media/webrtc_audio_device_impl.h" 14 #include "media/audio/audio_parameters.h" 15 #include "media/base/audio_converter.h" 16 #include "media/base/audio_fifo.h" 17 #include "media/base/channel_layout.h" 18 #include "third_party/WebKit/public/platform/WebMediaConstraints.h" 19 #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h" 20 #include "third_party/webrtc/modules/audio_processing/typing_detection.h" 21 22 namespace content { 23 24 namespace { 25 26 using webrtc::AudioProcessing; 27 28 #if defined(OS_ANDROID) 29 const int kAudioProcessingSampleRate = 16000; 30 #else 31 const int kAudioProcessingSampleRate = 32000; 32 #endif 33 const int kAudioProcessingNumberOfChannels = 1; 34 const AudioProcessing::ChannelLayout kAudioProcessingChannelLayout = 35 AudioProcessing::kMono; 36 37 const int kMaxNumberOfBuffersInFifo = 2; 38 39 // Used by UMA histograms and entries shouldn't be re-ordered or removed. 40 enum AudioTrackProcessingStates { 41 AUDIO_PROCESSING_ENABLED = 0, 42 AUDIO_PROCESSING_DISABLED, 43 AUDIO_PROCESSING_IN_WEBRTC, 44 AUDIO_PROCESSING_MAX 45 }; 46 47 void RecordProcessingState(AudioTrackProcessingStates state) { 48 UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates", 49 state, AUDIO_PROCESSING_MAX); 50 } 51 52 } // namespace 53 54 class MediaStreamAudioProcessor::MediaStreamAudioConverter 55 : public media::AudioConverter::InputCallback { 56 public: 57 MediaStreamAudioConverter(const media::AudioParameters& source_params, 58 const media::AudioParameters& sink_params) 59 : source_params_(source_params), 60 sink_params_(sink_params), 61 audio_converter_(source_params, sink_params_, false) { 62 // An instance of MediaStreamAudioConverter may be created in the main 63 // render thread and used in the audio thread, for example, the 64 // |MediaStreamAudioProcessor::capture_converter_|. 65 thread_checker_.DetachFromThread(); 66 audio_converter_.AddInput(this); 67 68 // Create and initialize audio fifo and audio bus wrapper. 69 // The size of the FIFO should be at least twice of the source buffer size 70 // or twice of the sink buffer size. Also, FIFO needs to have enough space 71 // to store pre-processed data before passing the data to 72 // webrtc::AudioProcessing, which requires 10ms as packet size. 73 int max_frame_size = std::max(source_params_.frames_per_buffer(), 74 sink_params_.frames_per_buffer()); 75 int buffer_size = std::max( 76 kMaxNumberOfBuffersInFifo * max_frame_size, 77 kMaxNumberOfBuffersInFifo * source_params_.sample_rate() / 100); 78 fifo_.reset(new media::AudioFifo(source_params_.channels(), buffer_size)); 79 80 // TODO(xians): Use CreateWrapper to save one memcpy. 81 audio_wrapper_ = media::AudioBus::Create(sink_params_.channels(), 82 sink_params_.frames_per_buffer()); 83 } 84 85 virtual ~MediaStreamAudioConverter() { 86 audio_converter_.RemoveInput(this); 87 } 88 89 void Push(const media::AudioBus* audio_source) { 90 // Called on the audio thread, which is the capture audio thread for 91 // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread 92 // for |MediaStreamAudioProcessor::render_converter_|. 93 // And it must be the same thread as calling Convert(). 94 DCHECK(thread_checker_.CalledOnValidThread()); 95 fifo_->Push(audio_source); 96 } 97 98 bool Convert(webrtc::AudioFrame* out, bool audio_mirroring) { 99 // Called on the audio thread, which is the capture audio thread for 100 // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread 101 // for |MediaStreamAudioProcessor::render_converter_|. 102 DCHECK(thread_checker_.CalledOnValidThread()); 103 // Return false if there is not enough data in the FIFO, this happens when 104 // fifo_->frames() / source_params_.sample_rate() is less than 105 // sink_params.frames_per_buffer() / sink_params.sample_rate(). 106 if (fifo_->frames() * sink_params_.sample_rate() < 107 sink_params_.frames_per_buffer() * source_params_.sample_rate()) { 108 return false; 109 } 110 111 // Convert data to the output format, this will trigger ProvideInput(). 112 audio_converter_.Convert(audio_wrapper_.get()); 113 DCHECK_EQ(audio_wrapper_->frames(), sink_params_.frames_per_buffer()); 114 115 // Swap channels before interleaving the data if |audio_mirroring| is 116 // set to true. 117 if (audio_mirroring && 118 sink_params_.channel_layout() == media::CHANNEL_LAYOUT_STEREO) { 119 // Swap the first and second channels. 120 audio_wrapper_->SwapChannels(0, 1); 121 } 122 123 // TODO(xians): Figure out a better way to handle the interleaved and 124 // deinterleaved format switching. 125 audio_wrapper_->ToInterleaved(audio_wrapper_->frames(), 126 sink_params_.bits_per_sample() / 8, 127 out->data_); 128 129 out->samples_per_channel_ = sink_params_.frames_per_buffer(); 130 out->sample_rate_hz_ = sink_params_.sample_rate(); 131 out->speech_type_ = webrtc::AudioFrame::kNormalSpeech; 132 out->vad_activity_ = webrtc::AudioFrame::kVadUnknown; 133 out->num_channels_ = sink_params_.channels(); 134 135 return true; 136 } 137 138 const media::AudioParameters& source_parameters() const { 139 return source_params_; 140 } 141 const media::AudioParameters& sink_parameters() const { 142 return sink_params_; 143 } 144 145 private: 146 // AudioConverter::InputCallback implementation. 147 virtual double ProvideInput(media::AudioBus* audio_bus, 148 base::TimeDelta buffer_delay) OVERRIDE { 149 // Called on realtime audio thread. 150 // TODO(xians): Figure out why the first Convert() triggers ProvideInput 151 // two times. 152 if (fifo_->frames() < audio_bus->frames()) 153 return 0; 154 155 fifo_->Consume(audio_bus, 0, audio_bus->frames()); 156 157 // Return 1.0 to indicate no volume scaling on the data. 158 return 1.0; 159 } 160 161 base::ThreadChecker thread_checker_; 162 const media::AudioParameters source_params_; 163 const media::AudioParameters sink_params_; 164 165 // TODO(xians): consider using SincResampler to save some memcpy. 166 // Handles mixing and resampling between input and output parameters. 167 media::AudioConverter audio_converter_; 168 scoped_ptr<media::AudioBus> audio_wrapper_; 169 scoped_ptr<media::AudioFifo> fifo_; 170 }; 171 172 bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() { 173 return !CommandLine::ForCurrentProcess()->HasSwitch( 174 switches::kDisableAudioTrackProcessing); 175 } 176 177 MediaStreamAudioProcessor::MediaStreamAudioProcessor( 178 const blink::WebMediaConstraints& constraints, 179 int effects, 180 WebRtcPlayoutDataSource* playout_data_source) 181 : render_delay_ms_(0), 182 playout_data_source_(playout_data_source), 183 audio_mirroring_(false), 184 typing_detected_(false), 185 stopped_(false) { 186 capture_thread_checker_.DetachFromThread(); 187 render_thread_checker_.DetachFromThread(); 188 InitializeAudioProcessingModule(constraints, effects); 189 if (IsAudioTrackProcessingEnabled()) { 190 aec_dump_message_filter_ = AecDumpMessageFilter::Get(); 191 // In unit tests not creating a message filter, |aec_dump_message_filter_| 192 // will be NULL. We can just ignore that. Other unit tests and browser tests 193 // ensure that we do get the filter when we should. 194 if (aec_dump_message_filter_) 195 aec_dump_message_filter_->AddDelegate(this); 196 } 197 } 198 199 MediaStreamAudioProcessor::~MediaStreamAudioProcessor() { 200 DCHECK(main_thread_checker_.CalledOnValidThread()); 201 Stop(); 202 } 203 204 void MediaStreamAudioProcessor::OnCaptureFormatChanged( 205 const media::AudioParameters& source_params) { 206 DCHECK(main_thread_checker_.CalledOnValidThread()); 207 // There is no need to hold a lock here since the caller guarantees that 208 // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks 209 // on the capture thread. 210 InitializeCaptureConverter(source_params); 211 212 // Reset the |capture_thread_checker_| since the capture data will come from 213 // a new capture thread. 214 capture_thread_checker_.DetachFromThread(); 215 } 216 217 void MediaStreamAudioProcessor::PushCaptureData( 218 const media::AudioBus* audio_source) { 219 DCHECK(capture_thread_checker_.CalledOnValidThread()); 220 DCHECK_EQ(audio_source->channels(), 221 capture_converter_->source_parameters().channels()); 222 DCHECK_EQ(audio_source->frames(), 223 capture_converter_->source_parameters().frames_per_buffer()); 224 225 capture_converter_->Push(audio_source); 226 } 227 228 bool MediaStreamAudioProcessor::ProcessAndConsumeData( 229 base::TimeDelta capture_delay, int volume, bool key_pressed, 230 int* new_volume, int16** out) { 231 DCHECK(capture_thread_checker_.CalledOnValidThread()); 232 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData"); 233 234 if (!capture_converter_->Convert(&capture_frame_, audio_mirroring_)) 235 return false; 236 237 *new_volume = ProcessData(&capture_frame_, capture_delay, volume, 238 key_pressed); 239 *out = capture_frame_.data_; 240 241 return true; 242 } 243 244 void MediaStreamAudioProcessor::Stop() { 245 DCHECK(main_thread_checker_.CalledOnValidThread()); 246 if (stopped_) 247 return; 248 249 stopped_ = true; 250 251 if (aec_dump_message_filter_) { 252 aec_dump_message_filter_->RemoveDelegate(this); 253 aec_dump_message_filter_ = NULL; 254 } 255 256 if (!audio_processing_.get()) 257 return; 258 259 StopEchoCancellationDump(audio_processing_.get()); 260 261 if (playout_data_source_) { 262 playout_data_source_->RemovePlayoutSink(this); 263 playout_data_source_ = NULL; 264 } 265 } 266 267 const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const { 268 return capture_converter_->source_parameters(); 269 } 270 271 const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const { 272 return capture_converter_->sink_parameters(); 273 } 274 275 void MediaStreamAudioProcessor::OnAecDumpFile( 276 const IPC::PlatformFileForTransit& file_handle) { 277 DCHECK(main_thread_checker_.CalledOnValidThread()); 278 279 base::File file = IPC::PlatformFileForTransitToFile(file_handle); 280 DCHECK(file.IsValid()); 281 282 if (audio_processing_) 283 StartEchoCancellationDump(audio_processing_.get(), file.Pass()); 284 else 285 file.Close(); 286 } 287 288 void MediaStreamAudioProcessor::OnDisableAecDump() { 289 DCHECK(main_thread_checker_.CalledOnValidThread()); 290 if (audio_processing_) 291 StopEchoCancellationDump(audio_processing_.get()); 292 } 293 294 void MediaStreamAudioProcessor::OnIpcClosing() { 295 DCHECK(main_thread_checker_.CalledOnValidThread()); 296 aec_dump_message_filter_ = NULL; 297 } 298 299 void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus, 300 int sample_rate, 301 int audio_delay_milliseconds) { 302 DCHECK(render_thread_checker_.CalledOnValidThread()); 303 DCHECK(audio_processing_->echo_control_mobile()->is_enabled() ^ 304 audio_processing_->echo_cancellation()->is_enabled()); 305 306 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData"); 307 DCHECK_LT(audio_delay_milliseconds, 308 std::numeric_limits<base::subtle::Atomic32>::max()); 309 base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds); 310 311 InitializeRenderConverterIfNeeded(sample_rate, audio_bus->channels(), 312 audio_bus->frames()); 313 314 render_converter_->Push(audio_bus); 315 while (render_converter_->Convert(&render_frame_, false)) 316 audio_processing_->AnalyzeReverseStream(&render_frame_); 317 } 318 319 void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() { 320 DCHECK(main_thread_checker_.CalledOnValidThread()); 321 // There is no need to hold a lock here since the caller guarantees that 322 // there is no more OnPlayoutData() callback on the render thread. 323 render_thread_checker_.DetachFromThread(); 324 render_converter_.reset(); 325 } 326 327 void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) { 328 stats->typing_noise_detected = 329 (base::subtle::Acquire_Load(&typing_detected_) != false); 330 GetAecStats(audio_processing_.get(), stats); 331 } 332 333 void MediaStreamAudioProcessor::InitializeAudioProcessingModule( 334 const blink::WebMediaConstraints& constraints, int effects) { 335 DCHECK(!audio_processing_); 336 337 MediaAudioConstraints audio_constraints(constraints, effects); 338 339 // Audio mirroring can be enabled even though audio processing is otherwise 340 // disabled. 341 audio_mirroring_ = audio_constraints.GetProperty( 342 MediaAudioConstraints::kGoogAudioMirroring); 343 344 if (!IsAudioTrackProcessingEnabled()) { 345 RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC); 346 return; 347 } 348 349 #if defined(OS_IOS) 350 // On iOS, VPIO provides built-in AGC and AEC. 351 const bool echo_cancellation = false; 352 const bool goog_agc = false; 353 #else 354 const bool echo_cancellation = 355 audio_constraints.GetEchoCancellationProperty(); 356 const bool goog_agc = audio_constraints.GetProperty( 357 MediaAudioConstraints::kGoogAutoGainControl); 358 #endif 359 360 #if defined(OS_IOS) || defined(OS_ANDROID) 361 const bool goog_experimental_aec = false; 362 const bool goog_typing_detection = false; 363 #else 364 const bool goog_experimental_aec = audio_constraints.GetProperty( 365 MediaAudioConstraints::kGoogExperimentalEchoCancellation); 366 const bool goog_typing_detection = audio_constraints.GetProperty( 367 MediaAudioConstraints::kGoogTypingNoiseDetection); 368 #endif 369 370 const bool goog_ns = audio_constraints.GetProperty( 371 MediaAudioConstraints::kGoogNoiseSuppression); 372 const bool goog_experimental_ns = audio_constraints.GetProperty( 373 MediaAudioConstraints::kGoogExperimentalNoiseSuppression); 374 const bool goog_high_pass_filter = audio_constraints.GetProperty( 375 MediaAudioConstraints::kGoogHighpassFilter); 376 377 // Return immediately if no goog constraint is enabled. 378 if (!echo_cancellation && !goog_experimental_aec && !goog_ns && 379 !goog_high_pass_filter && !goog_typing_detection && 380 !goog_agc && !goog_experimental_ns) { 381 RecordProcessingState(AUDIO_PROCESSING_DISABLED); 382 return; 383 } 384 385 // Create and configure the webrtc::AudioProcessing. 386 audio_processing_.reset(webrtc::AudioProcessing::Create()); 387 CHECK_EQ(0, audio_processing_->Initialize(kAudioProcessingSampleRate, 388 kAudioProcessingSampleRate, 389 kAudioProcessingSampleRate, 390 kAudioProcessingChannelLayout, 391 kAudioProcessingChannelLayout, 392 kAudioProcessingChannelLayout)); 393 394 // Enable the audio processing components. 395 if (echo_cancellation) { 396 EnableEchoCancellation(audio_processing_.get()); 397 398 if (goog_experimental_aec) 399 EnableExperimentalEchoCancellation(audio_processing_.get()); 400 401 if (playout_data_source_) 402 playout_data_source_->AddPlayoutSink(this); 403 } 404 405 if (goog_ns) 406 EnableNoiseSuppression(audio_processing_.get()); 407 408 if (goog_experimental_ns) 409 EnableExperimentalNoiseSuppression(audio_processing_.get()); 410 411 if (goog_high_pass_filter) 412 EnableHighPassFilter(audio_processing_.get()); 413 414 if (goog_typing_detection) { 415 // TODO(xians): Remove this |typing_detector_| after the typing suppression 416 // is enabled by default. 417 typing_detector_.reset(new webrtc::TypingDetection()); 418 EnableTypingDetection(audio_processing_.get(), typing_detector_.get()); 419 } 420 421 if (goog_agc) 422 EnableAutomaticGainControl(audio_processing_.get()); 423 424 RecordProcessingState(AUDIO_PROCESSING_ENABLED); 425 } 426 427 void MediaStreamAudioProcessor::InitializeCaptureConverter( 428 const media::AudioParameters& source_params) { 429 DCHECK(main_thread_checker_.CalledOnValidThread()); 430 DCHECK(source_params.IsValid()); 431 432 // Create and initialize audio converter for the source data. 433 // When the webrtc AudioProcessing is enabled, the sink format of the 434 // converter will be the same as the post-processed data format, which is 435 // 32k mono for desktops and 16k mono for Android. When the AudioProcessing 436 // is disabled, the sink format will be the same as the source format. 437 const int sink_sample_rate = audio_processing_ ? 438 kAudioProcessingSampleRate : source_params.sample_rate(); 439 const media::ChannelLayout sink_channel_layout = audio_processing_ ? 440 media::GuessChannelLayout(kAudioProcessingNumberOfChannels) : 441 source_params.channel_layout(); 442 443 // WebRtc AudioProcessing requires 10ms as its packet size. We use this 444 // native size when processing is enabled. While processing is disabled, and 445 // the source is running with a buffer size smaller than 10ms buffer, we use 446 // same buffer size as the incoming format to avoid extra FIFO for WebAudio. 447 int sink_buffer_size = sink_sample_rate / 100; 448 if (!audio_processing_ && 449 source_params.frames_per_buffer() < sink_buffer_size) { 450 sink_buffer_size = source_params.frames_per_buffer(); 451 } 452 453 media::AudioParameters sink_params( 454 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, sink_channel_layout, 455 sink_sample_rate, 16, sink_buffer_size); 456 capture_converter_.reset( 457 new MediaStreamAudioConverter(source_params, sink_params)); 458 } 459 460 void MediaStreamAudioProcessor::InitializeRenderConverterIfNeeded( 461 int sample_rate, int number_of_channels, int frames_per_buffer) { 462 DCHECK(render_thread_checker_.CalledOnValidThread()); 463 // TODO(xians): Figure out if we need to handle the buffer size change. 464 if (render_converter_.get() && 465 render_converter_->source_parameters().sample_rate() == sample_rate && 466 render_converter_->source_parameters().channels() == number_of_channels) { 467 // Do nothing if the |render_converter_| has been setup properly. 468 return; 469 } 470 471 // Create and initialize audio converter for the render data. 472 // webrtc::AudioProcessing accepts the same format as what it uses to process 473 // capture data, which is 32k mono for desktops and 16k mono for Android. 474 media::AudioParameters source_params( 475 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 476 media::GuessChannelLayout(number_of_channels), sample_rate, 16, 477 frames_per_buffer); 478 media::AudioParameters sink_params( 479 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 480 media::CHANNEL_LAYOUT_MONO, kAudioProcessingSampleRate, 16, 481 kAudioProcessingSampleRate / 100); 482 render_converter_.reset( 483 new MediaStreamAudioConverter(source_params, sink_params)); 484 render_data_bus_ = media::AudioBus::Create(number_of_channels, 485 frames_per_buffer); 486 } 487 488 int MediaStreamAudioProcessor::ProcessData(webrtc::AudioFrame* audio_frame, 489 base::TimeDelta capture_delay, 490 int volume, 491 bool key_pressed) { 492 DCHECK(capture_thread_checker_.CalledOnValidThread()); 493 if (!audio_processing_) 494 return 0; 495 496 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData"); 497 DCHECK_EQ(audio_processing_->input_sample_rate_hz(), 498 capture_converter_->sink_parameters().sample_rate()); 499 DCHECK_EQ(audio_processing_->num_input_channels(), 500 capture_converter_->sink_parameters().channels()); 501 DCHECK_EQ(audio_processing_->num_output_channels(), 502 capture_converter_->sink_parameters().channels()); 503 504 base::subtle::Atomic32 render_delay_ms = 505 base::subtle::Acquire_Load(&render_delay_ms_); 506 int64 capture_delay_ms = capture_delay.InMilliseconds(); 507 DCHECK_LT(capture_delay_ms, 508 std::numeric_limits<base::subtle::Atomic32>::max()); 509 int total_delay_ms = capture_delay_ms + render_delay_ms; 510 if (total_delay_ms > 300) { 511 LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms 512 << "ms; render delay: " << render_delay_ms << "ms"; 513 } 514 515 audio_processing_->set_stream_delay_ms(total_delay_ms); 516 517 DCHECK_LE(volume, WebRtcAudioDeviceImpl::kMaxVolumeLevel); 518 webrtc::GainControl* agc = audio_processing_->gain_control(); 519 int err = agc->set_stream_analog_level(volume); 520 DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err; 521 522 audio_processing_->set_stream_key_pressed(key_pressed); 523 524 err = audio_processing_->ProcessStream(audio_frame); 525 DCHECK_EQ(err, 0) << "ProcessStream() error: " << err; 526 527 if (typing_detector_ && 528 audio_frame->vad_activity_ != webrtc::AudioFrame::kVadUnknown) { 529 bool vad_active = 530 (audio_frame->vad_activity_ == webrtc::AudioFrame::kVadActive); 531 bool typing_detected = typing_detector_->Process(key_pressed, vad_active); 532 base::subtle::Release_Store(&typing_detected_, typing_detected); 533 } 534 535 // Return 0 if the volume has not been changed, otherwise return the new 536 // volume. 537 return (agc->stream_analog_level() == volume) ? 538 0 : agc->stream_analog_level(); 539 } 540 541 } // namespace content 542