1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/renderer/media/media_stream_audio_processor.h" 6 7 #include "base/command_line.h" 8 #include "base/debug/trace_event.h" 9 #if defined(OS_MACOSX) 10 #include "base/metrics/field_trial.h" 11 #endif 12 #include "base/metrics/histogram.h" 13 #include "content/public/common/content_switches.h" 14 #include "content/renderer/media/media_stream_audio_processor_options.h" 15 #include "content/renderer/media/rtc_media_constraints.h" 16 #include "content/renderer/media/webrtc_audio_device_impl.h" 17 #include "media/audio/audio_parameters.h" 18 #include "media/base/audio_converter.h" 19 #include "media/base/audio_fifo.h" 20 #include "media/base/channel_layout.h" 21 #include "third_party/WebKit/public/platform/WebMediaConstraints.h" 22 #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h" 23 #include "third_party/webrtc/modules/audio_processing/typing_detection.h" 24 25 namespace content { 26 27 namespace { 28 29 using webrtc::AudioProcessing; 30 31 #if defined(OS_ANDROID) 32 const int kAudioProcessingSampleRate = 16000; 33 #else 34 const int kAudioProcessingSampleRate = 32000; 35 #endif 36 const int kAudioProcessingNumberOfChannels = 1; 37 38 AudioProcessing::ChannelLayout MapLayout(media::ChannelLayout media_layout) { 39 switch (media_layout) { 40 case media::CHANNEL_LAYOUT_MONO: 41 return AudioProcessing::kMono; 42 case media::CHANNEL_LAYOUT_STEREO: 43 return AudioProcessing::kStereo; 44 case media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC: 45 return AudioProcessing::kStereoAndKeyboard; 46 default: 47 NOTREACHED() << "Layout not supported: " << media_layout; 48 return AudioProcessing::kMono; 49 } 50 } 51 52 AudioProcessing::ChannelLayout ChannelsToLayout(int num_channels) { 53 switch (num_channels) { 54 case 1: 55 return AudioProcessing::kMono; 56 case 2: 57 return AudioProcessing::kStereo; 58 default: 59 NOTREACHED() << "Channels not supported: " << num_channels; 60 return AudioProcessing::kMono; 61 } 62 } 63 64 // Used by UMA histograms and entries shouldn't be re-ordered or removed. 65 enum AudioTrackProcessingStates { 66 AUDIO_PROCESSING_ENABLED = 0, 67 AUDIO_PROCESSING_DISABLED, 68 AUDIO_PROCESSING_IN_WEBRTC, 69 AUDIO_PROCESSING_MAX 70 }; 71 72 void RecordProcessingState(AudioTrackProcessingStates state) { 73 UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates", 74 state, AUDIO_PROCESSING_MAX); 75 } 76 77 } // namespace 78 79 // Wraps AudioBus to provide access to the array of channel pointers, since this 80 // is the type webrtc::AudioProcessing deals in. The array is refreshed on every 81 // channel_ptrs() call, and will be valid until the underlying AudioBus pointers 82 // are changed, e.g. through calls to SetChannelData() or SwapChannels(). 83 // 84 // All methods are called on one of the capture or render audio threads 85 // exclusively. 86 class MediaStreamAudioBus { 87 public: 88 MediaStreamAudioBus(int channels, int frames) 89 : bus_(media::AudioBus::Create(channels, frames)), 90 channel_ptrs_(new float*[channels]) { 91 // May be created in the main render thread and used in the audio threads. 92 thread_checker_.DetachFromThread(); 93 } 94 95 media::AudioBus* bus() { 96 DCHECK(thread_checker_.CalledOnValidThread()); 97 return bus_.get(); 98 } 99 100 float* const* channel_ptrs() { 101 DCHECK(thread_checker_.CalledOnValidThread()); 102 for (int i = 0; i < bus_->channels(); ++i) { 103 channel_ptrs_[i] = bus_->channel(i); 104 } 105 return channel_ptrs_.get(); 106 } 107 108 private: 109 base::ThreadChecker thread_checker_; 110 scoped_ptr<media::AudioBus> bus_; 111 scoped_ptr<float*[]> channel_ptrs_; 112 }; 113 114 // Wraps AudioFifo to provide a cleaner interface to MediaStreamAudioProcessor. 115 // It avoids the FIFO when the source and destination frames match. All methods 116 // are called on one of the capture or render audio threads exclusively. 117 class MediaStreamAudioFifo { 118 public: 119 MediaStreamAudioFifo(int channels, int source_frames, 120 int destination_frames) 121 : source_frames_(source_frames), 122 destination_(new MediaStreamAudioBus(channels, destination_frames)), 123 data_available_(false) { 124 if (source_frames != destination_frames) { 125 // Since we require every Push to be followed by as many Consumes as 126 // possible, twice the larger of the two is a (probably) loose upper bound 127 // on the FIFO size. 128 const int fifo_frames = 2 * std::max(source_frames, destination_frames); 129 fifo_.reset(new media::AudioFifo(channels, fifo_frames)); 130 } 131 132 // May be created in the main render thread and used in the audio threads. 133 thread_checker_.DetachFromThread(); 134 } 135 136 void Push(const media::AudioBus* source) { 137 DCHECK(thread_checker_.CalledOnValidThread()); 138 DCHECK_EQ(source->channels(), destination_->bus()->channels()); 139 DCHECK_EQ(source->frames(), source_frames_); 140 141 if (fifo_) { 142 fifo_->Push(source); 143 } else { 144 source->CopyTo(destination_->bus()); 145 data_available_ = true; 146 } 147 } 148 149 // Returns true if there are destination_frames() of data available to be 150 // consumed, and otherwise false. 151 bool Consume(MediaStreamAudioBus** destination) { 152 DCHECK(thread_checker_.CalledOnValidThread()); 153 154 if (fifo_) { 155 if (fifo_->frames() < destination_->bus()->frames()) 156 return false; 157 158 fifo_->Consume(destination_->bus(), 0, destination_->bus()->frames()); 159 } else { 160 if (!data_available_) 161 return false; 162 163 // The data was already copied to |destination_| in this case. 164 data_available_ = false; 165 } 166 167 *destination = destination_.get(); 168 return true; 169 } 170 171 private: 172 base::ThreadChecker thread_checker_; 173 const int source_frames_; // For a DCHECK. 174 scoped_ptr<MediaStreamAudioBus> destination_; 175 scoped_ptr<media::AudioFifo> fifo_; 176 // Only used when the FIFO is disabled; 177 bool data_available_; 178 }; 179 180 bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() { 181 return !CommandLine::ForCurrentProcess()->HasSwitch( 182 switches::kDisableAudioTrackProcessing); 183 } 184 185 MediaStreamAudioProcessor::MediaStreamAudioProcessor( 186 const blink::WebMediaConstraints& constraints, 187 int effects, 188 WebRtcPlayoutDataSource* playout_data_source) 189 : render_delay_ms_(0), 190 playout_data_source_(playout_data_source), 191 audio_mirroring_(false), 192 typing_detected_(false), 193 stopped_(false) { 194 capture_thread_checker_.DetachFromThread(); 195 render_thread_checker_.DetachFromThread(); 196 InitializeAudioProcessingModule(constraints, effects); 197 if (IsAudioTrackProcessingEnabled()) { 198 aec_dump_message_filter_ = AecDumpMessageFilter::Get(); 199 // In unit tests not creating a message filter, |aec_dump_message_filter_| 200 // will be NULL. We can just ignore that. Other unit tests and browser tests 201 // ensure that we do get the filter when we should. 202 if (aec_dump_message_filter_.get()) 203 aec_dump_message_filter_->AddDelegate(this); 204 } 205 } 206 207 MediaStreamAudioProcessor::~MediaStreamAudioProcessor() { 208 DCHECK(main_thread_checker_.CalledOnValidThread()); 209 Stop(); 210 } 211 212 void MediaStreamAudioProcessor::OnCaptureFormatChanged( 213 const media::AudioParameters& input_format) { 214 DCHECK(main_thread_checker_.CalledOnValidThread()); 215 // There is no need to hold a lock here since the caller guarantees that 216 // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks 217 // on the capture thread. 218 InitializeCaptureFifo(input_format); 219 220 // Reset the |capture_thread_checker_| since the capture data will come from 221 // a new capture thread. 222 capture_thread_checker_.DetachFromThread(); 223 } 224 225 void MediaStreamAudioProcessor::PushCaptureData( 226 const media::AudioBus* audio_source) { 227 DCHECK(capture_thread_checker_.CalledOnValidThread()); 228 229 capture_fifo_->Push(audio_source); 230 } 231 232 bool MediaStreamAudioProcessor::ProcessAndConsumeData( 233 base::TimeDelta capture_delay, int volume, bool key_pressed, 234 int* new_volume, int16** out) { 235 DCHECK(capture_thread_checker_.CalledOnValidThread()); 236 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData"); 237 238 MediaStreamAudioBus* process_bus; 239 if (!capture_fifo_->Consume(&process_bus)) 240 return false; 241 242 // Use the process bus directly if audio processing is disabled. 243 MediaStreamAudioBus* output_bus = process_bus; 244 *new_volume = 0; 245 if (audio_processing_) { 246 output_bus = output_bus_.get(); 247 *new_volume = ProcessData(process_bus->channel_ptrs(), 248 process_bus->bus()->frames(), capture_delay, 249 volume, key_pressed, output_bus->channel_ptrs()); 250 } 251 252 // Swap channels before interleaving the data. 253 if (audio_mirroring_ && 254 output_format_.channel_layout() == media::CHANNEL_LAYOUT_STEREO) { 255 // Swap the first and second channels. 256 output_bus->bus()->SwapChannels(0, 1); 257 } 258 259 output_bus->bus()->ToInterleaved(output_bus->bus()->frames(), 260 sizeof(int16), 261 output_data_.get()); 262 *out = output_data_.get(); 263 264 return true; 265 } 266 267 void MediaStreamAudioProcessor::Stop() { 268 DCHECK(main_thread_checker_.CalledOnValidThread()); 269 if (stopped_) 270 return; 271 272 stopped_ = true; 273 274 if (aec_dump_message_filter_.get()) { 275 aec_dump_message_filter_->RemoveDelegate(this); 276 aec_dump_message_filter_ = NULL; 277 } 278 279 if (!audio_processing_.get()) 280 return; 281 282 StopEchoCancellationDump(audio_processing_.get()); 283 284 if (playout_data_source_) { 285 playout_data_source_->RemovePlayoutSink(this); 286 playout_data_source_ = NULL; 287 } 288 } 289 290 const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const { 291 return input_format_; 292 } 293 294 const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const { 295 return output_format_; 296 } 297 298 void MediaStreamAudioProcessor::OnAecDumpFile( 299 const IPC::PlatformFileForTransit& file_handle) { 300 DCHECK(main_thread_checker_.CalledOnValidThread()); 301 302 base::File file = IPC::PlatformFileForTransitToFile(file_handle); 303 DCHECK(file.IsValid()); 304 305 if (audio_processing_) 306 StartEchoCancellationDump(audio_processing_.get(), file.Pass()); 307 else 308 file.Close(); 309 } 310 311 void MediaStreamAudioProcessor::OnDisableAecDump() { 312 DCHECK(main_thread_checker_.CalledOnValidThread()); 313 if (audio_processing_) 314 StopEchoCancellationDump(audio_processing_.get()); 315 } 316 317 void MediaStreamAudioProcessor::OnIpcClosing() { 318 DCHECK(main_thread_checker_.CalledOnValidThread()); 319 aec_dump_message_filter_ = NULL; 320 } 321 322 void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus, 323 int sample_rate, 324 int audio_delay_milliseconds) { 325 DCHECK(render_thread_checker_.CalledOnValidThread()); 326 DCHECK(audio_processing_->echo_control_mobile()->is_enabled() ^ 327 audio_processing_->echo_cancellation()->is_enabled()); 328 329 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData"); 330 DCHECK_LT(audio_delay_milliseconds, 331 std::numeric_limits<base::subtle::Atomic32>::max()); 332 base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds); 333 334 InitializeRenderFifoIfNeeded(sample_rate, audio_bus->channels(), 335 audio_bus->frames()); 336 337 render_fifo_->Push(audio_bus); 338 MediaStreamAudioBus* analysis_bus; 339 while (render_fifo_->Consume(&analysis_bus)) { 340 audio_processing_->AnalyzeReverseStream( 341 analysis_bus->channel_ptrs(), 342 analysis_bus->bus()->frames(), 343 sample_rate, 344 ChannelsToLayout(audio_bus->channels())); 345 } 346 } 347 348 void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() { 349 DCHECK(main_thread_checker_.CalledOnValidThread()); 350 // There is no need to hold a lock here since the caller guarantees that 351 // there is no more OnPlayoutData() callback on the render thread. 352 render_thread_checker_.DetachFromThread(); 353 render_fifo_.reset(); 354 } 355 356 void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) { 357 stats->typing_noise_detected = 358 (base::subtle::Acquire_Load(&typing_detected_) != false); 359 GetAecStats(audio_processing_.get(), stats); 360 } 361 362 void MediaStreamAudioProcessor::InitializeAudioProcessingModule( 363 const blink::WebMediaConstraints& constraints, int effects) { 364 DCHECK(!audio_processing_); 365 366 MediaAudioConstraints audio_constraints(constraints, effects); 367 368 // Audio mirroring can be enabled even though audio processing is otherwise 369 // disabled. 370 audio_mirroring_ = audio_constraints.GetProperty( 371 MediaAudioConstraints::kGoogAudioMirroring); 372 373 if (!IsAudioTrackProcessingEnabled()) { 374 RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC); 375 return; 376 } 377 378 #if defined(OS_IOS) 379 // On iOS, VPIO provides built-in AGC and AEC. 380 const bool echo_cancellation = false; 381 const bool goog_agc = false; 382 #else 383 const bool echo_cancellation = 384 audio_constraints.GetEchoCancellationProperty(); 385 const bool goog_agc = audio_constraints.GetProperty( 386 MediaAudioConstraints::kGoogAutoGainControl); 387 #endif 388 389 #if defined(OS_IOS) || defined(OS_ANDROID) 390 const bool goog_experimental_aec = false; 391 const bool goog_typing_detection = false; 392 #else 393 const bool goog_experimental_aec = audio_constraints.GetProperty( 394 MediaAudioConstraints::kGoogExperimentalEchoCancellation); 395 const bool goog_typing_detection = audio_constraints.GetProperty( 396 MediaAudioConstraints::kGoogTypingNoiseDetection); 397 #endif 398 399 const bool goog_ns = audio_constraints.GetProperty( 400 MediaAudioConstraints::kGoogNoiseSuppression); 401 const bool goog_experimental_ns = audio_constraints.GetProperty( 402 MediaAudioConstraints::kGoogExperimentalNoiseSuppression); 403 const bool goog_high_pass_filter = audio_constraints.GetProperty( 404 MediaAudioConstraints::kGoogHighpassFilter); 405 406 // Return immediately if no goog constraint is enabled. 407 if (!echo_cancellation && !goog_experimental_aec && !goog_ns && 408 !goog_high_pass_filter && !goog_typing_detection && 409 !goog_agc && !goog_experimental_ns) { 410 RecordProcessingState(AUDIO_PROCESSING_DISABLED); 411 return; 412 } 413 414 // Experimental options provided at creation. 415 webrtc::Config config; 416 if (goog_experimental_aec) 417 config.Set<webrtc::DelayCorrection>(new webrtc::DelayCorrection(true)); 418 if (goog_experimental_ns) 419 config.Set<webrtc::ExperimentalNs>(new webrtc::ExperimentalNs(true)); 420 #if defined(OS_MACOSX) 421 if (base::FieldTrialList::FindFullName("NoReportedDelayOnMac") == "Enabled") 422 config.Set<webrtc::ReportedDelay>(new webrtc::ReportedDelay(false)); 423 #endif 424 425 // Create and configure the webrtc::AudioProcessing. 426 audio_processing_.reset(webrtc::AudioProcessing::Create(config)); 427 428 // Enable the audio processing components. 429 if (echo_cancellation) { 430 EnableEchoCancellation(audio_processing_.get()); 431 432 if (playout_data_source_) 433 playout_data_source_->AddPlayoutSink(this); 434 } 435 436 if (goog_ns) 437 EnableNoiseSuppression(audio_processing_.get()); 438 439 if (goog_high_pass_filter) 440 EnableHighPassFilter(audio_processing_.get()); 441 442 if (goog_typing_detection) { 443 // TODO(xians): Remove this |typing_detector_| after the typing suppression 444 // is enabled by default. 445 typing_detector_.reset(new webrtc::TypingDetection()); 446 EnableTypingDetection(audio_processing_.get(), typing_detector_.get()); 447 } 448 449 if (goog_agc) 450 EnableAutomaticGainControl(audio_processing_.get()); 451 452 RecordProcessingState(AUDIO_PROCESSING_ENABLED); 453 } 454 455 void MediaStreamAudioProcessor::InitializeCaptureFifo( 456 const media::AudioParameters& input_format) { 457 DCHECK(main_thread_checker_.CalledOnValidThread()); 458 DCHECK(input_format.IsValid()); 459 input_format_ = input_format; 460 461 // TODO(ajm): For now, we assume fixed parameters for the output when audio 462 // processing is enabled, to match the previous behavior. We should either 463 // use the input parameters (in which case, audio processing will convert 464 // at output) or ideally, have a backchannel from the sink to know what 465 // format it would prefer. 466 const int output_sample_rate = audio_processing_ ? 467 kAudioProcessingSampleRate : input_format.sample_rate(); 468 const media::ChannelLayout output_channel_layout = audio_processing_ ? 469 media::GuessChannelLayout(kAudioProcessingNumberOfChannels) : 470 input_format.channel_layout(); 471 472 // webrtc::AudioProcessing requires a 10 ms chunk size. We use this native 473 // size when processing is enabled. When disabled we use the same size as 474 // the source if less than 10 ms. 475 // 476 // TODO(ajm): This conditional buffer size appears to be assuming knowledge of 477 // the sink based on the source parameters. PeerConnection sinks seem to want 478 // 10 ms chunks regardless, while WebAudio sinks want less, and we're assuming 479 // we can identify WebAudio sinks by the input chunk size. Less fragile would 480 // be to have the sink actually tell us how much it wants (as in the above 481 // TODO). 482 int processing_frames = input_format.sample_rate() / 100; 483 int output_frames = output_sample_rate / 100; 484 if (!audio_processing_ && input_format.frames_per_buffer() < output_frames) { 485 processing_frames = input_format.frames_per_buffer(); 486 output_frames = processing_frames; 487 } 488 489 output_format_ = media::AudioParameters( 490 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 491 output_channel_layout, 492 output_sample_rate, 493 16, 494 output_frames); 495 496 capture_fifo_.reset( 497 new MediaStreamAudioFifo(input_format.channels(), 498 input_format.frames_per_buffer(), 499 processing_frames)); 500 501 if (audio_processing_) { 502 output_bus_.reset(new MediaStreamAudioBus(output_format_.channels(), 503 output_frames)); 504 } 505 output_data_.reset(new int16[output_format_.GetBytesPerBuffer() / 506 sizeof(int16)]); 507 } 508 509 void MediaStreamAudioProcessor::InitializeRenderFifoIfNeeded( 510 int sample_rate, int number_of_channels, int frames_per_buffer) { 511 DCHECK(render_thread_checker_.CalledOnValidThread()); 512 if (render_fifo_.get() && 513 render_format_.sample_rate() == sample_rate && 514 render_format_.channels() == number_of_channels && 515 render_format_.frames_per_buffer() == frames_per_buffer) { 516 // Do nothing if the |render_fifo_| has been setup properly. 517 return; 518 } 519 520 render_format_ = media::AudioParameters( 521 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 522 media::GuessChannelLayout(number_of_channels), 523 sample_rate, 524 16, 525 frames_per_buffer); 526 527 const int analysis_frames = sample_rate / 100; // 10 ms chunks. 528 render_fifo_.reset( 529 new MediaStreamAudioFifo(number_of_channels, 530 frames_per_buffer, 531 analysis_frames)); 532 } 533 534 int MediaStreamAudioProcessor::ProcessData(const float* const* process_ptrs, 535 int process_frames, 536 base::TimeDelta capture_delay, 537 int volume, 538 bool key_pressed, 539 float* const* output_ptrs) { 540 DCHECK(audio_processing_); 541 DCHECK(capture_thread_checker_.CalledOnValidThread()); 542 543 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData"); 544 545 base::subtle::Atomic32 render_delay_ms = 546 base::subtle::Acquire_Load(&render_delay_ms_); 547 int64 capture_delay_ms = capture_delay.InMilliseconds(); 548 DCHECK_LT(capture_delay_ms, 549 std::numeric_limits<base::subtle::Atomic32>::max()); 550 int total_delay_ms = capture_delay_ms + render_delay_ms; 551 if (total_delay_ms > 300) { 552 LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms 553 << "ms; render delay: " << render_delay_ms << "ms"; 554 } 555 556 webrtc::AudioProcessing* ap = audio_processing_.get(); 557 ap->set_stream_delay_ms(total_delay_ms); 558 559 DCHECK_LE(volume, WebRtcAudioDeviceImpl::kMaxVolumeLevel); 560 webrtc::GainControl* agc = ap->gain_control(); 561 int err = agc->set_stream_analog_level(volume); 562 DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err; 563 564 ap->set_stream_key_pressed(key_pressed); 565 566 err = ap->ProcessStream(process_ptrs, 567 process_frames, 568 input_format_.sample_rate(), 569 MapLayout(input_format_.channel_layout()), 570 output_format_.sample_rate(), 571 MapLayout(output_format_.channel_layout()), 572 output_ptrs); 573 DCHECK_EQ(err, 0) << "ProcessStream() error: " << err; 574 575 if (typing_detector_) { 576 webrtc::VoiceDetection* vad = ap->voice_detection(); 577 DCHECK(vad->is_enabled()); 578 bool detected = typing_detector_->Process(key_pressed, 579 vad->stream_has_voice()); 580 base::subtle::Release_Store(&typing_detected_, detected); 581 } 582 583 // Return 0 if the volume hasn't been changed, and otherwise the new volume. 584 return (agc->stream_analog_level() == volume) ? 585 0 : agc->stream_analog_level(); 586 } 587 588 } // namespace content 589