1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/audio/win/audio_unified_win.h" 6 7 #include <Functiondiscoverykeys_devpkey.h> 8 9 #include "base/debug/trace_event.h" 10 #ifndef NDEBUG 11 #include "base/file_util.h" 12 #include "base/path_service.h" 13 #endif 14 #include "base/time/time.h" 15 #include "base/win/scoped_com_initializer.h" 16 #include "media/audio/win/audio_manager_win.h" 17 #include "media/audio/win/avrt_wrapper_win.h" 18 #include "media/audio/win/core_audio_util_win.h" 19 20 using base::win::ScopedComPtr; 21 using base::win::ScopedCOMInitializer; 22 using base::win::ScopedCoMem; 23 24 // Smoothing factor in exponential smoothing filter where 0 < alpha < 1. 25 // Larger values of alpha reduce the level of smoothing. 26 // See http://en.wikipedia.org/wiki/Exponential_smoothing for details. 27 static const double kAlpha = 0.1; 28 29 // Compute a rate compensation which always attracts us back to a specified 30 // target level over a period of |kCorrectionTimeSeconds|. 31 static const double kCorrectionTimeSeconds = 0.1; 32 33 #ifndef NDEBUG 34 // Max number of columns in the output text file |kUnifiedAudioDebugFileName|. 35 // See LogElementNames enumerator for details on what each column represents. 36 static const size_t kMaxNumSampleTypes = 4; 37 38 static const size_t kMaxNumParams = 2; 39 40 // Max number of rows in the output file |kUnifiedAudioDebugFileName|. 41 // Each row corresponds to one set of sample values for (approximately) the 42 // same time instant (stored in the first column). 43 static const size_t kMaxFileSamples = 10000; 44 45 // Name of output debug file used for off-line analysis of measurements which 46 // can be utilized for performance tuning of this class. 47 static const char kUnifiedAudioDebugFileName[] = "unified_win_debug.txt"; 48 49 // Name of output debug file used for off-line analysis of measurements. 50 // This file will contain a list of audio parameters. 51 static const char kUnifiedAudioParamsFileName[] = "unified_win_params.txt"; 52 #endif 53 54 typedef uint32 ChannelConfig; 55 56 // Retrieves an integer mask which corresponds to the channel layout the 57 // audio engine uses for its internal processing/mixing of shared-mode 58 // streams. This mask indicates which channels are present in the multi- 59 // channel stream. The least significant bit corresponds with the Front Left 60 // speaker, the next least significant bit corresponds to the Front Right 61 // speaker, and so on, continuing in the order defined in KsMedia.h. 62 // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx 63 // for more details. 64 static ChannelConfig GetChannelConfig(EDataFlow data_flow) { 65 WAVEFORMATPCMEX format; 66 return SUCCEEDED(media::CoreAudioUtil::GetDefaultSharedModeMixFormat( 67 data_flow, eConsole, &format)) ? 68 static_cast<int>(format.dwChannelMask) : 0; 69 } 70 71 // Use the acquired IAudioClock interface to derive a time stamp of the audio 72 // sample which is currently playing through the speakers. 73 static double SpeakerStreamPosInMilliseconds(IAudioClock* clock) { 74 UINT64 device_frequency = 0, position = 0; 75 if (FAILED(clock->GetFrequency(&device_frequency)) || 76 FAILED(clock->GetPosition(&position, NULL))) { 77 return 0.0; 78 } 79 return base::Time::kMillisecondsPerSecond * 80 (static_cast<double>(position) / device_frequency); 81 } 82 83 // Get a time stamp in milliseconds given number of audio frames in |num_frames| 84 // using the current sample rate |fs| as scale factor. 85 // Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms]. 86 static double CurrentStreamPosInMilliseconds(UINT64 num_frames, DWORD fs) { 87 return base::Time::kMillisecondsPerSecond * 88 (static_cast<double>(num_frames) / fs); 89 } 90 91 // Convert a timestamp in milliseconds to byte units given the audio format 92 // in |format|. 93 // Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size 94 // is 4 bytes per audio frame => 480 * 4 = 1920 [bytes]. 95 static int MillisecondsToBytes(double ts_milliseconds, 96 const WAVEFORMATPCMEX& format) { 97 double seconds = ts_milliseconds / base::Time::kMillisecondsPerSecond; 98 return static_cast<int>(seconds * format.Format.nSamplesPerSec * 99 format.Format.nBlockAlign + 0.5); 100 } 101 102 // Convert frame count to milliseconds given the audio format in |format|. 103 static double FrameCountToMilliseconds(int num_frames, 104 const WAVEFORMATPCMEX& format) { 105 return (base::Time::kMillisecondsPerSecond * num_frames) / 106 static_cast<double>(format.Format.nSamplesPerSec); 107 } 108 109 namespace media { 110 111 WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin* manager, 112 const AudioParameters& params, 113 const std::string& input_device_id) 114 : creating_thread_id_(base::PlatformThread::CurrentId()), 115 manager_(manager), 116 params_(params), 117 input_channels_(params.input_channels()), 118 output_channels_(params.channels()), 119 input_device_id_(input_device_id), 120 share_mode_(CoreAudioUtil::GetShareMode()), 121 opened_(false), 122 volume_(1.0), 123 output_buffer_size_frames_(0), 124 input_buffer_size_frames_(0), 125 endpoint_render_buffer_size_frames_(0), 126 endpoint_capture_buffer_size_frames_(0), 127 num_written_frames_(0), 128 total_delay_ms_(0.0), 129 total_delay_bytes_(0), 130 source_(NULL), 131 input_callback_received_(false), 132 io_sample_rate_ratio_(1), 133 target_fifo_frames_(0), 134 average_delta_(0), 135 fifo_rate_compensation_(1), 136 update_output_delay_(false), 137 capture_delay_ms_(0) { 138 TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream"); 139 VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()"; 140 DCHECK(manager_); 141 142 VLOG(1) << "Input channels : " << input_channels_; 143 VLOG(1) << "Output channels: " << output_channels_; 144 VLOG(1) << "Sample rate : " << params_.sample_rate(); 145 VLOG(1) << "Buffer size : " << params.frames_per_buffer(); 146 147 #ifndef NDEBUG 148 input_time_stamps_.reset(new int64[kMaxFileSamples]); 149 num_frames_in_fifo_.reset(new int[kMaxFileSamples]); 150 resampler_margin_.reset(new int[kMaxFileSamples]); 151 fifo_rate_comps_.reset(new double[kMaxFileSamples]); 152 num_elements_.reset(new int[kMaxNumSampleTypes]); 153 std::fill(num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes, 0); 154 input_params_.reset(new int[kMaxNumParams]); 155 output_params_.reset(new int[kMaxNumParams]); 156 #endif 157 158 DVLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE) 159 << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled."; 160 161 // Load the Avrt DLL if not already loaded. Required to support MMCSS. 162 bool avrt_init = avrt::Initialize(); 163 DCHECK(avrt_init) << "Failed to load the avrt.dll"; 164 165 // All events are auto-reset events and non-signaled initially. 166 167 // Create the event which the audio engine will signal each time a buffer 168 // has been recorded. 169 capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL)); 170 171 // Create the event which will be set in Stop() when straeming shall stop. 172 stop_streaming_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL)); 173 } 174 175 WASAPIUnifiedStream::~WASAPIUnifiedStream() { 176 VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()"; 177 #ifndef NDEBUG 178 base::FilePath data_file_name; 179 PathService::Get(base::DIR_EXE, &data_file_name); 180 data_file_name = data_file_name.AppendASCII(kUnifiedAudioDebugFileName); 181 data_file_ = file_util::OpenFile(data_file_name, "wt"); 182 DVLOG(1) << ">> Output file " << data_file_name.value() << " is created."; 183 184 size_t n = 0; 185 size_t elements_to_write = *std::min_element( 186 num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes); 187 while (n < elements_to_write) { 188 fprintf(data_file_, "%I64d %d %d %10.9f\n", 189 input_time_stamps_[n], 190 num_frames_in_fifo_[n], 191 resampler_margin_[n], 192 fifo_rate_comps_[n]); 193 ++n; 194 } 195 file_util::CloseFile(data_file_); 196 197 base::FilePath param_file_name; 198 PathService::Get(base::DIR_EXE, ¶m_file_name); 199 param_file_name = param_file_name.AppendASCII(kUnifiedAudioParamsFileName); 200 param_file_ = file_util::OpenFile(param_file_name, "wt"); 201 DVLOG(1) << ">> Output file " << param_file_name.value() << " is created."; 202 fprintf(param_file_, "%d %d\n", input_params_[0], input_params_[1]); 203 fprintf(param_file_, "%d %d\n", output_params_[0], output_params_[1]); 204 file_util::CloseFile(param_file_); 205 #endif 206 } 207 208 bool WASAPIUnifiedStream::Open() { 209 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open"); 210 DVLOG(1) << "WASAPIUnifiedStream::Open()"; 211 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); 212 if (opened_) 213 return true; 214 215 AudioParameters hw_output_params; 216 HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters( 217 eRender, eConsole, &hw_output_params); 218 if (FAILED(hr)) { 219 LOG(ERROR) << "Failed to get preferred output audio parameters."; 220 return false; 221 } 222 223 AudioParameters hw_input_params; 224 if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) { 225 // Query native parameters for the default capture device. 226 hr = CoreAudioUtil::GetPreferredAudioParameters( 227 eCapture, eConsole, &hw_input_params); 228 } else { 229 // Query native parameters for the capture device given by 230 // |input_device_id_|. 231 hr = CoreAudioUtil::GetPreferredAudioParameters( 232 input_device_id_, &hw_input_params); 233 } 234 if (FAILED(hr)) { 235 LOG(ERROR) << "Failed to get preferred input audio parameters."; 236 return false; 237 } 238 239 // It is currently only possible to open up the output audio device using 240 // the native number of channels. 241 if (output_channels_ != hw_output_params.channels()) { 242 LOG(ERROR) << "Audio device does not support requested output channels."; 243 return false; 244 } 245 246 // It is currently only possible to open up the input audio device using 247 // the native number of channels. If the client asks for a higher channel 248 // count, we will do channel upmixing in this class. The most typical 249 // example is that the client provides stereo but the hardware can only be 250 // opened in mono mode. We will do mono to stereo conversion in this case. 251 if (input_channels_ < hw_input_params.channels()) { 252 LOG(ERROR) << "Audio device does not support requested input channels."; 253 return false; 254 } else if (input_channels_ > hw_input_params.channels()) { 255 ChannelLayout input_layout = 256 GuessChannelLayout(hw_input_params.channels()); 257 ChannelLayout output_layout = GuessChannelLayout(input_channels_); 258 channel_mixer_.reset(new ChannelMixer(input_layout, output_layout)); 259 DVLOG(1) << "Remixing input channel layout from " << input_layout 260 << " to " << output_layout << "; from " 261 << hw_input_params.channels() << " channels to " 262 << input_channels_; 263 } 264 265 if (hw_output_params.sample_rate() != params_.sample_rate()) { 266 LOG(ERROR) << "Requested sample-rate: " << params_.sample_rate() 267 << " must match the hardware sample-rate: " 268 << hw_output_params.sample_rate(); 269 return false; 270 } 271 272 if (hw_output_params.frames_per_buffer() != params_.frames_per_buffer()) { 273 LOG(ERROR) << "Requested buffer size: " << params_.frames_per_buffer() 274 << " must match the hardware buffer size: " 275 << hw_output_params.frames_per_buffer(); 276 return false; 277 } 278 279 // Set up WAVEFORMATPCMEX structures for input and output given the specified 280 // audio parameters. 281 SetIOFormats(hw_input_params, params_); 282 283 // Create the input and output busses. 284 input_bus_ = AudioBus::Create( 285 hw_input_params.channels(), input_buffer_size_frames_); 286 output_bus_ = AudioBus::Create(params_); 287 288 // One extra bus is needed for the input channel mixing case. 289 if (channel_mixer_) { 290 DCHECK_LT(hw_input_params.channels(), input_channels_); 291 // The size of the |channel_bus_| must be the same as the size of the 292 // output bus to ensure that the channel manager can deal with both 293 // resampled and non-resampled data as input. 294 channel_bus_ = AudioBus::Create( 295 input_channels_, params_.frames_per_buffer()); 296 } 297 298 // Check if FIFO and resampling is required to match the input rate to the 299 // output rate. If so, a special thread loop, optimized for this case, will 300 // be used. This mode is also called varispeed mode. 301 // Note that we can also use this mode when input and output rates are the 302 // same but native buffer sizes differ (can happen if two different audio 303 // devices are used). For this case, the resampler uses a target ratio of 304 // 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is 305 // required to compensate for the difference in buffer sizes. 306 // TODO(henrika): we could perhaps improve the performance for the second 307 // case here by only using the FIFO and avoid resampling. Not sure how much 308 // that would give and we risk not compensation for clock drift. 309 if (hw_input_params.sample_rate() != params_.sample_rate() || 310 hw_input_params.frames_per_buffer() != params_.frames_per_buffer()) { 311 DoVarispeedInitialization(hw_input_params, params_); 312 } 313 314 // Render side (event driven only in varispeed mode): 315 316 ScopedComPtr<IAudioClient> audio_output_client = 317 CoreAudioUtil::CreateDefaultClient(eRender, eConsole); 318 if (!audio_output_client) 319 return false; 320 321 if (!CoreAudioUtil::IsFormatSupported(audio_output_client, 322 share_mode_, 323 &output_format_)) { 324 return false; 325 } 326 327 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { 328 // The |render_event_| will be NULL unless varispeed mode is utilized. 329 hr = CoreAudioUtil::SharedModeInitialize( 330 audio_output_client, &output_format_, render_event_.Get(), 331 &endpoint_render_buffer_size_frames_); 332 } else { 333 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE. 334 } 335 if (FAILED(hr)) 336 return false; 337 338 ScopedComPtr<IAudioRenderClient> audio_render_client = 339 CoreAudioUtil::CreateRenderClient(audio_output_client); 340 if (!audio_render_client) 341 return false; 342 343 // Capture side (always event driven but format depends on varispeed or not): 344 345 ScopedComPtr<IAudioClient> audio_input_client; 346 if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) { 347 audio_input_client = CoreAudioUtil::CreateDefaultClient(eCapture, eConsole); 348 } else { 349 ScopedComPtr<IMMDevice> audio_input_device( 350 CoreAudioUtil::CreateDevice(input_device_id_)); 351 audio_input_client = CoreAudioUtil::CreateClient(audio_input_device); 352 } 353 if (!audio_input_client) 354 return false; 355 356 if (!CoreAudioUtil::IsFormatSupported(audio_input_client, 357 share_mode_, 358 &input_format_)) { 359 return false; 360 } 361 362 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { 363 // Include valid event handle for event-driven initialization. 364 // The input side is always event driven independent of if varispeed is 365 // used or not. 366 hr = CoreAudioUtil::SharedModeInitialize( 367 audio_input_client, &input_format_, capture_event_.Get(), 368 &endpoint_capture_buffer_size_frames_); 369 } else { 370 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE. 371 } 372 if (FAILED(hr)) 373 return false; 374 375 ScopedComPtr<IAudioCaptureClient> audio_capture_client = 376 CoreAudioUtil::CreateCaptureClient(audio_input_client); 377 if (!audio_capture_client) 378 return false; 379 380 // Varispeed mode requires additional preparations. 381 if (VarispeedMode()) 382 ResetVarispeed(); 383 384 // Store all valid COM interfaces. 385 audio_output_client_ = audio_output_client; 386 audio_render_client_ = audio_render_client; 387 audio_input_client_ = audio_input_client; 388 audio_capture_client_ = audio_capture_client; 389 390 opened_ = true; 391 return SUCCEEDED(hr); 392 } 393 394 void WASAPIUnifiedStream::Start(AudioSourceCallback* callback) { 395 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start"); 396 DVLOG(1) << "WASAPIUnifiedStream::Start()"; 397 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); 398 CHECK(callback); 399 CHECK(opened_); 400 401 if (audio_io_thread_) { 402 CHECK_EQ(callback, source_); 403 return; 404 } 405 406 source_ = callback; 407 408 if (VarispeedMode()) { 409 ResetVarispeed(); 410 fifo_rate_compensation_ = 1.0; 411 average_delta_ = 0.0; 412 input_callback_received_ = false; 413 update_output_delay_ = false; 414 } 415 416 // Create and start the thread that will listen for capture events. 417 // We will also listen on render events on the same thread if varispeed 418 // mode is utilized. 419 audio_io_thread_.reset( 420 new base::DelegateSimpleThread(this, "wasapi_io_thread")); 421 audio_io_thread_->Start(); 422 if (!audio_io_thread_->HasBeenStarted()) { 423 DLOG(ERROR) << "Failed to start WASAPI IO thread."; 424 return; 425 } 426 427 // Start input streaming data between the endpoint buffer and the audio 428 // engine. 429 HRESULT hr = audio_input_client_->Start(); 430 if (FAILED(hr)) { 431 StopAndJoinThread(hr); 432 return; 433 } 434 435 // Ensure that the endpoint buffer is prepared with silence. 436 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { 437 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( 438 audio_output_client_, audio_render_client_)) { 439 DLOG(WARNING) << "Failed to prepare endpoint buffers with silence."; 440 return; 441 } 442 } 443 num_written_frames_ = endpoint_render_buffer_size_frames_; 444 445 // Start output streaming data between the endpoint buffer and the audio 446 // engine. 447 hr = audio_output_client_->Start(); 448 if (FAILED(hr)) { 449 StopAndJoinThread(hr); 450 return; 451 } 452 } 453 454 void WASAPIUnifiedStream::Stop() { 455 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop"); 456 DVLOG(1) << "WASAPIUnifiedStream::Stop()"; 457 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); 458 if (!audio_io_thread_) 459 return; 460 461 // Stop input audio streaming. 462 HRESULT hr = audio_input_client_->Stop(); 463 if (FAILED(hr)) { 464 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED) 465 << "Failed to stop input streaming: " << std::hex << hr; 466 } 467 468 // Stop output audio streaming. 469 hr = audio_output_client_->Stop(); 470 if (FAILED(hr)) { 471 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED) 472 << "Failed to stop output streaming: " << std::hex << hr; 473 } 474 475 // Wait until the thread completes and perform cleanup. 476 SetEvent(stop_streaming_event_.Get()); 477 audio_io_thread_->Join(); 478 audio_io_thread_.reset(); 479 480 // Ensure that we don't quit the main thread loop immediately next 481 // time Start() is called. 482 ResetEvent(stop_streaming_event_.Get()); 483 484 // Clear source callback, it'll be set again on the next Start() call. 485 source_ = NULL; 486 487 // Flush all pending data and reset the audio clock stream position to 0. 488 hr = audio_output_client_->Reset(); 489 if (FAILED(hr)) { 490 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED) 491 << "Failed to reset output streaming: " << std::hex << hr; 492 } 493 494 audio_input_client_->Reset(); 495 if (FAILED(hr)) { 496 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED) 497 << "Failed to reset input streaming: " << std::hex << hr; 498 } 499 500 // Extra safety check to ensure that the buffers are cleared. 501 // If the buffers are not cleared correctly, the next call to Start() 502 // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer(). 503 // TODO(henrika): this check is is only needed for shared-mode streams. 504 UINT32 num_queued_frames = 0; 505 audio_output_client_->GetCurrentPadding(&num_queued_frames); 506 DCHECK_EQ(0u, num_queued_frames); 507 } 508 509 void WASAPIUnifiedStream::Close() { 510 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close"); 511 DVLOG(1) << "WASAPIUnifiedStream::Close()"; 512 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); 513 514 // It is valid to call Close() before calling open or Start(). 515 // It is also valid to call Close() after Start() has been called. 516 Stop(); 517 518 // Inform the audio manager that we have been closed. This will cause our 519 // destruction. 520 manager_->ReleaseOutputStream(this); 521 } 522 523 void WASAPIUnifiedStream::SetVolume(double volume) { 524 DVLOG(1) << "SetVolume(volume=" << volume << ")"; 525 if (volume < 0 || volume > 1) 526 return; 527 volume_ = volume; 528 } 529 530 void WASAPIUnifiedStream::GetVolume(double* volume) { 531 DVLOG(1) << "GetVolume()"; 532 *volume = static_cast<double>(volume_); 533 } 534 535 536 void WASAPIUnifiedStream::ProvideInput(int frame_delay, AudioBus* audio_bus) { 537 // TODO(henrika): utilize frame_delay? 538 // A non-zero framed delay means multiple callbacks were necessary to 539 // fulfill the requested number of frames. 540 if (frame_delay > 0) 541 DVLOG(3) << "frame_delay: " << frame_delay; 542 543 #ifndef NDEBUG 544 resampler_margin_[num_elements_[RESAMPLER_MARGIN]] = 545 fifo_->frames() - audio_bus->frames(); 546 num_elements_[RESAMPLER_MARGIN]++; 547 #endif 548 549 if (fifo_->frames() < audio_bus->frames()) { 550 DVLOG(ERROR) << "Not enough data in the FIFO (" 551 << fifo_->frames() << " < " << audio_bus->frames() << ")"; 552 audio_bus->Zero(); 553 return; 554 } 555 556 fifo_->Consume(audio_bus, 0, audio_bus->frames()); 557 } 558 559 void WASAPIUnifiedStream::SetIOFormats(const AudioParameters& input_params, 560 const AudioParameters& output_params) { 561 for (int n = 0; n < 2; ++n) { 562 const AudioParameters& params = (n == 0) ? input_params : output_params; 563 WAVEFORMATPCMEX* xformat = (n == 0) ? &input_format_ : &output_format_; 564 WAVEFORMATEX* format = &xformat->Format; 565 566 // Begin with the WAVEFORMATEX structure that specifies the basic format. 567 format->wFormatTag = WAVE_FORMAT_EXTENSIBLE; 568 format->nChannels = params.channels(); 569 format->nSamplesPerSec = params.sample_rate(); 570 format->wBitsPerSample = params.bits_per_sample(); 571 format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels; 572 format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign; 573 format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); 574 575 // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE. 576 // Note that we always open up using the native channel layout. 577 (*xformat).Samples.wValidBitsPerSample = format->wBitsPerSample; 578 (*xformat).dwChannelMask = (n == 0) ? 579 GetChannelConfig(eCapture) : GetChannelConfig(eRender); 580 (*xformat).SubFormat = KSDATAFORMAT_SUBTYPE_PCM; 581 } 582 583 input_buffer_size_frames_ = input_params.frames_per_buffer(); 584 output_buffer_size_frames_ = output_params.frames_per_buffer(); 585 VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_; 586 VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_; 587 588 #ifndef NDEBUG 589 input_params_[0] = input_format_.Format.nSamplesPerSec; 590 input_params_[1] = input_buffer_size_frames_; 591 output_params_[0] = output_format_.Format.nSamplesPerSec; 592 output_params_[1] = output_buffer_size_frames_; 593 #endif 594 } 595 596 void WASAPIUnifiedStream::DoVarispeedInitialization( 597 const AudioParameters& input_params, const AudioParameters& output_params) { 598 DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()"; 599 600 // A FIFO is required in this mode for input to output buffering. 601 // Note that it will add some latency. 602 fifo_.reset(new AudioFifo(input_params.channels(), kFifoSize)); 603 VLOG(1) << "Using FIFO of size " << fifo_->max_frames() 604 << " (#channels=" << input_params.channels() << ")"; 605 606 // Create the multi channel resampler using the initial sample rate ratio. 607 // We will call MultiChannelResampler::SetRatio() during runtime to 608 // allow arbitrary combinations of input and output devices running off 609 // different clocks and using different drivers, with potentially 610 // differing sample-rates. Note that the requested block size is given by 611 // the native input buffer size |input_buffer_size_frames_|. 612 io_sample_rate_ratio_ = input_params.sample_rate() / 613 static_cast<double>(output_params.sample_rate()); 614 DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_; 615 resampler_.reset(new MultiChannelResampler( 616 input_params.channels(), io_sample_rate_ratio_, input_buffer_size_frames_, 617 base::Bind(&WASAPIUnifiedStream::ProvideInput, base::Unretained(this)))); 618 VLOG(1) << "Resampling from " << input_params.sample_rate() << " to " 619 << output_params.sample_rate(); 620 621 // The optimal number of frames we'd like to keep in the FIFO at all times. 622 // The actual size will vary but the goal is to ensure that the average size 623 // is given by this value. 624 target_fifo_frames_ = kTargetFifoSafetyFactor * input_buffer_size_frames_; 625 VLOG(1) << "Target FIFO size: " << target_fifo_frames_; 626 627 // Create the event which the audio engine will signal each time it 628 // wants an audio buffer to render. 629 render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL)); 630 631 // Allocate memory for temporary audio bus used to store resampled input 632 // audio. 633 resampled_bus_ = AudioBus::Create( 634 input_params.channels(), output_buffer_size_frames_); 635 636 // Buffer initial silence corresponding to target I/O buffering. 637 ResetVarispeed(); 638 } 639 640 void WASAPIUnifiedStream::ResetVarispeed() { 641 DCHECK(VarispeedMode()); 642 643 // Buffer initial silence corresponding to target I/O buffering. 644 fifo_->Clear(); 645 scoped_ptr<AudioBus> silence = 646 AudioBus::Create(input_format_.Format.nChannels, 647 target_fifo_frames_); 648 silence->Zero(); 649 fifo_->Push(silence.get()); 650 resampler_->Flush(); 651 } 652 653 void WASAPIUnifiedStream::Run() { 654 ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA); 655 656 // Increase the thread priority. 657 audio_io_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio); 658 659 // Enable MMCSS to ensure that this thread receives prioritized access to 660 // CPU resources. 661 // TODO(henrika): investigate if it is possible to include these additional 662 // settings in SetThreadPriority() as well. 663 DWORD task_index = 0; 664 HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio", 665 &task_index); 666 bool mmcss_is_ok = 667 (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL)); 668 if (!mmcss_is_ok) { 669 // Failed to enable MMCSS on this thread. It is not fatal but can lead 670 // to reduced QoS at high load. 671 DWORD err = GetLastError(); 672 LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ")."; 673 } 674 675 // The IAudioClock interface enables us to monitor a stream's data 676 // rate and the current position in the stream. Allocate it before we 677 // start spinning. 678 ScopedComPtr<IAudioClock> audio_output_clock; 679 HRESULT hr = audio_output_client_->GetService( 680 __uuidof(IAudioClock), audio_output_clock.ReceiveVoid()); 681 LOG_IF(WARNING, FAILED(hr)) << "Failed to create IAudioClock: " 682 << std::hex << hr; 683 684 bool streaming = true; 685 bool error = false; 686 687 HANDLE wait_array[3]; 688 size_t num_handles = 0; 689 wait_array[num_handles++] = stop_streaming_event_; 690 wait_array[num_handles++] = capture_event_; 691 if (render_event_) { 692 // One extra event handle is needed in varispeed mode. 693 wait_array[num_handles++] = render_event_; 694 } 695 696 // Keep streaming audio until stop event is signaled. 697 // Capture events are always used but render events are only active in 698 // varispeed mode. 699 while (streaming && !error) { 700 // Wait for a close-down event, or a new capture event. 701 DWORD wait_result = WaitForMultipleObjects(num_handles, 702 wait_array, 703 FALSE, 704 INFINITE); 705 switch (wait_result) { 706 case WAIT_OBJECT_0 + 0: 707 // |stop_streaming_event_| has been set. 708 streaming = false; 709 break; 710 case WAIT_OBJECT_0 + 1: 711 // |capture_event_| has been set 712 if (VarispeedMode()) { 713 ProcessInputAudio(); 714 } else { 715 ProcessInputAudio(); 716 ProcessOutputAudio(audio_output_clock); 717 } 718 break; 719 case WAIT_OBJECT_0 + 2: 720 DCHECK(VarispeedMode()); 721 // |render_event_| has been set 722 ProcessOutputAudio(audio_output_clock); 723 break; 724 default: 725 error = true; 726 break; 727 } 728 } 729 730 if (streaming && error) { 731 // Stop audio streaming since something has gone wrong in our main thread 732 // loop. Note that, we are still in a "started" state, hence a Stop() call 733 // is required to join the thread properly. 734 audio_input_client_->Stop(); 735 audio_output_client_->Stop(); 736 PLOG(ERROR) << "WASAPI streaming failed."; 737 } 738 739 // Disable MMCSS. 740 if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) { 741 PLOG(WARNING) << "Failed to disable MMCSS"; 742 } 743 } 744 745 void WASAPIUnifiedStream::ProcessInputAudio() { 746 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio"); 747 748 BYTE* data_ptr = NULL; 749 UINT32 num_captured_frames = 0; 750 DWORD flags = 0; 751 UINT64 device_position = 0; 752 UINT64 capture_time_stamp = 0; 753 754 const int bytes_per_sample = input_format_.Format.wBitsPerSample >> 3; 755 756 base::TimeTicks now_tick = base::TimeTicks::HighResNow(); 757 758 #ifndef NDEBUG 759 if (VarispeedMode()) { 760 input_time_stamps_[num_elements_[INPUT_TIME_STAMP]] = 761 now_tick.ToInternalValue(); 762 num_elements_[INPUT_TIME_STAMP]++; 763 } 764 #endif 765 766 // Retrieve the amount of data in the capture endpoint buffer. 767 // |endpoint_capture_time_stamp| is the value of the performance 768 // counter at the time that the audio endpoint device recorded 769 // the device position of the first audio frame in the data packet. 770 HRESULT hr = audio_capture_client_->GetBuffer(&data_ptr, 771 &num_captured_frames, 772 &flags, 773 &device_position, 774 &capture_time_stamp); 775 if (FAILED(hr)) { 776 DLOG(ERROR) << "Failed to get data from the capture buffer"; 777 return; 778 } 779 780 if (hr == AUDCLNT_S_BUFFER_EMPTY) { 781 // The return coded is a success code but a new packet is *not* available 782 // and none of the output parameters in the GetBuffer() call contains valid 783 // values. Best we can do is to deliver silence and avoid setting 784 // |input_callback_received_| since this only seems to happen for the 785 // initial event(s) on some devices. 786 input_bus_->Zero(); 787 } else { 788 // Valid data has been recorded and it is now OK to set the flag which 789 // informs the render side that capturing has started. 790 input_callback_received_ = true; 791 } 792 793 if (num_captured_frames != 0) { 794 if (flags & AUDCLNT_BUFFERFLAGS_SILENT) { 795 // Clear out the capture buffer since silence is reported. 796 input_bus_->Zero(); 797 } else { 798 // Store captured data in an audio bus after de-interleaving 799 // the data to match the audio bus structure. 800 input_bus_->FromInterleaved( 801 data_ptr, num_captured_frames, bytes_per_sample); 802 } 803 } 804 805 hr = audio_capture_client_->ReleaseBuffer(num_captured_frames); 806 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer"; 807 808 // Buffer input into FIFO if varispeed mode is used. The render event 809 // will drive resampling of this data to match the output side. 810 if (VarispeedMode()) { 811 int available_frames = fifo_->max_frames() - fifo_->frames(); 812 if (input_bus_->frames() <= available_frames) { 813 fifo_->Push(input_bus_.get()); 814 } 815 #ifndef NDEBUG 816 num_frames_in_fifo_[num_elements_[NUM_FRAMES_IN_FIFO]] = 817 fifo_->frames(); 818 num_elements_[NUM_FRAMES_IN_FIFO]++; 819 #endif 820 } 821 822 // Save resource by not asking for new delay estimates each time. 823 // These estimates are fairly stable and it is perfectly safe to only 824 // sample at a rate of ~1Hz. 825 // TODO(henrika): we might have to increase the update rate in varispeed 826 // mode since the delay variations are higher in this mode. 827 if ((now_tick - last_delay_sample_time_).InMilliseconds() > 828 kTimeDiffInMillisecondsBetweenDelayMeasurements && 829 input_callback_received_) { 830 // Calculate the estimated capture delay, i.e., the latency between 831 // the recording time and the time we when we are notified about 832 // the recorded data. Note that the capture time stamp is given in 833 // 100-nanosecond (0.1 microseconds) units. 834 base::TimeDelta diff = 835 now_tick - base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp); 836 capture_delay_ms_ = diff.InMillisecondsF(); 837 838 last_delay_sample_time_ = now_tick; 839 update_output_delay_ = true; 840 } 841 } 842 843 void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock* audio_output_clock) { 844 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio"); 845 846 if (!input_callback_received_) { 847 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { 848 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( 849 audio_output_client_, audio_render_client_)) 850 DLOG(WARNING) << "Failed to prepare endpoint buffers with silence."; 851 } 852 return; 853 } 854 855 // Rate adjusted resampling is required in varispeed mode. It means that 856 // recorded audio samples will be read from the FIFO, resampled to match the 857 // output sample-rate and then stored in |resampled_bus_|. 858 if (VarispeedMode()) { 859 // Calculate a varispeed rate scalar factor to compensate for drift between 860 // input and output. We use the actual number of frames still in the FIFO 861 // compared with the ideal value of |target_fifo_frames_|. 862 int delta = fifo_->frames() - target_fifo_frames_; 863 864 // Average |delta| because it can jitter back/forth quite frequently 865 // by +/- the hardware buffer-size *if* the input and output callbacks are 866 // happening at almost exactly the same time. Also, if the input and output 867 // sample-rates are different then |delta| will jitter quite a bit due to 868 // the rate conversion happening in the varispeed, plus the jittering of 869 // the callbacks. The average value is what's important here. 870 // We use an exponential smoothing filter to reduce the variations. 871 average_delta_ += kAlpha * (delta - average_delta_); 872 873 // Compute a rate compensation which always attracts us back to the 874 // |target_fifo_frames_| over a period of kCorrectionTimeSeconds. 875 double correction_time_frames = 876 kCorrectionTimeSeconds * output_format_.Format.nSamplesPerSec; 877 fifo_rate_compensation_ = 878 (correction_time_frames + average_delta_) / correction_time_frames; 879 880 #ifndef NDEBUG 881 fifo_rate_comps_[num_elements_[RATE_COMPENSATION]] = 882 fifo_rate_compensation_; 883 num_elements_[RATE_COMPENSATION]++; 884 #endif 885 886 // Adjust for FIFO drift. 887 const double new_ratio = io_sample_rate_ratio_ * fifo_rate_compensation_; 888 resampler_->SetRatio(new_ratio); 889 // Get resampled input audio from FIFO where the size is given by the 890 // output side. 891 resampler_->Resample(resampled_bus_->frames(), resampled_bus_.get()); 892 } 893 894 // Derive a new total delay estimate if the capture side has set the 895 // |update_output_delay_| flag. 896 if (update_output_delay_) { 897 // Calculate the estimated render delay, i.e., the time difference 898 // between the time when data is added to the endpoint buffer and 899 // when the data is played out on the actual speaker. 900 const double stream_pos = CurrentStreamPosInMilliseconds( 901 num_written_frames_ + output_buffer_size_frames_, 902 output_format_.Format.nSamplesPerSec); 903 const double speaker_pos = 904 SpeakerStreamPosInMilliseconds(audio_output_clock); 905 const double render_delay_ms = stream_pos - speaker_pos; 906 const double fifo_delay_ms = VarispeedMode() ? 907 FrameCountToMilliseconds(target_fifo_frames_, input_format_) : 0; 908 909 // Derive the total delay, i.e., the sum of the input and output 910 // delays. Also convert the value into byte units. An extra FIFO delay 911 // is added for varispeed usage cases. 912 total_delay_ms_ = VarispeedMode() ? 913 capture_delay_ms_ + render_delay_ms + fifo_delay_ms : 914 capture_delay_ms_ + render_delay_ms; 915 DVLOG(2) << "total_delay_ms : " << total_delay_ms_; 916 DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_; 917 DVLOG(3) << " render_delay_ms : " << render_delay_ms; 918 DVLOG(3) << " fifo_delay_ms : " << fifo_delay_ms; 919 total_delay_bytes_ = MillisecondsToBytes(total_delay_ms_, output_format_); 920 921 // Wait for new signal from the capture side. 922 update_output_delay_ = false; 923 } 924 925 // Select source depending on if varispeed is utilized or not. 926 // Also, the source might be the output of a channel mixer if channel mixing 927 // is required to match the native input channels to the number of input 928 // channels used by the client (given by |input_channels_| in this case). 929 AudioBus* input_bus = VarispeedMode() ? 930 resampled_bus_.get() : input_bus_.get(); 931 if (channel_mixer_) { 932 DCHECK_EQ(input_bus->frames(), channel_bus_->frames()); 933 // Most common case is 1->2 channel upmixing. 934 channel_mixer_->Transform(input_bus, channel_bus_.get()); 935 // Use the output from the channel mixer as new input bus. 936 input_bus = channel_bus_.get(); 937 } 938 939 // Prepare for rendering by calling OnMoreIOData(). 940 int frames_filled = source_->OnMoreIOData( 941 input_bus, 942 output_bus_.get(), 943 AudioBuffersState(0, total_delay_bytes_)); 944 DCHECK_EQ(frames_filled, output_bus_->frames()); 945 946 // Keep track of number of rendered frames since we need it for 947 // our delay calculations. 948 num_written_frames_ += frames_filled; 949 950 // Derive the the amount of available space in the endpoint buffer. 951 // Avoid render attempt if there is no room for a captured packet. 952 UINT32 num_queued_frames = 0; 953 audio_output_client_->GetCurrentPadding(&num_queued_frames); 954 if (endpoint_render_buffer_size_frames_ - num_queued_frames < 955 output_buffer_size_frames_) 956 return; 957 958 // Grab all available space in the rendering endpoint buffer 959 // into which the client can write a data packet. 960 uint8* audio_data = NULL; 961 HRESULT hr = audio_render_client_->GetBuffer(output_buffer_size_frames_, 962 &audio_data); 963 if (FAILED(hr)) { 964 DLOG(ERROR) << "Failed to access render buffer"; 965 return; 966 } 967 968 const int bytes_per_sample = output_format_.Format.wBitsPerSample >> 3; 969 970 // Convert the audio bus content to interleaved integer data using 971 // |audio_data| as destination. 972 output_bus_->Scale(volume_); 973 output_bus_->ToInterleaved( 974 output_buffer_size_frames_, bytes_per_sample, audio_data); 975 976 // Release the buffer space acquired in the GetBuffer() call. 977 audio_render_client_->ReleaseBuffer(output_buffer_size_frames_, 0); 978 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer"; 979 980 return; 981 } 982 983 void WASAPIUnifiedStream::HandleError(HRESULT err) { 984 CHECK((started() && GetCurrentThreadId() == audio_io_thread_->tid()) || 985 (!started() && GetCurrentThreadId() == creating_thread_id_)); 986 NOTREACHED() << "Error code: " << std::hex << err; 987 if (source_) 988 source_->OnError(this); 989 } 990 991 void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err) { 992 CHECK(GetCurrentThreadId() == creating_thread_id_); 993 DCHECK(audio_io_thread_.get()); 994 SetEvent(stop_streaming_event_.Get()); 995 audio_io_thread_->Join(); 996 audio_io_thread_.reset(); 997 HandleError(err); 998 } 999 1000 } // namespace media 1001