Home | History | Annotate | Download | only in win
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/audio/win/audio_unified_win.h"
      6 
      7 #include <Functiondiscoverykeys_devpkey.h>
      8 
      9 #include "base/debug/trace_event.h"
     10 #ifndef NDEBUG
     11 #include "base/file_util.h"
     12 #include "base/path_service.h"
     13 #endif
     14 #include "base/time/time.h"
     15 #include "base/win/scoped_com_initializer.h"
     16 #include "media/audio/win/audio_manager_win.h"
     17 #include "media/audio/win/avrt_wrapper_win.h"
     18 #include "media/audio/win/core_audio_util_win.h"
     19 
     20 using base::win::ScopedComPtr;
     21 using base::win::ScopedCOMInitializer;
     22 using base::win::ScopedCoMem;
     23 
     24 // Smoothing factor in exponential smoothing filter where 0 < alpha < 1.
     25 // Larger values of alpha reduce the level of smoothing.
     26 // See http://en.wikipedia.org/wiki/Exponential_smoothing for details.
     27 static const double kAlpha = 0.1;
     28 
     29 // Compute a rate compensation which always attracts us back to a specified
     30 // target level over a period of |kCorrectionTimeSeconds|.
     31 static const double kCorrectionTimeSeconds = 0.1;
     32 
     33 #ifndef NDEBUG
     34 // Max number of columns in the output text file |kUnifiedAudioDebugFileName|.
     35 // See LogElementNames enumerator for details on what each column represents.
     36 static const size_t kMaxNumSampleTypes = 4;
     37 
     38 static const size_t kMaxNumParams = 2;
     39 
     40 // Max number of rows in the output file |kUnifiedAudioDebugFileName|.
     41 // Each row corresponds to one set of sample values for (approximately) the
     42 // same time instant (stored in the first column).
     43 static const size_t kMaxFileSamples = 10000;
     44 
     45 // Name of output debug file used for off-line analysis of measurements which
     46 // can be utilized for performance tuning of this class.
     47 static const char kUnifiedAudioDebugFileName[] = "unified_win_debug.txt";
     48 
     49 // Name of output debug file used for off-line analysis of measurements.
     50 // This file will contain a list of audio parameters.
     51 static const char kUnifiedAudioParamsFileName[] = "unified_win_params.txt";
     52 #endif
     53 
     54 typedef uint32 ChannelConfig;
     55 
     56 // Retrieves an integer mask which corresponds to the channel layout the
     57 // audio engine uses for its internal processing/mixing of shared-mode
     58 // streams. This mask indicates which channels are present in the multi-
     59 // channel stream. The least significant bit corresponds with the Front Left
     60 // speaker, the next least significant bit corresponds to the Front Right
     61 // speaker, and so on, continuing in the order defined in KsMedia.h.
     62 // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
     63 // for more details.
     64 static ChannelConfig GetChannelConfig(EDataFlow data_flow) {
     65   WAVEFORMATPCMEX format;
     66   return SUCCEEDED(media::CoreAudioUtil::GetDefaultSharedModeMixFormat(
     67                    data_flow, eConsole, &format)) ?
     68                    static_cast<int>(format.dwChannelMask) : 0;
     69 }
     70 
     71 // Use the acquired IAudioClock interface to derive a time stamp of the audio
     72 // sample which is currently playing through the speakers.
     73 static double SpeakerStreamPosInMilliseconds(IAudioClock* clock) {
     74   UINT64 device_frequency = 0, position = 0;
     75   if (FAILED(clock->GetFrequency(&device_frequency)) ||
     76       FAILED(clock->GetPosition(&position, NULL))) {
     77     return 0.0;
     78   }
     79   return base::Time::kMillisecondsPerSecond *
     80       (static_cast<double>(position) / device_frequency);
     81 }
     82 
     83 // Get a time stamp in milliseconds given number of audio frames in |num_frames|
     84 // using the current sample rate |fs| as scale factor.
     85 // Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms].
     86 static double CurrentStreamPosInMilliseconds(UINT64 num_frames, DWORD fs) {
     87   return base::Time::kMillisecondsPerSecond *
     88       (static_cast<double>(num_frames) / fs);
     89 }
     90 
     91 // Convert a timestamp in milliseconds to byte units given the audio format
     92 // in |format|.
     93 // Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size
     94 // is 4 bytes per audio frame => 480 * 4 = 1920 [bytes].
     95 static int MillisecondsToBytes(double ts_milliseconds,
     96                                const WAVEFORMATPCMEX& format) {
     97   double seconds = ts_milliseconds / base::Time::kMillisecondsPerSecond;
     98   return static_cast<int>(seconds * format.Format.nSamplesPerSec *
     99       format.Format.nBlockAlign + 0.5);
    100 }
    101 
    102 // Convert frame count to milliseconds given the audio format in |format|.
    103 static double FrameCountToMilliseconds(int num_frames,
    104                                        const WAVEFORMATPCMEX& format) {
    105   return (base::Time::kMillisecondsPerSecond * num_frames) /
    106       static_cast<double>(format.Format.nSamplesPerSec);
    107 }
    108 
    109 namespace media {
    110 
    111 WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin* manager,
    112                                          const AudioParameters& params,
    113                                          const std::string& input_device_id)
    114     : creating_thread_id_(base::PlatformThread::CurrentId()),
    115       manager_(manager),
    116       params_(params),
    117       input_channels_(params.input_channels()),
    118       output_channels_(params.channels()),
    119       input_device_id_(input_device_id),
    120       share_mode_(CoreAudioUtil::GetShareMode()),
    121       opened_(false),
    122       volume_(1.0),
    123       output_buffer_size_frames_(0),
    124       input_buffer_size_frames_(0),
    125       endpoint_render_buffer_size_frames_(0),
    126       endpoint_capture_buffer_size_frames_(0),
    127       num_written_frames_(0),
    128       total_delay_ms_(0.0),
    129       total_delay_bytes_(0),
    130       source_(NULL),
    131       input_callback_received_(false),
    132       io_sample_rate_ratio_(1),
    133       target_fifo_frames_(0),
    134       average_delta_(0),
    135       fifo_rate_compensation_(1),
    136       update_output_delay_(false),
    137       capture_delay_ms_(0) {
    138   TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream");
    139   VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()";
    140   DCHECK(manager_);
    141 
    142   VLOG(1) << "Input channels : " << input_channels_;
    143   VLOG(1) << "Output channels: " << output_channels_;
    144   VLOG(1) << "Sample rate    : " << params_.sample_rate();
    145   VLOG(1) << "Buffer size    : " << params.frames_per_buffer();
    146 
    147 #ifndef NDEBUG
    148   input_time_stamps_.reset(new int64[kMaxFileSamples]);
    149   num_frames_in_fifo_.reset(new int[kMaxFileSamples]);
    150   resampler_margin_.reset(new int[kMaxFileSamples]);
    151   fifo_rate_comps_.reset(new double[kMaxFileSamples]);
    152   num_elements_.reset(new int[kMaxNumSampleTypes]);
    153   std::fill(num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes, 0);
    154   input_params_.reset(new int[kMaxNumParams]);
    155   output_params_.reset(new int[kMaxNumParams]);
    156 #endif
    157 
    158   DVLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE)
    159       << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled.";
    160 
    161   // Load the Avrt DLL if not already loaded. Required to support MMCSS.
    162   bool avrt_init = avrt::Initialize();
    163   DCHECK(avrt_init) << "Failed to load the avrt.dll";
    164 
    165   // All events are auto-reset events and non-signaled initially.
    166 
    167   // Create the event which the audio engine will signal each time a buffer
    168   // has been recorded.
    169   capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
    170 
    171   // Create the event which will be set in Stop() when straeming shall stop.
    172   stop_streaming_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
    173 }
    174 
    175 WASAPIUnifiedStream::~WASAPIUnifiedStream() {
    176   VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()";
    177 #ifndef NDEBUG
    178   base::FilePath data_file_name;
    179   PathService::Get(base::DIR_EXE, &data_file_name);
    180   data_file_name = data_file_name.AppendASCII(kUnifiedAudioDebugFileName);
    181   data_file_ = file_util::OpenFile(data_file_name, "wt");
    182   DVLOG(1) << ">> Output file " << data_file_name.value() << " is created.";
    183 
    184   size_t n = 0;
    185   size_t elements_to_write = *std::min_element(
    186       num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes);
    187   while (n < elements_to_write) {
    188     fprintf(data_file_, "%I64d %d %d %10.9f\n",
    189         input_time_stamps_[n],
    190         num_frames_in_fifo_[n],
    191         resampler_margin_[n],
    192         fifo_rate_comps_[n]);
    193     ++n;
    194   }
    195   file_util::CloseFile(data_file_);
    196 
    197   base::FilePath param_file_name;
    198   PathService::Get(base::DIR_EXE, &param_file_name);
    199   param_file_name = param_file_name.AppendASCII(kUnifiedAudioParamsFileName);
    200   param_file_ = file_util::OpenFile(param_file_name, "wt");
    201   DVLOG(1) << ">> Output file " << param_file_name.value() << " is created.";
    202   fprintf(param_file_, "%d %d\n", input_params_[0], input_params_[1]);
    203   fprintf(param_file_, "%d %d\n", output_params_[0], output_params_[1]);
    204   file_util::CloseFile(param_file_);
    205 #endif
    206 }
    207 
    208 bool WASAPIUnifiedStream::Open() {
    209   TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open");
    210   DVLOG(1) << "WASAPIUnifiedStream::Open()";
    211   DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
    212   if (opened_)
    213     return true;
    214 
    215   AudioParameters hw_output_params;
    216   HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters(
    217       eRender, eConsole, &hw_output_params);
    218   if (FAILED(hr)) {
    219     LOG(ERROR) << "Failed to get preferred output audio parameters.";
    220     return false;
    221   }
    222 
    223   AudioParameters hw_input_params;
    224   if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
    225     // Query native parameters for the default capture device.
    226     hr = CoreAudioUtil::GetPreferredAudioParameters(
    227         eCapture, eConsole, &hw_input_params);
    228   } else {
    229     // Query native parameters for the capture device given by
    230     // |input_device_id_|.
    231     hr = CoreAudioUtil::GetPreferredAudioParameters(
    232         input_device_id_, &hw_input_params);
    233   }
    234   if (FAILED(hr)) {
    235     LOG(ERROR) << "Failed to get preferred input audio parameters.";
    236     return false;
    237   }
    238 
    239   // It is currently only possible to open up the output audio device using
    240   // the native number of channels.
    241   if (output_channels_ != hw_output_params.channels()) {
    242     LOG(ERROR) << "Audio device does not support requested output channels.";
    243     return false;
    244   }
    245 
    246   // It is currently only possible to open up the input audio device using
    247   // the native number of channels. If the client asks for a higher channel
    248   // count, we will do channel upmixing in this class. The most typical
    249   // example is that the client provides stereo but the hardware can only be
    250   // opened in mono mode. We will do mono to stereo conversion in this case.
    251   if (input_channels_ < hw_input_params.channels()) {
    252     LOG(ERROR) << "Audio device does not support requested input channels.";
    253     return false;
    254   } else if (input_channels_ > hw_input_params.channels()) {
    255     ChannelLayout input_layout =
    256         GuessChannelLayout(hw_input_params.channels());
    257     ChannelLayout output_layout = GuessChannelLayout(input_channels_);
    258     channel_mixer_.reset(new ChannelMixer(input_layout, output_layout));
    259     DVLOG(1) << "Remixing input channel layout from " << input_layout
    260              << " to " << output_layout << "; from "
    261              << hw_input_params.channels() << " channels to "
    262              << input_channels_;
    263   }
    264 
    265   if (hw_output_params.sample_rate() != params_.sample_rate()) {
    266     LOG(ERROR) << "Requested sample-rate: " << params_.sample_rate()
    267                << " must match the hardware sample-rate: "
    268                << hw_output_params.sample_rate();
    269     return false;
    270   }
    271 
    272   if (hw_output_params.frames_per_buffer() != params_.frames_per_buffer()) {
    273     LOG(ERROR) << "Requested buffer size: " << params_.frames_per_buffer()
    274                << " must match the hardware buffer size: "
    275                << hw_output_params.frames_per_buffer();
    276     return false;
    277   }
    278 
    279   // Set up WAVEFORMATPCMEX structures for input and output given the specified
    280   // audio parameters.
    281   SetIOFormats(hw_input_params, params_);
    282 
    283   // Create the input and output busses.
    284   input_bus_ = AudioBus::Create(
    285       hw_input_params.channels(), input_buffer_size_frames_);
    286   output_bus_ = AudioBus::Create(params_);
    287 
    288   // One extra bus is needed for the input channel mixing case.
    289   if (channel_mixer_) {
    290     DCHECK_LT(hw_input_params.channels(), input_channels_);
    291     // The size of the |channel_bus_| must be the same as the size of the
    292     // output bus to ensure that the channel manager can deal with both
    293     // resampled and non-resampled data as input.
    294     channel_bus_ = AudioBus::Create(
    295         input_channels_, params_.frames_per_buffer());
    296   }
    297 
    298   // Check if FIFO and resampling is required to match the input rate to the
    299   // output rate. If so, a special thread loop, optimized for this case, will
    300   // be used. This mode is also called varispeed mode.
    301   // Note that we can also use this mode when input and output rates are the
    302   // same but native buffer sizes differ (can happen if two different audio
    303   // devices are used). For this case, the resampler uses a target ratio of
    304   // 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is
    305   // required to compensate for the difference in buffer sizes.
    306   // TODO(henrika): we could perhaps improve the performance for the second
    307   // case here by only using the FIFO and avoid resampling. Not sure how much
    308   // that would give and we risk not compensation for clock drift.
    309   if (hw_input_params.sample_rate() != params_.sample_rate() ||
    310       hw_input_params.frames_per_buffer() != params_.frames_per_buffer()) {
    311     DoVarispeedInitialization(hw_input_params, params_);
    312   }
    313 
    314   // Render side (event driven only in varispeed mode):
    315 
    316   ScopedComPtr<IAudioClient> audio_output_client =
    317       CoreAudioUtil::CreateDefaultClient(eRender, eConsole);
    318   if (!audio_output_client)
    319     return false;
    320 
    321   if (!CoreAudioUtil::IsFormatSupported(audio_output_client,
    322                                         share_mode_,
    323                                         &output_format_)) {
    324     return false;
    325   }
    326 
    327   if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
    328     // The |render_event_| will be NULL unless varispeed mode is utilized.
    329     hr = CoreAudioUtil::SharedModeInitialize(
    330         audio_output_client, &output_format_, render_event_.Get(),
    331         &endpoint_render_buffer_size_frames_);
    332   } else {
    333     // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
    334   }
    335   if (FAILED(hr))
    336     return false;
    337 
    338   ScopedComPtr<IAudioRenderClient> audio_render_client =
    339       CoreAudioUtil::CreateRenderClient(audio_output_client);
    340   if (!audio_render_client)
    341     return false;
    342 
    343   // Capture side (always event driven but format depends on varispeed or not):
    344 
    345   ScopedComPtr<IAudioClient> audio_input_client;
    346   if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
    347     audio_input_client = CoreAudioUtil::CreateDefaultClient(eCapture, eConsole);
    348   } else {
    349     ScopedComPtr<IMMDevice> audio_input_device(
    350       CoreAudioUtil::CreateDevice(input_device_id_));
    351     audio_input_client = CoreAudioUtil::CreateClient(audio_input_device);
    352   }
    353   if (!audio_input_client)
    354     return false;
    355 
    356   if (!CoreAudioUtil::IsFormatSupported(audio_input_client,
    357                                         share_mode_,
    358                                         &input_format_)) {
    359     return false;
    360   }
    361 
    362   if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
    363     // Include valid event handle for event-driven initialization.
    364     // The input side is always event driven independent of if varispeed is
    365     // used or not.
    366     hr = CoreAudioUtil::SharedModeInitialize(
    367         audio_input_client, &input_format_, capture_event_.Get(),
    368         &endpoint_capture_buffer_size_frames_);
    369   } else {
    370     // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
    371   }
    372   if (FAILED(hr))
    373     return false;
    374 
    375   ScopedComPtr<IAudioCaptureClient> audio_capture_client =
    376       CoreAudioUtil::CreateCaptureClient(audio_input_client);
    377   if (!audio_capture_client)
    378     return false;
    379 
    380   // Varispeed mode requires additional preparations.
    381   if (VarispeedMode())
    382     ResetVarispeed();
    383 
    384   // Store all valid COM interfaces.
    385   audio_output_client_ = audio_output_client;
    386   audio_render_client_ = audio_render_client;
    387   audio_input_client_ = audio_input_client;
    388   audio_capture_client_ = audio_capture_client;
    389 
    390   opened_ = true;
    391   return SUCCEEDED(hr);
    392 }
    393 
    394 void WASAPIUnifiedStream::Start(AudioSourceCallback* callback) {
    395   TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start");
    396   DVLOG(1) << "WASAPIUnifiedStream::Start()";
    397   DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
    398   CHECK(callback);
    399   CHECK(opened_);
    400 
    401   if (audio_io_thread_) {
    402     CHECK_EQ(callback, source_);
    403     return;
    404   }
    405 
    406   source_ = callback;
    407 
    408   if (VarispeedMode()) {
    409     ResetVarispeed();
    410     fifo_rate_compensation_ = 1.0;
    411     average_delta_ = 0.0;
    412     input_callback_received_ = false;
    413     update_output_delay_ = false;
    414   }
    415 
    416   // Create and start the thread that will listen for capture events.
    417   // We will also listen on render events on the same thread if varispeed
    418   // mode is utilized.
    419   audio_io_thread_.reset(
    420       new base::DelegateSimpleThread(this, "wasapi_io_thread"));
    421   audio_io_thread_->Start();
    422   if (!audio_io_thread_->HasBeenStarted()) {
    423     DLOG(ERROR) << "Failed to start WASAPI IO thread.";
    424     return;
    425   }
    426 
    427   // Start input streaming data between the endpoint buffer and the audio
    428   // engine.
    429   HRESULT hr = audio_input_client_->Start();
    430   if (FAILED(hr)) {
    431     StopAndJoinThread(hr);
    432     return;
    433   }
    434 
    435   // Ensure that the endpoint buffer is prepared with silence.
    436   if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
    437     if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
    438              audio_output_client_, audio_render_client_)) {
    439       DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
    440       return;
    441     }
    442   }
    443   num_written_frames_ = endpoint_render_buffer_size_frames_;
    444 
    445   // Start output streaming data between the endpoint buffer and the audio
    446   // engine.
    447   hr = audio_output_client_->Start();
    448   if (FAILED(hr)) {
    449     StopAndJoinThread(hr);
    450     return;
    451   }
    452 }
    453 
    454 void WASAPIUnifiedStream::Stop() {
    455   TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop");
    456   DVLOG(1) << "WASAPIUnifiedStream::Stop()";
    457   DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
    458   if (!audio_io_thread_)
    459     return;
    460 
    461   // Stop input audio streaming.
    462   HRESULT hr = audio_input_client_->Stop();
    463   if (FAILED(hr)) {
    464     DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
    465       << "Failed to stop input streaming: " << std::hex << hr;
    466   }
    467 
    468   // Stop output audio streaming.
    469   hr = audio_output_client_->Stop();
    470   if (FAILED(hr)) {
    471     DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
    472         << "Failed to stop output streaming: " << std::hex << hr;
    473   }
    474 
    475   // Wait until the thread completes and perform cleanup.
    476   SetEvent(stop_streaming_event_.Get());
    477   audio_io_thread_->Join();
    478   audio_io_thread_.reset();
    479 
    480   // Ensure that we don't quit the main thread loop immediately next
    481   // time Start() is called.
    482   ResetEvent(stop_streaming_event_.Get());
    483 
    484   // Clear source callback, it'll be set again on the next Start() call.
    485   source_ = NULL;
    486 
    487   // Flush all pending data and reset the audio clock stream position to 0.
    488   hr = audio_output_client_->Reset();
    489   if (FAILED(hr)) {
    490     DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
    491         << "Failed to reset output streaming: " << std::hex << hr;
    492   }
    493 
    494   audio_input_client_->Reset();
    495   if (FAILED(hr)) {
    496     DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
    497         << "Failed to reset input streaming: " << std::hex << hr;
    498   }
    499 
    500   // Extra safety check to ensure that the buffers are cleared.
    501   // If the buffers are not cleared correctly, the next call to Start()
    502   // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
    503   // TODO(henrika): this check is is only needed for shared-mode streams.
    504   UINT32 num_queued_frames = 0;
    505   audio_output_client_->GetCurrentPadding(&num_queued_frames);
    506   DCHECK_EQ(0u, num_queued_frames);
    507 }
    508 
    509 void WASAPIUnifiedStream::Close() {
    510   TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close");
    511   DVLOG(1) << "WASAPIUnifiedStream::Close()";
    512   DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
    513 
    514   // It is valid to call Close() before calling open or Start().
    515   // It is also valid to call Close() after Start() has been called.
    516   Stop();
    517 
    518   // Inform the audio manager that we have been closed. This will cause our
    519   // destruction.
    520   manager_->ReleaseOutputStream(this);
    521 }
    522 
    523 void WASAPIUnifiedStream::SetVolume(double volume) {
    524   DVLOG(1) << "SetVolume(volume=" << volume << ")";
    525   if (volume < 0 || volume > 1)
    526     return;
    527   volume_ = volume;
    528 }
    529 
    530 void WASAPIUnifiedStream::GetVolume(double* volume) {
    531   DVLOG(1) << "GetVolume()";
    532   *volume = static_cast<double>(volume_);
    533 }
    534 
    535 
    536 void WASAPIUnifiedStream::ProvideInput(int frame_delay, AudioBus* audio_bus) {
    537   // TODO(henrika): utilize frame_delay?
    538   // A non-zero framed delay means multiple callbacks were necessary to
    539   // fulfill the requested number of frames.
    540   if (frame_delay > 0)
    541     DVLOG(3) << "frame_delay: " << frame_delay;
    542 
    543 #ifndef NDEBUG
    544   resampler_margin_[num_elements_[RESAMPLER_MARGIN]] =
    545     fifo_->frames() - audio_bus->frames();
    546   num_elements_[RESAMPLER_MARGIN]++;
    547 #endif
    548 
    549   if (fifo_->frames() < audio_bus->frames()) {
    550     DVLOG(ERROR) << "Not enough data in the FIFO ("
    551                  << fifo_->frames() << " < " << audio_bus->frames() << ")";
    552     audio_bus->Zero();
    553     return;
    554   }
    555 
    556   fifo_->Consume(audio_bus, 0, audio_bus->frames());
    557 }
    558 
    559 void WASAPIUnifiedStream::SetIOFormats(const AudioParameters& input_params,
    560                                        const AudioParameters& output_params) {
    561   for (int n = 0; n < 2; ++n) {
    562     const AudioParameters& params = (n == 0) ? input_params : output_params;
    563     WAVEFORMATPCMEX* xformat = (n == 0) ? &input_format_ : &output_format_;
    564     WAVEFORMATEX* format = &xformat->Format;
    565 
    566     // Begin with the WAVEFORMATEX structure that specifies the basic format.
    567     format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
    568     format->nChannels =  params.channels();
    569     format->nSamplesPerSec = params.sample_rate();
    570     format->wBitsPerSample = params.bits_per_sample();
    571     format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
    572     format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;
    573     format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
    574 
    575     // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
    576     // Note that we always open up using the native channel layout.
    577     (*xformat).Samples.wValidBitsPerSample = format->wBitsPerSample;
    578     (*xformat).dwChannelMask = (n == 0) ?
    579         GetChannelConfig(eCapture) : GetChannelConfig(eRender);
    580     (*xformat).SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
    581   }
    582 
    583   input_buffer_size_frames_ = input_params.frames_per_buffer();
    584   output_buffer_size_frames_ = output_params.frames_per_buffer();
    585   VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_;
    586   VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_;
    587 
    588 #ifndef NDEBUG
    589   input_params_[0] = input_format_.Format.nSamplesPerSec;
    590   input_params_[1] = input_buffer_size_frames_;
    591   output_params_[0] = output_format_.Format.nSamplesPerSec;
    592   output_params_[1] = output_buffer_size_frames_;
    593 #endif
    594 }
    595 
    596 void WASAPIUnifiedStream::DoVarispeedInitialization(
    597     const AudioParameters& input_params, const AudioParameters& output_params) {
    598   DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()";
    599 
    600   // A FIFO is required in this mode for input to output buffering.
    601   // Note that it will add some latency.
    602   fifo_.reset(new AudioFifo(input_params.channels(), kFifoSize));
    603   VLOG(1) << "Using FIFO of size " << fifo_->max_frames()
    604           << " (#channels=" << input_params.channels() << ")";
    605 
    606   // Create the multi channel resampler using the initial sample rate ratio.
    607   // We will call MultiChannelResampler::SetRatio() during runtime to
    608   // allow arbitrary combinations of input and output devices running off
    609   // different clocks and using different drivers, with potentially
    610   // differing sample-rates. Note that the requested block size is given by
    611   // the native input buffer size |input_buffer_size_frames_|.
    612   io_sample_rate_ratio_ = input_params.sample_rate() /
    613       static_cast<double>(output_params.sample_rate());
    614   DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_;
    615   resampler_.reset(new MultiChannelResampler(
    616       input_params.channels(), io_sample_rate_ratio_, input_buffer_size_frames_,
    617       base::Bind(&WASAPIUnifiedStream::ProvideInput, base::Unretained(this))));
    618   VLOG(1) << "Resampling from " << input_params.sample_rate() << " to "
    619           << output_params.sample_rate();
    620 
    621   // The optimal number of frames we'd like to keep in the FIFO at all times.
    622   // The actual size will vary but the goal is to ensure that the average size
    623   // is given by this value.
    624   target_fifo_frames_ = kTargetFifoSafetyFactor * input_buffer_size_frames_;
    625   VLOG(1) << "Target FIFO size: " <<  target_fifo_frames_;
    626 
    627   // Create the event which the audio engine will signal each time it
    628   // wants an audio buffer to render.
    629   render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
    630 
    631   // Allocate memory for temporary audio bus used to store resampled input
    632   // audio.
    633   resampled_bus_ = AudioBus::Create(
    634       input_params.channels(), output_buffer_size_frames_);
    635 
    636   // Buffer initial silence corresponding to target I/O buffering.
    637   ResetVarispeed();
    638 }
    639 
    640 void WASAPIUnifiedStream::ResetVarispeed() {
    641   DCHECK(VarispeedMode());
    642 
    643   // Buffer initial silence corresponding to target I/O buffering.
    644   fifo_->Clear();
    645   scoped_ptr<AudioBus> silence =
    646       AudioBus::Create(input_format_.Format.nChannels,
    647                        target_fifo_frames_);
    648   silence->Zero();
    649   fifo_->Push(silence.get());
    650   resampler_->Flush();
    651 }
    652 
    653 void WASAPIUnifiedStream::Run() {
    654   ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
    655 
    656   // Increase the thread priority.
    657   audio_io_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
    658 
    659   // Enable MMCSS to ensure that this thread receives prioritized access to
    660   // CPU resources.
    661   // TODO(henrika): investigate if it is possible to include these additional
    662   // settings in SetThreadPriority() as well.
    663   DWORD task_index = 0;
    664   HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
    665                                                       &task_index);
    666   bool mmcss_is_ok =
    667       (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
    668   if (!mmcss_is_ok) {
    669     // Failed to enable MMCSS on this thread. It is not fatal but can lead
    670     // to reduced QoS at high load.
    671     DWORD err = GetLastError();
    672     LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
    673   }
    674 
    675   // The IAudioClock interface enables us to monitor a stream's data
    676   // rate and the current position in the stream. Allocate it before we
    677   // start spinning.
    678   ScopedComPtr<IAudioClock> audio_output_clock;
    679   HRESULT hr = audio_output_client_->GetService(
    680       __uuidof(IAudioClock), audio_output_clock.ReceiveVoid());
    681   LOG_IF(WARNING, FAILED(hr)) << "Failed to create IAudioClock: "
    682                               << std::hex << hr;
    683 
    684   bool streaming = true;
    685   bool error = false;
    686 
    687   HANDLE wait_array[3];
    688   size_t num_handles = 0;
    689   wait_array[num_handles++] = stop_streaming_event_;
    690   wait_array[num_handles++] = capture_event_;
    691   if (render_event_) {
    692     // One extra event handle is needed in varispeed mode.
    693     wait_array[num_handles++] = render_event_;
    694   }
    695 
    696   // Keep streaming audio until stop event is signaled.
    697   // Capture events are always used but render events are only active in
    698   // varispeed mode.
    699   while (streaming && !error) {
    700     // Wait for a close-down event, or a new capture event.
    701     DWORD wait_result = WaitForMultipleObjects(num_handles,
    702                                                wait_array,
    703                                                FALSE,
    704                                                INFINITE);
    705     switch (wait_result) {
    706       case WAIT_OBJECT_0 + 0:
    707         // |stop_streaming_event_| has been set.
    708         streaming = false;
    709         break;
    710       case WAIT_OBJECT_0 + 1:
    711         // |capture_event_| has been set
    712         if (VarispeedMode()) {
    713           ProcessInputAudio();
    714         } else {
    715           ProcessInputAudio();
    716           ProcessOutputAudio(audio_output_clock);
    717         }
    718         break;
    719       case WAIT_OBJECT_0 + 2:
    720         DCHECK(VarispeedMode());
    721         // |render_event_| has been set
    722         ProcessOutputAudio(audio_output_clock);
    723         break;
    724       default:
    725         error = true;
    726         break;
    727     }
    728   }
    729 
    730   if (streaming && error) {
    731     // Stop audio streaming since something has gone wrong in our main thread
    732     // loop. Note that, we are still in a "started" state, hence a Stop() call
    733     // is required to join the thread properly.
    734     audio_input_client_->Stop();
    735     audio_output_client_->Stop();
    736     PLOG(ERROR) << "WASAPI streaming failed.";
    737   }
    738 
    739   // Disable MMCSS.
    740   if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
    741     PLOG(WARNING) << "Failed to disable MMCSS";
    742   }
    743 }
    744 
    745 void WASAPIUnifiedStream::ProcessInputAudio() {
    746   TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio");
    747 
    748   BYTE* data_ptr = NULL;
    749   UINT32 num_captured_frames = 0;
    750   DWORD flags = 0;
    751   UINT64 device_position = 0;
    752   UINT64 capture_time_stamp = 0;
    753 
    754   const int bytes_per_sample = input_format_.Format.wBitsPerSample >> 3;
    755 
    756   base::TimeTicks now_tick = base::TimeTicks::HighResNow();
    757 
    758 #ifndef NDEBUG
    759   if (VarispeedMode()) {
    760     input_time_stamps_[num_elements_[INPUT_TIME_STAMP]] =
    761         now_tick.ToInternalValue();
    762     num_elements_[INPUT_TIME_STAMP]++;
    763   }
    764 #endif
    765 
    766   // Retrieve the amount of data in the capture endpoint buffer.
    767   // |endpoint_capture_time_stamp| is the value of the performance
    768   // counter at the time that the audio endpoint device recorded
    769   // the device position of the first audio frame in the data packet.
    770   HRESULT hr = audio_capture_client_->GetBuffer(&data_ptr,
    771                                                 &num_captured_frames,
    772                                                 &flags,
    773                                                 &device_position,
    774                                                 &capture_time_stamp);
    775   if (FAILED(hr)) {
    776     DLOG(ERROR) << "Failed to get data from the capture buffer";
    777     return;
    778   }
    779 
    780   if (hr == AUDCLNT_S_BUFFER_EMPTY) {
    781     // The return coded is a success code but a new packet is *not* available
    782     // and none of the output parameters in the GetBuffer() call contains valid
    783     // values. Best we can do is to deliver silence and avoid setting
    784     // |input_callback_received_| since this only seems to happen for the
    785     // initial event(s) on some devices.
    786     input_bus_->Zero();
    787   } else {
    788     // Valid data has been recorded and it is now OK to set the flag which
    789     // informs the render side that capturing has started.
    790     input_callback_received_ = true;
    791   }
    792 
    793   if (num_captured_frames != 0) {
    794     if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
    795       // Clear out the capture buffer since silence is reported.
    796       input_bus_->Zero();
    797     } else {
    798       // Store captured data in an audio bus after de-interleaving
    799       // the data to match the audio bus structure.
    800       input_bus_->FromInterleaved(
    801           data_ptr, num_captured_frames, bytes_per_sample);
    802     }
    803   }
    804 
    805   hr = audio_capture_client_->ReleaseBuffer(num_captured_frames);
    806   DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
    807 
    808   // Buffer input into FIFO if varispeed mode is used. The render event
    809   // will drive resampling of this data to match the output side.
    810   if (VarispeedMode()) {
    811     int available_frames = fifo_->max_frames() - fifo_->frames();
    812     if (input_bus_->frames() <= available_frames) {
    813       fifo_->Push(input_bus_.get());
    814     }
    815 #ifndef NDEBUG
    816     num_frames_in_fifo_[num_elements_[NUM_FRAMES_IN_FIFO]] =
    817         fifo_->frames();
    818     num_elements_[NUM_FRAMES_IN_FIFO]++;
    819 #endif
    820   }
    821 
    822   // Save resource by not asking for new delay estimates each time.
    823   // These estimates are fairly stable and it is perfectly safe to only
    824   // sample at a rate of ~1Hz.
    825   // TODO(henrika): we might have to increase the update rate in varispeed
    826   // mode since the delay variations are higher in this mode.
    827   if ((now_tick - last_delay_sample_time_).InMilliseconds() >
    828       kTimeDiffInMillisecondsBetweenDelayMeasurements &&
    829       input_callback_received_) {
    830     // Calculate the estimated capture delay, i.e., the latency between
    831     // the recording time and the time we when we are notified about
    832     // the recorded data. Note that the capture time stamp is given in
    833     // 100-nanosecond (0.1 microseconds) units.
    834     base::TimeDelta diff =
    835       now_tick - base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp);
    836     capture_delay_ms_ = diff.InMillisecondsF();
    837 
    838     last_delay_sample_time_ = now_tick;
    839     update_output_delay_ = true;
    840   }
    841 }
    842 
    843 void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock* audio_output_clock) {
    844   TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio");
    845 
    846   if (!input_callback_received_) {
    847     if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
    848       if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
    849               audio_output_client_, audio_render_client_))
    850         DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
    851     }
    852     return;
    853   }
    854 
    855   // Rate adjusted resampling is required in varispeed mode. It means that
    856   // recorded audio samples will be read from the FIFO, resampled to match the
    857   // output sample-rate and then stored in |resampled_bus_|.
    858   if (VarispeedMode()) {
    859     // Calculate a varispeed rate scalar factor to compensate for drift between
    860     // input and output.  We use the actual number of frames still in the FIFO
    861     // compared with the ideal value of |target_fifo_frames_|.
    862     int delta = fifo_->frames() - target_fifo_frames_;
    863 
    864     // Average |delta| because it can jitter back/forth quite frequently
    865     // by +/- the hardware buffer-size *if* the input and output callbacks are
    866     // happening at almost exactly the same time.  Also, if the input and output
    867     // sample-rates are different then |delta| will jitter quite a bit due to
    868     // the rate conversion happening in the varispeed, plus the jittering of
    869     // the callbacks.  The average value is what's important here.
    870     // We use an exponential smoothing filter to reduce the variations.
    871     average_delta_ += kAlpha * (delta - average_delta_);
    872 
    873     // Compute a rate compensation which always attracts us back to the
    874     // |target_fifo_frames_| over a period of kCorrectionTimeSeconds.
    875     double correction_time_frames =
    876         kCorrectionTimeSeconds * output_format_.Format.nSamplesPerSec;
    877     fifo_rate_compensation_ =
    878         (correction_time_frames + average_delta_) / correction_time_frames;
    879 
    880 #ifndef NDEBUG
    881     fifo_rate_comps_[num_elements_[RATE_COMPENSATION]] =
    882         fifo_rate_compensation_;
    883     num_elements_[RATE_COMPENSATION]++;
    884 #endif
    885 
    886     // Adjust for FIFO drift.
    887     const double new_ratio = io_sample_rate_ratio_ * fifo_rate_compensation_;
    888     resampler_->SetRatio(new_ratio);
    889     // Get resampled input audio from FIFO where the size is given by the
    890     // output side.
    891     resampler_->Resample(resampled_bus_->frames(), resampled_bus_.get());
    892   }
    893 
    894   // Derive a new total delay estimate if the capture side has set the
    895   // |update_output_delay_| flag.
    896   if (update_output_delay_) {
    897     // Calculate the estimated render delay, i.e., the time difference
    898     // between the time when data is added to the endpoint buffer and
    899     // when the data is played out on the actual speaker.
    900     const double stream_pos = CurrentStreamPosInMilliseconds(
    901         num_written_frames_ + output_buffer_size_frames_,
    902         output_format_.Format.nSamplesPerSec);
    903     const double speaker_pos =
    904         SpeakerStreamPosInMilliseconds(audio_output_clock);
    905     const double render_delay_ms = stream_pos - speaker_pos;
    906     const double fifo_delay_ms = VarispeedMode() ?
    907       FrameCountToMilliseconds(target_fifo_frames_, input_format_) : 0;
    908 
    909     // Derive the total delay, i.e., the sum of the input and output
    910     // delays. Also convert the value into byte units. An extra FIFO delay
    911     // is added for varispeed usage cases.
    912     total_delay_ms_ = VarispeedMode() ?
    913       capture_delay_ms_ + render_delay_ms + fifo_delay_ms :
    914       capture_delay_ms_ + render_delay_ms;
    915     DVLOG(2) << "total_delay_ms   : " << total_delay_ms_;
    916     DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_;
    917     DVLOG(3) << " render_delay_ms : " << render_delay_ms;
    918     DVLOG(3) << " fifo_delay_ms   : " << fifo_delay_ms;
    919     total_delay_bytes_ = MillisecondsToBytes(total_delay_ms_, output_format_);
    920 
    921     // Wait for new signal from the capture side.
    922     update_output_delay_ = false;
    923   }
    924 
    925   // Select source depending on if varispeed is utilized or not.
    926   // Also, the source might be the output of a channel mixer if channel mixing
    927   // is required to match the native input channels to the number of input
    928   // channels used by the client (given by |input_channels_| in this case).
    929   AudioBus* input_bus = VarispeedMode() ?
    930       resampled_bus_.get() : input_bus_.get();
    931   if (channel_mixer_) {
    932     DCHECK_EQ(input_bus->frames(), channel_bus_->frames());
    933     // Most common case is 1->2 channel upmixing.
    934     channel_mixer_->Transform(input_bus, channel_bus_.get());
    935     // Use the output from the channel mixer as new input bus.
    936     input_bus = channel_bus_.get();
    937   }
    938 
    939   // Prepare for rendering by calling OnMoreIOData().
    940   int frames_filled = source_->OnMoreIOData(
    941       input_bus,
    942       output_bus_.get(),
    943       AudioBuffersState(0, total_delay_bytes_));
    944   DCHECK_EQ(frames_filled, output_bus_->frames());
    945 
    946   // Keep track of number of rendered frames since we need it for
    947   // our delay calculations.
    948   num_written_frames_ += frames_filled;
    949 
    950   // Derive the the amount of available space in the endpoint buffer.
    951   // Avoid render attempt if there is no room for a captured packet.
    952   UINT32 num_queued_frames = 0;
    953   audio_output_client_->GetCurrentPadding(&num_queued_frames);
    954   if (endpoint_render_buffer_size_frames_ - num_queued_frames <
    955       output_buffer_size_frames_)
    956     return;
    957 
    958   // Grab all available space in the rendering endpoint buffer
    959   // into which the client can write a data packet.
    960   uint8* audio_data = NULL;
    961   HRESULT hr = audio_render_client_->GetBuffer(output_buffer_size_frames_,
    962                                                &audio_data);
    963   if (FAILED(hr)) {
    964     DLOG(ERROR) << "Failed to access render buffer";
    965     return;
    966   }
    967 
    968   const int bytes_per_sample = output_format_.Format.wBitsPerSample >> 3;
    969 
    970   // Convert the audio bus content to interleaved integer data using
    971   // |audio_data| as destination.
    972   output_bus_->Scale(volume_);
    973   output_bus_->ToInterleaved(
    974       output_buffer_size_frames_, bytes_per_sample, audio_data);
    975 
    976   // Release the buffer space acquired in the GetBuffer() call.
    977   audio_render_client_->ReleaseBuffer(output_buffer_size_frames_, 0);
    978   DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer";
    979 
    980   return;
    981 }
    982 
    983 void WASAPIUnifiedStream::HandleError(HRESULT err) {
    984   CHECK((started() && GetCurrentThreadId() == audio_io_thread_->tid()) ||
    985         (!started() && GetCurrentThreadId() == creating_thread_id_));
    986   NOTREACHED() << "Error code: " << std::hex << err;
    987   if (source_)
    988     source_->OnError(this);
    989 }
    990 
    991 void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err) {
    992   CHECK(GetCurrentThreadId() == creating_thread_id_);
    993   DCHECK(audio_io_thread_.get());
    994   SetEvent(stop_streaming_event_.Get());
    995   audio_io_thread_->Join();
    996   audio_io_thread_.reset();
    997   HandleError(err);
    998 }
    999 
   1000 }  // namespace media
   1001