audio/win/audio_unified_win.h

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
#define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_

#include <Audioclient.h>
#include <MMDeviceAPI.h>

#include <string>

#include "base/compiler_specific.h"
#include "base/gtest_prod_util.h"
#include "base/threading/platform_thread.h"
#include "base/threading/simple_thread.h"
#include "base/win/scoped_co_mem.h"
#include "base/win/scoped_comptr.h"
#include "base/win/scoped_handle.h"
#include "media/audio/audio_io.h"
#include "media/audio/audio_parameters.h"
#include "media/base/audio_fifo.h"
#include "media/base/channel_mixer.h"
#include "media/base/media_export.h"
#include "media/base/multi_channel_resampler.h"

namespace media {

class AudioManagerWin;

// Implementation of AudioOutputStream for Windows using the Core Audio API
// where both capturing and rendering takes place on the same thread to enable
// audio I/O. This class allows arbitrary combinations of input and output
// devices running off different clocks and using different drivers, with
// potentially differing sample-rates.
//
// It is required to first acquire the native sample rate of the selected
// output device and then use the same rate when creating this object.
// The inner operation depends on the input sample rate which is determined
// during construction. Three different main modes are supported:
//
//  1)  input rate == output rate => input side drives output side directly.
//  2)  input rate != output rate => both sides are driven independently by
//      events and a FIFO plus a resampling unit is used to compensate for
//      differences in sample rates between the two sides.
//  3)  input rate == output rate but native buffer sizes are not identical =>
//      same inner functionality as in (2) to compensate for the differences
//      in buffer sizes and also compensate for any potential clock drift
//      between the two devices.
//
// Mode detection is is done at construction and using mode (1) will lead to
// best performance (lower delay and no "varispeed distortion"), i.e., it is
// recommended to use same sample rates for input and output. Mode (2) uses a
// resampler which supports rate adjustments to fine tune for things like
// clock drift and differences in sample rates between different devices.
// Mode (2) - which uses a FIFO and a adjustable multi-channel resampler -
// is also called the varispeed mode and it is used for case (3) as well to
// compensate for the difference in buffer sizes mainly.
// Mode (3) can happen if two different audio devices are used.
// As an example: some devices needs a buffer size of 441 @ 44.1kHz and others
// 448 @ 44.1kHz. This is a rare case and will only happen for sample rates
// which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.).
//
// Implementation notes:
//
//  - Open() can fail if the input and output parameters do not fulfill
//    certain conditions. See source for Open() for more details.
//  - Channel mixing will be performed if the clients asks for a larger
//    number of channels than the native audio layer provides.
//    Example: client wants stereo but audio layer provides mono. In this case
//    upmixing from mono to stereo (1->2) will be done.
//
// TODO(henrika):
//
//  - Add support for exclusive mode.
//  - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float
//    as internal sample-value representation.
//  - Perform fine-tuning for non-matching sample rates to reduce latency.
//
class MEDIA_EXPORT WASAPIUnifiedStream
    : public AudioOutputStream,
      public base::DelegateSimpleThread::Delegate {
 public:
  // The ctor takes all the usual parameters, plus |manager| which is the
  // the audio manager who is creating this object.
  WASAPIUnifiedStream(AudioManagerWin* manager,
                      const AudioParameters& params,
                      const std::string& input_device_id);

  // The dtor is typically called by the AudioManager only and it is usually
  // triggered by calling AudioOutputStream::Close().
  virtual ~WASAPIUnifiedStream();

  // Implementation of AudioOutputStream.
  virtual bool Open() OVERRIDE;
  virtual void Start(AudioSourceCallback* callback) OVERRIDE;
  virtual void Stop() OVERRIDE;
  virtual void Close() OVERRIDE;
  virtual void SetVolume(double volume) OVERRIDE;
  virtual void GetVolume(double* volume) OVERRIDE;

  bool started() const {
    return audio_io_thread_.get() != NULL;
  }

  // Returns true if input sample rate differs from the output sample rate.
  // A FIFO and a adjustable multi-channel resampler are utilized in this mode.
  bool VarispeedMode() const { return (fifo_ && resampler_); }

 private:
  enum {
    // Time in milliseconds between two successive delay measurements.
    // We save resources by not updating the delay estimates for each capture
    // event (typically 100Hz rate).
    kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000,

    // Max possible FIFO size.
    kFifoSize = 16384,

    // This value was determined empirically for minimum latency while still
    // guarding against FIFO under-runs. The actual target size will be equal
    // to kTargetFifoSafetyFactor * (native input buffer size).
    // TODO(henrika): tune this value for lowest possible latency for all
    // possible sample rate combinations.
    kTargetFifoSafetyFactor = 2
  };

  // Additional initialization required when input and output sample rate
  // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|,
  // and the |capture_bus_| and configures the |input_format_| structure
  // given the provided input and output audio parameters.
  void DoVarispeedInitialization(const AudioParameters& input_params,
                                 const AudioParameters& output_params);

  // Clears varispeed related components such as the FIFO and the resampler.
  void ResetVarispeed();

  // Builds WAVEFORMATEX structures for input and output based on input and
  // output audio parameters.
  void SetIOFormats(const AudioParameters& input_params,
                    const AudioParameters& output_params);

  // DelegateSimpleThread::Delegate implementation.
  virtual void Run() OVERRIDE;

  // MultiChannelResampler::MultiChannelAudioSourceProvider implementation.
  // Callback for providing more data into the resampler.
  // Only used in varispeed mode, i.e., when input rate != output rate.
  virtual void ProvideInput(int frame_delay, AudioBus* audio_bus);

  // Issues the OnError() callback to the |source_|.
  void HandleError(HRESULT err);

  // Stops and joins the audio thread in case of an error.
  void StopAndJoinThread(HRESULT err);

  // Converts unique endpoint ID to user-friendly device name.
  std::string GetDeviceName(LPCWSTR device_id) const;

  // Called on the audio IO thread for each capture event.
  // Buffers captured audio into a FIFO if varispeed is used or into an audio
  // bus if input and output sample rates are identical.
  void ProcessInputAudio();

  // Called on the audio IO thread for each render event when varispeed is
  // active or for each capture event when varispeed is not used.
  // In varispeed mode, it triggers a resampling callback, which reads from the
  // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled
  // input signal and at the same time asks for data to play out.
  // If input and output rates are the same - instead of reading from the FIFO
  // and do resampling - we read directly from the audio bus used to store
  // captured data in ProcessInputAudio.
  void ProcessOutputAudio(IAudioClock* audio_output_clock);

  // Contains the thread ID of the creating thread.
  base::PlatformThreadId creating_thread_id_;

  // Our creator, the audio manager needs to be notified when we close.
  AudioManagerWin* manager_;

  // Contains the audio parameter structure provided at construction.
  AudioParameters params_;
  // For convenience, same as in params_.
  int input_channels_;
  int output_channels_;

  // Unique ID of the input device to be opened.
  const std::string input_device_id_;

  // The sharing mode for the streams.
  // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE
  // where AUDCLNT_SHAREMODE_SHARED is the default.
  AUDCLNT_SHAREMODE share_mode_;

  // Rendering and capturing is driven by this thread (no message loop).
  // All OnMoreIOData() callbacks will be called from this thread.
  scoped_ptr<base::DelegateSimpleThread> audio_io_thread_;

  // Contains the desired audio output format which is set up at construction.
  // It is required to first acquire the native sample rate of the selected
  // output device and then use the same rate when creating this object.
  WAVEFORMATPCMEX output_format_;

  // Contains the native audio input format which is set up at construction
  // if varispeed mode is utilized.
  WAVEFORMATPCMEX input_format_;

  // True when successfully opened.
  bool opened_;

  // Volume level from 0 to 1 used for output scaling.
  double volume_;

  // Size in audio frames of each audio packet where an audio packet
  // is defined as the block of data which the destination is expected to
  // receive in each OnMoreIOData() callback.
  size_t output_buffer_size_frames_;

  // Size in audio frames of each audio packet where an audio packet
  // is defined as the block of data which the source is expected to
  // deliver in each OnMoreIOData() callback.
  size_t input_buffer_size_frames_;

  // Length of the audio endpoint buffer.
  uint32 endpoint_render_buffer_size_frames_;
  uint32 endpoint_capture_buffer_size_frames_;

  // Counts the number of audio frames written to the endpoint buffer.
  uint64 num_written_frames_;

  // Time stamp for last delay measurement.
  base::TimeTicks last_delay_sample_time_;

  // Contains the total (sum of render and capture) delay in milliseconds.
  double total_delay_ms_;

  // Contains the total (sum of render and capture and possibly FIFO) delay
  // in bytes. The update frequency is set by a constant called
  // |kTimeDiffInMillisecondsBetweenDelayMeasurements|.
  int total_delay_bytes_;

  // Pointer to the client that will deliver audio samples to be played out.
  AudioSourceCallback* source_;

  // IMMDevice interfaces which represents audio endpoint devices.
  base::win::ScopedComPtr<IMMDevice> endpoint_render_device_;
  base::win::ScopedComPtr<IMMDevice> endpoint_capture_device_;

  // IAudioClient interfaces which enables a client to create and initialize
  // an audio stream between an audio application and the audio engine.
  base::win::ScopedComPtr<IAudioClient> audio_output_client_;
  base::win::ScopedComPtr<IAudioClient> audio_input_client_;

  // IAudioRenderClient interfaces enables a client to write output
  // data to a rendering endpoint buffer.
  base::win::ScopedComPtr<IAudioRenderClient> audio_render_client_;

  // IAudioCaptureClient interfaces enables a client to read input
  // data from a capturing endpoint buffer.
  base::win::ScopedComPtr<IAudioCaptureClient> audio_capture_client_;

  // The audio engine will signal this event each time a buffer has been
  // recorded.
  base::win::ScopedHandle capture_event_;

  // The audio engine will signal this event each time it needs a new
  // audio buffer to play out.
  // Only utilized in varispeed mode.
  base::win::ScopedHandle render_event_;

  // This event will be signaled when streaming shall stop.
  base::win::ScopedHandle stop_streaming_event_;

  // Container for retrieving data from AudioSourceCallback::OnMoreIOData().
  scoped_ptr<AudioBus> output_bus_;

  // Container for sending data to AudioSourceCallback::OnMoreIOData().
  scoped_ptr<AudioBus> input_bus_;

  // Container for storing output from the channel mixer.
  scoped_ptr<AudioBus> channel_bus_;

  // All members below are only allocated, or used, in varispeed mode:

  // Temporary storage of resampled input audio data.
  scoped_ptr<AudioBus> resampled_bus_;

  // Set to true first time a capture event has been received in varispeed
  // mode.
  bool input_callback_received_;

  // MultiChannelResampler is a multi channel wrapper for SincResampler;
  // allowing high quality sample rate conversion of multiple channels at once.
  scoped_ptr<MultiChannelResampler> resampler_;

  // Resampler I/O ratio.
  double io_sample_rate_ratio_;

  // Used for input to output buffering.
  scoped_ptr<AudioFifo> fifo_;

  // The channel mixer is only created and utilized if number of input channels
  // is larger than the native number of input channels (e.g client wants
  // stereo but the audio device only supports mono).
  scoped_ptr<ChannelMixer> channel_mixer_;

  // The optimal number of frames we'd like to keep in the FIFO at all times.
  int target_fifo_frames_;

  // A running average of the measured delta between actual number of frames
  // in the FIFO versus |target_fifo_frames_|.
  double average_delta_;

  // A varispeed rate scalar which is calculated based on FIFO drift.
  double fifo_rate_compensation_;

  // Set to true when input side signals output side that a new delay
  // estimate is needed.
  bool update_output_delay_;

  // Capture side stores its delay estimate so the sum can be derived in
  // the render side.
  double capture_delay_ms_;

  // TODO(henrika): possibly remove these members once the performance is
  // properly tuned. Only used for off-line debugging.
#ifndef NDEBUG
  enum LogElementNames {
    INPUT_TIME_STAMP,
    NUM_FRAMES_IN_FIFO,
    RESAMPLER_MARGIN,
    RATE_COMPENSATION
  };

  scoped_ptr<int64[]> input_time_stamps_;
  scoped_ptr<int[]> num_frames_in_fifo_;
  scoped_ptr<int[]> resampler_margin_;
  scoped_ptr<double[]> fifo_rate_comps_;
  scoped_ptr<int[]> num_elements_;
  scoped_ptr<int[]> input_params_;
  scoped_ptr<int[]> output_params_;

  FILE* data_file_;
  FILE* param_file_;
#endif

  DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream);
};

}  // namespace media

#endif  // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_