Home | History | Annotate | Download | only in win
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
      6 #define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
      7 
      8 #include <Audioclient.h>
      9 #include <MMDeviceAPI.h>
     10 
     11 #include <string>
     12 
     13 #include "base/compiler_specific.h"
     14 #include "base/gtest_prod_util.h"
     15 #include "base/threading/platform_thread.h"
     16 #include "base/threading/simple_thread.h"
     17 #include "base/win/scoped_co_mem.h"
     18 #include "base/win/scoped_comptr.h"
     19 #include "base/win/scoped_handle.h"
     20 #include "media/audio/audio_io.h"
     21 #include "media/audio/audio_parameters.h"
     22 #include "media/base/audio_fifo.h"
     23 #include "media/base/channel_mixer.h"
     24 #include "media/base/media_export.h"
     25 #include "media/base/multi_channel_resampler.h"
     26 
     27 namespace media {
     28 
     29 class AudioManagerWin;
     30 
     31 // Implementation of AudioOutputStream for Windows using the Core Audio API
     32 // where both capturing and rendering takes place on the same thread to enable
     33 // audio I/O. This class allows arbitrary combinations of input and output
     34 // devices running off different clocks and using different drivers, with
     35 // potentially differing sample-rates.
     36 //
     37 // It is required to first acquire the native sample rate of the selected
     38 // output device and then use the same rate when creating this object.
     39 // The inner operation depends on the input sample rate which is determined
     40 // during construction. Three different main modes are supported:
     41 //
     42 //  1)  input rate == output rate => input side drives output side directly.
     43 //  2)  input rate != output rate => both sides are driven independently by
     44 //      events and a FIFO plus a resampling unit is used to compensate for
     45 //      differences in sample rates between the two sides.
     46 //  3)  input rate == output rate but native buffer sizes are not identical =>
     47 //      same inner functionality as in (2) to compensate for the differences
     48 //      in buffer sizes and also compensate for any potential clock drift
     49 //      between the two devices.
     50 //
     51 // Mode detection is is done at construction and using mode (1) will lead to
     52 // best performance (lower delay and no "varispeed distortion"), i.e., it is
     53 // recommended to use same sample rates for input and output. Mode (2) uses a
     54 // resampler which supports rate adjustments to fine tune for things like
     55 // clock drift and differences in sample rates between different devices.
     56 // Mode (2) - which uses a FIFO and a adjustable multi-channel resampler -
     57 // is also called the varispeed mode and it is used for case (3) as well to
     58 // compensate for the difference in buffer sizes mainly.
     59 // Mode (3) can happen if two different audio devices are used.
     60 // As an example: some devices needs a buffer size of 441 @ 44.1kHz and others
     61 // 448 @ 44.1kHz. This is a rare case and will only happen for sample rates
     62 // which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.).
     63 //
     64 // Implementation notes:
     65 //
     66 //  - Open() can fail if the input and output parameters do not fulfill
     67 //    certain conditions. See source for Open() for more details.
     68 //  - Channel mixing will be performed if the clients asks for a larger
     69 //    number of channels than the native audio layer provides.
     70 //    Example: client wants stereo but audio layer provides mono. In this case
     71 //    upmixing from mono to stereo (1->2) will be done.
     72 //
     73 // TODO(henrika):
     74 //
     75 //  - Add support for exclusive mode.
     76 //  - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float
     77 //    as internal sample-value representation.
     78 //  - Perform fine-tuning for non-matching sample rates to reduce latency.
     79 //
     80 class MEDIA_EXPORT WASAPIUnifiedStream
     81     : public AudioOutputStream,
     82       public base::DelegateSimpleThread::Delegate {
     83  public:
     84   // The ctor takes all the usual parameters, plus |manager| which is the
     85   // the audio manager who is creating this object.
     86   WASAPIUnifiedStream(AudioManagerWin* manager,
     87                       const AudioParameters& params,
     88                       const std::string& input_device_id);
     89 
     90   // The dtor is typically called by the AudioManager only and it is usually
     91   // triggered by calling AudioOutputStream::Close().
     92   virtual ~WASAPIUnifiedStream();
     93 
     94   // Implementation of AudioOutputStream.
     95   virtual bool Open() OVERRIDE;
     96   virtual void Start(AudioSourceCallback* callback) OVERRIDE;
     97   virtual void Stop() OVERRIDE;
     98   virtual void Close() OVERRIDE;
     99   virtual void SetVolume(double volume) OVERRIDE;
    100   virtual void GetVolume(double* volume) OVERRIDE;
    101 
    102   bool started() const {
    103     return audio_io_thread_.get() != NULL;
    104   }
    105 
    106   // Returns true if input sample rate differs from the output sample rate.
    107   // A FIFO and a adjustable multi-channel resampler are utilized in this mode.
    108   bool VarispeedMode() const { return (fifo_ && resampler_); }
    109 
    110  private:
    111   enum {
    112     // Time in milliseconds between two successive delay measurements.
    113     // We save resources by not updating the delay estimates for each capture
    114     // event (typically 100Hz rate).
    115     kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000,
    116 
    117     // Max possible FIFO size.
    118     kFifoSize = 16384,
    119 
    120     // This value was determined empirically for minimum latency while still
    121     // guarding against FIFO under-runs. The actual target size will be equal
    122     // to kTargetFifoSafetyFactor * (native input buffer size).
    123     // TODO(henrika): tune this value for lowest possible latency for all
    124     // possible sample rate combinations.
    125     kTargetFifoSafetyFactor = 2
    126   };
    127 
    128   // Additional initialization required when input and output sample rate
    129   // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|,
    130   // and the |capture_bus_| and configures the |input_format_| structure
    131   // given the provided input and output audio parameters.
    132   void DoVarispeedInitialization(const AudioParameters& input_params,
    133                                  const AudioParameters& output_params);
    134 
    135   // Clears varispeed related components such as the FIFO and the resampler.
    136   void ResetVarispeed();
    137 
    138   // Builds WAVEFORMATEX structures for input and output based on input and
    139   // output audio parameters.
    140   void SetIOFormats(const AudioParameters& input_params,
    141                     const AudioParameters& output_params);
    142 
    143   // DelegateSimpleThread::Delegate implementation.
    144   virtual void Run() OVERRIDE;
    145 
    146   // MultiChannelResampler::MultiChannelAudioSourceProvider implementation.
    147   // Callback for providing more data into the resampler.
    148   // Only used in varispeed mode, i.e., when input rate != output rate.
    149   virtual void ProvideInput(int frame_delay, AudioBus* audio_bus);
    150 
    151   // Issues the OnError() callback to the |source_|.
    152   void HandleError(HRESULT err);
    153 
    154   // Stops and joins the audio thread in case of an error.
    155   void StopAndJoinThread(HRESULT err);
    156 
    157   // Converts unique endpoint ID to user-friendly device name.
    158   std::string GetDeviceName(LPCWSTR device_id) const;
    159 
    160   // Called on the audio IO thread for each capture event.
    161   // Buffers captured audio into a FIFO if varispeed is used or into an audio
    162   // bus if input and output sample rates are identical.
    163   void ProcessInputAudio();
    164 
    165   // Called on the audio IO thread for each render event when varispeed is
    166   // active or for each capture event when varispeed is not used.
    167   // In varispeed mode, it triggers a resampling callback, which reads from the
    168   // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled
    169   // input signal and at the same time asks for data to play out.
    170   // If input and output rates are the same - instead of reading from the FIFO
    171   // and do resampling - we read directly from the audio bus used to store
    172   // captured data in ProcessInputAudio.
    173   void ProcessOutputAudio(IAudioClock* audio_output_clock);
    174 
    175   // Contains the thread ID of the creating thread.
    176   base::PlatformThreadId creating_thread_id_;
    177 
    178   // Our creator, the audio manager needs to be notified when we close.
    179   AudioManagerWin* manager_;
    180 
    181   // Contains the audio parameter structure provided at construction.
    182   AudioParameters params_;
    183   // For convenience, same as in params_.
    184   int input_channels_;
    185   int output_channels_;
    186 
    187   // Unique ID of the input device to be opened.
    188   const std::string input_device_id_;
    189 
    190   // The sharing mode for the streams.
    191   // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE
    192   // where AUDCLNT_SHAREMODE_SHARED is the default.
    193   AUDCLNT_SHAREMODE share_mode_;
    194 
    195   // Rendering and capturing is driven by this thread (no message loop).
    196   // All OnMoreIOData() callbacks will be called from this thread.
    197   scoped_ptr<base::DelegateSimpleThread> audio_io_thread_;
    198 
    199   // Contains the desired audio output format which is set up at construction.
    200   // It is required to first acquire the native sample rate of the selected
    201   // output device and then use the same rate when creating this object.
    202   WAVEFORMATPCMEX output_format_;
    203 
    204   // Contains the native audio input format which is set up at construction
    205   // if varispeed mode is utilized.
    206   WAVEFORMATPCMEX input_format_;
    207 
    208   // True when successfully opened.
    209   bool opened_;
    210 
    211   // Volume level from 0 to 1 used for output scaling.
    212   double volume_;
    213 
    214   // Size in audio frames of each audio packet where an audio packet
    215   // is defined as the block of data which the destination is expected to
    216   // receive in each OnMoreIOData() callback.
    217   size_t output_buffer_size_frames_;
    218 
    219   // Size in audio frames of each audio packet where an audio packet
    220   // is defined as the block of data which the source is expected to
    221   // deliver in each OnMoreIOData() callback.
    222   size_t input_buffer_size_frames_;
    223 
    224   // Length of the audio endpoint buffer.
    225   uint32 endpoint_render_buffer_size_frames_;
    226   uint32 endpoint_capture_buffer_size_frames_;
    227 
    228   // Counts the number of audio frames written to the endpoint buffer.
    229   uint64 num_written_frames_;
    230 
    231   // Time stamp for last delay measurement.
    232   base::TimeTicks last_delay_sample_time_;
    233 
    234   // Contains the total (sum of render and capture) delay in milliseconds.
    235   double total_delay_ms_;
    236 
    237   // Contains the total (sum of render and capture and possibly FIFO) delay
    238   // in bytes. The update frequency is set by a constant called
    239   // |kTimeDiffInMillisecondsBetweenDelayMeasurements|.
    240   int total_delay_bytes_;
    241 
    242   // Pointer to the client that will deliver audio samples to be played out.
    243   AudioSourceCallback* source_;
    244 
    245   // IMMDevice interfaces which represents audio endpoint devices.
    246   base::win::ScopedComPtr<IMMDevice> endpoint_render_device_;
    247   base::win::ScopedComPtr<IMMDevice> endpoint_capture_device_;
    248 
    249   // IAudioClient interfaces which enables a client to create and initialize
    250   // an audio stream between an audio application and the audio engine.
    251   base::win::ScopedComPtr<IAudioClient> audio_output_client_;
    252   base::win::ScopedComPtr<IAudioClient> audio_input_client_;
    253 
    254   // IAudioRenderClient interfaces enables a client to write output
    255   // data to a rendering endpoint buffer.
    256   base::win::ScopedComPtr<IAudioRenderClient> audio_render_client_;
    257 
    258   // IAudioCaptureClient interfaces enables a client to read input
    259   // data from a capturing endpoint buffer.
    260   base::win::ScopedComPtr<IAudioCaptureClient> audio_capture_client_;
    261 
    262   // The audio engine will signal this event each time a buffer has been
    263   // recorded.
    264   base::win::ScopedHandle capture_event_;
    265 
    266   // The audio engine will signal this event each time it needs a new
    267   // audio buffer to play out.
    268   // Only utilized in varispeed mode.
    269   base::win::ScopedHandle render_event_;
    270 
    271   // This event will be signaled when streaming shall stop.
    272   base::win::ScopedHandle stop_streaming_event_;
    273 
    274   // Container for retrieving data from AudioSourceCallback::OnMoreIOData().
    275   scoped_ptr<AudioBus> output_bus_;
    276 
    277   // Container for sending data to AudioSourceCallback::OnMoreIOData().
    278   scoped_ptr<AudioBus> input_bus_;
    279 
    280   // Container for storing output from the channel mixer.
    281   scoped_ptr<AudioBus> channel_bus_;
    282 
    283   // All members below are only allocated, or used, in varispeed mode:
    284 
    285   // Temporary storage of resampled input audio data.
    286   scoped_ptr<AudioBus> resampled_bus_;
    287 
    288   // Set to true first time a capture event has been received in varispeed
    289   // mode.
    290   bool input_callback_received_;
    291 
    292   // MultiChannelResampler is a multi channel wrapper for SincResampler;
    293   // allowing high quality sample rate conversion of multiple channels at once.
    294   scoped_ptr<MultiChannelResampler> resampler_;
    295 
    296   // Resampler I/O ratio.
    297   double io_sample_rate_ratio_;
    298 
    299   // Used for input to output buffering.
    300   scoped_ptr<AudioFifo> fifo_;
    301 
    302   // The channel mixer is only created and utilized if number of input channels
    303   // is larger than the native number of input channels (e.g client wants
    304   // stereo but the audio device only supports mono).
    305   scoped_ptr<ChannelMixer> channel_mixer_;
    306 
    307   // The optimal number of frames we'd like to keep in the FIFO at all times.
    308   int target_fifo_frames_;
    309 
    310   // A running average of the measured delta between actual number of frames
    311   // in the FIFO versus |target_fifo_frames_|.
    312   double average_delta_;
    313 
    314   // A varispeed rate scalar which is calculated based on FIFO drift.
    315   double fifo_rate_compensation_;
    316 
    317   // Set to true when input side signals output side that a new delay
    318   // estimate is needed.
    319   bool update_output_delay_;
    320 
    321   // Capture side stores its delay estimate so the sum can be derived in
    322   // the render side.
    323   double capture_delay_ms_;
    324 
    325   // TODO(henrika): possibly remove these members once the performance is
    326   // properly tuned. Only used for off-line debugging.
    327 #ifndef NDEBUG
    328   enum LogElementNames {
    329     INPUT_TIME_STAMP,
    330     NUM_FRAMES_IN_FIFO,
    331     RESAMPLER_MARGIN,
    332     RATE_COMPENSATION
    333   };
    334 
    335   scoped_ptr<int64[]> input_time_stamps_;
    336   scoped_ptr<int[]> num_frames_in_fifo_;
    337   scoped_ptr<int[]> resampler_margin_;
    338   scoped_ptr<double[]> fifo_rate_comps_;
    339   scoped_ptr<int[]> num_elements_;
    340   scoped_ptr<int[]> input_params_;
    341   scoped_ptr<int[]> output_params_;
    342 
    343   FILE* data_file_;
    344   FILE* param_file_;
    345 #endif
    346 
    347   DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream);
    348 };
    349 
    350 }  // namespace media
    351 
    352 #endif  // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
    353