1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ 6 #define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ 7 8 #include <Audioclient.h> 9 #include <MMDeviceAPI.h> 10 11 #include <string> 12 13 #include "base/compiler_specific.h" 14 #include "base/gtest_prod_util.h" 15 #include "base/threading/platform_thread.h" 16 #include "base/threading/simple_thread.h" 17 #include "base/win/scoped_co_mem.h" 18 #include "base/win/scoped_comptr.h" 19 #include "base/win/scoped_handle.h" 20 #include "media/audio/audio_io.h" 21 #include "media/audio/audio_parameters.h" 22 #include "media/base/audio_fifo.h" 23 #include "media/base/channel_mixer.h" 24 #include "media/base/media_export.h" 25 #include "media/base/multi_channel_resampler.h" 26 27 namespace media { 28 29 class AudioManagerWin; 30 31 // Implementation of AudioOutputStream for Windows using the Core Audio API 32 // where both capturing and rendering takes place on the same thread to enable 33 // audio I/O. This class allows arbitrary combinations of input and output 34 // devices running off different clocks and using different drivers, with 35 // potentially differing sample-rates. 36 // 37 // It is required to first acquire the native sample rate of the selected 38 // output device and then use the same rate when creating this object. 39 // The inner operation depends on the input sample rate which is determined 40 // during construction. Three different main modes are supported: 41 // 42 // 1) input rate == output rate => input side drives output side directly. 43 // 2) input rate != output rate => both sides are driven independently by 44 // events and a FIFO plus a resampling unit is used to compensate for 45 // differences in sample rates between the two sides. 46 // 3) input rate == output rate but native buffer sizes are not identical => 47 // same inner functionality as in (2) to compensate for the differences 48 // in buffer sizes and also compensate for any potential clock drift 49 // between the two devices. 50 // 51 // Mode detection is is done at construction and using mode (1) will lead to 52 // best performance (lower delay and no "varispeed distortion"), i.e., it is 53 // recommended to use same sample rates for input and output. Mode (2) uses a 54 // resampler which supports rate adjustments to fine tune for things like 55 // clock drift and differences in sample rates between different devices. 56 // Mode (2) - which uses a FIFO and a adjustable multi-channel resampler - 57 // is also called the varispeed mode and it is used for case (3) as well to 58 // compensate for the difference in buffer sizes mainly. 59 // Mode (3) can happen if two different audio devices are used. 60 // As an example: some devices needs a buffer size of 441 @ 44.1kHz and others 61 // 448 @ 44.1kHz. This is a rare case and will only happen for sample rates 62 // which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.). 63 // 64 // Implementation notes: 65 // 66 // - Open() can fail if the input and output parameters do not fulfill 67 // certain conditions. See source for Open() for more details. 68 // - Channel mixing will be performed if the clients asks for a larger 69 // number of channels than the native audio layer provides. 70 // Example: client wants stereo but audio layer provides mono. In this case 71 // upmixing from mono to stereo (1->2) will be done. 72 // 73 // TODO(henrika): 74 // 75 // - Add support for exclusive mode. 76 // - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float 77 // as internal sample-value representation. 78 // - Perform fine-tuning for non-matching sample rates to reduce latency. 79 // 80 class MEDIA_EXPORT WASAPIUnifiedStream 81 : public AudioOutputStream, 82 public base::DelegateSimpleThread::Delegate { 83 public: 84 // The ctor takes all the usual parameters, plus |manager| which is the 85 // the audio manager who is creating this object. 86 WASAPIUnifiedStream(AudioManagerWin* manager, 87 const AudioParameters& params, 88 const std::string& input_device_id); 89 90 // The dtor is typically called by the AudioManager only and it is usually 91 // triggered by calling AudioOutputStream::Close(). 92 virtual ~WASAPIUnifiedStream(); 93 94 // Implementation of AudioOutputStream. 95 virtual bool Open() OVERRIDE; 96 virtual void Start(AudioSourceCallback* callback) OVERRIDE; 97 virtual void Stop() OVERRIDE; 98 virtual void Close() OVERRIDE; 99 virtual void SetVolume(double volume) OVERRIDE; 100 virtual void GetVolume(double* volume) OVERRIDE; 101 102 bool started() const { 103 return audio_io_thread_.get() != NULL; 104 } 105 106 // Returns true if input sample rate differs from the output sample rate. 107 // A FIFO and a adjustable multi-channel resampler are utilized in this mode. 108 bool VarispeedMode() const { return (fifo_ && resampler_); } 109 110 private: 111 enum { 112 // Time in milliseconds between two successive delay measurements. 113 // We save resources by not updating the delay estimates for each capture 114 // event (typically 100Hz rate). 115 kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000, 116 117 // Max possible FIFO size. 118 kFifoSize = 16384, 119 120 // This value was determined empirically for minimum latency while still 121 // guarding against FIFO under-runs. The actual target size will be equal 122 // to kTargetFifoSafetyFactor * (native input buffer size). 123 // TODO(henrika): tune this value for lowest possible latency for all 124 // possible sample rate combinations. 125 kTargetFifoSafetyFactor = 2 126 }; 127 128 // Additional initialization required when input and output sample rate 129 // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|, 130 // and the |capture_bus_| and configures the |input_format_| structure 131 // given the provided input and output audio parameters. 132 void DoVarispeedInitialization(const AudioParameters& input_params, 133 const AudioParameters& output_params); 134 135 // Clears varispeed related components such as the FIFO and the resampler. 136 void ResetVarispeed(); 137 138 // Builds WAVEFORMATEX structures for input and output based on input and 139 // output audio parameters. 140 void SetIOFormats(const AudioParameters& input_params, 141 const AudioParameters& output_params); 142 143 // DelegateSimpleThread::Delegate implementation. 144 virtual void Run() OVERRIDE; 145 146 // MultiChannelResampler::MultiChannelAudioSourceProvider implementation. 147 // Callback for providing more data into the resampler. 148 // Only used in varispeed mode, i.e., when input rate != output rate. 149 virtual void ProvideInput(int frame_delay, AudioBus* audio_bus); 150 151 // Issues the OnError() callback to the |source_|. 152 void HandleError(HRESULT err); 153 154 // Stops and joins the audio thread in case of an error. 155 void StopAndJoinThread(HRESULT err); 156 157 // Converts unique endpoint ID to user-friendly device name. 158 std::string GetDeviceName(LPCWSTR device_id) const; 159 160 // Called on the audio IO thread for each capture event. 161 // Buffers captured audio into a FIFO if varispeed is used or into an audio 162 // bus if input and output sample rates are identical. 163 void ProcessInputAudio(); 164 165 // Called on the audio IO thread for each render event when varispeed is 166 // active or for each capture event when varispeed is not used. 167 // In varispeed mode, it triggers a resampling callback, which reads from the 168 // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled 169 // input signal and at the same time asks for data to play out. 170 // If input and output rates are the same - instead of reading from the FIFO 171 // and do resampling - we read directly from the audio bus used to store 172 // captured data in ProcessInputAudio. 173 void ProcessOutputAudio(IAudioClock* audio_output_clock); 174 175 // Contains the thread ID of the creating thread. 176 base::PlatformThreadId creating_thread_id_; 177 178 // Our creator, the audio manager needs to be notified when we close. 179 AudioManagerWin* manager_; 180 181 // Contains the audio parameter structure provided at construction. 182 AudioParameters params_; 183 // For convenience, same as in params_. 184 int input_channels_; 185 int output_channels_; 186 187 // Unique ID of the input device to be opened. 188 const std::string input_device_id_; 189 190 // The sharing mode for the streams. 191 // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE 192 // where AUDCLNT_SHAREMODE_SHARED is the default. 193 AUDCLNT_SHAREMODE share_mode_; 194 195 // Rendering and capturing is driven by this thread (no message loop). 196 // All OnMoreIOData() callbacks will be called from this thread. 197 scoped_ptr<base::DelegateSimpleThread> audio_io_thread_; 198 199 // Contains the desired audio output format which is set up at construction. 200 // It is required to first acquire the native sample rate of the selected 201 // output device and then use the same rate when creating this object. 202 WAVEFORMATPCMEX output_format_; 203 204 // Contains the native audio input format which is set up at construction 205 // if varispeed mode is utilized. 206 WAVEFORMATPCMEX input_format_; 207 208 // True when successfully opened. 209 bool opened_; 210 211 // Volume level from 0 to 1 used for output scaling. 212 double volume_; 213 214 // Size in audio frames of each audio packet where an audio packet 215 // is defined as the block of data which the destination is expected to 216 // receive in each OnMoreIOData() callback. 217 size_t output_buffer_size_frames_; 218 219 // Size in audio frames of each audio packet where an audio packet 220 // is defined as the block of data which the source is expected to 221 // deliver in each OnMoreIOData() callback. 222 size_t input_buffer_size_frames_; 223 224 // Length of the audio endpoint buffer. 225 uint32 endpoint_render_buffer_size_frames_; 226 uint32 endpoint_capture_buffer_size_frames_; 227 228 // Counts the number of audio frames written to the endpoint buffer. 229 uint64 num_written_frames_; 230 231 // Time stamp for last delay measurement. 232 base::TimeTicks last_delay_sample_time_; 233 234 // Contains the total (sum of render and capture) delay in milliseconds. 235 double total_delay_ms_; 236 237 // Contains the total (sum of render and capture and possibly FIFO) delay 238 // in bytes. The update frequency is set by a constant called 239 // |kTimeDiffInMillisecondsBetweenDelayMeasurements|. 240 int total_delay_bytes_; 241 242 // Pointer to the client that will deliver audio samples to be played out. 243 AudioSourceCallback* source_; 244 245 // IMMDevice interfaces which represents audio endpoint devices. 246 base::win::ScopedComPtr<IMMDevice> endpoint_render_device_; 247 base::win::ScopedComPtr<IMMDevice> endpoint_capture_device_; 248 249 // IAudioClient interfaces which enables a client to create and initialize 250 // an audio stream between an audio application and the audio engine. 251 base::win::ScopedComPtr<IAudioClient> audio_output_client_; 252 base::win::ScopedComPtr<IAudioClient> audio_input_client_; 253 254 // IAudioRenderClient interfaces enables a client to write output 255 // data to a rendering endpoint buffer. 256 base::win::ScopedComPtr<IAudioRenderClient> audio_render_client_; 257 258 // IAudioCaptureClient interfaces enables a client to read input 259 // data from a capturing endpoint buffer. 260 base::win::ScopedComPtr<IAudioCaptureClient> audio_capture_client_; 261 262 // The audio engine will signal this event each time a buffer has been 263 // recorded. 264 base::win::ScopedHandle capture_event_; 265 266 // The audio engine will signal this event each time it needs a new 267 // audio buffer to play out. 268 // Only utilized in varispeed mode. 269 base::win::ScopedHandle render_event_; 270 271 // This event will be signaled when streaming shall stop. 272 base::win::ScopedHandle stop_streaming_event_; 273 274 // Container for retrieving data from AudioSourceCallback::OnMoreIOData(). 275 scoped_ptr<AudioBus> output_bus_; 276 277 // Container for sending data to AudioSourceCallback::OnMoreIOData(). 278 scoped_ptr<AudioBus> input_bus_; 279 280 // Container for storing output from the channel mixer. 281 scoped_ptr<AudioBus> channel_bus_; 282 283 // All members below are only allocated, or used, in varispeed mode: 284 285 // Temporary storage of resampled input audio data. 286 scoped_ptr<AudioBus> resampled_bus_; 287 288 // Set to true first time a capture event has been received in varispeed 289 // mode. 290 bool input_callback_received_; 291 292 // MultiChannelResampler is a multi channel wrapper for SincResampler; 293 // allowing high quality sample rate conversion of multiple channels at once. 294 scoped_ptr<MultiChannelResampler> resampler_; 295 296 // Resampler I/O ratio. 297 double io_sample_rate_ratio_; 298 299 // Used for input to output buffering. 300 scoped_ptr<AudioFifo> fifo_; 301 302 // The channel mixer is only created and utilized if number of input channels 303 // is larger than the native number of input channels (e.g client wants 304 // stereo but the audio device only supports mono). 305 scoped_ptr<ChannelMixer> channel_mixer_; 306 307 // The optimal number of frames we'd like to keep in the FIFO at all times. 308 int target_fifo_frames_; 309 310 // A running average of the measured delta between actual number of frames 311 // in the FIFO versus |target_fifo_frames_|. 312 double average_delta_; 313 314 // A varispeed rate scalar which is calculated based on FIFO drift. 315 double fifo_rate_compensation_; 316 317 // Set to true when input side signals output side that a new delay 318 // estimate is needed. 319 bool update_output_delay_; 320 321 // Capture side stores its delay estimate so the sum can be derived in 322 // the render side. 323 double capture_delay_ms_; 324 325 // TODO(henrika): possibly remove these members once the performance is 326 // properly tuned. Only used for off-line debugging. 327 #ifndef NDEBUG 328 enum LogElementNames { 329 INPUT_TIME_STAMP, 330 NUM_FRAMES_IN_FIFO, 331 RESAMPLER_MARGIN, 332 RATE_COMPENSATION 333 }; 334 335 scoped_ptr<int64[]> input_time_stamps_; 336 scoped_ptr<int[]> num_frames_in_fifo_; 337 scoped_ptr<int[]> resampler_margin_; 338 scoped_ptr<double[]> fifo_rate_comps_; 339 scoped_ptr<int[]> num_elements_; 340 scoped_ptr<int[]> input_params_; 341 scoped_ptr<int[]> output_params_; 342 343 FILE* data_file_; 344 FILE* param_file_; 345 #endif 346 347 DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream); 348 }; 349 350 } // namespace media 351 352 #endif // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ 353