1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_ 6 #define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_ 7 8 #include <string> 9 #include <vector> 10 11 #include "base/basictypes.h" 12 #include "base/compiler_specific.h" 13 #include "base/logging.h" 14 #include "base/memory/ref_counted.h" 15 #include "base/memory/scoped_ptr.h" 16 #include "base/threading/thread_checker.h" 17 #include "content/common/content_export.h" 18 #include "content/renderer/media/webrtc_audio_capturer.h" 19 #include "content/renderer/media/webrtc_audio_device_not_impl.h" 20 #include "content/renderer/media/webrtc_audio_renderer.h" 21 #include "media/base/audio_capturer_source.h" 22 #include "media/base/audio_renderer_sink.h" 23 24 // A WebRtcAudioDeviceImpl instance implements the abstract interface 25 // webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc:: 26 // VoiceEngine) to register this class as an external AudioDeviceModule (ADM). 27 // Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the 28 // session id that tells which device to use. The user can then call 29 // WebRtcAudioDeviceImpl::StartPlayout() and 30 // WebRtcAudioDeviceImpl::StartRecording() from the render process to initiate 31 // and start audio rendering and capturing in the browser process. IPC is 32 // utilized to set up the media streams. 33 // 34 // Usage example: 35 // 36 // using namespace webrtc; 37 // 38 // { 39 // scoped_refptr<WebRtcAudioDeviceImpl> external_adm; 40 // external_adm = new WebRtcAudioDeviceImpl(); 41 // external_adm->SetSessionId(session_id); 42 // VoiceEngine* voe = VoiceEngine::Create(); 43 // VoEBase* base = VoEBase::GetInterface(voe); 44 // base->Init(external_adm); 45 // int ch = base->CreateChannel(); 46 // ... 47 // base->StartReceive(ch) 48 // base->StartPlayout(ch); 49 // base->StartSending(ch); 50 // ... 51 // <== full-duplex audio session with AGC enabled ==> 52 // ... 53 // base->DeleteChannel(ch); 54 // base->Terminate(); 55 // base->Release(); 56 // VoiceEngine::Delete(voe); 57 // } 58 // 59 // webrtc::VoiceEngine::Init() calls these ADM methods (in this order): 60 // 61 // RegisterAudioCallback(this) 62 // webrtc::VoiceEngine is an webrtc::AudioTransport implementation and 63 // implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks. 64 // 65 // Init() 66 // Creates and initializes the AudioOutputDevice and AudioInputDevice 67 // objects. 68 // 69 // SetAGC(true) 70 // Enables the adaptive analog mode of the AGC which ensures that a 71 // suitable microphone volume level will be set. This scheme will affect 72 // the actual microphone control slider. 73 // 74 // AGC overview: 75 // 76 // It aims to maintain a constant speech loudness level from the microphone. 77 // This is done by both controlling the analog microphone gain and applying 78 // digital gain. The microphone gain on the sound card is slowly 79 // increased/decreased during speech only. By observing the microphone control 80 // slider you can see it move when you speak. If you scream, the slider moves 81 // downwards and then upwards again when you return to normal. It is not 82 // uncommon that the slider hits the maximum. This means that the maximum 83 // analog gain is not large enough to give the desired loudness. Nevertheless, 84 // we can in general still attain the desired loudness. If the microphone 85 // control slider is moved manually, the gain adaptation restarts and returns 86 // to roughly the same position as before the change if the circumstances are 87 // still the same. When the input microphone signal causes saturation, the 88 // level is decreased dramatically and has to re-adapt towards the old level. 89 // The adaptation is a slowly varying process and at the beginning of capture 90 // this is noticed by a slow increase in volume. Smaller changes in microphone 91 // input level is leveled out by the built-in digital control. For larger 92 // differences we need to rely on the slow adaptation. 93 // See http://en.wikipedia.org/wiki/Automatic_gain_control for more details. 94 // 95 // AGC implementation details: 96 // 97 // The adaptive analog mode of the AGC is always enabled for desktop platforms 98 // in WebRTC. 99 // 100 // Before recording starts, the ADM enables AGC on the AudioInputDevice. 101 // 102 // A capture session with AGC is started up as follows (simplified): 103 // 104 // [renderer] 105 // | 106 // ADM::StartRecording() 107 // AudioInputDevice::InitializeOnIOThread() 108 // AudioInputHostMsg_CreateStream(..., agc=true) [IPC] 109 // | 110 // [IPC to the browser] 111 // | 112 // AudioInputRendererHost::OnCreateStream() 113 // AudioInputController::CreateLowLatency() 114 // AudioInputController::DoSetAutomaticGainControl(true) 115 // AudioInputStream::SetAutomaticGainControl(true) 116 // | 117 // AGC is now enabled in the media layer and streaming starts (details omitted). 118 // The figure below illustrates the AGC scheme which is active in combination 119 // with the default media flow explained earlier. 120 // | 121 // [browser] 122 // | 123 // AudioInputStream::(Capture thread loop) 124 // AgcAudioStream<AudioInputStream>::GetAgcVolume() => get latest mic volume 125 // AudioInputData::OnData(..., volume) 126 // AudioInputController::OnData(..., volume) 127 // AudioInputSyncWriter::Write(..., volume) 128 // | 129 // [volume | size | data] is sent to the renderer [shared memory] 130 // | 131 // [renderer] 132 // | 133 // AudioInputDevice::AudioThreadCallback::Process() 134 // WebRtcAudioDeviceImpl::Capture(..., volume) 135 // AudioTransport::RecordedDataIsAvailable(...,volume, new_volume) 136 // | 137 // The AGC now uses the current volume input and computes a suitable new 138 // level given by the |new_level| output. This value is only non-zero if the 139 // AGC has take a decision that the microphone level should change. 140 // | 141 // if (new_volume != 0) 142 // AudioInputDevice::SetVolume(new_volume) 143 // AudioInputHostMsg_SetVolume(new_volume) [IPC] 144 // | 145 // [IPC to the browser] 146 // | 147 // AudioInputRendererHost::OnSetVolume() 148 // AudioInputController::SetVolume() 149 // AudioInputStream::SetVolume(scaled_volume) 150 // | 151 // Here we set the new microphone level in the media layer and at the same time 152 // read the new setting (we might not get exactly what is set). 153 // | 154 // AudioInputData::OnData(..., updated_volume) 155 // AudioInputController::OnData(..., updated_volume) 156 // | 157 // | 158 // This process repeats until we stop capturing data. Note that, a common 159 // steady state is that the volume control reaches its max and the new_volume 160 // value from the AGC is zero. A loud voice input is required to break this 161 // state and start lowering the level again. 162 // 163 // Implementation notes: 164 // 165 // - This class must be created and destroyed on the main render thread and 166 // most methods are called on the same thread. However, some methods are 167 // also called on a Libjingle worker thread. RenderData is called on the 168 // AudioOutputDevice thread and CaptureData on the AudioInputDevice thread. 169 // To summarize: this class lives on four different threads. 170 // - The webrtc::AudioDeviceModule is reference counted. 171 // - AGC is only supported in combination with the WASAPI-based audio layer 172 // on Windows, i.e., it is not supported on Windows XP. 173 // - All volume levels required for the AGC scheme are transfered in a 174 // normalized range [0.0, 1.0]. Scaling takes place in both endpoints 175 // (WebRTC client a media layer). This approach ensures that we can avoid 176 // transferring maximum levels between the renderer and the browser. 177 // 178 179 namespace content { 180 181 class WebRtcAudioCapturer; 182 class WebRtcAudioRenderer; 183 184 // TODO(xians): Move the following two interfaces to webrtc so that 185 // libjingle can own references to the renderer and capturer. 186 class WebRtcAudioRendererSource { 187 public: 188 // Callback to get the rendered interleaved data. 189 // TODO(xians): Change uint8* to int16*. 190 virtual void RenderData(uint8* audio_data, 191 int number_of_channels, 192 int number_of_frames, 193 int audio_delay_milliseconds) = 0; 194 195 // Set the format for the capture audio parameters. 196 virtual void SetRenderFormat(const media::AudioParameters& params) = 0; 197 198 // Callback to notify the client that the renderer is going away. 199 virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) = 0; 200 201 protected: 202 virtual ~WebRtcAudioRendererSource() {} 203 }; 204 205 class PeerConnectionAudioSink { 206 public: 207 // Callback to deliver the captured interleaved data. 208 // |channels| contains a vector of WebRtc VoE channels. 209 // |audio_data| is the pointer to the audio data. 210 // |sample_rate| is the sample frequency of audio data. 211 // |number_of_channels| is the number of channels reflecting the order of 212 // surround sound channels. 213 // |audio_delay_milliseconds| is recording delay value. 214 // |current_volume| is current microphone volume, in range of |0, 255]. 215 // |need_audio_processing| indicates if the audio needs WebRtc AEC/NS/AGC 216 // audio processing. 217 // The return value is the new microphone volume, in the range of |0, 255]. 218 // When the volume does not need to be updated, it returns 0. 219 virtual int OnData(const int16* audio_data, 220 int sample_rate, 221 int number_of_channels, 222 int number_of_frames, 223 const std::vector<int>& channels, 224 int audio_delay_milliseconds, 225 int current_volume, 226 bool need_audio_processing, 227 bool key_pressed) = 0; 228 229 // Set the format for the capture audio parameters. 230 // This is called when the capture format has changed, and it must be called 231 // on the same thread as calling CaptureData(). 232 virtual void OnSetFormat(const media::AudioParameters& params) = 0; 233 234 protected: 235 virtual ~PeerConnectionAudioSink() {} 236 }; 237 238 // Note that this class inherits from webrtc::AudioDeviceModule but due to 239 // the high number of non-implemented methods, we move the cruft over to the 240 // WebRtcAudioDeviceNotImpl. 241 class CONTENT_EXPORT WebRtcAudioDeviceImpl 242 : NON_EXPORTED_BASE(public PeerConnectionAudioSink), 243 NON_EXPORTED_BASE(public WebRtcAudioDeviceNotImpl), 244 NON_EXPORTED_BASE(public WebRtcAudioRendererSource) { 245 public: 246 // The maximum volume value WebRtc uses. 247 static const int kMaxVolumeLevel = 255; 248 249 // Instances of this object are created on the main render thread. 250 WebRtcAudioDeviceImpl(); 251 252 // webrtc::RefCountedModule implementation. 253 // The creator must call AddRef() after construction and use Release() 254 // to release the reference and delete this object. 255 // Called on the main render thread. 256 virtual int32_t AddRef() OVERRIDE; 257 virtual int32_t Release() OVERRIDE; 258 259 // webrtc::AudioDeviceModule implementation. 260 // All implemented methods are called on the main render thread unless 261 // anything else is stated. 262 263 virtual int32_t RegisterAudioCallback(webrtc::AudioTransport* audio_callback) 264 OVERRIDE; 265 266 virtual int32_t Init() OVERRIDE; 267 virtual int32_t Terminate() OVERRIDE; 268 virtual bool Initialized() const OVERRIDE; 269 270 virtual int32_t PlayoutIsAvailable(bool* available) OVERRIDE; 271 virtual bool PlayoutIsInitialized() const OVERRIDE; 272 virtual int32_t RecordingIsAvailable(bool* available) OVERRIDE; 273 virtual bool RecordingIsInitialized() const OVERRIDE; 274 275 // All Start/Stop methods are called on a libJingle worker thread. 276 virtual int32_t StartPlayout() OVERRIDE; 277 virtual int32_t StopPlayout() OVERRIDE; 278 virtual bool Playing() const OVERRIDE; 279 virtual int32_t StartRecording() OVERRIDE; 280 virtual int32_t StopRecording() OVERRIDE; 281 virtual bool Recording() const OVERRIDE; 282 283 // Called on the AudioInputDevice worker thread. 284 virtual int32_t SetMicrophoneVolume(uint32_t volume) OVERRIDE; 285 286 // TODO(henrika): sort out calling thread once we start using this API. 287 virtual int32_t MicrophoneVolume(uint32_t* volume) const OVERRIDE; 288 289 virtual int32_t MaxMicrophoneVolume(uint32_t* max_volume) const OVERRIDE; 290 virtual int32_t MinMicrophoneVolume(uint32_t* min_volume) const OVERRIDE; 291 virtual int32_t StereoPlayoutIsAvailable(bool* available) const OVERRIDE; 292 virtual int32_t StereoRecordingIsAvailable(bool* available) const OVERRIDE; 293 virtual int32_t PlayoutDelay(uint16_t* delay_ms) const OVERRIDE; 294 virtual int32_t RecordingDelay(uint16_t* delay_ms) const OVERRIDE; 295 virtual int32_t RecordingSampleRate(uint32_t* samples_per_sec) const OVERRIDE; 296 virtual int32_t PlayoutSampleRate(uint32_t* samples_per_sec) const OVERRIDE; 297 298 // Sets the |renderer_|, returns false if |renderer_| already exists. 299 // Called on the main renderer thread. 300 bool SetAudioRenderer(WebRtcAudioRenderer* renderer); 301 302 // Adds the capturer to the ADM. 303 void AddAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer); 304 305 // Gets the default capturer, which is the capturer in the list with 306 // a valid |device_id|. Microphones are represented by capturers with a valid 307 // |device_id|, since only one microphone is supported today, only one 308 // capturer in the |capturers_| can have a valid |device_id|. 309 scoped_refptr<WebRtcAudioCapturer> GetDefaultCapturer() const; 310 311 const scoped_refptr<WebRtcAudioRenderer>& renderer() const { 312 return renderer_; 313 } 314 int output_buffer_size() const { 315 return output_audio_parameters_.frames_per_buffer(); 316 } 317 int output_channels() const { 318 return output_audio_parameters_.channels(); 319 } 320 int output_sample_rate() const { 321 return output_audio_parameters_.sample_rate(); 322 } 323 324 private: 325 typedef std::list<scoped_refptr<WebRtcAudioCapturer> > CapturerList; 326 327 // Make destructor private to ensure that we can only be deleted by Release(). 328 virtual ~WebRtcAudioDeviceImpl(); 329 330 // PeerConnectionAudioSink implementation. 331 332 // Called on the AudioInputDevice worker thread. 333 virtual int OnData(const int16* audio_data, 334 int sample_rate, 335 int number_of_channels, 336 int number_of_frames, 337 const std::vector<int>& channels, 338 int audio_delay_milliseconds, 339 int current_volume, 340 bool need_audio_processing, 341 bool key_pressed) OVERRIDE; 342 343 // Called on the AudioInputDevice worker thread. 344 virtual void OnSetFormat(const media::AudioParameters& params) OVERRIDE; 345 346 // WebRtcAudioRendererSource implementation. 347 348 // Called on the AudioInputDevice worker thread. 349 virtual void RenderData(uint8* audio_data, 350 int number_of_channels, 351 int number_of_frames, 352 int audio_delay_milliseconds) OVERRIDE; 353 354 // Called on the main render thread. 355 virtual void SetRenderFormat(const media::AudioParameters& params) OVERRIDE; 356 virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) OVERRIDE; 357 358 // Used to DCHECK that we are called on the correct thread. 359 base::ThreadChecker thread_checker_; 360 361 int ref_count_; 362 363 // List of captures which provides access to the native audio input layer 364 // in the browser process. 365 CapturerList capturers_; 366 367 // Provides access to the audio renderer in the browser process. 368 scoped_refptr<WebRtcAudioRenderer> renderer_; 369 370 // Weak reference to the audio callback. 371 // The webrtc client defines |audio_transport_callback_| by calling 372 // RegisterAudioCallback(). 373 webrtc::AudioTransport* audio_transport_callback_; 374 375 // Cached values of used output audio parameters. Platform dependent. 376 media::AudioParameters output_audio_parameters_; 377 378 // Cached value of the current audio delay on the input/capture side. 379 int input_delay_ms_; 380 381 // Cached value of the current audio delay on the output/renderer side. 382 int output_delay_ms_; 383 384 // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|, |renderer_| 385 // |recording_| and |microphone_volume_|. 386 mutable base::Lock lock_; 387 388 bool initialized_; 389 bool playing_; 390 bool recording_; 391 392 // Used for histograms of total recording and playout times. 393 base::Time start_capture_time_; 394 base::Time start_render_time_; 395 396 // Stores latest microphone volume received in a CaptureData() callback. 397 // Range is [0, 255]. 398 uint32_t microphone_volume_; 399 400 DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl); 401 }; 402 403 } // namespace content 404 405 #endif // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_ 406