1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_ 6 #define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_ 7 8 #include <string> 9 #include <vector> 10 11 #include "base/basictypes.h" 12 #include "base/compiler_specific.h" 13 #include "base/files/file.h" 14 #include "base/logging.h" 15 #include "base/memory/ref_counted.h" 16 #include "base/memory/scoped_ptr.h" 17 #include "base/threading/thread_checker.h" 18 #include "content/common/content_export.h" 19 #include "content/renderer/media/webrtc_audio_capturer.h" 20 #include "content/renderer/media/webrtc_audio_device_not_impl.h" 21 #include "ipc/ipc_platform_file.h" 22 #include "media/base/audio_capturer_source.h" 23 #include "media/base/audio_renderer_sink.h" 24 25 // A WebRtcAudioDeviceImpl instance implements the abstract interface 26 // webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc:: 27 // VoiceEngine) to register this class as an external AudioDeviceModule (ADM). 28 // Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the 29 // session id that tells which device to use. The user can then call 30 // WebRtcAudioDeviceImpl::StartPlayout() and 31 // WebRtcAudioDeviceImpl::StartRecording() from the render process to initiate 32 // and start audio rendering and capturing in the browser process. IPC is 33 // utilized to set up the media streams. 34 // 35 // Usage example: 36 // 37 // using namespace webrtc; 38 // 39 // { 40 // scoped_refptr<WebRtcAudioDeviceImpl> external_adm; 41 // external_adm = new WebRtcAudioDeviceImpl(); 42 // external_adm->SetSessionId(session_id); 43 // VoiceEngine* voe = VoiceEngine::Create(); 44 // VoEBase* base = VoEBase::GetInterface(voe); 45 // base->Init(external_adm); 46 // int ch = base->CreateChannel(); 47 // ... 48 // base->StartReceive(ch) 49 // base->StartPlayout(ch); 50 // base->StartSending(ch); 51 // ... 52 // <== full-duplex audio session with AGC enabled ==> 53 // ... 54 // base->DeleteChannel(ch); 55 // base->Terminate(); 56 // base->Release(); 57 // VoiceEngine::Delete(voe); 58 // } 59 // 60 // webrtc::VoiceEngine::Init() calls these ADM methods (in this order): 61 // 62 // RegisterAudioCallback(this) 63 // webrtc::VoiceEngine is an webrtc::AudioTransport implementation and 64 // implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks. 65 // 66 // Init() 67 // Creates and initializes the AudioOutputDevice and AudioInputDevice 68 // objects. 69 // 70 // SetAGC(true) 71 // Enables the adaptive analog mode of the AGC which ensures that a 72 // suitable microphone volume level will be set. This scheme will affect 73 // the actual microphone control slider. 74 // 75 // AGC overview: 76 // 77 // It aims to maintain a constant speech loudness level from the microphone. 78 // This is done by both controlling the analog microphone gain and applying 79 // digital gain. The microphone gain on the sound card is slowly 80 // increased/decreased during speech only. By observing the microphone control 81 // slider you can see it move when you speak. If you scream, the slider moves 82 // downwards and then upwards again when you return to normal. It is not 83 // uncommon that the slider hits the maximum. This means that the maximum 84 // analog gain is not large enough to give the desired loudness. Nevertheless, 85 // we can in general still attain the desired loudness. If the microphone 86 // control slider is moved manually, the gain adaptation restarts and returns 87 // to roughly the same position as before the change if the circumstances are 88 // still the same. When the input microphone signal causes saturation, the 89 // level is decreased dramatically and has to re-adapt towards the old level. 90 // The adaptation is a slowly varying process and at the beginning of capture 91 // this is noticed by a slow increase in volume. Smaller changes in microphone 92 // input level is leveled out by the built-in digital control. For larger 93 // differences we need to rely on the slow adaptation. 94 // See http://en.wikipedia.org/wiki/Automatic_gain_control for more details. 95 // 96 // AGC implementation details: 97 // 98 // The adaptive analog mode of the AGC is always enabled for desktop platforms 99 // in WebRTC. 100 // 101 // Before recording starts, the ADM enables AGC on the AudioInputDevice. 102 // 103 // A capture session with AGC is started up as follows (simplified): 104 // 105 // [renderer] 106 // | 107 // ADM::StartRecording() 108 // AudioInputDevice::InitializeOnIOThread() 109 // AudioInputHostMsg_CreateStream(..., agc=true) [IPC] 110 // | 111 // [IPC to the browser] 112 // | 113 // AudioInputRendererHost::OnCreateStream() 114 // AudioInputController::CreateLowLatency() 115 // AudioInputController::DoSetAutomaticGainControl(true) 116 // AudioInputStream::SetAutomaticGainControl(true) 117 // | 118 // AGC is now enabled in the media layer and streaming starts (details omitted). 119 // The figure below illustrates the AGC scheme which is active in combination 120 // with the default media flow explained earlier. 121 // | 122 // [browser] 123 // | 124 // AudioInputStream::(Capture thread loop) 125 // AgcAudioStream<AudioInputStream>::GetAgcVolume() => get latest mic volume 126 // AudioInputData::OnData(..., volume) 127 // AudioInputController::OnData(..., volume) 128 // AudioInputSyncWriter::Write(..., volume) 129 // | 130 // [volume | size | data] is sent to the renderer [shared memory] 131 // | 132 // [renderer] 133 // | 134 // AudioInputDevice::AudioThreadCallback::Process() 135 // WebRtcAudioDeviceImpl::Capture(..., volume) 136 // AudioTransport::RecordedDataIsAvailable(...,volume, new_volume) 137 // | 138 // The AGC now uses the current volume input and computes a suitable new 139 // level given by the |new_level| output. This value is only non-zero if the 140 // AGC has take a decision that the microphone level should change. 141 // | 142 // if (new_volume != 0) 143 // AudioInputDevice::SetVolume(new_volume) 144 // AudioInputHostMsg_SetVolume(new_volume) [IPC] 145 // | 146 // [IPC to the browser] 147 // | 148 // AudioInputRendererHost::OnSetVolume() 149 // AudioInputController::SetVolume() 150 // AudioInputStream::SetVolume(scaled_volume) 151 // | 152 // Here we set the new microphone level in the media layer and at the same time 153 // read the new setting (we might not get exactly what is set). 154 // | 155 // AudioInputData::OnData(..., updated_volume) 156 // AudioInputController::OnData(..., updated_volume) 157 // | 158 // | 159 // This process repeats until we stop capturing data. Note that, a common 160 // steady state is that the volume control reaches its max and the new_volume 161 // value from the AGC is zero. A loud voice input is required to break this 162 // state and start lowering the level again. 163 // 164 // Implementation notes: 165 // 166 // - This class must be created and destroyed on the main render thread and 167 // most methods are called on the same thread. However, some methods are 168 // also called on a Libjingle worker thread. RenderData is called on the 169 // AudioOutputDevice thread and CaptureData on the AudioInputDevice thread. 170 // To summarize: this class lives on four different threads. 171 // - The webrtc::AudioDeviceModule is reference counted. 172 // - AGC is only supported in combination with the WASAPI-based audio layer 173 // on Windows, i.e., it is not supported on Windows XP. 174 // - All volume levels required for the AGC scheme are transfered in a 175 // normalized range [0.0, 1.0]. Scaling takes place in both endpoints 176 // (WebRTC client a media layer). This approach ensures that we can avoid 177 // transferring maximum levels between the renderer and the browser. 178 // 179 180 namespace content { 181 182 class WebRtcAudioCapturer; 183 class WebRtcAudioRenderer; 184 185 // TODO(xians): Move the following two interfaces to webrtc so that 186 // libjingle can own references to the renderer and capturer. 187 class WebRtcAudioRendererSource { 188 public: 189 // Callback to get the rendered data. 190 virtual void RenderData(media::AudioBus* audio_bus, 191 int sample_rate, 192 int audio_delay_milliseconds, 193 base::TimeDelta* current_time) = 0; 194 195 // Callback to notify the client that the renderer is going away. 196 virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) = 0; 197 198 protected: 199 virtual ~WebRtcAudioRendererSource() {} 200 }; 201 202 class PeerConnectionAudioSink { 203 public: 204 // Callback to deliver the captured interleaved data. 205 // |channels| contains a vector of WebRtc VoE channels. 206 // |audio_data| is the pointer to the audio data. 207 // |sample_rate| is the sample frequency of audio data. 208 // |number_of_channels| is the number of channels reflecting the order of 209 // surround sound channels. 210 // |audio_delay_milliseconds| is recording delay value. 211 // |current_volume| is current microphone volume, in range of |0, 255]. 212 // |need_audio_processing| indicates if the audio needs WebRtc AEC/NS/AGC 213 // audio processing. 214 // The return value is the new microphone volume, in the range of |0, 255]. 215 // When the volume does not need to be updated, it returns 0. 216 virtual int OnData(const int16* audio_data, 217 int sample_rate, 218 int number_of_channels, 219 int number_of_frames, 220 const std::vector<int>& channels, 221 int audio_delay_milliseconds, 222 int current_volume, 223 bool need_audio_processing, 224 bool key_pressed) = 0; 225 226 // Set the format for the capture audio parameters. 227 // This is called when the capture format has changed, and it must be called 228 // on the same thread as calling CaptureData(). 229 virtual void OnSetFormat(const media::AudioParameters& params) = 0; 230 231 protected: 232 virtual ~PeerConnectionAudioSink() {} 233 }; 234 235 // TODO(xians): Merge this interface with WebRtcAudioRendererSource. 236 // The reason why we could not do it today is that WebRtcAudioRendererSource 237 // gets the data by pulling, while the data is pushed into 238 // WebRtcPlayoutDataSource::Sink. 239 class WebRtcPlayoutDataSource { 240 public: 241 class Sink { 242 public: 243 // Callback to get the playout data. 244 // Called on the render audio thread. 245 virtual void OnPlayoutData(media::AudioBus* audio_bus, 246 int sample_rate, 247 int audio_delay_milliseconds) = 0; 248 249 // Callback to notify the sink that the source has changed. 250 // Called on the main render thread. 251 virtual void OnPlayoutDataSourceChanged() = 0; 252 253 protected: 254 virtual ~Sink() {} 255 }; 256 257 // Adds/Removes the sink of WebRtcAudioRendererSource to the ADM. 258 // These methods are used by the MediaStreamAudioProcesssor to get the 259 // rendered data for AEC. 260 virtual void AddPlayoutSink(Sink* sink) = 0; 261 virtual void RemovePlayoutSink(Sink* sink) = 0; 262 263 protected: 264 virtual ~WebRtcPlayoutDataSource() {} 265 }; 266 267 // Note that this class inherits from webrtc::AudioDeviceModule but due to 268 // the high number of non-implemented methods, we move the cruft over to the 269 // WebRtcAudioDeviceNotImpl. 270 class CONTENT_EXPORT WebRtcAudioDeviceImpl 271 : NON_EXPORTED_BASE(public PeerConnectionAudioSink), 272 NON_EXPORTED_BASE(public WebRtcAudioDeviceNotImpl), 273 NON_EXPORTED_BASE(public WebRtcAudioRendererSource), 274 NON_EXPORTED_BASE(public WebRtcPlayoutDataSource) { 275 public: 276 // The maximum volume value WebRtc uses. 277 static const int kMaxVolumeLevel = 255; 278 279 // Instances of this object are created on the main render thread. 280 WebRtcAudioDeviceImpl(); 281 282 // webrtc::RefCountedModule implementation. 283 // The creator must call AddRef() after construction and use Release() 284 // to release the reference and delete this object. 285 // Called on the main render thread. 286 virtual int32_t AddRef() OVERRIDE; 287 virtual int32_t Release() OVERRIDE; 288 289 // webrtc::AudioDeviceModule implementation. 290 // All implemented methods are called on the main render thread unless 291 // anything else is stated. 292 293 virtual int32_t RegisterAudioCallback(webrtc::AudioTransport* audio_callback) 294 OVERRIDE; 295 296 virtual int32_t Init() OVERRIDE; 297 virtual int32_t Terminate() OVERRIDE; 298 virtual bool Initialized() const OVERRIDE; 299 300 virtual int32_t PlayoutIsAvailable(bool* available) OVERRIDE; 301 virtual bool PlayoutIsInitialized() const OVERRIDE; 302 virtual int32_t RecordingIsAvailable(bool* available) OVERRIDE; 303 virtual bool RecordingIsInitialized() const OVERRIDE; 304 305 // All Start/Stop methods are called on a libJingle worker thread. 306 virtual int32_t StartPlayout() OVERRIDE; 307 virtual int32_t StopPlayout() OVERRIDE; 308 virtual bool Playing() const OVERRIDE; 309 virtual int32_t StartRecording() OVERRIDE; 310 virtual int32_t StopRecording() OVERRIDE; 311 virtual bool Recording() const OVERRIDE; 312 313 // Called on the AudioInputDevice worker thread. 314 virtual int32_t SetMicrophoneVolume(uint32_t volume) OVERRIDE; 315 316 // TODO(henrika): sort out calling thread once we start using this API. 317 virtual int32_t MicrophoneVolume(uint32_t* volume) const OVERRIDE; 318 319 virtual int32_t MaxMicrophoneVolume(uint32_t* max_volume) const OVERRIDE; 320 virtual int32_t MinMicrophoneVolume(uint32_t* min_volume) const OVERRIDE; 321 virtual int32_t StereoPlayoutIsAvailable(bool* available) const OVERRIDE; 322 virtual int32_t StereoRecordingIsAvailable(bool* available) const OVERRIDE; 323 virtual int32_t PlayoutDelay(uint16_t* delay_ms) const OVERRIDE; 324 virtual int32_t RecordingDelay(uint16_t* delay_ms) const OVERRIDE; 325 virtual int32_t RecordingSampleRate(uint32_t* sample_rate) const OVERRIDE; 326 virtual int32_t PlayoutSampleRate(uint32_t* sample_rate) const OVERRIDE; 327 328 // Sets the |renderer_|, returns false if |renderer_| already exists. 329 // Called on the main renderer thread. 330 bool SetAudioRenderer(WebRtcAudioRenderer* renderer); 331 332 // Adds/Removes the capturer to the ADM. 333 // TODO(xians): Remove these two methods once the ADM does not need to pass 334 // hardware information up to WebRtc. 335 void AddAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer); 336 void RemoveAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer); 337 338 // Gets the default capturer, which is the last capturer in |capturers_|. 339 // The method can be called by both Libjingle thread and main render thread. 340 scoped_refptr<WebRtcAudioCapturer> GetDefaultCapturer() const; 341 342 // Gets paired device information of the capture device for the audio 343 // renderer. This is used to pass on a session id, sample rate and buffer 344 // size to a webrtc audio renderer (either local or remote), so that audio 345 // will be rendered to a matching output device. 346 // Returns true if the capture device has a paired output device, otherwise 347 // false. Note that if there are more than one open capture device the 348 // function will not be able to pick an appropriate device and return false. 349 bool GetAuthorizedDeviceInfoForAudioRenderer( 350 int* session_id, int* output_sample_rate, int* output_buffer_size); 351 352 const scoped_refptr<WebRtcAudioRenderer>& renderer() const { 353 return renderer_; 354 } 355 356 private: 357 typedef std::list<scoped_refptr<WebRtcAudioCapturer> > CapturerList; 358 typedef std::list<WebRtcPlayoutDataSource::Sink*> PlayoutDataSinkList; 359 class RenderBuffer; 360 361 // Make destructor private to ensure that we can only be deleted by Release(). 362 virtual ~WebRtcAudioDeviceImpl(); 363 364 // PeerConnectionAudioSink implementation. 365 366 // Called on the AudioInputDevice worker thread. 367 virtual int OnData(const int16* audio_data, 368 int sample_rate, 369 int number_of_channels, 370 int number_of_frames, 371 const std::vector<int>& channels, 372 int audio_delay_milliseconds, 373 int current_volume, 374 bool need_audio_processing, 375 bool key_pressed) OVERRIDE; 376 377 // Called on the AudioInputDevice worker thread. 378 virtual void OnSetFormat(const media::AudioParameters& params) OVERRIDE; 379 380 // WebRtcAudioRendererSource implementation. 381 382 // Called on the AudioOutputDevice worker thread. 383 virtual void RenderData(media::AudioBus* audio_bus, 384 int sample_rate, 385 int audio_delay_milliseconds, 386 base::TimeDelta* current_time) OVERRIDE; 387 388 // Called on the main render thread. 389 virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) OVERRIDE; 390 391 // WebRtcPlayoutDataSource implementation. 392 virtual void AddPlayoutSink(WebRtcPlayoutDataSource::Sink* sink) OVERRIDE; 393 virtual void RemovePlayoutSink(WebRtcPlayoutDataSource::Sink* sink) OVERRIDE; 394 395 // Used to DCHECK that we are called on the correct thread. 396 base::ThreadChecker thread_checker_; 397 398 int ref_count_; 399 400 // List of captures which provides access to the native audio input layer 401 // in the browser process. 402 CapturerList capturers_; 403 404 // Provides access to the audio renderer in the browser process. 405 scoped_refptr<WebRtcAudioRenderer> renderer_; 406 407 // A list of raw pointer of WebRtcPlayoutDataSource::Sink objects which want 408 // to get the playout data, the sink need to call RemovePlayoutSink() 409 // before it goes away. 410 PlayoutDataSinkList playout_sinks_; 411 412 // Weak reference to the audio callback. 413 // The webrtc client defines |audio_transport_callback_| by calling 414 // RegisterAudioCallback(). 415 webrtc::AudioTransport* audio_transport_callback_; 416 417 // Cached value of the current audio delay on the input/capture side. 418 int input_delay_ms_; 419 420 // Cached value of the current audio delay on the output/renderer side. 421 int output_delay_ms_; 422 423 // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|, |renderer_| 424 // |recording_| and |microphone_volume_|. 425 mutable base::Lock lock_; 426 427 // Used to protect the racing of calling OnData() since there can be more 428 // than one input stream calling OnData(). 429 mutable base::Lock capture_callback_lock_; 430 431 bool initialized_; 432 bool playing_; 433 bool recording_; 434 435 // Stores latest microphone volume received in a CaptureData() callback. 436 // Range is [0, 255]. 437 uint32_t microphone_volume_; 438 439 // Buffer used for temporary storage during render callback. 440 // It is only accessed by the audio render thread. 441 std::vector<int16> render_buffer_; 442 443 // Flag to tell if audio processing is enabled in MediaStreamAudioProcessor. 444 const bool is_audio_track_processing_enabled_; 445 446 DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl); 447 }; 448 449 } // namespace content 450 451 #endif // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_ 452