Home | History | Annotate | Download | only in media
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
      6 #define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
      7 
      8 #include <string>
      9 #include <vector>
     10 
     11 #include "base/basictypes.h"
     12 #include "base/compiler_specific.h"
     13 #include "base/logging.h"
     14 #include "base/memory/ref_counted.h"
     15 #include "base/memory/scoped_ptr.h"
     16 #include "base/threading/thread_checker.h"
     17 #include "content/common/content_export.h"
     18 #include "content/renderer/media/webrtc_audio_capturer.h"
     19 #include "content/renderer/media/webrtc_audio_device_not_impl.h"
     20 #include "content/renderer/media/webrtc_audio_renderer.h"
     21 #include "media/base/audio_capturer_source.h"
     22 #include "media/base/audio_renderer_sink.h"
     23 
     24 // A WebRtcAudioDeviceImpl instance implements the abstract interface
     25 // webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc::
     26 // VoiceEngine) to register this class as an external AudioDeviceModule (ADM).
     27 // Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the
     28 // session id that tells which device to use. The user can then call
     29 // WebRtcAudioDeviceImpl::StartPlayout() and
     30 // WebRtcAudioDeviceImpl::StartRecording() from the render process to initiate
     31 // and start audio rendering and capturing in the browser process. IPC is
     32 // utilized to set up the media streams.
     33 //
     34 // Usage example:
     35 //
     36 //   using namespace webrtc;
     37 //
     38 //   {
     39 //      scoped_refptr<WebRtcAudioDeviceImpl> external_adm;
     40 //      external_adm = new WebRtcAudioDeviceImpl();
     41 //      external_adm->SetSessionId(session_id);
     42 //      VoiceEngine* voe = VoiceEngine::Create();
     43 //      VoEBase* base = VoEBase::GetInterface(voe);
     44 //      base->Init(external_adm);
     45 //      int ch = base->CreateChannel();
     46 //      ...
     47 //      base->StartReceive(ch)
     48 //      base->StartPlayout(ch);
     49 //      base->StartSending(ch);
     50 //      ...
     51 //      <== full-duplex audio session with AGC enabled ==>
     52 //      ...
     53 //      base->DeleteChannel(ch);
     54 //      base->Terminate();
     55 //      base->Release();
     56 //      VoiceEngine::Delete(voe);
     57 //   }
     58 //
     59 // webrtc::VoiceEngine::Init() calls these ADM methods (in this order):
     60 //
     61 //  RegisterAudioCallback(this)
     62 //    webrtc::VoiceEngine is an webrtc::AudioTransport implementation and
     63 //    implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks.
     64 //
     65 //  Init()
     66 //    Creates and initializes the AudioOutputDevice and AudioInputDevice
     67 //    objects.
     68 //
     69 //  SetAGC(true)
     70 //    Enables the adaptive analog mode of the AGC which ensures that a
     71 //    suitable microphone volume level will be set. This scheme will affect
     72 //    the actual microphone control slider.
     73 //
     74 // AGC overview:
     75 //
     76 // It aims to maintain a constant speech loudness level from the microphone.
     77 // This is done by both controlling the analog microphone gain and applying
     78 // digital gain. The microphone gain on the sound card is slowly
     79 // increased/decreased during speech only. By observing the microphone control
     80 // slider you can see it move when you speak. If you scream, the slider moves
     81 // downwards and then upwards again when you return to normal. It is not
     82 // uncommon that the slider hits the maximum. This means that the maximum
     83 // analog gain is not large enough to give the desired loudness. Nevertheless,
     84 // we can in general still attain the desired loudness. If the microphone
     85 // control slider is moved manually, the gain adaptation restarts and returns
     86 // to roughly the same position as before the change if the circumstances are
     87 // still the same. When the input microphone signal causes saturation, the
     88 // level is decreased dramatically and has to re-adapt towards the old level.
     89 // The adaptation is a slowly varying process and at the beginning of capture
     90 // this is noticed by a slow increase in volume. Smaller changes in microphone
     91 // input level is leveled out by the built-in digital control. For larger
     92 // differences we need to rely on the slow adaptation.
     93 // See http://en.wikipedia.org/wiki/Automatic_gain_control for more details.
     94 //
     95 // AGC implementation details:
     96 //
     97 // The adaptive analog mode of the AGC is always enabled for desktop platforms
     98 // in WebRTC.
     99 //
    100 // Before recording starts, the ADM enables AGC on the AudioInputDevice.
    101 //
    102 // A capture session with AGC is started up as follows (simplified):
    103 //
    104 //                            [renderer]
    105 //                                |
    106 //                     ADM::StartRecording()
    107 //             AudioInputDevice::InitializeOnIOThread()
    108 //           AudioInputHostMsg_CreateStream(..., agc=true)               [IPC]
    109 //                                |
    110 //                       [IPC to the browser]
    111 //                                |
    112 //              AudioInputRendererHost::OnCreateStream()
    113 //              AudioInputController::CreateLowLatency()
    114 //         AudioInputController::DoSetAutomaticGainControl(true)
    115 //            AudioInputStream::SetAutomaticGainControl(true)
    116 //                                |
    117 // AGC is now enabled in the media layer and streaming starts (details omitted).
    118 // The figure below illustrates the AGC scheme which is active in combination
    119 // with the default media flow explained earlier.
    120 //                                |
    121 //                            [browser]
    122 //                                |
    123 //                AudioInputStream::(Capture thread loop)
    124 //  AgcAudioStream<AudioInputStream>::GetAgcVolume() => get latest mic volume
    125 //                 AudioInputData::OnData(..., volume)
    126 //              AudioInputController::OnData(..., volume)
    127 //               AudioInputSyncWriter::Write(..., volume)
    128 //                                |
    129 //      [volume | size | data] is sent to the renderer         [shared memory]
    130 //                                |
    131 //                            [renderer]
    132 //                                |
    133 //          AudioInputDevice::AudioThreadCallback::Process()
    134 //            WebRtcAudioDeviceImpl::Capture(..., volume)
    135 //    AudioTransport::RecordedDataIsAvailable(...,volume, new_volume)
    136 //                                |
    137 // The AGC now uses the current volume input and computes a suitable new
    138 // level given by the |new_level| output. This value is only non-zero if the
    139 // AGC has take a decision that the microphone level should change.
    140 //                                |
    141 //                      if (new_volume != 0)
    142 //              AudioInputDevice::SetVolume(new_volume)
    143 //              AudioInputHostMsg_SetVolume(new_volume)                  [IPC]
    144 //                                |
    145 //                       [IPC to the browser]
    146 //                                |
    147 //                 AudioInputRendererHost::OnSetVolume()
    148 //                  AudioInputController::SetVolume()
    149 //             AudioInputStream::SetVolume(scaled_volume)
    150 //                                |
    151 // Here we set the new microphone level in the media layer and at the same time
    152 // read the new setting (we might not get exactly what is set).
    153 //                                |
    154 //             AudioInputData::OnData(..., updated_volume)
    155 //           AudioInputController::OnData(..., updated_volume)
    156 //                                |
    157 //                                |
    158 // This process repeats until we stop capturing data. Note that, a common
    159 // steady state is that the volume control reaches its max and the new_volume
    160 // value from the AGC is zero. A loud voice input is required to break this
    161 // state and start lowering the level again.
    162 //
    163 // Implementation notes:
    164 //
    165 //  - This class must be created and destroyed on the main render thread and
    166 //    most methods are called on the same thread. However, some methods are
    167 //    also called on a Libjingle worker thread. RenderData is called on the
    168 //    AudioOutputDevice thread and CaptureData on the AudioInputDevice thread.
    169 //    To summarize: this class lives on four different threads.
    170 //  - The webrtc::AudioDeviceModule is reference counted.
    171 //  - AGC is only supported in combination with the WASAPI-based audio layer
    172 //    on Windows, i.e., it is not supported on Windows XP.
    173 //  - All volume levels required for the AGC scheme are transfered in a
    174 //    normalized range [0.0, 1.0]. Scaling takes place in both endpoints
    175 //    (WebRTC client a media layer). This approach ensures that we can avoid
    176 //    transferring maximum levels between the renderer and the browser.
    177 //
    178 
    179 namespace content {
    180 
    181 class WebRtcAudioCapturer;
    182 class WebRtcAudioRenderer;
    183 
    184 // TODO(xians): Move the following two interfaces to webrtc so that
    185 // libjingle can own references to the renderer and capturer.
    186 class WebRtcAudioRendererSource {
    187  public:
    188   // Callback to get the rendered interleaved data.
    189   // TODO(xians): Change uint8* to int16*.
    190   virtual void RenderData(uint8* audio_data,
    191                           int number_of_channels,
    192                           int number_of_frames,
    193                           int audio_delay_milliseconds) = 0;
    194 
    195   // Set the format for the capture audio parameters.
    196   virtual void SetRenderFormat(const media::AudioParameters& params) = 0;
    197 
    198   // Callback to notify the client that the renderer is going away.
    199   virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) = 0;
    200 
    201  protected:
    202   virtual ~WebRtcAudioRendererSource() {}
    203 };
    204 
    205 class PeerConnectionAudioSink {
    206  public:
    207   // Callback to deliver the captured interleaved data.
    208   // |channels| contains a vector of WebRtc VoE channels.
    209   // |audio_data| is the pointer to the audio data.
    210   // |sample_rate| is the sample frequency of audio data.
    211   // |number_of_channels| is the number of channels reflecting the order of
    212   // surround sound channels.
    213   // |audio_delay_milliseconds| is recording delay value.
    214   // |current_volume| is current microphone volume, in range of |0, 255].
    215   // |need_audio_processing| indicates if the audio needs WebRtc AEC/NS/AGC
    216   // audio processing.
    217   // The return value is the new microphone volume, in the range of |0, 255].
    218   // When the volume does not need to be updated, it returns 0.
    219   virtual int OnData(const int16* audio_data,
    220                      int sample_rate,
    221                      int number_of_channels,
    222                      int number_of_frames,
    223                      const std::vector<int>& channels,
    224                      int audio_delay_milliseconds,
    225                      int current_volume,
    226                      bool need_audio_processing,
    227                      bool key_pressed) = 0;
    228 
    229   // Set the format for the capture audio parameters.
    230   // This is called when the capture format has changed, and it must be called
    231   // on the same thread as calling CaptureData().
    232   virtual void OnSetFormat(const media::AudioParameters& params) = 0;
    233 
    234  protected:
    235  virtual ~PeerConnectionAudioSink() {}
    236 };
    237 
    238 // Note that this class inherits from webrtc::AudioDeviceModule but due to
    239 // the high number of non-implemented methods, we move the cruft over to the
    240 // WebRtcAudioDeviceNotImpl.
    241 class CONTENT_EXPORT WebRtcAudioDeviceImpl
    242     : NON_EXPORTED_BASE(public PeerConnectionAudioSink),
    243       NON_EXPORTED_BASE(public WebRtcAudioDeviceNotImpl),
    244       NON_EXPORTED_BASE(public WebRtcAudioRendererSource) {
    245  public:
    246   // The maximum volume value WebRtc uses.
    247   static const int kMaxVolumeLevel = 255;
    248 
    249   // Instances of this object are created on the main render thread.
    250   WebRtcAudioDeviceImpl();
    251 
    252   // webrtc::RefCountedModule implementation.
    253   // The creator must call AddRef() after construction and use Release()
    254   // to release the reference and delete this object.
    255   // Called on the main render thread.
    256   virtual int32_t AddRef() OVERRIDE;
    257   virtual int32_t Release() OVERRIDE;
    258 
    259   // webrtc::AudioDeviceModule implementation.
    260   // All implemented methods are called on the main render thread unless
    261   // anything else is stated.
    262 
    263   virtual int32_t RegisterAudioCallback(webrtc::AudioTransport* audio_callback)
    264       OVERRIDE;
    265 
    266   virtual int32_t Init() OVERRIDE;
    267   virtual int32_t Terminate() OVERRIDE;
    268   virtual bool Initialized() const OVERRIDE;
    269 
    270   virtual int32_t PlayoutIsAvailable(bool* available) OVERRIDE;
    271   virtual bool PlayoutIsInitialized() const OVERRIDE;
    272   virtual int32_t RecordingIsAvailable(bool* available) OVERRIDE;
    273   virtual bool RecordingIsInitialized() const OVERRIDE;
    274 
    275   // All Start/Stop methods are called on a libJingle worker thread.
    276   virtual int32_t StartPlayout() OVERRIDE;
    277   virtual int32_t StopPlayout() OVERRIDE;
    278   virtual bool Playing() const OVERRIDE;
    279   virtual int32_t StartRecording() OVERRIDE;
    280   virtual int32_t StopRecording() OVERRIDE;
    281   virtual bool Recording() const OVERRIDE;
    282 
    283   // Called on the AudioInputDevice worker thread.
    284   virtual int32_t SetMicrophoneVolume(uint32_t volume) OVERRIDE;
    285 
    286   // TODO(henrika): sort out calling thread once we start using this API.
    287   virtual int32_t MicrophoneVolume(uint32_t* volume) const OVERRIDE;
    288 
    289   virtual int32_t MaxMicrophoneVolume(uint32_t* max_volume) const OVERRIDE;
    290   virtual int32_t MinMicrophoneVolume(uint32_t* min_volume) const OVERRIDE;
    291   virtual int32_t StereoPlayoutIsAvailable(bool* available) const OVERRIDE;
    292   virtual int32_t StereoRecordingIsAvailable(bool* available) const OVERRIDE;
    293   virtual int32_t PlayoutDelay(uint16_t* delay_ms) const OVERRIDE;
    294   virtual int32_t RecordingDelay(uint16_t* delay_ms) const OVERRIDE;
    295   virtual int32_t RecordingSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
    296   virtual int32_t PlayoutSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
    297 
    298   // Sets the |renderer_|, returns false if |renderer_| already exists.
    299   // Called on the main renderer thread.
    300   bool SetAudioRenderer(WebRtcAudioRenderer* renderer);
    301 
    302   // Adds the capturer to the ADM.
    303   void AddAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer);
    304 
    305   // Gets the default capturer, which is the capturer in the list with
    306   // a valid |device_id|. Microphones are represented by capturers with a valid
    307   // |device_id|, since only one microphone is supported today, only one
    308   // capturer in the |capturers_| can have a valid |device_id|.
    309   scoped_refptr<WebRtcAudioCapturer> GetDefaultCapturer() const;
    310 
    311   const scoped_refptr<WebRtcAudioRenderer>& renderer() const {
    312     return renderer_;
    313   }
    314   int output_buffer_size() const {
    315     return output_audio_parameters_.frames_per_buffer();
    316   }
    317   int output_channels() const {
    318     return output_audio_parameters_.channels();
    319   }
    320   int output_sample_rate() const {
    321     return output_audio_parameters_.sample_rate();
    322   }
    323 
    324  private:
    325   typedef std::list<scoped_refptr<WebRtcAudioCapturer> > CapturerList;
    326 
    327   // Make destructor private to ensure that we can only be deleted by Release().
    328   virtual ~WebRtcAudioDeviceImpl();
    329 
    330   // PeerConnectionAudioSink implementation.
    331 
    332   // Called on the AudioInputDevice worker thread.
    333   virtual int OnData(const int16* audio_data,
    334                      int sample_rate,
    335                      int number_of_channels,
    336                      int number_of_frames,
    337                      const std::vector<int>& channels,
    338                      int audio_delay_milliseconds,
    339                      int current_volume,
    340                      bool need_audio_processing,
    341                      bool key_pressed) OVERRIDE;
    342 
    343   // Called on the AudioInputDevice worker thread.
    344   virtual void OnSetFormat(const media::AudioParameters& params) OVERRIDE;
    345 
    346   // WebRtcAudioRendererSource implementation.
    347 
    348   // Called on the AudioInputDevice worker thread.
    349   virtual void RenderData(uint8* audio_data,
    350                           int number_of_channels,
    351                           int number_of_frames,
    352                           int audio_delay_milliseconds) OVERRIDE;
    353 
    354   // Called on the main render thread.
    355   virtual void SetRenderFormat(const media::AudioParameters& params) OVERRIDE;
    356   virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) OVERRIDE;
    357 
    358   // Used to DCHECK that we are called on the correct thread.
    359   base::ThreadChecker thread_checker_;
    360 
    361   int ref_count_;
    362 
    363   // List of captures which provides access to the native audio input layer
    364   // in the browser process.
    365   CapturerList capturers_;
    366 
    367   // Provides access to the audio renderer in the browser process.
    368   scoped_refptr<WebRtcAudioRenderer> renderer_;
    369 
    370   // Weak reference to the audio callback.
    371   // The webrtc client defines |audio_transport_callback_| by calling
    372   // RegisterAudioCallback().
    373   webrtc::AudioTransport* audio_transport_callback_;
    374 
    375   // Cached values of used output audio parameters. Platform dependent.
    376   media::AudioParameters output_audio_parameters_;
    377 
    378   // Cached value of the current audio delay on the input/capture side.
    379   int input_delay_ms_;
    380 
    381   // Cached value of the current audio delay on the output/renderer side.
    382   int output_delay_ms_;
    383 
    384   // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|, |renderer_|
    385   // |recording_| and |microphone_volume_|.
    386   mutable base::Lock lock_;
    387 
    388   bool initialized_;
    389   bool playing_;
    390   bool recording_;
    391 
    392   // Used for histograms of total recording and playout times.
    393   base::Time start_capture_time_;
    394   base::Time start_render_time_;
    395 
    396   // Stores latest microphone volume received in a CaptureData() callback.
    397   // Range is [0, 255].
    398   uint32_t microphone_volume_;
    399 
    400   DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl);
    401 };
    402 
    403 }  // namespace content
    404 
    405 #endif  // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
    406