Home | History | Annotate | Download | only in media
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      5 #include "content/renderer/media/webrtc_audio_renderer.h"
      7 #include "base/logging.h"
      8 #include "base/metrics/histogram.h"
      9 #include "base/strings/string_util.h"
     10 #include "base/strings/stringprintf.h"
     11 #include "content/renderer/media/audio_device_factory.h"
     12 #include "content/renderer/media/webrtc_audio_device_impl.h"
     13 #include "content/renderer/media/webrtc_logging.h"
     14 #include "media/audio/audio_output_device.h"
     15 #include "media/audio/audio_parameters.h"
     16 #include "media/audio/sample_rates.h"
     18 #if defined(OS_WIN)
     19 #include "base/win/windows_version.h"
     20 #include "media/audio/win/core_audio_util_win.h"
     21 #endif
     23 namespace content {
     25 namespace {
     27 // Supported hardware sample rates for output sides.
     28 #if defined(OS_WIN) || defined(OS_MACOSX)
     29 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its
     30 // current sample rate (set by the user) on Windows and Mac OS X.  The listed
     31 // rates below adds restrictions and Initialize() will fail if the user selects
     32 // any rate outside these ranges.
     33 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};
     34 #elif defined(OS_LINUX) || defined(OS_OPENBSD)
     35 const int kValidOutputRates[] = {48000, 44100};
     36 #elif defined(OS_ANDROID)
     37 // TODO(leozwang): We want to use native sampling rate on Android to achieve
     38 // low latency, currently 16000 is used to work around audio problem on some
     39 // Android devices.
     40 const int kValidOutputRates[] = {48000, 44100, 16000};
     41 #else
     42 const int kValidOutputRates[] = {44100};
     43 #endif
     45 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.
     46 enum AudioFramesPerBuffer {
     47   k160,
     48   k320,
     49   k440,
     50   k480,
     51   k640,
     52   k880,
     53   k960,
     54   k1440,
     55   k1920,
     56   kUnexpectedAudioBufferSize  // Must always be last!
     57 };
     59 // Helper method to convert integral values to their respective enum values
     60 // above, or kUnexpectedAudioBufferSize if no match exists.
     61 // We map 441 to k440 to avoid changes in the XML part for histograms.
     62 // It is still possible to map the histogram result to the actual buffer size.
     63 // See http://crbug.com/243450 for details.
     64 AudioFramesPerBuffer AsAudioFramesPerBuffer(int frames_per_buffer) {
     65   switch (frames_per_buffer) {
     66     case 160: return k160;
     67     case 320: return k320;
     68     case 441: return k440;
     69     case 480: return k480;
     70     case 640: return k640;
     71     case 880: return k880;
     72     case 960: return k960;
     73     case 1440: return k1440;
     74     case 1920: return k1920;
     75   }
     76   return kUnexpectedAudioBufferSize;
     77 }
     79 void AddHistogramFramesPerBuffer(int param) {
     80   AudioFramesPerBuffer afpb = AsAudioFramesPerBuffer(param);
     81   if (afpb != kUnexpectedAudioBufferSize) {
     82     UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
     83                               afpb, kUnexpectedAudioBufferSize);
     84   } else {
     85     // Report unexpected sample rates using a unique histogram name.
     86     UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputFramesPerBufferUnexpected", param);
     87   }
     88 }
     90 // This is a simple wrapper class that's handed out to users of a shared
     91 // WebRtcAudioRenderer instance.  This class maintains the per-user 'playing'
     92 // and 'started' states to avoid problems related to incorrect usage which
     93 // might violate the implementation assumptions inside WebRtcAudioRenderer
     94 // (see the play reference count).
     95 class SharedAudioRenderer : public MediaStreamAudioRenderer {
     96  public:
     97   SharedAudioRenderer(const scoped_refptr<MediaStreamAudioRenderer>& delegate)
     98       : delegate_(delegate), started_(false), playing_(false) {
     99   }
    101  protected:
    102   virtual ~SharedAudioRenderer() {
    103     DCHECK(thread_checker_.CalledOnValidThread());
    104     DVLOG(1) << __FUNCTION__;
    105     Stop();
    106   }
    108   virtual void Start() OVERRIDE {
    109     DCHECK(thread_checker_.CalledOnValidThread());
    110     if (started_)
    111       return;
    112     started_ = true;
    113     delegate_->Start();
    114   }
    116   virtual void Play() OVERRIDE {
    117     DCHECK(thread_checker_.CalledOnValidThread());
    118     DCHECK(started_);
    119     if (playing_)
    120       return;
    121     playing_ = true;
    122     delegate_->Play();
    123   }
    125   virtual void Pause() OVERRIDE {
    126     DCHECK(thread_checker_.CalledOnValidThread());
    127     DCHECK(started_);
    128     if (!playing_)
    129       return;
    130     playing_ = false;
    131     delegate_->Pause();
    132   }
    134   virtual void Stop() OVERRIDE {
    135     DCHECK(thread_checker_.CalledOnValidThread());
    136     if (!started_)
    137       return;
    138     Pause();
    139     started_ = false;
    140     delegate_->Stop();
    141   }
    143   virtual void SetVolume(float volume) OVERRIDE {
    144     DCHECK(thread_checker_.CalledOnValidThread());
    145     return delegate_->SetVolume(volume);
    146   }
    148   virtual base::TimeDelta GetCurrentRenderTime() const OVERRIDE {
    149     DCHECK(thread_checker_.CalledOnValidThread());
    150     return delegate_->GetCurrentRenderTime();
    151   }
    153   virtual bool IsLocalRenderer() const OVERRIDE {
    154     DCHECK(thread_checker_.CalledOnValidThread());
    155     return delegate_->IsLocalRenderer();
    156   }
    158  private:
    159   base::ThreadChecker thread_checker_;
    160   scoped_refptr<MediaStreamAudioRenderer> delegate_;
    161   bool started_;
    162   bool playing_;
    163 };
    165 }  // namespace
    167 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id,
    168                                          int session_id,
    169                                          int sample_rate,
    170                                          int frames_per_buffer)
    171     : state_(UNINITIALIZED),
    172       source_render_view_id_(source_render_view_id),
    173       session_id_(session_id),
    174       source_(NULL),
    175       play_ref_count_(0),
    176       start_ref_count_(0),
    177       audio_delay_milliseconds_(0),
    178       fifo_delay_milliseconds_(0),
    179       sample_rate_(sample_rate),
    180       frames_per_buffer_(frames_per_buffer) {
    181   WebRtcLogMessage(base::StringPrintf(
    182       "WAR::WAR. source_render_view_id=%d"
    183       ", session_id=%d, sample_rate=%d, frames_per_buffer=%d",
    184       source_render_view_id,
    185       session_id,
    186       sample_rate,
    187       frames_per_buffer));
    188 }
    190 WebRtcAudioRenderer::~WebRtcAudioRenderer() {
    191   DCHECK(thread_checker_.CalledOnValidThread());
    192   DCHECK_EQ(state_, UNINITIALIZED);
    193   buffer_.reset();
    194 }
    196 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {
    197   DVLOG(1) << "WebRtcAudioRenderer::Initialize()";
    198   DCHECK(thread_checker_.CalledOnValidThread());
    199   base::AutoLock auto_lock(lock_);
    200   DCHECK_EQ(state_, UNINITIALIZED);
    201   DCHECK(source);
    202   DCHECK(!sink_.get());
    203   DCHECK(!source_);
    205   // Use stereo output on all platforms.
    206   media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;
    208   // TODO(tommi,henrika): Maybe we should just change |sample_rate_| to be
    209   // immutable and change its value instead of using a temporary?
    210   int sample_rate = sample_rate_;
    211   DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;
    213   // WebRTC does not yet support higher rates than 96000 on the client side
    214   // and 48000 is the preferred sample rate. Therefore, if 192000 is detected,
    215   // we change the rate to 48000 instead. The consequence is that the native
    216   // layer will be opened up at 192kHz but WebRTC will provide data at 48kHz
    217   // which will then be resampled by the audio converted on the browser side
    218   // to match the native audio layer.
    219   if (sample_rate == 192000) {
    220     DVLOG(1) << "Resampling from 48000 to 192000 is required";
    221     sample_rate = 48000;
    222   }
    223   media::AudioSampleRate asr = media::AsAudioSampleRate(sample_rate);
    224   if (asr != media::kUnexpectedAudioSampleRate) {
    226         "WebRTC.AudioOutputSampleRate", asr, media::kUnexpectedAudioSampleRate);
    227   } else {
    228     UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected", sample_rate);
    229   }
    231   // Verify that the reported output hardware sample rate is supported
    232   // on the current platform.
    233   if (std::find(&kValidOutputRates[0],
    234                 &kValidOutputRates[0] + arraysize(kValidOutputRates),
    235                 sample_rate) ==
    236                     &kValidOutputRates[arraysize(kValidOutputRates)]) {
    237     DLOG(ERROR) << sample_rate << " is not a supported output rate.";
    238     return false;
    239   }
    241   // Set up audio parameters for the source, i.e., the WebRTC client.
    243   // The WebRTC client only supports multiples of 10ms as buffer size where
    244   // 10ms is preferred for lowest possible delay.
    245   media::AudioParameters source_params;
    246   int buffer_size = (sample_rate / 100);
    247   DVLOG(1) << "Using WebRTC output buffer size: " << buffer_size;
    249   int channels = ChannelLayoutToChannelCount(channel_layout);
    250   source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
    251                       channel_layout, channels, 0,
    252                       sample_rate, 16, buffer_size);
    254   // Set up audio parameters for the sink, i.e., the native audio output stream.
    255   // We strive to open up using native parameters to achieve best possible
    256   // performance and to ensure that no FIFO is needed on the browser side to
    257   // match the client request. Any mismatch between the source and the sink is
    258   // taken care of in this class instead using a pull FIFO.
    260   media::AudioParameters sink_params;
    262   // Use native output siz as default.
    263   buffer_size = frames_per_buffer_;
    264 #if defined(OS_ANDROID)
    265   // TODO(henrika): Keep tuning this scheme and espcicially for low-latency
    266   // cases. Might not be possible to come up with the perfect solution using
    267   // the render side only.
    268   const int frames_per_10ms = (sample_rate / 100);
    269   if (buffer_size < 2 * frames_per_10ms) {
    270     // Examples of low-latency frame sizes and the resulting |buffer_size|:
    271     //  Nexus 7     : 240 audio frames => 2*480 = 960
    272     //  Nexus 10    : 256              => 2*441 = 882
    273     //  Galaxy Nexus: 144              => 2*441 = 882
    274     buffer_size = 2 * frames_per_10ms;
    275     DVLOG(1) << "Low-latency output detected on Android";
    276   }
    277 #endif
    278   DVLOG(1) << "Using sink output buffer size: " << buffer_size;
    280   sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
    281                     channel_layout, channels, 0, sample_rate, 16, buffer_size);
    283   // Create a FIFO if re-buffering is required to match the source input with
    284   // the sink request. The source acts as provider here and the sink as
    285   // consumer.
    286   fifo_delay_milliseconds_ = 0;
    287   if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {
    288     DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
    289              << " to " << sink_params.frames_per_buffer();
    290     audio_fifo_.reset(new media::AudioPullFifo(
    291         source_params.channels(),
    292         source_params.frames_per_buffer(),
    293         base::Bind(
    294             &WebRtcAudioRenderer::SourceCallback,
    295             base::Unretained(this))));
    297     if (sink_params.frames_per_buffer() > source_params.frames_per_buffer()) {
    298       int frame_duration_milliseconds = base::Time::kMillisecondsPerSecond /
    299           static_cast<double>(source_params.sample_rate());
    300       fifo_delay_milliseconds_ = (sink_params.frames_per_buffer() -
    301         source_params.frames_per_buffer()) * frame_duration_milliseconds;
    302     }
    303   }
    305   // Allocate local audio buffers based on the parameters above.
    306   // It is assumed that each audio sample contains 16 bits and each
    307   // audio frame contains one or two audio samples depending on the
    308   // number of channels.
    309   buffer_.reset(
    310       new int16[source_params.frames_per_buffer() * source_params.channels()]);
    312   source_ = source;
    313   source->SetRenderFormat(source_params);
    315   // Configure the audio rendering client and start rendering.
    316   sink_ = AudioDeviceFactory::NewOutputDevice(source_render_view_id_);
    318   // TODO(tommi): Rename InitializeUnifiedStream to rather reflect association
    319   // with a session.
    320   DCHECK_GE(session_id_, 0);
    321   sink_->InitializeUnifiedStream(sink_params, this, session_id_);
    323   sink_->Start();
    325   // User must call Play() before any audio can be heard.
    326   state_ = PAUSED;
    328   UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",
    329                             source_params.channel_layout(),
    330                             media::CHANNEL_LAYOUT_MAX);
    331   UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
    332                             source_params.frames_per_buffer(),
    333                             kUnexpectedAudioBufferSize);
    334   AddHistogramFramesPerBuffer(source_params.frames_per_buffer());
    336   return true;
    337 }
    339 scoped_refptr<MediaStreamAudioRenderer>
    340 WebRtcAudioRenderer::CreateSharedAudioRendererProxy() {
    341   return new SharedAudioRenderer(this);
    342 }
    344 bool WebRtcAudioRenderer::IsStarted() const {
    345   DCHECK(thread_checker_.CalledOnValidThread());
    346   return start_ref_count_ != 0;
    347 }
    349 void WebRtcAudioRenderer::Start() {
    350   DVLOG(1) << "WebRtcAudioRenderer::Start()";
    351   DCHECK(thread_checker_.CalledOnValidThread());
    352   ++start_ref_count_;
    353 }
    355 void WebRtcAudioRenderer::Play() {
    356   DVLOG(1) << "WebRtcAudioRenderer::Play()";
    357   DCHECK(thread_checker_.CalledOnValidThread());
    358   DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
    359   base::AutoLock auto_lock(lock_);
    360   if (state_ == UNINITIALIZED)
    361     return;
    363   DCHECK(play_ref_count_ == 0 || state_ == PLAYING);
    364   ++play_ref_count_;
    366   if (state_ != PLAYING) {
    367     state_ = PLAYING;
    369     if (audio_fifo_) {
    370       audio_delay_milliseconds_ = 0;
    371       audio_fifo_->Clear();
    372     }
    373   }
    374 }
    376 void WebRtcAudioRenderer::Pause() {
    377   DVLOG(1) << "WebRtcAudioRenderer::Pause()";
    378   DCHECK(thread_checker_.CalledOnValidThread());
    379   DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
    380   base::AutoLock auto_lock(lock_);
    381   if (state_ == UNINITIALIZED)
    382     return;
    384   DCHECK_EQ(state_, PLAYING);
    385   DCHECK_GT(play_ref_count_, 0);
    386   if (!--play_ref_count_)
    387     state_ = PAUSED;
    388 }
    390 void WebRtcAudioRenderer::Stop() {
    391   DVLOG(1) << "WebRtcAudioRenderer::Stop()";
    392   DCHECK(thread_checker_.CalledOnValidThread());
    393   {
    394     base::AutoLock auto_lock(lock_);
    395     if (state_ == UNINITIALIZED)
    396       return;
    398     if (--start_ref_count_)
    399       return;
    401     DVLOG(1) << "Calling RemoveAudioRenderer and Stop().";
    403     source_->RemoveAudioRenderer(this);
    404     source_ = NULL;
    405     state_ = UNINITIALIZED;
    406   }
    408   // Make sure to stop the sink while _not_ holding the lock since the Render()
    409   // callback may currently be executing and try to grab the lock while we're
    410   // stopping the thread on which it runs.
    411   sink_->Stop();
    412 }
    414 void WebRtcAudioRenderer::SetVolume(float volume) {
    415   DCHECK(thread_checker_.CalledOnValidThread());
    416   base::AutoLock auto_lock(lock_);
    417   if (state_ == UNINITIALIZED)
    418     return;
    420   sink_->SetVolume(volume);
    421 }
    423 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {
    424   return base::TimeDelta();
    425 }
    427 bool WebRtcAudioRenderer::IsLocalRenderer() const {
    428   return false;
    429 }
    431 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
    432                                 int audio_delay_milliseconds) {
    433   base::AutoLock auto_lock(lock_);
    434   if (!source_)
    435     return 0;
    437   DVLOG(2) << "WebRtcAudioRenderer::Render()";
    438   DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds;
    440   audio_delay_milliseconds_ = audio_delay_milliseconds;
    442   if (audio_fifo_)
    443     audio_fifo_->Consume(audio_bus, audio_bus->frames());
    444   else
    445     SourceCallback(0, audio_bus);
    447   return (state_ == PLAYING) ? audio_bus->frames() : 0;
    448 }
    450 void WebRtcAudioRenderer::OnRenderError() {
    452   LOG(ERROR) << "OnRenderError()";
    453 }
    455 // Called by AudioPullFifo when more data is necessary.
    456 void WebRtcAudioRenderer::SourceCallback(
    457     int fifo_frame_delay, media::AudioBus* audio_bus) {
    458   DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("
    459            << fifo_frame_delay << ", "
    460            << audio_bus->frames() << ")";
    462   int output_delay_milliseconds = audio_delay_milliseconds_;
    463   output_delay_milliseconds += fifo_delay_milliseconds_;
    464   DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds;
    466   // We need to keep render data for the |source_| regardless of |state_|,
    467   // otherwise the data will be buffered up inside |source_|.
    468   source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),
    469                       audio_bus->channels(), audio_bus->frames(),
    470                       output_delay_milliseconds);
    472   // Avoid filling up the audio bus if we are not playing; instead
    473   // return here and ensure that the returned value in Render() is 0.
    474   if (state_ != PLAYING) {
    475     audio_bus->Zero();
    476     return;
    477   }
    479   // De-interleave each channel and convert to 32-bit floating-point
    480   // with nominal range -1.0 -> +1.0 to match the callback format.
    481   audio_bus->FromInterleaved(buffer_.get(),
    482                              audio_bus->frames(),
    483                              sizeof(buffer_[0]));
    484 }
    486 }  // namespace content