1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/renderer/media/webrtc_audio_renderer.h" 6 7 #include "base/logging.h" 8 #include "base/metrics/histogram.h" 9 #include "base/strings/string_util.h" 10 #include "base/strings/stringprintf.h" 11 #include "content/renderer/media/audio_device_factory.h" 12 #include "content/renderer/media/webrtc_audio_device_impl.h" 13 #include "content/renderer/media/webrtc_logging.h" 14 #include "media/audio/audio_output_device.h" 15 #include "media/audio/audio_parameters.h" 16 #include "media/audio/sample_rates.h" 17 18 #if defined(OS_WIN) 19 #include "base/win/windows_version.h" 20 #include "media/audio/win/core_audio_util_win.h" 21 #endif 22 23 namespace content { 24 25 namespace { 26 27 // Supported hardware sample rates for output sides. 28 #if defined(OS_WIN) || defined(OS_MACOSX) 29 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its 30 // current sample rate (set by the user) on Windows and Mac OS X. The listed 31 // rates below adds restrictions and Initialize() will fail if the user selects 32 // any rate outside these ranges. 33 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000}; 34 #elif defined(OS_LINUX) || defined(OS_OPENBSD) 35 const int kValidOutputRates[] = {48000, 44100}; 36 #elif defined(OS_ANDROID) 37 // TODO(leozwang): We want to use native sampling rate on Android to achieve 38 // low latency, currently 16000 is used to work around audio problem on some 39 // Android devices. 40 const int kValidOutputRates[] = {48000, 44100, 16000}; 41 #else 42 const int kValidOutputRates[] = {44100}; 43 #endif 44 45 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove. 46 enum AudioFramesPerBuffer { 47 k160, 48 k320, 49 k440, 50 k480, 51 k640, 52 k880, 53 k960, 54 k1440, 55 k1920, 56 kUnexpectedAudioBufferSize // Must always be last! 57 }; 58 59 // Helper method to convert integral values to their respective enum values 60 // above, or kUnexpectedAudioBufferSize if no match exists. 61 // We map 441 to k440 to avoid changes in the XML part for histograms. 62 // It is still possible to map the histogram result to the actual buffer size. 63 // See http://crbug.com/243450 for details. 64 AudioFramesPerBuffer AsAudioFramesPerBuffer(int frames_per_buffer) { 65 switch (frames_per_buffer) { 66 case 160: return k160; 67 case 320: return k320; 68 case 441: return k440; 69 case 480: return k480; 70 case 640: return k640; 71 case 880: return k880; 72 case 960: return k960; 73 case 1440: return k1440; 74 case 1920: return k1920; 75 } 76 return kUnexpectedAudioBufferSize; 77 } 78 79 void AddHistogramFramesPerBuffer(int param) { 80 AudioFramesPerBuffer afpb = AsAudioFramesPerBuffer(param); 81 if (afpb != kUnexpectedAudioBufferSize) { 82 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer", 83 afpb, kUnexpectedAudioBufferSize); 84 } else { 85 // Report unexpected sample rates using a unique histogram name. 86 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputFramesPerBufferUnexpected", param); 87 } 88 } 89 90 // This is a simple wrapper class that's handed out to users of a shared 91 // WebRtcAudioRenderer instance. This class maintains the per-user 'playing' 92 // and 'started' states to avoid problems related to incorrect usage which 93 // might violate the implementation assumptions inside WebRtcAudioRenderer 94 // (see the play reference count). 95 class SharedAudioRenderer : public MediaStreamAudioRenderer { 96 public: 97 SharedAudioRenderer(const scoped_refptr<MediaStreamAudioRenderer>& delegate) 98 : delegate_(delegate), started_(false), playing_(false) { 99 } 100 101 protected: 102 virtual ~SharedAudioRenderer() { 103 DCHECK(thread_checker_.CalledOnValidThread()); 104 DVLOG(1) << __FUNCTION__; 105 Stop(); 106 } 107 108 virtual void Start() OVERRIDE { 109 DCHECK(thread_checker_.CalledOnValidThread()); 110 if (started_) 111 return; 112 started_ = true; 113 delegate_->Start(); 114 } 115 116 virtual void Play() OVERRIDE { 117 DCHECK(thread_checker_.CalledOnValidThread()); 118 DCHECK(started_); 119 if (playing_) 120 return; 121 playing_ = true; 122 delegate_->Play(); 123 } 124 125 virtual void Pause() OVERRIDE { 126 DCHECK(thread_checker_.CalledOnValidThread()); 127 DCHECK(started_); 128 if (!playing_) 129 return; 130 playing_ = false; 131 delegate_->Pause(); 132 } 133 134 virtual void Stop() OVERRIDE { 135 DCHECK(thread_checker_.CalledOnValidThread()); 136 if (!started_) 137 return; 138 Pause(); 139 started_ = false; 140 delegate_->Stop(); 141 } 142 143 virtual void SetVolume(float volume) OVERRIDE { 144 DCHECK(thread_checker_.CalledOnValidThread()); 145 return delegate_->SetVolume(volume); 146 } 147 148 virtual base::TimeDelta GetCurrentRenderTime() const OVERRIDE { 149 DCHECK(thread_checker_.CalledOnValidThread()); 150 return delegate_->GetCurrentRenderTime(); 151 } 152 153 virtual bool IsLocalRenderer() const OVERRIDE { 154 DCHECK(thread_checker_.CalledOnValidThread()); 155 return delegate_->IsLocalRenderer(); 156 } 157 158 private: 159 base::ThreadChecker thread_checker_; 160 scoped_refptr<MediaStreamAudioRenderer> delegate_; 161 bool started_; 162 bool playing_; 163 }; 164 165 } // namespace 166 167 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id, 168 int session_id, 169 int sample_rate, 170 int frames_per_buffer) 171 : state_(UNINITIALIZED), 172 source_render_view_id_(source_render_view_id), 173 session_id_(session_id), 174 source_(NULL), 175 play_ref_count_(0), 176 start_ref_count_(0), 177 audio_delay_milliseconds_(0), 178 fifo_delay_milliseconds_(0), 179 sample_rate_(sample_rate), 180 frames_per_buffer_(frames_per_buffer) { 181 WebRtcLogMessage(base::StringPrintf( 182 "WAR::WAR. source_render_view_id=%d" 183 ", session_id=%d, sample_rate=%d, frames_per_buffer=%d", 184 source_render_view_id, 185 session_id, 186 sample_rate, 187 frames_per_buffer)); 188 } 189 190 WebRtcAudioRenderer::~WebRtcAudioRenderer() { 191 DCHECK(thread_checker_.CalledOnValidThread()); 192 DCHECK_EQ(state_, UNINITIALIZED); 193 buffer_.reset(); 194 } 195 196 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) { 197 DVLOG(1) << "WebRtcAudioRenderer::Initialize()"; 198 DCHECK(thread_checker_.CalledOnValidThread()); 199 base::AutoLock auto_lock(lock_); 200 DCHECK_EQ(state_, UNINITIALIZED); 201 DCHECK(source); 202 DCHECK(!sink_.get()); 203 DCHECK(!source_); 204 205 // Use stereo output on all platforms. 206 media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO; 207 208 // TODO(tommi,henrika): Maybe we should just change |sample_rate_| to be 209 // immutable and change its value instead of using a temporary? 210 int sample_rate = sample_rate_; 211 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate; 212 213 // WebRTC does not yet support higher rates than 96000 on the client side 214 // and 48000 is the preferred sample rate. Therefore, if 192000 is detected, 215 // we change the rate to 48000 instead. The consequence is that the native 216 // layer will be opened up at 192kHz but WebRTC will provide data at 48kHz 217 // which will then be resampled by the audio converted on the browser side 218 // to match the native audio layer. 219 if (sample_rate == 192000) { 220 DVLOG(1) << "Resampling from 48000 to 192000 is required"; 221 sample_rate = 48000; 222 } 223 media::AudioSampleRate asr = media::AsAudioSampleRate(sample_rate); 224 if (asr != media::kUnexpectedAudioSampleRate) { 225 UMA_HISTOGRAM_ENUMERATION( 226 "WebRTC.AudioOutputSampleRate", asr, media::kUnexpectedAudioSampleRate); 227 } else { 228 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected", sample_rate); 229 } 230 231 // Verify that the reported output hardware sample rate is supported 232 // on the current platform. 233 if (std::find(&kValidOutputRates[0], 234 &kValidOutputRates[0] + arraysize(kValidOutputRates), 235 sample_rate) == 236 &kValidOutputRates[arraysize(kValidOutputRates)]) { 237 DLOG(ERROR) << sample_rate << " is not a supported output rate."; 238 return false; 239 } 240 241 // Set up audio parameters for the source, i.e., the WebRTC client. 242 243 // The WebRTC client only supports multiples of 10ms as buffer size where 244 // 10ms is preferred for lowest possible delay. 245 media::AudioParameters source_params; 246 int buffer_size = (sample_rate / 100); 247 DVLOG(1) << "Using WebRTC output buffer size: " << buffer_size; 248 249 int channels = ChannelLayoutToChannelCount(channel_layout); 250 source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 251 channel_layout, channels, 0, 252 sample_rate, 16, buffer_size); 253 254 // Set up audio parameters for the sink, i.e., the native audio output stream. 255 // We strive to open up using native parameters to achieve best possible 256 // performance and to ensure that no FIFO is needed on the browser side to 257 // match the client request. Any mismatch between the source and the sink is 258 // taken care of in this class instead using a pull FIFO. 259 260 media::AudioParameters sink_params; 261 262 // Use native output siz as default. 263 buffer_size = frames_per_buffer_; 264 #if defined(OS_ANDROID) 265 // TODO(henrika): Keep tuning this scheme and espcicially for low-latency 266 // cases. Might not be possible to come up with the perfect solution using 267 // the render side only. 268 const int frames_per_10ms = (sample_rate / 100); 269 if (buffer_size < 2 * frames_per_10ms) { 270 // Examples of low-latency frame sizes and the resulting |buffer_size|: 271 // Nexus 7 : 240 audio frames => 2*480 = 960 272 // Nexus 10 : 256 => 2*441 = 882 273 // Galaxy Nexus: 144 => 2*441 = 882 274 buffer_size = 2 * frames_per_10ms; 275 DVLOG(1) << "Low-latency output detected on Android"; 276 } 277 #endif 278 DVLOG(1) << "Using sink output buffer size: " << buffer_size; 279 280 sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 281 channel_layout, channels, 0, sample_rate, 16, buffer_size); 282 283 // Create a FIFO if re-buffering is required to match the source input with 284 // the sink request. The source acts as provider here and the sink as 285 // consumer. 286 fifo_delay_milliseconds_ = 0; 287 if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) { 288 DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer() 289 << " to " << sink_params.frames_per_buffer(); 290 audio_fifo_.reset(new media::AudioPullFifo( 291 source_params.channels(), 292 source_params.frames_per_buffer(), 293 base::Bind( 294 &WebRtcAudioRenderer::SourceCallback, 295 base::Unretained(this)))); 296 297 if (sink_params.frames_per_buffer() > source_params.frames_per_buffer()) { 298 int frame_duration_milliseconds = base::Time::kMillisecondsPerSecond / 299 static_cast<double>(source_params.sample_rate()); 300 fifo_delay_milliseconds_ = (sink_params.frames_per_buffer() - 301 source_params.frames_per_buffer()) * frame_duration_milliseconds; 302 } 303 } 304 305 // Allocate local audio buffers based on the parameters above. 306 // It is assumed that each audio sample contains 16 bits and each 307 // audio frame contains one or two audio samples depending on the 308 // number of channels. 309 buffer_.reset( 310 new int16[source_params.frames_per_buffer() * source_params.channels()]); 311 312 source_ = source; 313 source->SetRenderFormat(source_params); 314 315 // Configure the audio rendering client and start rendering. 316 sink_ = AudioDeviceFactory::NewOutputDevice(source_render_view_id_); 317 318 // TODO(tommi): Rename InitializeUnifiedStream to rather reflect association 319 // with a session. 320 DCHECK_GE(session_id_, 0); 321 sink_->InitializeUnifiedStream(sink_params, this, session_id_); 322 323 sink_->Start(); 324 325 // User must call Play() before any audio can be heard. 326 state_ = PAUSED; 327 328 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout", 329 source_params.channel_layout(), 330 media::CHANNEL_LAYOUT_MAX); 331 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer", 332 source_params.frames_per_buffer(), 333 kUnexpectedAudioBufferSize); 334 AddHistogramFramesPerBuffer(source_params.frames_per_buffer()); 335 336 return true; 337 } 338 339 scoped_refptr<MediaStreamAudioRenderer> 340 WebRtcAudioRenderer::CreateSharedAudioRendererProxy() { 341 return new SharedAudioRenderer(this); 342 } 343 344 bool WebRtcAudioRenderer::IsStarted() const { 345 DCHECK(thread_checker_.CalledOnValidThread()); 346 return start_ref_count_ != 0; 347 } 348 349 void WebRtcAudioRenderer::Start() { 350 DVLOG(1) << "WebRtcAudioRenderer::Start()"; 351 DCHECK(thread_checker_.CalledOnValidThread()); 352 ++start_ref_count_; 353 } 354 355 void WebRtcAudioRenderer::Play() { 356 DVLOG(1) << "WebRtcAudioRenderer::Play()"; 357 DCHECK(thread_checker_.CalledOnValidThread()); 358 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?"; 359 base::AutoLock auto_lock(lock_); 360 if (state_ == UNINITIALIZED) 361 return; 362 363 DCHECK(play_ref_count_ == 0 || state_ == PLAYING); 364 ++play_ref_count_; 365 366 if (state_ != PLAYING) { 367 state_ = PLAYING; 368 369 if (audio_fifo_) { 370 audio_delay_milliseconds_ = 0; 371 audio_fifo_->Clear(); 372 } 373 } 374 } 375 376 void WebRtcAudioRenderer::Pause() { 377 DVLOG(1) << "WebRtcAudioRenderer::Pause()"; 378 DCHECK(thread_checker_.CalledOnValidThread()); 379 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?"; 380 base::AutoLock auto_lock(lock_); 381 if (state_ == UNINITIALIZED) 382 return; 383 384 DCHECK_EQ(state_, PLAYING); 385 DCHECK_GT(play_ref_count_, 0); 386 if (!--play_ref_count_) 387 state_ = PAUSED; 388 } 389 390 void WebRtcAudioRenderer::Stop() { 391 DVLOG(1) << "WebRtcAudioRenderer::Stop()"; 392 DCHECK(thread_checker_.CalledOnValidThread()); 393 { 394 base::AutoLock auto_lock(lock_); 395 if (state_ == UNINITIALIZED) 396 return; 397 398 if (--start_ref_count_) 399 return; 400 401 DVLOG(1) << "Calling RemoveAudioRenderer and Stop()."; 402 403 source_->RemoveAudioRenderer(this); 404 source_ = NULL; 405 state_ = UNINITIALIZED; 406 } 407 408 // Make sure to stop the sink while _not_ holding the lock since the Render() 409 // callback may currently be executing and try to grab the lock while we're 410 // stopping the thread on which it runs. 411 sink_->Stop(); 412 } 413 414 void WebRtcAudioRenderer::SetVolume(float volume) { 415 DCHECK(thread_checker_.CalledOnValidThread()); 416 base::AutoLock auto_lock(lock_); 417 if (state_ == UNINITIALIZED) 418 return; 419 420 sink_->SetVolume(volume); 421 } 422 423 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const { 424 return base::TimeDelta(); 425 } 426 427 bool WebRtcAudioRenderer::IsLocalRenderer() const { 428 return false; 429 } 430 431 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus, 432 int audio_delay_milliseconds) { 433 base::AutoLock auto_lock(lock_); 434 if (!source_) 435 return 0; 436 437 DVLOG(2) << "WebRtcAudioRenderer::Render()"; 438 DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds; 439 440 audio_delay_milliseconds_ = audio_delay_milliseconds; 441 442 if (audio_fifo_) 443 audio_fifo_->Consume(audio_bus, audio_bus->frames()); 444 else 445 SourceCallback(0, audio_bus); 446 447 return (state_ == PLAYING) ? audio_bus->frames() : 0; 448 } 449 450 void WebRtcAudioRenderer::OnRenderError() { 451 NOTIMPLEMENTED(); 452 LOG(ERROR) << "OnRenderError()"; 453 } 454 455 // Called by AudioPullFifo when more data is necessary. 456 void WebRtcAudioRenderer::SourceCallback( 457 int fifo_frame_delay, media::AudioBus* audio_bus) { 458 DVLOG(2) << "WebRtcAudioRenderer::SourceCallback(" 459 << fifo_frame_delay << ", " 460 << audio_bus->frames() << ")"; 461 462 int output_delay_milliseconds = audio_delay_milliseconds_; 463 output_delay_milliseconds += fifo_delay_milliseconds_; 464 DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds; 465 466 // We need to keep render data for the |source_| regardless of |state_|, 467 // otherwise the data will be buffered up inside |source_|. 468 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()), 469 audio_bus->channels(), audio_bus->frames(), 470 output_delay_milliseconds); 471 472 // Avoid filling up the audio bus if we are not playing; instead 473 // return here and ensure that the returned value in Render() is 0. 474 if (state_ != PLAYING) { 475 audio_bus->Zero(); 476 return; 477 } 478 479 // De-interleave each channel and convert to 32-bit floating-point 480 // with nominal range -1.0 -> +1.0 to match the callback format. 481 audio_bus->FromInterleaved(buffer_.get(), 482 audio_bus->frames(), 483 sizeof(buffer_[0])); 484 } 485 486 } // namespace content 487