1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/filters/ffmpeg_audio_decoder.h" 6 7 #include "base/bind.h" 8 #include "base/callback_helpers.h" 9 #include "base/location.h" 10 #include "base/message_loop/message_loop_proxy.h" 11 #include "media/base/audio_buffer.h" 12 #include "media/base/audio_bus.h" 13 #include "media/base/audio_decoder_config.h" 14 #include "media/base/audio_timestamp_helper.h" 15 #include "media/base/bind_to_loop.h" 16 #include "media/base/decoder_buffer.h" 17 #include "media/base/demuxer.h" 18 #include "media/base/limits.h" 19 #include "media/base/pipeline.h" 20 #include "media/base/sample_format.h" 21 #include "media/ffmpeg/ffmpeg_common.h" 22 #include "media/filters/ffmpeg_glue.h" 23 24 namespace media { 25 26 // Helper structure for managing multiple decoded audio frames per packet. 27 struct QueuedAudioBuffer { 28 AudioDecoder::Status status; 29 scoped_refptr<AudioBuffer> buffer; 30 }; 31 32 // Returns true if the decode result was end of stream. 33 static inline bool IsEndOfStream(int result, 34 int decoded_size, 35 const scoped_refptr<DecoderBuffer>& input) { 36 // Three conditions to meet to declare end of stream for this decoder: 37 // 1. FFmpeg didn't read anything. 38 // 2. FFmpeg didn't output anything. 39 // 3. An end of stream buffer is received. 40 return result == 0 && decoded_size == 0 && input->end_of_stream(); 41 } 42 43 // Return the number of channels from the data in |frame|. 44 static inline int DetermineChannels(AVFrame* frame) { 45 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS) 46 // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field. 47 return av_get_channel_layout_nb_channels(frame->channel_layout); 48 #else 49 return frame->channels; 50 #endif 51 } 52 53 // Called by FFmpeg's allocation routine to allocate a buffer. Uses 54 // AVCodecContext.opaque to get the object reference in order to call 55 // GetAudioBuffer() to do the actual allocation. 56 static int GetAudioBufferImpl(struct AVCodecContext* s, 57 AVFrame* frame, 58 int flags) { 59 DCHECK(s->codec->capabilities & CODEC_CAP_DR1); 60 DCHECK_EQ(s->codec_type, AVMEDIA_TYPE_AUDIO); 61 FFmpegAudioDecoder* decoder = static_cast<FFmpegAudioDecoder*>(s->opaque); 62 return decoder->GetAudioBuffer(s, frame, flags); 63 } 64 65 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the 66 // AudioBuffer allocated, so unref it. 67 static void ReleaseAudioBufferImpl(void* opaque, uint8* data) { 68 scoped_refptr<AudioBuffer> buffer; 69 buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque)); 70 } 71 72 FFmpegAudioDecoder::FFmpegAudioDecoder( 73 const scoped_refptr<base::MessageLoopProxy>& message_loop) 74 : message_loop_(message_loop), 75 weak_factory_(this), 76 demuxer_stream_(NULL), 77 bytes_per_channel_(0), 78 channel_layout_(CHANNEL_LAYOUT_NONE), 79 channels_(0), 80 samples_per_second_(0), 81 av_sample_format_(0), 82 last_input_timestamp_(kNoTimestamp()), 83 output_frames_to_drop_(0) { 84 } 85 86 void FFmpegAudioDecoder::Initialize( 87 DemuxerStream* stream, 88 const PipelineStatusCB& status_cb, 89 const StatisticsCB& statistics_cb) { 90 DCHECK(message_loop_->BelongsToCurrentThread()); 91 PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb); 92 93 FFmpegGlue::InitializeFFmpeg(); 94 95 if (demuxer_stream_) { 96 // TODO(scherkus): initialization currently happens more than once in 97 // PipelineIntegrationTest.BasicPlayback. 98 LOG(ERROR) << "Initialize has already been called."; 99 CHECK(false); 100 } 101 102 weak_this_ = weak_factory_.GetWeakPtr(); 103 demuxer_stream_ = stream; 104 105 if (!ConfigureDecoder()) { 106 status_cb.Run(DECODER_ERROR_NOT_SUPPORTED); 107 return; 108 } 109 110 statistics_cb_ = statistics_cb; 111 initialize_cb.Run(PIPELINE_OK); 112 } 113 114 void FFmpegAudioDecoder::Read(const ReadCB& read_cb) { 115 DCHECK(message_loop_->BelongsToCurrentThread()); 116 DCHECK(!read_cb.is_null()); 117 CHECK(read_cb_.is_null()) << "Overlapping decodes are not supported."; 118 119 read_cb_ = BindToCurrentLoop(read_cb); 120 121 // If we don't have any queued audio from the last packet we decoded, ask for 122 // more data from the demuxer to satisfy this read. 123 if (queued_audio_.empty()) { 124 ReadFromDemuxerStream(); 125 return; 126 } 127 128 base::ResetAndReturn(&read_cb_).Run( 129 queued_audio_.front().status, queued_audio_.front().buffer); 130 queued_audio_.pop_front(); 131 } 132 133 int FFmpegAudioDecoder::bits_per_channel() { 134 DCHECK(message_loop_->BelongsToCurrentThread()); 135 return bytes_per_channel_ * 8; 136 } 137 138 ChannelLayout FFmpegAudioDecoder::channel_layout() { 139 DCHECK(message_loop_->BelongsToCurrentThread()); 140 return channel_layout_; 141 } 142 143 int FFmpegAudioDecoder::samples_per_second() { 144 DCHECK(message_loop_->BelongsToCurrentThread()); 145 return samples_per_second_; 146 } 147 148 void FFmpegAudioDecoder::Reset(const base::Closure& closure) { 149 DCHECK(message_loop_->BelongsToCurrentThread()); 150 base::Closure reset_cb = BindToCurrentLoop(closure); 151 152 avcodec_flush_buffers(codec_context_.get()); 153 ResetTimestampState(); 154 queued_audio_.clear(); 155 reset_cb.Run(); 156 } 157 158 FFmpegAudioDecoder::~FFmpegAudioDecoder() { 159 // TODO(scherkus): should we require Stop() to be called? this might end up 160 // getting called on a random thread due to refcounting. 161 ReleaseFFmpegResources(); 162 } 163 164 int FFmpegAudioDecoder::GetAudioBuffer(AVCodecContext* codec, 165 AVFrame* frame, 166 int flags) { 167 // Since this routine is called by FFmpeg when a buffer is required for audio 168 // data, use the values supplied by FFmpeg (ignoring the current settings). 169 // RunDecodeLoop() gets to determine if the buffer is useable or not. 170 AVSampleFormat format = static_cast<AVSampleFormat>(frame->format); 171 SampleFormat sample_format = AVSampleFormatToSampleFormat(format); 172 int channels = DetermineChannels(frame); 173 if ((channels <= 0) || (channels >= limits::kMaxChannels)) { 174 DLOG(ERROR) << "Requested number of channels (" << channels 175 << ") exceeds limit."; 176 return AVERROR(EINVAL); 177 } 178 179 int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format); 180 if (frame->nb_samples <= 0) 181 return AVERROR(EINVAL); 182 183 // Determine how big the buffer should be and allocate it. FFmpeg may adjust 184 // how big each channel data is in order to meet the alignment policy, so 185 // we need to take this into consideration. 186 int buffer_size_in_bytes = 187 av_samples_get_buffer_size(&frame->linesize[0], 188 channels, 189 frame->nb_samples, 190 format, 191 AudioBuffer::kChannelAlignment); 192 // Check for errors from av_samples_get_buffer_size(). 193 if (buffer_size_in_bytes < 0) 194 return buffer_size_in_bytes; 195 int frames_required = buffer_size_in_bytes / bytes_per_channel / channels; 196 DCHECK_GE(frames_required, frame->nb_samples); 197 scoped_refptr<AudioBuffer> buffer = 198 AudioBuffer::CreateBuffer(sample_format, channels, frames_required); 199 200 // Initialize the data[] and extended_data[] fields to point into the memory 201 // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved 202 // audio and equal to |channels| for planar audio. 203 int number_of_planes = buffer->channel_data().size(); 204 if (number_of_planes <= AV_NUM_DATA_POINTERS) { 205 DCHECK_EQ(frame->extended_data, frame->data); 206 for (int i = 0; i < number_of_planes; ++i) 207 frame->data[i] = buffer->channel_data()[i]; 208 } else { 209 // There are more channels than can fit into data[], so allocate 210 // extended_data[] and fill appropriately. 211 frame->extended_data = static_cast<uint8**>( 212 av_malloc(number_of_planes * sizeof(*frame->extended_data))); 213 int i = 0; 214 for (; i < AV_NUM_DATA_POINTERS; ++i) 215 frame->extended_data[i] = frame->data[i] = buffer->channel_data()[i]; 216 for (; i < number_of_planes; ++i) 217 frame->extended_data[i] = buffer->channel_data()[i]; 218 } 219 220 // Now create an AVBufferRef for the data just allocated. It will own the 221 // reference to the AudioBuffer object. 222 void* opaque = NULL; 223 buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque)); 224 frame->buf[0] = av_buffer_create( 225 frame->data[0], buffer_size_in_bytes, ReleaseAudioBufferImpl, opaque, 0); 226 return 0; 227 } 228 229 void FFmpegAudioDecoder::ReadFromDemuxerStream() { 230 DCHECK(!read_cb_.is_null()); 231 demuxer_stream_->Read(base::Bind( 232 &FFmpegAudioDecoder::BufferReady, weak_this_)); 233 } 234 235 void FFmpegAudioDecoder::BufferReady( 236 DemuxerStream::Status status, 237 const scoped_refptr<DecoderBuffer>& input) { 238 DCHECK(message_loop_->BelongsToCurrentThread()); 239 DCHECK(!read_cb_.is_null()); 240 DCHECK(queued_audio_.empty()); 241 DCHECK_EQ(status != DemuxerStream::kOk, !input.get()) << status; 242 243 if (status == DemuxerStream::kAborted) { 244 DCHECK(!input.get()); 245 base::ResetAndReturn(&read_cb_).Run(kAborted, NULL); 246 return; 247 } 248 249 if (status == DemuxerStream::kConfigChanged) { 250 DCHECK(!input.get()); 251 252 // Send a "end of stream" buffer to the decode loop 253 // to output any remaining data still in the decoder. 254 RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true); 255 256 DVLOG(1) << "Config changed."; 257 258 if (!ConfigureDecoder()) { 259 base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL); 260 return; 261 } 262 263 ResetTimestampState(); 264 265 if (queued_audio_.empty()) { 266 ReadFromDemuxerStream(); 267 return; 268 } 269 270 base::ResetAndReturn(&read_cb_).Run( 271 queued_audio_.front().status, queued_audio_.front().buffer); 272 queued_audio_.pop_front(); 273 return; 274 } 275 276 DCHECK_EQ(status, DemuxerStream::kOk); 277 DCHECK(input.get()); 278 279 // Make sure we are notified if http://crbug.com/49709 returns. Issue also 280 // occurs with some damaged files. 281 if (!input->end_of_stream() && input->timestamp() == kNoTimestamp() && 282 output_timestamp_helper_->base_timestamp() == kNoTimestamp()) { 283 DVLOG(1) << "Received a buffer without timestamps!"; 284 base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL); 285 return; 286 } 287 288 if (!input->end_of_stream()) { 289 if (last_input_timestamp_ == kNoTimestamp() && 290 codec_context_->codec_id == AV_CODEC_ID_VORBIS && 291 input->timestamp() < base::TimeDelta()) { 292 // Dropping frames for negative timestamps as outlined in section A.2 293 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html 294 output_frames_to_drop_ = floor( 295 0.5 + -input->timestamp().InSecondsF() * samples_per_second_); 296 } else { 297 if (last_input_timestamp_ != kNoTimestamp() && 298 input->timestamp() < last_input_timestamp_) { 299 const base::TimeDelta diff = input->timestamp() - last_input_timestamp_; 300 DLOG(WARNING) 301 << "Input timestamps are not monotonically increasing! " 302 << " ts " << input->timestamp().InMicroseconds() << " us" 303 << " diff " << diff.InMicroseconds() << " us"; 304 } 305 306 last_input_timestamp_ = input->timestamp(); 307 } 308 } 309 310 RunDecodeLoop(input, false); 311 312 // We exhausted the provided packet, but it wasn't enough for a frame. Ask 313 // for more data in order to fulfill this read. 314 if (queued_audio_.empty()) { 315 ReadFromDemuxerStream(); 316 return; 317 } 318 319 // Execute callback to return the first frame we decoded. 320 base::ResetAndReturn(&read_cb_).Run( 321 queued_audio_.front().status, queued_audio_.front().buffer); 322 queued_audio_.pop_front(); 323 } 324 325 bool FFmpegAudioDecoder::ConfigureDecoder() { 326 const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config(); 327 328 if (!config.IsValidConfig()) { 329 DLOG(ERROR) << "Invalid audio stream -" 330 << " codec: " << config.codec() 331 << " channel layout: " << config.channel_layout() 332 << " bits per channel: " << config.bits_per_channel() 333 << " samples per second: " << config.samples_per_second(); 334 return false; 335 } 336 337 if (config.is_encrypted()) { 338 DLOG(ERROR) << "Encrypted audio stream not supported"; 339 return false; 340 } 341 342 if (codec_context_.get() && 343 (bytes_per_channel_ != config.bytes_per_channel() || 344 channel_layout_ != config.channel_layout() || 345 samples_per_second_ != config.samples_per_second())) { 346 DVLOG(1) << "Unsupported config change :"; 347 DVLOG(1) << "\tbytes_per_channel : " << bytes_per_channel_ 348 << " -> " << config.bytes_per_channel(); 349 DVLOG(1) << "\tchannel_layout : " << channel_layout_ 350 << " -> " << config.channel_layout(); 351 DVLOG(1) << "\tsample_rate : " << samples_per_second_ 352 << " -> " << config.samples_per_second(); 353 return false; 354 } 355 356 // Release existing decoder resources if necessary. 357 ReleaseFFmpegResources(); 358 359 // Initialize AVCodecContext structure. 360 codec_context_.reset(avcodec_alloc_context3(NULL)); 361 AudioDecoderConfigToAVCodecContext(config, codec_context_.get()); 362 363 codec_context_->opaque = this; 364 codec_context_->get_buffer2 = GetAudioBufferImpl; 365 codec_context_->refcounted_frames = 1; 366 367 AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); 368 if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) { 369 DLOG(ERROR) << "Could not initialize audio decoder: " 370 << codec_context_->codec_id; 371 return false; 372 } 373 374 // Success! 375 av_frame_.reset(av_frame_alloc()); 376 channel_layout_ = config.channel_layout(); 377 samples_per_second_ = config.samples_per_second(); 378 output_timestamp_helper_.reset( 379 new AudioTimestampHelper(config.samples_per_second())); 380 381 // Store initial values to guard against midstream configuration changes. 382 channels_ = codec_context_->channels; 383 if (channels_ != ChannelLayoutToChannelCount(channel_layout_)) { 384 DLOG(ERROR) << "Audio configuration specified " 385 << ChannelLayoutToChannelCount(channel_layout_) 386 << " channels, but FFmpeg thinks the file contains " 387 << channels_ << " channels"; 388 return false; 389 } 390 av_sample_format_ = codec_context_->sample_fmt; 391 sample_format_ = AVSampleFormatToSampleFormat( 392 static_cast<AVSampleFormat>(av_sample_format_)); 393 bytes_per_channel_ = SampleFormatToBytesPerChannel(sample_format_); 394 395 return true; 396 } 397 398 void FFmpegAudioDecoder::ReleaseFFmpegResources() { 399 codec_context_.reset(); 400 av_frame_.reset(); 401 } 402 403 void FFmpegAudioDecoder::ResetTimestampState() { 404 output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp()); 405 last_input_timestamp_ = kNoTimestamp(); 406 output_frames_to_drop_ = 0; 407 } 408 409 void FFmpegAudioDecoder::RunDecodeLoop( 410 const scoped_refptr<DecoderBuffer>& input, 411 bool skip_eos_append) { 412 AVPacket packet; 413 av_init_packet(&packet); 414 if (input->end_of_stream()) { 415 packet.data = NULL; 416 packet.size = 0; 417 } else { 418 packet.data = const_cast<uint8*>(input->data()); 419 packet.size = input->data_size(); 420 } 421 422 // Each audio packet may contain several frames, so we must call the decoder 423 // until we've exhausted the packet. Regardless of the packet size we always 424 // want to hand it to the decoder at least once, otherwise we would end up 425 // skipping end of stream packets since they have a size of zero. 426 do { 427 int frame_decoded = 0; 428 int result = avcodec_decode_audio4( 429 codec_context_.get(), av_frame_.get(), &frame_decoded, &packet); 430 431 if (result < 0) { 432 DCHECK(!input->end_of_stream()) 433 << "End of stream buffer produced an error! " 434 << "This is quite possibly a bug in the audio decoder not handling " 435 << "end of stream AVPackets correctly."; 436 437 DLOG(WARNING) 438 << "Failed to decode an audio frame with timestamp: " 439 << input->timestamp().InMicroseconds() << " us, duration: " 440 << input->duration().InMicroseconds() << " us, packet size: " 441 << input->data_size() << " bytes"; 442 443 break; 444 } 445 446 // Update packet size and data pointer in case we need to call the decoder 447 // with the remaining bytes from this packet. 448 packet.size -= result; 449 packet.data += result; 450 451 if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() && 452 !input->end_of_stream()) { 453 DCHECK(input->timestamp() != kNoTimestamp()); 454 if (output_frames_to_drop_ > 0) { 455 // Currently Vorbis is the only codec that causes us to drop samples. 456 // If we have to drop samples it always means the timeline starts at 0. 457 DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); 458 output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); 459 } else { 460 output_timestamp_helper_->SetBaseTimestamp(input->timestamp()); 461 } 462 } 463 464 scoped_refptr<AudioBuffer> output; 465 int decoded_frames = 0; 466 int original_frames = 0; 467 int channels = DetermineChannels(av_frame_.get()); 468 if (frame_decoded) { 469 if (av_frame_->sample_rate != samples_per_second_ || 470 channels != channels_ || 471 av_frame_->format != av_sample_format_) { 472 DLOG(ERROR) << "Unsupported midstream configuration change!" 473 << " Sample Rate: " << av_frame_->sample_rate << " vs " 474 << samples_per_second_ 475 << ", Channels: " << channels << " vs " 476 << channels_ 477 << ", Sample Format: " << av_frame_->format << " vs " 478 << av_sample_format_; 479 480 // This is an unrecoverable error, so bail out. 481 QueuedAudioBuffer queue_entry = { kDecodeError, NULL }; 482 queued_audio_.push_back(queue_entry); 483 av_frame_unref(av_frame_.get()); 484 break; 485 } 486 487 // Get the AudioBuffer that the data was decoded into. Adjust the number 488 // of frames, in case fewer than requested were actually decoded. 489 output = reinterpret_cast<AudioBuffer*>( 490 av_buffer_get_opaque(av_frame_->buf[0])); 491 DCHECK_EQ(channels_, output->channel_count()); 492 original_frames = av_frame_->nb_samples; 493 int unread_frames = output->frame_count() - original_frames; 494 DCHECK_GE(unread_frames, 0); 495 if (unread_frames > 0) 496 output->TrimEnd(unread_frames); 497 498 // If there are frames to drop, get rid of as many as we can. 499 if (output_frames_to_drop_ > 0) { 500 int drop = std::min(output->frame_count(), output_frames_to_drop_); 501 output->TrimStart(drop); 502 output_frames_to_drop_ -= drop; 503 } 504 505 decoded_frames = output->frame_count(); 506 av_frame_unref(av_frame_.get()); 507 } 508 509 // WARNING: |av_frame_| no longer has valid data at this point. 510 511 if (decoded_frames > 0) { 512 // Set the timestamp/duration once all the extra frames have been 513 // discarded. 514 output->set_timestamp(output_timestamp_helper_->GetTimestamp()); 515 output->set_duration( 516 output_timestamp_helper_->GetFrameDuration(decoded_frames)); 517 output_timestamp_helper_->AddFrames(decoded_frames); 518 } else if (IsEndOfStream(result, original_frames, input) && 519 !skip_eos_append) { 520 DCHECK_EQ(packet.size, 0); 521 output = AudioBuffer::CreateEOSBuffer(); 522 } else { 523 // In case all the frames in the buffer were dropped. 524 output = NULL; 525 } 526 527 if (output.get()) { 528 QueuedAudioBuffer queue_entry = { kOk, output }; 529 queued_audio_.push_back(queue_entry); 530 } 531 532 // Decoding finished successfully, update statistics. 533 if (result > 0) { 534 PipelineStatistics statistics; 535 statistics.audio_bytes_decoded = result; 536 statistics_cb_.Run(statistics); 537 } 538 } while (packet.size > 0); 539 } 540 541 } // namespace media 542