Home | History | Annotate | Download | only in filters
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/filters/ffmpeg_audio_decoder.h"
      6 
      7 #include "base/bind.h"
      8 #include "base/callback_helpers.h"
      9 #include "base/location.h"
     10 #include "base/message_loop/message_loop_proxy.h"
     11 #include "media/base/audio_buffer.h"
     12 #include "media/base/audio_bus.h"
     13 #include "media/base/audio_decoder_config.h"
     14 #include "media/base/audio_timestamp_helper.h"
     15 #include "media/base/bind_to_loop.h"
     16 #include "media/base/decoder_buffer.h"
     17 #include "media/base/demuxer.h"
     18 #include "media/base/limits.h"
     19 #include "media/base/pipeline.h"
     20 #include "media/base/sample_format.h"
     21 #include "media/ffmpeg/ffmpeg_common.h"
     22 #include "media/filters/ffmpeg_glue.h"
     23 
     24 namespace media {
     25 
     26 // Helper structure for managing multiple decoded audio frames per packet.
     27 struct QueuedAudioBuffer {
     28   AudioDecoder::Status status;
     29   scoped_refptr<AudioBuffer> buffer;
     30 };
     31 
     32 // Returns true if the decode result was end of stream.
     33 static inline bool IsEndOfStream(int result,
     34                                  int decoded_size,
     35                                  const scoped_refptr<DecoderBuffer>& input) {
     36   // Three conditions to meet to declare end of stream for this decoder:
     37   // 1. FFmpeg didn't read anything.
     38   // 2. FFmpeg didn't output anything.
     39   // 3. An end of stream buffer is received.
     40   return result == 0 && decoded_size == 0 && input->end_of_stream();
     41 }
     42 
     43 // Return the number of channels from the data in |frame|.
     44 static inline int DetermineChannels(AVFrame* frame) {
     45 #if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
     46   // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
     47   return av_get_channel_layout_nb_channels(frame->channel_layout);
     48 #else
     49   return frame->channels;
     50 #endif
     51 }
     52 
     53 // Called by FFmpeg's allocation routine to allocate a buffer. Uses
     54 // AVCodecContext.opaque to get the object reference in order to call
     55 // GetAudioBuffer() to do the actual allocation.
     56 static int GetAudioBufferImpl(struct AVCodecContext* s,
     57                               AVFrame* frame,
     58                               int flags) {
     59   DCHECK(s->codec->capabilities & CODEC_CAP_DR1);
     60   DCHECK_EQ(s->codec_type, AVMEDIA_TYPE_AUDIO);
     61   FFmpegAudioDecoder* decoder = static_cast<FFmpegAudioDecoder*>(s->opaque);
     62   return decoder->GetAudioBuffer(s, frame, flags);
     63 }
     64 
     65 // Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
     66 // AudioBuffer allocated, so unref it.
     67 static void ReleaseAudioBufferImpl(void* opaque, uint8* data) {
     68   scoped_refptr<AudioBuffer> buffer;
     69   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
     70 }
     71 
     72 FFmpegAudioDecoder::FFmpegAudioDecoder(
     73     const scoped_refptr<base::MessageLoopProxy>& message_loop)
     74     : message_loop_(message_loop),
     75       weak_factory_(this),
     76       demuxer_stream_(NULL),
     77       bytes_per_channel_(0),
     78       channel_layout_(CHANNEL_LAYOUT_NONE),
     79       channels_(0),
     80       samples_per_second_(0),
     81       av_sample_format_(0),
     82       last_input_timestamp_(kNoTimestamp()),
     83       output_frames_to_drop_(0) {
     84 }
     85 
     86 void FFmpegAudioDecoder::Initialize(
     87     DemuxerStream* stream,
     88     const PipelineStatusCB& status_cb,
     89     const StatisticsCB& statistics_cb) {
     90   DCHECK(message_loop_->BelongsToCurrentThread());
     91   PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
     92 
     93   FFmpegGlue::InitializeFFmpeg();
     94 
     95   if (demuxer_stream_) {
     96     // TODO(scherkus): initialization currently happens more than once in
     97     // PipelineIntegrationTest.BasicPlayback.
     98     LOG(ERROR) << "Initialize has already been called.";
     99     CHECK(false);
    100   }
    101 
    102   weak_this_ = weak_factory_.GetWeakPtr();
    103   demuxer_stream_ = stream;
    104 
    105   if (!ConfigureDecoder()) {
    106     status_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
    107     return;
    108   }
    109 
    110   statistics_cb_ = statistics_cb;
    111   initialize_cb.Run(PIPELINE_OK);
    112 }
    113 
    114 void FFmpegAudioDecoder::Read(const ReadCB& read_cb) {
    115   DCHECK(message_loop_->BelongsToCurrentThread());
    116   DCHECK(!read_cb.is_null());
    117   CHECK(read_cb_.is_null()) << "Overlapping decodes are not supported.";
    118 
    119   read_cb_ = BindToCurrentLoop(read_cb);
    120 
    121   // If we don't have any queued audio from the last packet we decoded, ask for
    122   // more data from the demuxer to satisfy this read.
    123   if (queued_audio_.empty()) {
    124     ReadFromDemuxerStream();
    125     return;
    126   }
    127 
    128   base::ResetAndReturn(&read_cb_).Run(
    129       queued_audio_.front().status, queued_audio_.front().buffer);
    130   queued_audio_.pop_front();
    131 }
    132 
    133 int FFmpegAudioDecoder::bits_per_channel() {
    134   DCHECK(message_loop_->BelongsToCurrentThread());
    135   return bytes_per_channel_ * 8;
    136 }
    137 
    138 ChannelLayout FFmpegAudioDecoder::channel_layout() {
    139   DCHECK(message_loop_->BelongsToCurrentThread());
    140   return channel_layout_;
    141 }
    142 
    143 int FFmpegAudioDecoder::samples_per_second() {
    144   DCHECK(message_loop_->BelongsToCurrentThread());
    145   return samples_per_second_;
    146 }
    147 
    148 void FFmpegAudioDecoder::Reset(const base::Closure& closure) {
    149   DCHECK(message_loop_->BelongsToCurrentThread());
    150   base::Closure reset_cb = BindToCurrentLoop(closure);
    151 
    152   avcodec_flush_buffers(codec_context_.get());
    153   ResetTimestampState();
    154   queued_audio_.clear();
    155   reset_cb.Run();
    156 }
    157 
    158 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
    159   // TODO(scherkus): should we require Stop() to be called? this might end up
    160   // getting called on a random thread due to refcounting.
    161   ReleaseFFmpegResources();
    162 }
    163 
    164 int FFmpegAudioDecoder::GetAudioBuffer(AVCodecContext* codec,
    165                                        AVFrame* frame,
    166                                        int flags) {
    167   // Since this routine is called by FFmpeg when a buffer is required for audio
    168   // data, use the values supplied by FFmpeg (ignoring the current settings).
    169   // RunDecodeLoop() gets to determine if the buffer is useable or not.
    170   AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
    171   SampleFormat sample_format = AVSampleFormatToSampleFormat(format);
    172   int channels = DetermineChannels(frame);
    173   if ((channels <= 0) || (channels >= limits::kMaxChannels)) {
    174     DLOG(ERROR) << "Requested number of channels (" << channels
    175                 << ") exceeds limit.";
    176     return AVERROR(EINVAL);
    177   }
    178 
    179   int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
    180   if (frame->nb_samples <= 0)
    181     return AVERROR(EINVAL);
    182 
    183   // Determine how big the buffer should be and allocate it. FFmpeg may adjust
    184   // how big each channel data is in order to meet the alignment policy, so
    185   // we need to take this into consideration.
    186   int buffer_size_in_bytes =
    187       av_samples_get_buffer_size(&frame->linesize[0],
    188                                  channels,
    189                                  frame->nb_samples,
    190                                  format,
    191                                  AudioBuffer::kChannelAlignment);
    192   // Check for errors from av_samples_get_buffer_size().
    193   if (buffer_size_in_bytes < 0)
    194     return buffer_size_in_bytes;
    195   int frames_required = buffer_size_in_bytes / bytes_per_channel / channels;
    196   DCHECK_GE(frames_required, frame->nb_samples);
    197   scoped_refptr<AudioBuffer> buffer =
    198       AudioBuffer::CreateBuffer(sample_format, channels, frames_required);
    199 
    200   // Initialize the data[] and extended_data[] fields to point into the memory
    201   // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
    202   // audio and equal to |channels| for planar audio.
    203   int number_of_planes = buffer->channel_data().size();
    204   if (number_of_planes <= AV_NUM_DATA_POINTERS) {
    205     DCHECK_EQ(frame->extended_data, frame->data);
    206     for (int i = 0; i < number_of_planes; ++i)
    207       frame->data[i] = buffer->channel_data()[i];
    208   } else {
    209     // There are more channels than can fit into data[], so allocate
    210     // extended_data[] and fill appropriately.
    211     frame->extended_data = static_cast<uint8**>(
    212         av_malloc(number_of_planes * sizeof(*frame->extended_data)));
    213     int i = 0;
    214     for (; i < AV_NUM_DATA_POINTERS; ++i)
    215       frame->extended_data[i] = frame->data[i] = buffer->channel_data()[i];
    216     for (; i < number_of_planes; ++i)
    217       frame->extended_data[i] = buffer->channel_data()[i];
    218   }
    219 
    220   // Now create an AVBufferRef for the data just allocated. It will own the
    221   // reference to the AudioBuffer object.
    222   void* opaque = NULL;
    223   buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
    224   frame->buf[0] = av_buffer_create(
    225       frame->data[0], buffer_size_in_bytes, ReleaseAudioBufferImpl, opaque, 0);
    226   return 0;
    227 }
    228 
    229 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
    230   DCHECK(!read_cb_.is_null());
    231   demuxer_stream_->Read(base::Bind(
    232       &FFmpegAudioDecoder::BufferReady, weak_this_));
    233 }
    234 
    235 void FFmpegAudioDecoder::BufferReady(
    236     DemuxerStream::Status status,
    237     const scoped_refptr<DecoderBuffer>& input) {
    238   DCHECK(message_loop_->BelongsToCurrentThread());
    239   DCHECK(!read_cb_.is_null());
    240   DCHECK(queued_audio_.empty());
    241   DCHECK_EQ(status != DemuxerStream::kOk, !input.get()) << status;
    242 
    243   if (status == DemuxerStream::kAborted) {
    244     DCHECK(!input.get());
    245     base::ResetAndReturn(&read_cb_).Run(kAborted, NULL);
    246     return;
    247   }
    248 
    249   if (status == DemuxerStream::kConfigChanged) {
    250     DCHECK(!input.get());
    251 
    252     // Send a "end of stream" buffer to the decode loop
    253     // to output any remaining data still in the decoder.
    254     RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
    255 
    256     DVLOG(1) << "Config changed.";
    257 
    258     if (!ConfigureDecoder()) {
    259       base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
    260       return;
    261     }
    262 
    263     ResetTimestampState();
    264 
    265     if (queued_audio_.empty()) {
    266       ReadFromDemuxerStream();
    267       return;
    268     }
    269 
    270     base::ResetAndReturn(&read_cb_).Run(
    271         queued_audio_.front().status, queued_audio_.front().buffer);
    272     queued_audio_.pop_front();
    273     return;
    274   }
    275 
    276   DCHECK_EQ(status, DemuxerStream::kOk);
    277   DCHECK(input.get());
    278 
    279   // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
    280   // occurs with some damaged files.
    281   if (!input->end_of_stream() && input->timestamp() == kNoTimestamp() &&
    282       output_timestamp_helper_->base_timestamp() == kNoTimestamp()) {
    283     DVLOG(1) << "Received a buffer without timestamps!";
    284     base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
    285     return;
    286   }
    287 
    288   if (!input->end_of_stream()) {
    289     if (last_input_timestamp_ == kNoTimestamp() &&
    290         codec_context_->codec_id == AV_CODEC_ID_VORBIS &&
    291         input->timestamp() < base::TimeDelta()) {
    292       // Dropping frames for negative timestamps as outlined in section A.2
    293       // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
    294       output_frames_to_drop_ = floor(
    295           0.5 + -input->timestamp().InSecondsF() * samples_per_second_);
    296     } else {
    297       if (last_input_timestamp_ != kNoTimestamp() &&
    298           input->timestamp() < last_input_timestamp_) {
    299         const base::TimeDelta diff = input->timestamp() - last_input_timestamp_;
    300         DLOG(WARNING)
    301             << "Input timestamps are not monotonically increasing! "
    302             << " ts " << input->timestamp().InMicroseconds() << " us"
    303             << " diff " << diff.InMicroseconds() << " us";
    304       }
    305 
    306       last_input_timestamp_ = input->timestamp();
    307     }
    308   }
    309 
    310   RunDecodeLoop(input, false);
    311 
    312   // We exhausted the provided packet, but it wasn't enough for a frame.  Ask
    313   // for more data in order to fulfill this read.
    314   if (queued_audio_.empty()) {
    315     ReadFromDemuxerStream();
    316     return;
    317   }
    318 
    319   // Execute callback to return the first frame we decoded.
    320   base::ResetAndReturn(&read_cb_).Run(
    321       queued_audio_.front().status, queued_audio_.front().buffer);
    322   queued_audio_.pop_front();
    323 }
    324 
    325 bool FFmpegAudioDecoder::ConfigureDecoder() {
    326   const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config();
    327 
    328   if (!config.IsValidConfig()) {
    329     DLOG(ERROR) << "Invalid audio stream -"
    330                 << " codec: " << config.codec()
    331                 << " channel layout: " << config.channel_layout()
    332                 << " bits per channel: " << config.bits_per_channel()
    333                 << " samples per second: " << config.samples_per_second();
    334     return false;
    335   }
    336 
    337   if (config.is_encrypted()) {
    338     DLOG(ERROR) << "Encrypted audio stream not supported";
    339     return false;
    340   }
    341 
    342   if (codec_context_.get() &&
    343       (bytes_per_channel_ != config.bytes_per_channel() ||
    344        channel_layout_ != config.channel_layout() ||
    345        samples_per_second_ != config.samples_per_second())) {
    346     DVLOG(1) << "Unsupported config change :";
    347     DVLOG(1) << "\tbytes_per_channel : " << bytes_per_channel_
    348              << " -> " << config.bytes_per_channel();
    349     DVLOG(1) << "\tchannel_layout : " << channel_layout_
    350              << " -> " << config.channel_layout();
    351     DVLOG(1) << "\tsample_rate : " << samples_per_second_
    352              << " -> " << config.samples_per_second();
    353     return false;
    354   }
    355 
    356   // Release existing decoder resources if necessary.
    357   ReleaseFFmpegResources();
    358 
    359   // Initialize AVCodecContext structure.
    360   codec_context_.reset(avcodec_alloc_context3(NULL));
    361   AudioDecoderConfigToAVCodecContext(config, codec_context_.get());
    362 
    363   codec_context_->opaque = this;
    364   codec_context_->get_buffer2 = GetAudioBufferImpl;
    365   codec_context_->refcounted_frames = 1;
    366 
    367   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
    368   if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
    369     DLOG(ERROR) << "Could not initialize audio decoder: "
    370                 << codec_context_->codec_id;
    371     return false;
    372   }
    373 
    374   // Success!
    375   av_frame_.reset(av_frame_alloc());
    376   channel_layout_ = config.channel_layout();
    377   samples_per_second_ = config.samples_per_second();
    378   output_timestamp_helper_.reset(
    379       new AudioTimestampHelper(config.samples_per_second()));
    380 
    381   // Store initial values to guard against midstream configuration changes.
    382   channels_ = codec_context_->channels;
    383   if (channels_ != ChannelLayoutToChannelCount(channel_layout_)) {
    384     DLOG(ERROR) << "Audio configuration specified "
    385                 << ChannelLayoutToChannelCount(channel_layout_)
    386                 << " channels, but FFmpeg thinks the file contains "
    387                 << channels_ << " channels";
    388     return false;
    389   }
    390   av_sample_format_ = codec_context_->sample_fmt;
    391   sample_format_ = AVSampleFormatToSampleFormat(
    392       static_cast<AVSampleFormat>(av_sample_format_));
    393   bytes_per_channel_ = SampleFormatToBytesPerChannel(sample_format_);
    394 
    395   return true;
    396 }
    397 
    398 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
    399   codec_context_.reset();
    400   av_frame_.reset();
    401 }
    402 
    403 void FFmpegAudioDecoder::ResetTimestampState() {
    404   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
    405   last_input_timestamp_ = kNoTimestamp();
    406   output_frames_to_drop_ = 0;
    407 }
    408 
    409 void FFmpegAudioDecoder::RunDecodeLoop(
    410     const scoped_refptr<DecoderBuffer>& input,
    411     bool skip_eos_append) {
    412   AVPacket packet;
    413   av_init_packet(&packet);
    414   if (input->end_of_stream()) {
    415     packet.data = NULL;
    416     packet.size = 0;
    417   } else {
    418     packet.data = const_cast<uint8*>(input->data());
    419     packet.size = input->data_size();
    420   }
    421 
    422   // Each audio packet may contain several frames, so we must call the decoder
    423   // until we've exhausted the packet.  Regardless of the packet size we always
    424   // want to hand it to the decoder at least once, otherwise we would end up
    425   // skipping end of stream packets since they have a size of zero.
    426   do {
    427     int frame_decoded = 0;
    428     int result = avcodec_decode_audio4(
    429         codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
    430 
    431     if (result < 0) {
    432       DCHECK(!input->end_of_stream())
    433           << "End of stream buffer produced an error! "
    434           << "This is quite possibly a bug in the audio decoder not handling "
    435           << "end of stream AVPackets correctly.";
    436 
    437       DLOG(WARNING)
    438           << "Failed to decode an audio frame with timestamp: "
    439           << input->timestamp().InMicroseconds() << " us, duration: "
    440           << input->duration().InMicroseconds() << " us, packet size: "
    441           << input->data_size() << " bytes";
    442 
    443       break;
    444     }
    445 
    446     // Update packet size and data pointer in case we need to call the decoder
    447     // with the remaining bytes from this packet.
    448     packet.size -= result;
    449     packet.data += result;
    450 
    451     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
    452         !input->end_of_stream()) {
    453       DCHECK(input->timestamp() != kNoTimestamp());
    454       if (output_frames_to_drop_ > 0) {
    455         // Currently Vorbis is the only codec that causes us to drop samples.
    456         // If we have to drop samples it always means the timeline starts at 0.
    457         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
    458         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
    459       } else {
    460         output_timestamp_helper_->SetBaseTimestamp(input->timestamp());
    461       }
    462     }
    463 
    464     scoped_refptr<AudioBuffer> output;
    465     int decoded_frames = 0;
    466     int original_frames = 0;
    467     int channels = DetermineChannels(av_frame_.get());
    468     if (frame_decoded) {
    469       if (av_frame_->sample_rate != samples_per_second_ ||
    470           channels != channels_ ||
    471           av_frame_->format != av_sample_format_) {
    472         DLOG(ERROR) << "Unsupported midstream configuration change!"
    473                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
    474                     << samples_per_second_
    475                     << ", Channels: " << channels << " vs "
    476                     << channels_
    477                     << ", Sample Format: " << av_frame_->format << " vs "
    478                     << av_sample_format_;
    479 
    480         // This is an unrecoverable error, so bail out.
    481         QueuedAudioBuffer queue_entry = { kDecodeError, NULL };
    482         queued_audio_.push_back(queue_entry);
    483         av_frame_unref(av_frame_.get());
    484         break;
    485       }
    486 
    487       // Get the AudioBuffer that the data was decoded into. Adjust the number
    488       // of frames, in case fewer than requested were actually decoded.
    489       output = reinterpret_cast<AudioBuffer*>(
    490           av_buffer_get_opaque(av_frame_->buf[0]));
    491       DCHECK_EQ(channels_, output->channel_count());
    492       original_frames = av_frame_->nb_samples;
    493       int unread_frames = output->frame_count() - original_frames;
    494       DCHECK_GE(unread_frames, 0);
    495       if (unread_frames > 0)
    496         output->TrimEnd(unread_frames);
    497 
    498       // If there are frames to drop, get rid of as many as we can.
    499       if (output_frames_to_drop_ > 0) {
    500         int drop = std::min(output->frame_count(), output_frames_to_drop_);
    501         output->TrimStart(drop);
    502         output_frames_to_drop_ -= drop;
    503       }
    504 
    505       decoded_frames = output->frame_count();
    506       av_frame_unref(av_frame_.get());
    507     }
    508 
    509     // WARNING: |av_frame_| no longer has valid data at this point.
    510 
    511     if (decoded_frames > 0) {
    512       // Set the timestamp/duration once all the extra frames have been
    513       // discarded.
    514       output->set_timestamp(output_timestamp_helper_->GetTimestamp());
    515       output->set_duration(
    516           output_timestamp_helper_->GetFrameDuration(decoded_frames));
    517       output_timestamp_helper_->AddFrames(decoded_frames);
    518     } else if (IsEndOfStream(result, original_frames, input) &&
    519                !skip_eos_append) {
    520       DCHECK_EQ(packet.size, 0);
    521       output = AudioBuffer::CreateEOSBuffer();
    522     } else {
    523       // In case all the frames in the buffer were dropped.
    524       output = NULL;
    525     }
    526 
    527     if (output.get()) {
    528       QueuedAudioBuffer queue_entry = { kOk, output };
    529       queued_audio_.push_back(queue_entry);
    530     }
    531 
    532     // Decoding finished successfully, update statistics.
    533     if (result > 0) {
    534       PipelineStatistics statistics;
    535       statistics.audio_bytes_decoded = result;
    536       statistics_cb_.Run(statistics);
    537     }
    538   } while (packet.size > 0);
    539 }
    540 
    541 }  // namespace media
    542