Home | History | Annotate | Download | only in ppapi
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/logging.h"
     10 #include "media/base/audio_bus.h"
     11 #include "media/base/audio_timestamp_helper.h"
     12 #include "media/base/buffers.h"
     13 #include "media/base/data_buffer.h"
     14 #include "media/base/limits.h"
     15 
     16 // Include FFmpeg header files.
     17 extern "C" {
     18 // Temporarily disable possible loss of data warning.
     19 MSVC_PUSH_DISABLE_WARNING(4244);
     20 #include <libavcodec/avcodec.h>
     21 MSVC_POP_WARNING();
     22 }  // extern "C"
     23 
     24 namespace media {
     25 
     26 // Maximum number of channels with defined layout in src/media.
     27 static const int kMaxChannels = 8;
     28 
     29 static AVCodecID CdmAudioCodecToCodecID(
     30     cdm::AudioDecoderConfig::AudioCodec audio_codec) {
     31   switch (audio_codec) {
     32     case cdm::AudioDecoderConfig::kCodecVorbis:
     33       return AV_CODEC_ID_VORBIS;
     34     case cdm::AudioDecoderConfig::kCodecAac:
     35       return AV_CODEC_ID_AAC;
     36     case cdm::AudioDecoderConfig::kUnknownAudioCodec:
     37     default:
     38       NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec;
     39       return AV_CODEC_ID_NONE;
     40   }
     41 }
     42 
     43 static void CdmAudioDecoderConfigToAVCodecContext(
     44     const cdm::AudioDecoderConfig& config,
     45     AVCodecContext* codec_context) {
     46   codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
     47   codec_context->codec_id = CdmAudioCodecToCodecID(config.codec);
     48 
     49   switch (config.bits_per_channel) {
     50     case 8:
     51       codec_context->sample_fmt = AV_SAMPLE_FMT_U8;
     52       break;
     53     case 16:
     54       codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
     55       break;
     56     case 32:
     57       codec_context->sample_fmt = AV_SAMPLE_FMT_S32;
     58       break;
     59     default:
     60       DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
     61                   "per channel: " << config.bits_per_channel;
     62       codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
     63   }
     64 
     65   codec_context->channels = config.channel_count;
     66   codec_context->sample_rate = config.samples_per_second;
     67 
     68   if (config.extra_data) {
     69     codec_context->extradata_size = config.extra_data_size;
     70     codec_context->extradata = reinterpret_cast<uint8_t*>(
     71         av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE));
     72     memcpy(codec_context->extradata, config.extra_data,
     73            config.extra_data_size);
     74     memset(codec_context->extradata + config.extra_data_size, '\0',
     75            FF_INPUT_BUFFER_PADDING_SIZE);
     76   } else {
     77     codec_context->extradata = NULL;
     78     codec_context->extradata_size = 0;
     79   }
     80 }
     81 
     82 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host* host)
     83     : is_initialized_(false),
     84       host_(host),
     85       codec_context_(NULL),
     86       av_frame_(NULL),
     87       bits_per_channel_(0),
     88       samples_per_second_(0),
     89       channels_(0),
     90       av_sample_format_(0),
     91       bytes_per_frame_(0),
     92       last_input_timestamp_(kNoTimestamp()),
     93       output_bytes_to_drop_(0) {
     94 }
     95 
     96 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
     97   ReleaseFFmpegResources();
     98 }
     99 
    100 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
    101   DVLOG(1) << "Initialize()";
    102 
    103   if (!IsValidConfig(config)) {
    104     LOG(ERROR) << "Initialize(): invalid audio decoder configuration.";
    105     return false;
    106   }
    107 
    108   if (is_initialized_) {
    109     LOG(ERROR) << "Initialize(): Already initialized.";
    110     return false;
    111   }
    112 
    113   // Initialize AVCodecContext structure.
    114   codec_context_ = avcodec_alloc_context3(NULL);
    115   CdmAudioDecoderConfigToAVCodecContext(config, codec_context_);
    116 
    117   // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
    118   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
    119     codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
    120 
    121   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
    122   if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) {
    123     DLOG(ERROR) << "Could not initialize audio decoder: "
    124                 << codec_context_->codec_id;
    125     return false;
    126   }
    127 
    128   // Ensure avcodec_open2() respected our format request.
    129   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
    130     DLOG(ERROR) << "Unable to configure a supported sample format: "
    131                 << codec_context_->sample_fmt;
    132     return false;
    133   }
    134 
    135   // Some codecs will only output float data, so we need to convert to integer
    136   // before returning the decoded buffer.
    137   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP ||
    138       codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
    139     // Preallocate the AudioBus for float conversions.  We can treat interleaved
    140     // float data as a single planar channel since our output is expected in an
    141     // interleaved format anyways.
    142     int channels = codec_context_->channels;
    143     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT)
    144       channels = 1;
    145     converter_bus_ = AudioBus::CreateWrapper(channels);
    146   }
    147 
    148   // Success!
    149   av_frame_ = avcodec_alloc_frame();
    150   bits_per_channel_ = config.bits_per_channel;
    151   samples_per_second_ = config.samples_per_second;
    152   bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8;
    153   output_timestamp_helper_.reset(
    154       new AudioTimestampHelper(config.samples_per_second));
    155   serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_);
    156   is_initialized_ = true;
    157 
    158   // Store initial values to guard against midstream configuration changes.
    159   channels_ = codec_context_->channels;
    160   av_sample_format_ = codec_context_->sample_fmt;
    161 
    162   return true;
    163 }
    164 
    165 void FFmpegCdmAudioDecoder::Deinitialize() {
    166   DVLOG(1) << "Deinitialize()";
    167   ReleaseFFmpegResources();
    168   is_initialized_ = false;
    169   ResetTimestampState();
    170 }
    171 
    172 void FFmpegCdmAudioDecoder::Reset() {
    173   DVLOG(1) << "Reset()";
    174   avcodec_flush_buffers(codec_context_);
    175   ResetTimestampState();
    176 }
    177 
    178 // static
    179 bool FFmpegCdmAudioDecoder::IsValidConfig(
    180     const cdm::AudioDecoderConfig& config) {
    181   return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec &&
    182          config.channel_count > 0 &&
    183          config.channel_count <= kMaxChannels &&
    184          config.bits_per_channel > 0 &&
    185          config.bits_per_channel <= limits::kMaxBitsPerSample &&
    186          config.samples_per_second > 0 &&
    187          config.samples_per_second <= limits::kMaxSampleRate;
    188 }
    189 
    190 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
    191     const uint8_t* compressed_buffer,
    192     int32_t compressed_buffer_size,
    193     int64_t input_timestamp,
    194     cdm::AudioFrames* decoded_frames) {
    195   DVLOG(1) << "DecodeBuffer()";
    196   const bool is_end_of_stream = !compressed_buffer;
    197   base::TimeDelta timestamp =
    198       base::TimeDelta::FromMicroseconds(input_timestamp);
    199 
    200   bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
    201   if (!is_end_of_stream) {
    202     if (last_input_timestamp_ == kNoTimestamp()) {
    203       if (is_vorbis && timestamp < base::TimeDelta()) {
    204         // Dropping frames for negative timestamps as outlined in section A.2
    205         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
    206         int frames_to_drop = floor(
    207             0.5 + -timestamp.InSecondsF() * samples_per_second_);
    208         output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
    209       } else {
    210         last_input_timestamp_ = timestamp;
    211       }
    212     } else if (timestamp != kNoTimestamp()) {
    213       if (timestamp < last_input_timestamp_) {
    214         base::TimeDelta diff = timestamp - last_input_timestamp_;
    215         DVLOG(1) << "Input timestamps are not monotonically increasing! "
    216                  << " ts " << timestamp.InMicroseconds() << " us"
    217                  << " diff " << diff.InMicroseconds() << " us";
    218         return cdm::kDecodeError;
    219       }
    220 
    221       last_input_timestamp_ = timestamp;
    222     }
    223   }
    224 
    225   AVPacket packet;
    226   av_init_packet(&packet);
    227   packet.data = const_cast<uint8_t*>(compressed_buffer);
    228   packet.size = compressed_buffer_size;
    229 
    230   // Each audio packet may contain several frames, so we must call the decoder
    231   // until we've exhausted the packet.  Regardless of the packet size we always
    232   // want to hand it to the decoder at least once, otherwise we would end up
    233   // skipping end of stream packets since they have a size of zero.
    234   do {
    235     // Reset frame to default values.
    236     avcodec_get_frame_defaults(av_frame_);
    237 
    238     int frame_decoded = 0;
    239     int result = avcodec_decode_audio4(
    240         codec_context_, av_frame_, &frame_decoded, &packet);
    241 
    242     if (result < 0) {
    243       DCHECK(!is_end_of_stream)
    244           << "End of stream buffer produced an error! "
    245           << "This is quite possibly a bug in the audio decoder not handling "
    246           << "end of stream AVPackets correctly.";
    247 
    248       DLOG(ERROR)
    249           << "Error decoding an audio frame with timestamp: "
    250           << timestamp.InMicroseconds() << " us, duration: "
    251           << timestamp.InMicroseconds() << " us, packet size: "
    252           << compressed_buffer_size << " bytes";
    253 
    254       return cdm::kDecodeError;
    255     }
    256 
    257     // Update packet size and data pointer in case we need to call the decoder
    258     // with the remaining bytes from this packet.
    259     packet.size -= result;
    260     packet.data += result;
    261 
    262     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
    263         !is_end_of_stream) {
    264       DCHECK(timestamp != kNoTimestamp());
    265       if (output_bytes_to_drop_ > 0) {
    266         // Currently Vorbis is the only codec that causes us to drop samples.
    267         // If we have to drop samples it always means the timeline starts at 0.
    268         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
    269         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
    270       } else {
    271         output_timestamp_helper_->SetBaseTimestamp(timestamp);
    272       }
    273     }
    274 
    275     int decoded_audio_size = 0;
    276     if (frame_decoded) {
    277       if (av_frame_->sample_rate != samples_per_second_ ||
    278           av_frame_->channels != channels_ ||
    279           av_frame_->format != av_sample_format_) {
    280         DLOG(ERROR) << "Unsupported midstream configuration change!"
    281                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
    282                     << samples_per_second_
    283                     << ", Channels: " << av_frame_->channels << " vs "
    284                     << channels_
    285                     << ", Sample Format: " << av_frame_->format << " vs "
    286                     << av_sample_format_;
    287         return cdm::kDecodeError;
    288       }
    289 
    290       decoded_audio_size = av_samples_get_buffer_size(
    291           NULL, codec_context_->channels, av_frame_->nb_samples,
    292           codec_context_->sample_fmt, 1);
    293       // If we're decoding into float, adjust audio size.
    294       if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) {
    295         DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT ||
    296                codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP);
    297         decoded_audio_size *=
    298             static_cast<float>(bits_per_channel_ / 8) / sizeof(float);
    299       }
    300     }
    301 
    302     int start_sample = 0;
    303     if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
    304       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
    305           << "Decoder didn't output full frames";
    306 
    307       int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
    308       start_sample = dropped_size / bytes_per_frame_;
    309       decoded_audio_size -= dropped_size;
    310       output_bytes_to_drop_ -= dropped_size;
    311     }
    312 
    313     scoped_refptr<DataBuffer> output;
    314     if (decoded_audio_size > 0) {
    315       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
    316           << "Decoder didn't output full frames";
    317 
    318       // Convert float data using an AudioBus.
    319       if (converter_bus_) {
    320         // Setup the AudioBus as a wrapper of the AVFrame data and then use
    321         // AudioBus::ToInterleaved() to convert the data as necessary.
    322         int skip_frames = start_sample;
    323         int total_frames = av_frame_->nb_samples;
    324         int frames_to_interleave = decoded_audio_size / bytes_per_frame_;
    325         if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
    326           DCHECK_EQ(converter_bus_->channels(), 1);
    327           total_frames *= codec_context_->channels;
    328           skip_frames *= codec_context_->channels;
    329           frames_to_interleave *= codec_context_->channels;
    330         }
    331 
    332         converter_bus_->set_frames(total_frames);
    333         for (int i = 0; i < converter_bus_->channels(); ++i) {
    334           converter_bus_->SetChannelData(i, reinterpret_cast<float*>(
    335               av_frame_->extended_data[i]));
    336         }
    337 
    338         output = new DataBuffer(decoded_audio_size);
    339         output->set_data_size(decoded_audio_size);
    340 
    341         DCHECK_EQ(frames_to_interleave, converter_bus_->frames() - skip_frames);
    342         converter_bus_->ToInterleavedPartial(
    343             skip_frames, frames_to_interleave, bits_per_channel_ / 8,
    344             output->writable_data());
    345       } else {
    346         output = DataBuffer::CopyFrom(
    347             av_frame_->extended_data[0] + start_sample * bytes_per_frame_,
    348             decoded_audio_size);
    349       }
    350 
    351       base::TimeDelta output_timestamp =
    352           output_timestamp_helper_->GetTimestamp();
    353       output_timestamp_helper_->AddFrames(decoded_audio_size /
    354                                           bytes_per_frame_);
    355 
    356       // Serialize the audio samples into |serialized_audio_frames_|.
    357       SerializeInt64(output_timestamp.InMicroseconds());
    358       SerializeInt64(output->data_size());
    359       serialized_audio_frames_.insert(
    360           serialized_audio_frames_.end(),
    361           output->data(),
    362           output->data() + output->data_size());
    363     }
    364   } while (packet.size > 0);
    365 
    366   if (!serialized_audio_frames_.empty()) {
    367     decoded_frames->SetFrameBuffer(
    368         host_->Allocate(serialized_audio_frames_.size()));
    369     if (!decoded_frames->FrameBuffer()) {
    370       LOG(ERROR) << "DecodeBuffer() cdm::Host::Allocate failed.";
    371       return cdm::kDecodeError;
    372     }
    373     memcpy(decoded_frames->FrameBuffer()->Data(),
    374            &serialized_audio_frames_[0],
    375            serialized_audio_frames_.size());
    376     decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size());
    377     serialized_audio_frames_.clear();
    378 
    379     return cdm::kSuccess;
    380   }
    381 
    382   return cdm::kNeedMoreData;
    383 }
    384 
    385 void FFmpegCdmAudioDecoder::ResetTimestampState() {
    386   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
    387   last_input_timestamp_ = kNoTimestamp();
    388   output_bytes_to_drop_ = 0;
    389 }
    390 
    391 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
    392   DVLOG(1) << "ReleaseFFmpegResources()";
    393 
    394   if (codec_context_) {
    395     av_free(codec_context_->extradata);
    396     avcodec_close(codec_context_);
    397     av_free(codec_context_);
    398     codec_context_ = NULL;
    399   }
    400   if (av_frame_) {
    401     av_free(av_frame_);
    402     av_frame_ = NULL;
    403   }
    404 }
    405 
    406 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
    407   int previous_size = serialized_audio_frames_.size();
    408   serialized_audio_frames_.resize(previous_size + sizeof(value));
    409   memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value));
    410 }
    411 
    412 }  // namespace media
    413