Home | History | Annotate | Download | only in ppapi
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/logging.h"
     10 #include "media/base/audio_bus.h"
     11 #include "media/base/audio_timestamp_helper.h"
     12 #include "media/base/buffers.h"
     13 #include "media/base/data_buffer.h"
     14 #include "media/base/limits.h"
     15 #include "media/ffmpeg/ffmpeg_common.h"
     16 
     17 // Include FFmpeg header files.
     18 extern "C" {
     19 // Temporarily disable possible loss of data warning.
     20 MSVC_PUSH_DISABLE_WARNING(4244);
     21 #include <libavcodec/avcodec.h>
     22 MSVC_POP_WARNING();
     23 }  // extern "C"
     24 
     25 namespace media {
     26 
     27 // Maximum number of channels with defined layout in src/media.
     28 static const int kMaxChannels = 8;
     29 
     30 static AVCodecID CdmAudioCodecToCodecID(
     31     cdm::AudioDecoderConfig::AudioCodec audio_codec) {
     32   switch (audio_codec) {
     33     case cdm::AudioDecoderConfig::kCodecVorbis:
     34       return AV_CODEC_ID_VORBIS;
     35     case cdm::AudioDecoderConfig::kCodecAac:
     36       return AV_CODEC_ID_AAC;
     37     case cdm::AudioDecoderConfig::kUnknownAudioCodec:
     38     default:
     39       NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec;
     40       return AV_CODEC_ID_NONE;
     41   }
     42 }
     43 
     44 static void CdmAudioDecoderConfigToAVCodecContext(
     45     const cdm::AudioDecoderConfig& config,
     46     AVCodecContext* codec_context) {
     47   codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
     48   codec_context->codec_id = CdmAudioCodecToCodecID(config.codec);
     49 
     50   switch (config.bits_per_channel) {
     51     case 8:
     52       codec_context->sample_fmt = AV_SAMPLE_FMT_U8;
     53       break;
     54     case 16:
     55       codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
     56       break;
     57     case 32:
     58       codec_context->sample_fmt = AV_SAMPLE_FMT_S32;
     59       break;
     60     default:
     61       DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
     62                   "per channel: " << config.bits_per_channel;
     63       codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
     64   }
     65 
     66   codec_context->channels = config.channel_count;
     67   codec_context->sample_rate = config.samples_per_second;
     68 
     69   if (config.extra_data) {
     70     codec_context->extradata_size = config.extra_data_size;
     71     codec_context->extradata = reinterpret_cast<uint8_t*>(
     72         av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE));
     73     memcpy(codec_context->extradata, config.extra_data,
     74            config.extra_data_size);
     75     memset(codec_context->extradata + config.extra_data_size, '\0',
     76            FF_INPUT_BUFFER_PADDING_SIZE);
     77   } else {
     78     codec_context->extradata = NULL;
     79     codec_context->extradata_size = 0;
     80   }
     81 }
     82 
     83 static cdm::AudioFormat AVSampleFormatToCdmAudioFormat(
     84     AVSampleFormat sample_format) {
     85   switch (sample_format) {
     86     case AV_SAMPLE_FMT_U8:
     87       return cdm::kAudioFormatU8;
     88     case AV_SAMPLE_FMT_S16:
     89       return cdm::kAudioFormatS16;
     90     case AV_SAMPLE_FMT_S32:
     91       return cdm::kAudioFormatS32;
     92     case AV_SAMPLE_FMT_FLT:
     93       return cdm::kAudioFormatF32;
     94     case AV_SAMPLE_FMT_S16P:
     95       return cdm::kAudioFormatPlanarS16;
     96     case AV_SAMPLE_FMT_FLTP:
     97       return cdm::kAudioFormatPlanarF32;
     98     default:
     99       DVLOG(1) << "Unknown AVSampleFormat: " << sample_format;
    100   }
    101   return cdm::kUnknownAudioFormat;
    102 }
    103 
    104 static void CopySamples(cdm::AudioFormat cdm_format,
    105                         int decoded_audio_size,
    106                         const AVFrame& av_frame,
    107                         uint8_t* output_buffer) {
    108   switch (cdm_format) {
    109     case cdm::kAudioFormatU8:
    110     case cdm::kAudioFormatS16:
    111     case cdm::kAudioFormatS32:
    112     case cdm::kAudioFormatF32:
    113       memcpy(output_buffer, av_frame.data[0], decoded_audio_size);
    114       break;
    115     case cdm::kAudioFormatPlanarS16:
    116     case cdm::kAudioFormatPlanarF32: {
    117       const int decoded_size_per_channel =
    118           decoded_audio_size / av_frame.channels;
    119       for (int i = 0; i < av_frame.channels; ++i) {
    120         memcpy(output_buffer,
    121                av_frame.extended_data[i],
    122                decoded_size_per_channel);
    123         output_buffer += decoded_size_per_channel;
    124       }
    125       break;
    126     }
    127     default:
    128       NOTREACHED() << "Unsupported CDM Audio Format!";
    129       memset(output_buffer, 0, decoded_audio_size);
    130   }
    131 }
    132 
    133 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(ClearKeyCdmHost* host)
    134     : is_initialized_(false),
    135       host_(host),
    136       samples_per_second_(0),
    137       channels_(0),
    138       av_sample_format_(0),
    139       bytes_per_frame_(0),
    140       last_input_timestamp_(kNoTimestamp()),
    141       output_bytes_to_drop_(0) {
    142 }
    143 
    144 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
    145   ReleaseFFmpegResources();
    146 }
    147 
    148 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
    149   DVLOG(1) << "Initialize()";
    150   if (!IsValidConfig(config)) {
    151     LOG(ERROR) << "Initialize(): invalid audio decoder configuration.";
    152     return false;
    153   }
    154 
    155   if (is_initialized_) {
    156     LOG(ERROR) << "Initialize(): Already initialized.";
    157     return false;
    158   }
    159 
    160   // Initialize AVCodecContext structure.
    161   codec_context_.reset(avcodec_alloc_context3(NULL));
    162   CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get());
    163 
    164   // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
    165   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
    166     codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
    167 
    168   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
    169   if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
    170     DLOG(ERROR) << "Could not initialize audio decoder: "
    171                 << codec_context_->codec_id;
    172     return false;
    173   }
    174 
    175   // Ensure avcodec_open2() respected our format request.
    176   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
    177     DLOG(ERROR) << "Unable to configure a supported sample format: "
    178                 << codec_context_->sample_fmt;
    179     return false;
    180   }
    181 
    182   // Success!
    183   av_frame_.reset(av_frame_alloc());
    184   samples_per_second_ = config.samples_per_second;
    185   bytes_per_frame_ = codec_context_->channels * config.bits_per_channel / 8;
    186   output_timestamp_helper_.reset(
    187       new AudioTimestampHelper(config.samples_per_second));
    188   is_initialized_ = true;
    189 
    190   // Store initial values to guard against midstream configuration changes.
    191   channels_ = codec_context_->channels;
    192   av_sample_format_ = codec_context_->sample_fmt;
    193 
    194   return true;
    195 }
    196 
    197 void FFmpegCdmAudioDecoder::Deinitialize() {
    198   DVLOG(1) << "Deinitialize()";
    199   ReleaseFFmpegResources();
    200   is_initialized_ = false;
    201   ResetTimestampState();
    202 }
    203 
    204 void FFmpegCdmAudioDecoder::Reset() {
    205   DVLOG(1) << "Reset()";
    206   avcodec_flush_buffers(codec_context_.get());
    207   ResetTimestampState();
    208 }
    209 
    210 // static
    211 bool FFmpegCdmAudioDecoder::IsValidConfig(
    212     const cdm::AudioDecoderConfig& config) {
    213   return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec &&
    214          config.channel_count > 0 &&
    215          config.channel_count <= kMaxChannels &&
    216          config.bits_per_channel > 0 &&
    217          config.bits_per_channel <= limits::kMaxBitsPerSample &&
    218          config.samples_per_second > 0 &&
    219          config.samples_per_second <= limits::kMaxSampleRate;
    220 }
    221 
    222 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
    223     const uint8_t* compressed_buffer,
    224     int32_t compressed_buffer_size,
    225     int64_t input_timestamp,
    226     cdm::AudioFrames* decoded_frames) {
    227   DVLOG(1) << "DecodeBuffer()";
    228   const bool is_end_of_stream = !compressed_buffer;
    229   base::TimeDelta timestamp =
    230       base::TimeDelta::FromMicroseconds(input_timestamp);
    231 
    232   bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
    233   if (!is_end_of_stream) {
    234     if (last_input_timestamp_ == kNoTimestamp()) {
    235       if (is_vorbis && timestamp < base::TimeDelta()) {
    236         // Dropping frames for negative timestamps as outlined in section A.2
    237         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
    238         int frames_to_drop = floor(
    239             0.5 + -timestamp.InSecondsF() * samples_per_second_);
    240         output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
    241       } else {
    242         last_input_timestamp_ = timestamp;
    243       }
    244     } else if (timestamp != kNoTimestamp()) {
    245       if (timestamp < last_input_timestamp_) {
    246         base::TimeDelta diff = timestamp - last_input_timestamp_;
    247         DVLOG(1) << "Input timestamps are not monotonically increasing! "
    248                  << " ts " << timestamp.InMicroseconds() << " us"
    249                  << " diff " << diff.InMicroseconds() << " us";
    250         return cdm::kDecodeError;
    251       }
    252 
    253       last_input_timestamp_ = timestamp;
    254     }
    255   }
    256 
    257   AVPacket packet;
    258   av_init_packet(&packet);
    259   packet.data = const_cast<uint8_t*>(compressed_buffer);
    260   packet.size = compressed_buffer_size;
    261 
    262   // Tell the CDM what AudioFormat we're using.
    263   const cdm::AudioFormat cdm_format = AVSampleFormatToCdmAudioFormat(
    264       static_cast<AVSampleFormat>(av_sample_format_));
    265   DCHECK_NE(cdm_format, cdm::kUnknownAudioFormat);
    266   decoded_frames->SetFormat(cdm_format);
    267 
    268   // Each audio packet may contain several frames, so we must call the decoder
    269   // until we've exhausted the packet.  Regardless of the packet size we always
    270   // want to hand it to the decoder at least once, otherwise we would end up
    271   // skipping end of stream packets since they have a size of zero.
    272   do {
    273     // Reset frame to default values.
    274     avcodec_get_frame_defaults(av_frame_.get());
    275 
    276     int frame_decoded = 0;
    277     int result = avcodec_decode_audio4(
    278         codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
    279 
    280     if (result < 0) {
    281       DCHECK(!is_end_of_stream)
    282           << "End of stream buffer produced an error! "
    283           << "This is quite possibly a bug in the audio decoder not handling "
    284           << "end of stream AVPackets correctly.";
    285 
    286       DLOG(ERROR)
    287           << "Error decoding an audio frame with timestamp: "
    288           << timestamp.InMicroseconds() << " us, duration: "
    289           << timestamp.InMicroseconds() << " us, packet size: "
    290           << compressed_buffer_size << " bytes";
    291 
    292       return cdm::kDecodeError;
    293     }
    294 
    295     // Update packet size and data pointer in case we need to call the decoder
    296     // with the remaining bytes from this packet.
    297     packet.size -= result;
    298     packet.data += result;
    299 
    300     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
    301         !is_end_of_stream) {
    302       DCHECK(timestamp != kNoTimestamp());
    303       if (output_bytes_to_drop_ > 0) {
    304         // Currently Vorbis is the only codec that causes us to drop samples.
    305         // If we have to drop samples it always means the timeline starts at 0.
    306         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
    307         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
    308       } else {
    309         output_timestamp_helper_->SetBaseTimestamp(timestamp);
    310       }
    311     }
    312 
    313     int decoded_audio_size = 0;
    314     if (frame_decoded) {
    315       if (av_frame_->sample_rate != samples_per_second_ ||
    316           av_frame_->channels != channels_ ||
    317           av_frame_->format != av_sample_format_) {
    318         DLOG(ERROR) << "Unsupported midstream configuration change!"
    319                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
    320                     << samples_per_second_
    321                     << ", Channels: " << av_frame_->channels << " vs "
    322                     << channels_
    323                     << ", Sample Format: " << av_frame_->format << " vs "
    324                     << av_sample_format_;
    325         return cdm::kDecodeError;
    326       }
    327 
    328       decoded_audio_size = av_samples_get_buffer_size(
    329           NULL, codec_context_->channels, av_frame_->nb_samples,
    330           codec_context_->sample_fmt, 1);
    331     }
    332 
    333     if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
    334       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
    335           << "Decoder didn't output full frames";
    336 
    337       int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
    338       decoded_audio_size -= dropped_size;
    339       output_bytes_to_drop_ -= dropped_size;
    340     }
    341 
    342     if (decoded_audio_size > 0) {
    343       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
    344           << "Decoder didn't output full frames";
    345 
    346       base::TimeDelta output_timestamp =
    347           output_timestamp_helper_->GetTimestamp();
    348       output_timestamp_helper_->AddFrames(decoded_audio_size /
    349                                           bytes_per_frame_);
    350 
    351       // If we've exhausted the packet in the first decode we can write directly
    352       // into the frame buffer instead of a multistep serialization approach.
    353       if (serialized_audio_frames_.empty() && !packet.size) {
    354         const uint32_t buffer_size = decoded_audio_size + sizeof(int64) * 2;
    355         decoded_frames->SetFrameBuffer(host_->Allocate(buffer_size));
    356         if (!decoded_frames->FrameBuffer()) {
    357           LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed.";
    358           return cdm::kDecodeError;
    359         }
    360         decoded_frames->FrameBuffer()->SetSize(buffer_size);
    361         uint8_t* output_buffer = decoded_frames->FrameBuffer()->Data();
    362 
    363         const int64 timestamp = output_timestamp.InMicroseconds();
    364         memcpy(output_buffer, &timestamp, sizeof(timestamp));
    365         output_buffer += sizeof(timestamp);
    366 
    367         const int64 output_size = decoded_audio_size;
    368         memcpy(output_buffer, &output_size, sizeof(output_size));
    369         output_buffer += sizeof(output_size);
    370 
    371         // Copy the samples and return success.
    372         CopySamples(
    373             cdm_format, decoded_audio_size, *av_frame_, output_buffer);
    374         return cdm::kSuccess;
    375       }
    376 
    377       // There are still more frames to decode, so we need to serialize them in
    378       // a secondary buffer since we don't know their sizes ahead of time (which
    379       // is required to allocate the FrameBuffer object).
    380       SerializeInt64(output_timestamp.InMicroseconds());
    381       SerializeInt64(decoded_audio_size);
    382 
    383       const size_t previous_size = serialized_audio_frames_.size();
    384       serialized_audio_frames_.resize(previous_size + decoded_audio_size);
    385       uint8_t* output_buffer = &serialized_audio_frames_[0] + previous_size;
    386       CopySamples(
    387           cdm_format, decoded_audio_size, *av_frame_, output_buffer);
    388     }
    389   } while (packet.size > 0);
    390 
    391   if (!serialized_audio_frames_.empty()) {
    392     decoded_frames->SetFrameBuffer(
    393         host_->Allocate(serialized_audio_frames_.size()));
    394     if (!decoded_frames->FrameBuffer()) {
    395       LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed.";
    396       return cdm::kDecodeError;
    397     }
    398     memcpy(decoded_frames->FrameBuffer()->Data(),
    399            &serialized_audio_frames_[0],
    400            serialized_audio_frames_.size());
    401     decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size());
    402     serialized_audio_frames_.clear();
    403 
    404     return cdm::kSuccess;
    405   }
    406 
    407   return cdm::kNeedMoreData;
    408 }
    409 
    410 void FFmpegCdmAudioDecoder::ResetTimestampState() {
    411   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
    412   last_input_timestamp_ = kNoTimestamp();
    413   output_bytes_to_drop_ = 0;
    414 }
    415 
    416 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
    417   DVLOG(1) << "ReleaseFFmpegResources()";
    418 
    419   codec_context_.reset();
    420   av_frame_.reset();
    421 }
    422 
    423 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
    424   const size_t previous_size = serialized_audio_frames_.size();
    425   serialized_audio_frames_.resize(previous_size + sizeof(value));
    426   memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value));
    427 }
    428 
    429 }  // namespace media
    430