1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h" 6 7 #include <algorithm> 8 9 #include "base/logging.h" 10 #include "media/base/audio_bus.h" 11 #include "media/base/audio_timestamp_helper.h" 12 #include "media/base/buffers.h" 13 #include "media/base/data_buffer.h" 14 #include "media/base/limits.h" 15 16 // Include FFmpeg header files. 17 extern "C" { 18 // Temporarily disable possible loss of data warning. 19 MSVC_PUSH_DISABLE_WARNING(4244); 20 #include <libavcodec/avcodec.h> 21 MSVC_POP_WARNING(); 22 } // extern "C" 23 24 namespace media { 25 26 // Maximum number of channels with defined layout in src/media. 27 static const int kMaxChannels = 8; 28 29 static AVCodecID CdmAudioCodecToCodecID( 30 cdm::AudioDecoderConfig::AudioCodec audio_codec) { 31 switch (audio_codec) { 32 case cdm::AudioDecoderConfig::kCodecVorbis: 33 return AV_CODEC_ID_VORBIS; 34 case cdm::AudioDecoderConfig::kCodecAac: 35 return AV_CODEC_ID_AAC; 36 case cdm::AudioDecoderConfig::kUnknownAudioCodec: 37 default: 38 NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec; 39 return AV_CODEC_ID_NONE; 40 } 41 } 42 43 static void CdmAudioDecoderConfigToAVCodecContext( 44 const cdm::AudioDecoderConfig& config, 45 AVCodecContext* codec_context) { 46 codec_context->codec_type = AVMEDIA_TYPE_AUDIO; 47 codec_context->codec_id = CdmAudioCodecToCodecID(config.codec); 48 49 switch (config.bits_per_channel) { 50 case 8: 51 codec_context->sample_fmt = AV_SAMPLE_FMT_U8; 52 break; 53 case 16: 54 codec_context->sample_fmt = AV_SAMPLE_FMT_S16; 55 break; 56 case 32: 57 codec_context->sample_fmt = AV_SAMPLE_FMT_S32; 58 break; 59 default: 60 DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits " 61 "per channel: " << config.bits_per_channel; 62 codec_context->sample_fmt = AV_SAMPLE_FMT_NONE; 63 } 64 65 codec_context->channels = config.channel_count; 66 codec_context->sample_rate = config.samples_per_second; 67 68 if (config.extra_data) { 69 codec_context->extradata_size = config.extra_data_size; 70 codec_context->extradata = reinterpret_cast<uint8_t*>( 71 av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE)); 72 memcpy(codec_context->extradata, config.extra_data, 73 config.extra_data_size); 74 memset(codec_context->extradata + config.extra_data_size, '\0', 75 FF_INPUT_BUFFER_PADDING_SIZE); 76 } else { 77 codec_context->extradata = NULL; 78 codec_context->extradata_size = 0; 79 } 80 } 81 82 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host* host) 83 : is_initialized_(false), 84 host_(host), 85 codec_context_(NULL), 86 av_frame_(NULL), 87 bits_per_channel_(0), 88 samples_per_second_(0), 89 channels_(0), 90 av_sample_format_(0), 91 bytes_per_frame_(0), 92 last_input_timestamp_(kNoTimestamp()), 93 output_bytes_to_drop_(0) { 94 } 95 96 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() { 97 ReleaseFFmpegResources(); 98 } 99 100 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) { 101 DVLOG(1) << "Initialize()"; 102 103 if (!IsValidConfig(config)) { 104 LOG(ERROR) << "Initialize(): invalid audio decoder configuration."; 105 return false; 106 } 107 108 if (is_initialized_) { 109 LOG(ERROR) << "Initialize(): Already initialized."; 110 return false; 111 } 112 113 // Initialize AVCodecContext structure. 114 codec_context_ = avcodec_alloc_context3(NULL); 115 CdmAudioDecoderConfigToAVCodecContext(config, codec_context_); 116 117 // MP3 decodes to S16P which we don't support, tell it to use S16 instead. 118 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) 119 codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; 120 121 AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); 122 if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) { 123 DLOG(ERROR) << "Could not initialize audio decoder: " 124 << codec_context_->codec_id; 125 return false; 126 } 127 128 // Ensure avcodec_open2() respected our format request. 129 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { 130 DLOG(ERROR) << "Unable to configure a supported sample format: " 131 << codec_context_->sample_fmt; 132 return false; 133 } 134 135 // Some codecs will only output float data, so we need to convert to integer 136 // before returning the decoded buffer. 137 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP || 138 codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { 139 // Preallocate the AudioBus for float conversions. We can treat interleaved 140 // float data as a single planar channel since our output is expected in an 141 // interleaved format anyways. 142 int channels = codec_context_->channels; 143 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) 144 channels = 1; 145 converter_bus_ = AudioBus::CreateWrapper(channels); 146 } 147 148 // Success! 149 av_frame_ = avcodec_alloc_frame(); 150 bits_per_channel_ = config.bits_per_channel; 151 samples_per_second_ = config.samples_per_second; 152 bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8; 153 output_timestamp_helper_.reset( 154 new AudioTimestampHelper(config.samples_per_second)); 155 serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_); 156 is_initialized_ = true; 157 158 // Store initial values to guard against midstream configuration changes. 159 channels_ = codec_context_->channels; 160 av_sample_format_ = codec_context_->sample_fmt; 161 162 return true; 163 } 164 165 void FFmpegCdmAudioDecoder::Deinitialize() { 166 DVLOG(1) << "Deinitialize()"; 167 ReleaseFFmpegResources(); 168 is_initialized_ = false; 169 ResetTimestampState(); 170 } 171 172 void FFmpegCdmAudioDecoder::Reset() { 173 DVLOG(1) << "Reset()"; 174 avcodec_flush_buffers(codec_context_); 175 ResetTimestampState(); 176 } 177 178 // static 179 bool FFmpegCdmAudioDecoder::IsValidConfig( 180 const cdm::AudioDecoderConfig& config) { 181 return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec && 182 config.channel_count > 0 && 183 config.channel_count <= kMaxChannels && 184 config.bits_per_channel > 0 && 185 config.bits_per_channel <= limits::kMaxBitsPerSample && 186 config.samples_per_second > 0 && 187 config.samples_per_second <= limits::kMaxSampleRate; 188 } 189 190 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer( 191 const uint8_t* compressed_buffer, 192 int32_t compressed_buffer_size, 193 int64_t input_timestamp, 194 cdm::AudioFrames* decoded_frames) { 195 DVLOG(1) << "DecodeBuffer()"; 196 const bool is_end_of_stream = !compressed_buffer; 197 base::TimeDelta timestamp = 198 base::TimeDelta::FromMicroseconds(input_timestamp); 199 200 bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS; 201 if (!is_end_of_stream) { 202 if (last_input_timestamp_ == kNoTimestamp()) { 203 if (is_vorbis && timestamp < base::TimeDelta()) { 204 // Dropping frames for negative timestamps as outlined in section A.2 205 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html 206 int frames_to_drop = floor( 207 0.5 + -timestamp.InSecondsF() * samples_per_second_); 208 output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop; 209 } else { 210 last_input_timestamp_ = timestamp; 211 } 212 } else if (timestamp != kNoTimestamp()) { 213 if (timestamp < last_input_timestamp_) { 214 base::TimeDelta diff = timestamp - last_input_timestamp_; 215 DVLOG(1) << "Input timestamps are not monotonically increasing! " 216 << " ts " << timestamp.InMicroseconds() << " us" 217 << " diff " << diff.InMicroseconds() << " us"; 218 return cdm::kDecodeError; 219 } 220 221 last_input_timestamp_ = timestamp; 222 } 223 } 224 225 AVPacket packet; 226 av_init_packet(&packet); 227 packet.data = const_cast<uint8_t*>(compressed_buffer); 228 packet.size = compressed_buffer_size; 229 230 // Each audio packet may contain several frames, so we must call the decoder 231 // until we've exhausted the packet. Regardless of the packet size we always 232 // want to hand it to the decoder at least once, otherwise we would end up 233 // skipping end of stream packets since they have a size of zero. 234 do { 235 // Reset frame to default values. 236 avcodec_get_frame_defaults(av_frame_); 237 238 int frame_decoded = 0; 239 int result = avcodec_decode_audio4( 240 codec_context_, av_frame_, &frame_decoded, &packet); 241 242 if (result < 0) { 243 DCHECK(!is_end_of_stream) 244 << "End of stream buffer produced an error! " 245 << "This is quite possibly a bug in the audio decoder not handling " 246 << "end of stream AVPackets correctly."; 247 248 DLOG(ERROR) 249 << "Error decoding an audio frame with timestamp: " 250 << timestamp.InMicroseconds() << " us, duration: " 251 << timestamp.InMicroseconds() << " us, packet size: " 252 << compressed_buffer_size << " bytes"; 253 254 return cdm::kDecodeError; 255 } 256 257 // Update packet size and data pointer in case we need to call the decoder 258 // with the remaining bytes from this packet. 259 packet.size -= result; 260 packet.data += result; 261 262 if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() && 263 !is_end_of_stream) { 264 DCHECK(timestamp != kNoTimestamp()); 265 if (output_bytes_to_drop_ > 0) { 266 // Currently Vorbis is the only codec that causes us to drop samples. 267 // If we have to drop samples it always means the timeline starts at 0. 268 DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); 269 output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); 270 } else { 271 output_timestamp_helper_->SetBaseTimestamp(timestamp); 272 } 273 } 274 275 int decoded_audio_size = 0; 276 if (frame_decoded) { 277 if (av_frame_->sample_rate != samples_per_second_ || 278 av_frame_->channels != channels_ || 279 av_frame_->format != av_sample_format_) { 280 DLOG(ERROR) << "Unsupported midstream configuration change!" 281 << " Sample Rate: " << av_frame_->sample_rate << " vs " 282 << samples_per_second_ 283 << ", Channels: " << av_frame_->channels << " vs " 284 << channels_ 285 << ", Sample Format: " << av_frame_->format << " vs " 286 << av_sample_format_; 287 return cdm::kDecodeError; 288 } 289 290 decoded_audio_size = av_samples_get_buffer_size( 291 NULL, codec_context_->channels, av_frame_->nb_samples, 292 codec_context_->sample_fmt, 1); 293 // If we're decoding into float, adjust audio size. 294 if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) { 295 DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT || 296 codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP); 297 decoded_audio_size *= 298 static_cast<float>(bits_per_channel_ / 8) / sizeof(float); 299 } 300 } 301 302 int start_sample = 0; 303 if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) { 304 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) 305 << "Decoder didn't output full frames"; 306 307 int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_); 308 start_sample = dropped_size / bytes_per_frame_; 309 decoded_audio_size -= dropped_size; 310 output_bytes_to_drop_ -= dropped_size; 311 } 312 313 scoped_refptr<DataBuffer> output; 314 if (decoded_audio_size > 0) { 315 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) 316 << "Decoder didn't output full frames"; 317 318 // Convert float data using an AudioBus. 319 if (converter_bus_) { 320 // Setup the AudioBus as a wrapper of the AVFrame data and then use 321 // AudioBus::ToInterleaved() to convert the data as necessary. 322 int skip_frames = start_sample; 323 int total_frames = av_frame_->nb_samples; 324 int frames_to_interleave = decoded_audio_size / bytes_per_frame_; 325 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { 326 DCHECK_EQ(converter_bus_->channels(), 1); 327 total_frames *= codec_context_->channels; 328 skip_frames *= codec_context_->channels; 329 frames_to_interleave *= codec_context_->channels; 330 } 331 332 converter_bus_->set_frames(total_frames); 333 for (int i = 0; i < converter_bus_->channels(); ++i) { 334 converter_bus_->SetChannelData(i, reinterpret_cast<float*>( 335 av_frame_->extended_data[i])); 336 } 337 338 output = new DataBuffer(decoded_audio_size); 339 output->set_data_size(decoded_audio_size); 340 341 DCHECK_EQ(frames_to_interleave, converter_bus_->frames() - skip_frames); 342 converter_bus_->ToInterleavedPartial( 343 skip_frames, frames_to_interleave, bits_per_channel_ / 8, 344 output->writable_data()); 345 } else { 346 output = DataBuffer::CopyFrom( 347 av_frame_->extended_data[0] + start_sample * bytes_per_frame_, 348 decoded_audio_size); 349 } 350 351 base::TimeDelta output_timestamp = 352 output_timestamp_helper_->GetTimestamp(); 353 output_timestamp_helper_->AddFrames(decoded_audio_size / 354 bytes_per_frame_); 355 356 // Serialize the audio samples into |serialized_audio_frames_|. 357 SerializeInt64(output_timestamp.InMicroseconds()); 358 SerializeInt64(output->data_size()); 359 serialized_audio_frames_.insert( 360 serialized_audio_frames_.end(), 361 output->data(), 362 output->data() + output->data_size()); 363 } 364 } while (packet.size > 0); 365 366 if (!serialized_audio_frames_.empty()) { 367 decoded_frames->SetFrameBuffer( 368 host_->Allocate(serialized_audio_frames_.size())); 369 if (!decoded_frames->FrameBuffer()) { 370 LOG(ERROR) << "DecodeBuffer() cdm::Host::Allocate failed."; 371 return cdm::kDecodeError; 372 } 373 memcpy(decoded_frames->FrameBuffer()->Data(), 374 &serialized_audio_frames_[0], 375 serialized_audio_frames_.size()); 376 decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size()); 377 serialized_audio_frames_.clear(); 378 379 return cdm::kSuccess; 380 } 381 382 return cdm::kNeedMoreData; 383 } 384 385 void FFmpegCdmAudioDecoder::ResetTimestampState() { 386 output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp()); 387 last_input_timestamp_ = kNoTimestamp(); 388 output_bytes_to_drop_ = 0; 389 } 390 391 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() { 392 DVLOG(1) << "ReleaseFFmpegResources()"; 393 394 if (codec_context_) { 395 av_free(codec_context_->extradata); 396 avcodec_close(codec_context_); 397 av_free(codec_context_); 398 codec_context_ = NULL; 399 } 400 if (av_frame_) { 401 av_free(av_frame_); 402 av_frame_ = NULL; 403 } 404 } 405 406 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) { 407 int previous_size = serialized_audio_frames_.size(); 408 serialized_audio_frames_.resize(previous_size + sizeof(value)); 409 memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value)); 410 } 411 412 } // namespace media 413