1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/cdm/ppapi/ffmpeg_cdm_audio_decoder.h" 6 7 #include <algorithm> 8 9 #include "base/logging.h" 10 #include "media/base/audio_bus.h" 11 #include "media/base/audio_timestamp_helper.h" 12 #include "media/base/buffers.h" 13 #include "media/base/data_buffer.h" 14 #include "media/base/limits.h" 15 #include "media/ffmpeg/ffmpeg_common.h" 16 17 // Include FFmpeg header files. 18 extern "C" { 19 // Temporarily disable possible loss of data warning. 20 MSVC_PUSH_DISABLE_WARNING(4244); 21 #include <libavcodec/avcodec.h> 22 MSVC_POP_WARNING(); 23 } // extern "C" 24 25 namespace media { 26 27 // Maximum number of channels with defined layout in src/media. 28 static const int kMaxChannels = 8; 29 30 static AVCodecID CdmAudioCodecToCodecID( 31 cdm::AudioDecoderConfig::AudioCodec audio_codec) { 32 switch (audio_codec) { 33 case cdm::AudioDecoderConfig::kCodecVorbis: 34 return AV_CODEC_ID_VORBIS; 35 case cdm::AudioDecoderConfig::kCodecAac: 36 return AV_CODEC_ID_AAC; 37 case cdm::AudioDecoderConfig::kUnknownAudioCodec: 38 default: 39 NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec; 40 return AV_CODEC_ID_NONE; 41 } 42 } 43 44 static void CdmAudioDecoderConfigToAVCodecContext( 45 const cdm::AudioDecoderConfig& config, 46 AVCodecContext* codec_context) { 47 codec_context->codec_type = AVMEDIA_TYPE_AUDIO; 48 codec_context->codec_id = CdmAudioCodecToCodecID(config.codec); 49 50 switch (config.bits_per_channel) { 51 case 8: 52 codec_context->sample_fmt = AV_SAMPLE_FMT_U8; 53 break; 54 case 16: 55 codec_context->sample_fmt = AV_SAMPLE_FMT_S16; 56 break; 57 case 32: 58 codec_context->sample_fmt = AV_SAMPLE_FMT_S32; 59 break; 60 default: 61 DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits " 62 "per channel: " << config.bits_per_channel; 63 codec_context->sample_fmt = AV_SAMPLE_FMT_NONE; 64 } 65 66 codec_context->channels = config.channel_count; 67 codec_context->sample_rate = config.samples_per_second; 68 69 if (config.extra_data) { 70 codec_context->extradata_size = config.extra_data_size; 71 codec_context->extradata = reinterpret_cast<uint8_t*>( 72 av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE)); 73 memcpy(codec_context->extradata, config.extra_data, 74 config.extra_data_size); 75 memset(codec_context->extradata + config.extra_data_size, '\0', 76 FF_INPUT_BUFFER_PADDING_SIZE); 77 } else { 78 codec_context->extradata = NULL; 79 codec_context->extradata_size = 0; 80 } 81 } 82 83 static cdm::AudioFormat AVSampleFormatToCdmAudioFormat( 84 AVSampleFormat sample_format) { 85 switch (sample_format) { 86 case AV_SAMPLE_FMT_U8: 87 return cdm::kAudioFormatU8; 88 case AV_SAMPLE_FMT_S16: 89 return cdm::kAudioFormatS16; 90 case AV_SAMPLE_FMT_S32: 91 return cdm::kAudioFormatS32; 92 case AV_SAMPLE_FMT_FLT: 93 return cdm::kAudioFormatF32; 94 case AV_SAMPLE_FMT_S16P: 95 return cdm::kAudioFormatPlanarS16; 96 case AV_SAMPLE_FMT_FLTP: 97 return cdm::kAudioFormatPlanarF32; 98 default: 99 DVLOG(1) << "Unknown AVSampleFormat: " << sample_format; 100 } 101 return cdm::kUnknownAudioFormat; 102 } 103 104 static void CopySamples(cdm::AudioFormat cdm_format, 105 int decoded_audio_size, 106 const AVFrame& av_frame, 107 uint8_t* output_buffer) { 108 switch (cdm_format) { 109 case cdm::kAudioFormatU8: 110 case cdm::kAudioFormatS16: 111 case cdm::kAudioFormatS32: 112 case cdm::kAudioFormatF32: 113 memcpy(output_buffer, av_frame.data[0], decoded_audio_size); 114 break; 115 case cdm::kAudioFormatPlanarS16: 116 case cdm::kAudioFormatPlanarF32: { 117 const int decoded_size_per_channel = 118 decoded_audio_size / av_frame.channels; 119 for (int i = 0; i < av_frame.channels; ++i) { 120 memcpy(output_buffer, 121 av_frame.extended_data[i], 122 decoded_size_per_channel); 123 output_buffer += decoded_size_per_channel; 124 } 125 break; 126 } 127 default: 128 NOTREACHED() << "Unsupported CDM Audio Format!"; 129 memset(output_buffer, 0, decoded_audio_size); 130 } 131 } 132 133 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(ClearKeyCdmHost* host) 134 : is_initialized_(false), 135 host_(host), 136 samples_per_second_(0), 137 channels_(0), 138 av_sample_format_(0), 139 bytes_per_frame_(0), 140 last_input_timestamp_(kNoTimestamp()), 141 output_bytes_to_drop_(0) { 142 } 143 144 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() { 145 ReleaseFFmpegResources(); 146 } 147 148 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) { 149 DVLOG(1) << "Initialize()"; 150 if (!IsValidConfig(config)) { 151 LOG(ERROR) << "Initialize(): invalid audio decoder configuration."; 152 return false; 153 } 154 155 if (is_initialized_) { 156 LOG(ERROR) << "Initialize(): Already initialized."; 157 return false; 158 } 159 160 // Initialize AVCodecContext structure. 161 codec_context_.reset(avcodec_alloc_context3(NULL)); 162 CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get()); 163 164 // MP3 decodes to S16P which we don't support, tell it to use S16 instead. 165 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) 166 codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; 167 168 AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); 169 if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) { 170 DLOG(ERROR) << "Could not initialize audio decoder: " 171 << codec_context_->codec_id; 172 return false; 173 } 174 175 // Ensure avcodec_open2() respected our format request. 176 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { 177 DLOG(ERROR) << "Unable to configure a supported sample format: " 178 << codec_context_->sample_fmt; 179 return false; 180 } 181 182 // Success! 183 av_frame_.reset(av_frame_alloc()); 184 samples_per_second_ = config.samples_per_second; 185 bytes_per_frame_ = codec_context_->channels * config.bits_per_channel / 8; 186 output_timestamp_helper_.reset( 187 new AudioTimestampHelper(config.samples_per_second)); 188 is_initialized_ = true; 189 190 // Store initial values to guard against midstream configuration changes. 191 channels_ = codec_context_->channels; 192 av_sample_format_ = codec_context_->sample_fmt; 193 194 return true; 195 } 196 197 void FFmpegCdmAudioDecoder::Deinitialize() { 198 DVLOG(1) << "Deinitialize()"; 199 ReleaseFFmpegResources(); 200 is_initialized_ = false; 201 ResetTimestampState(); 202 } 203 204 void FFmpegCdmAudioDecoder::Reset() { 205 DVLOG(1) << "Reset()"; 206 avcodec_flush_buffers(codec_context_.get()); 207 ResetTimestampState(); 208 } 209 210 // static 211 bool FFmpegCdmAudioDecoder::IsValidConfig( 212 const cdm::AudioDecoderConfig& config) { 213 return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec && 214 config.channel_count > 0 && 215 config.channel_count <= kMaxChannels && 216 config.bits_per_channel > 0 && 217 config.bits_per_channel <= limits::kMaxBitsPerSample && 218 config.samples_per_second > 0 && 219 config.samples_per_second <= limits::kMaxSampleRate; 220 } 221 222 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer( 223 const uint8_t* compressed_buffer, 224 int32_t compressed_buffer_size, 225 int64_t input_timestamp, 226 cdm::AudioFrames* decoded_frames) { 227 DVLOG(1) << "DecodeBuffer()"; 228 const bool is_end_of_stream = !compressed_buffer; 229 base::TimeDelta timestamp = 230 base::TimeDelta::FromMicroseconds(input_timestamp); 231 232 bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS; 233 if (!is_end_of_stream) { 234 if (last_input_timestamp_ == kNoTimestamp()) { 235 if (is_vorbis && timestamp < base::TimeDelta()) { 236 // Dropping frames for negative timestamps as outlined in section A.2 237 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html 238 int frames_to_drop = floor( 239 0.5 + -timestamp.InSecondsF() * samples_per_second_); 240 output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop; 241 } else { 242 last_input_timestamp_ = timestamp; 243 } 244 } else if (timestamp != kNoTimestamp()) { 245 if (timestamp < last_input_timestamp_) { 246 base::TimeDelta diff = timestamp - last_input_timestamp_; 247 DVLOG(1) << "Input timestamps are not monotonically increasing! " 248 << " ts " << timestamp.InMicroseconds() << " us" 249 << " diff " << diff.InMicroseconds() << " us"; 250 return cdm::kDecodeError; 251 } 252 253 last_input_timestamp_ = timestamp; 254 } 255 } 256 257 AVPacket packet; 258 av_init_packet(&packet); 259 packet.data = const_cast<uint8_t*>(compressed_buffer); 260 packet.size = compressed_buffer_size; 261 262 // Tell the CDM what AudioFormat we're using. 263 const cdm::AudioFormat cdm_format = AVSampleFormatToCdmAudioFormat( 264 static_cast<AVSampleFormat>(av_sample_format_)); 265 DCHECK_NE(cdm_format, cdm::kUnknownAudioFormat); 266 decoded_frames->SetFormat(cdm_format); 267 268 // Each audio packet may contain several frames, so we must call the decoder 269 // until we've exhausted the packet. Regardless of the packet size we always 270 // want to hand it to the decoder at least once, otherwise we would end up 271 // skipping end of stream packets since they have a size of zero. 272 do { 273 // Reset frame to default values. 274 avcodec_get_frame_defaults(av_frame_.get()); 275 276 int frame_decoded = 0; 277 int result = avcodec_decode_audio4( 278 codec_context_.get(), av_frame_.get(), &frame_decoded, &packet); 279 280 if (result < 0) { 281 DCHECK(!is_end_of_stream) 282 << "End of stream buffer produced an error! " 283 << "This is quite possibly a bug in the audio decoder not handling " 284 << "end of stream AVPackets correctly."; 285 286 DLOG(ERROR) 287 << "Error decoding an audio frame with timestamp: " 288 << timestamp.InMicroseconds() << " us, duration: " 289 << timestamp.InMicroseconds() << " us, packet size: " 290 << compressed_buffer_size << " bytes"; 291 292 return cdm::kDecodeError; 293 } 294 295 // Update packet size and data pointer in case we need to call the decoder 296 // with the remaining bytes from this packet. 297 packet.size -= result; 298 packet.data += result; 299 300 if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() && 301 !is_end_of_stream) { 302 DCHECK(timestamp != kNoTimestamp()); 303 if (output_bytes_to_drop_ > 0) { 304 // Currently Vorbis is the only codec that causes us to drop samples. 305 // If we have to drop samples it always means the timeline starts at 0. 306 DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); 307 output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); 308 } else { 309 output_timestamp_helper_->SetBaseTimestamp(timestamp); 310 } 311 } 312 313 int decoded_audio_size = 0; 314 if (frame_decoded) { 315 if (av_frame_->sample_rate != samples_per_second_ || 316 av_frame_->channels != channels_ || 317 av_frame_->format != av_sample_format_) { 318 DLOG(ERROR) << "Unsupported midstream configuration change!" 319 << " Sample Rate: " << av_frame_->sample_rate << " vs " 320 << samples_per_second_ 321 << ", Channels: " << av_frame_->channels << " vs " 322 << channels_ 323 << ", Sample Format: " << av_frame_->format << " vs " 324 << av_sample_format_; 325 return cdm::kDecodeError; 326 } 327 328 decoded_audio_size = av_samples_get_buffer_size( 329 NULL, codec_context_->channels, av_frame_->nb_samples, 330 codec_context_->sample_fmt, 1); 331 } 332 333 if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) { 334 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) 335 << "Decoder didn't output full frames"; 336 337 int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_); 338 decoded_audio_size -= dropped_size; 339 output_bytes_to_drop_ -= dropped_size; 340 } 341 342 if (decoded_audio_size > 0) { 343 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) 344 << "Decoder didn't output full frames"; 345 346 base::TimeDelta output_timestamp = 347 output_timestamp_helper_->GetTimestamp(); 348 output_timestamp_helper_->AddFrames(decoded_audio_size / 349 bytes_per_frame_); 350 351 // If we've exhausted the packet in the first decode we can write directly 352 // into the frame buffer instead of a multistep serialization approach. 353 if (serialized_audio_frames_.empty() && !packet.size) { 354 const uint32_t buffer_size = decoded_audio_size + sizeof(int64) * 2; 355 decoded_frames->SetFrameBuffer(host_->Allocate(buffer_size)); 356 if (!decoded_frames->FrameBuffer()) { 357 LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed."; 358 return cdm::kDecodeError; 359 } 360 decoded_frames->FrameBuffer()->SetSize(buffer_size); 361 uint8_t* output_buffer = decoded_frames->FrameBuffer()->Data(); 362 363 const int64 timestamp = output_timestamp.InMicroseconds(); 364 memcpy(output_buffer, ×tamp, sizeof(timestamp)); 365 output_buffer += sizeof(timestamp); 366 367 const int64 output_size = decoded_audio_size; 368 memcpy(output_buffer, &output_size, sizeof(output_size)); 369 output_buffer += sizeof(output_size); 370 371 // Copy the samples and return success. 372 CopySamples( 373 cdm_format, decoded_audio_size, *av_frame_, output_buffer); 374 return cdm::kSuccess; 375 } 376 377 // There are still more frames to decode, so we need to serialize them in 378 // a secondary buffer since we don't know their sizes ahead of time (which 379 // is required to allocate the FrameBuffer object). 380 SerializeInt64(output_timestamp.InMicroseconds()); 381 SerializeInt64(decoded_audio_size); 382 383 const size_t previous_size = serialized_audio_frames_.size(); 384 serialized_audio_frames_.resize(previous_size + decoded_audio_size); 385 uint8_t* output_buffer = &serialized_audio_frames_[0] + previous_size; 386 CopySamples( 387 cdm_format, decoded_audio_size, *av_frame_, output_buffer); 388 } 389 } while (packet.size > 0); 390 391 if (!serialized_audio_frames_.empty()) { 392 decoded_frames->SetFrameBuffer( 393 host_->Allocate(serialized_audio_frames_.size())); 394 if (!decoded_frames->FrameBuffer()) { 395 LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed."; 396 return cdm::kDecodeError; 397 } 398 memcpy(decoded_frames->FrameBuffer()->Data(), 399 &serialized_audio_frames_[0], 400 serialized_audio_frames_.size()); 401 decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size()); 402 serialized_audio_frames_.clear(); 403 404 return cdm::kSuccess; 405 } 406 407 return cdm::kNeedMoreData; 408 } 409 410 void FFmpegCdmAudioDecoder::ResetTimestampState() { 411 output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp()); 412 last_input_timestamp_ = kNoTimestamp(); 413 output_bytes_to_drop_ = 0; 414 } 415 416 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() { 417 DVLOG(1) << "ReleaseFFmpegResources()"; 418 419 codec_context_.reset(); 420 av_frame_.reset(); 421 } 422 423 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) { 424 const size_t previous_size = serialized_audio_frames_.size(); 425 serialized_audio_frames_.resize(previous_size + sizeof(value)); 426 memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value)); 427 } 428 429 } // namespace media 430