1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "media/base/audio_splicer.h" 6 7 #include <cstdlib> 8 #include <deque> 9 10 #include "base/logging.h" 11 #include "media/base/audio_buffer.h" 12 #include "media/base/audio_bus.h" 13 #include "media/base/audio_decoder_config.h" 14 #include "media/base/audio_timestamp_helper.h" 15 #include "media/base/vector_math.h" 16 17 namespace media { 18 19 // Minimum gap size needed before the splicer will take action to 20 // fill a gap. This avoids periodically inserting and then dropping samples 21 // when the buffer timestamps are slightly off because of timestamp rounding 22 // in the source content. Unit is frames. 23 static const int kMinGapSize = 2; 24 25 // AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so 26 // manually adjust the duration and timestamp after trimming. 27 static void AccurateTrimStart(int frames_to_trim, 28 const scoped_refptr<AudioBuffer> buffer, 29 const AudioTimestampHelper& timestamp_helper) { 30 buffer->TrimStart(frames_to_trim); 31 buffer->set_timestamp(timestamp_helper.GetTimestamp()); 32 } 33 34 // Returns an AudioBus whose frame buffer is backed by the provided AudioBuffer. 35 static scoped_ptr<AudioBus> CreateAudioBufferWrapper( 36 const scoped_refptr<AudioBuffer>& buffer) { 37 scoped_ptr<AudioBus> wrapper = 38 AudioBus::CreateWrapper(buffer->channel_count()); 39 wrapper->set_frames(buffer->frame_count()); 40 for (int ch = 0; ch < buffer->channel_count(); ++ch) { 41 wrapper->SetChannelData( 42 ch, reinterpret_cast<float*>(buffer->channel_data()[ch])); 43 } 44 return wrapper.Pass(); 45 } 46 47 class AudioStreamSanitizer { 48 public: 49 explicit AudioStreamSanitizer(int samples_per_second); 50 ~AudioStreamSanitizer(); 51 52 // Resets the sanitizer state by clearing the output buffers queue, and 53 // resetting the timestamp helper. 54 void Reset(); 55 56 // Similar to Reset(), but initializes the timestamp helper with the given 57 // parameters. 58 void ResetTimestampState(int64 frame_count, base::TimeDelta base_timestamp); 59 60 // Adds a new buffer full of samples or end of stream buffer to the splicer. 61 // Returns true if the buffer was accepted. False is returned if an error 62 // occurred. 63 bool AddInput(const scoped_refptr<AudioBuffer>& input); 64 65 // Returns true if the sanitizer has a buffer to return. 66 bool HasNextBuffer() const; 67 68 // Removes the next buffer from the output buffer queue and returns it; should 69 // only be called if HasNextBuffer() returns true. 70 scoped_refptr<AudioBuffer> GetNextBuffer(); 71 72 // Returns the total frame count of all buffers available for output. 73 int GetFrameCount() const; 74 75 const AudioTimestampHelper& timestamp_helper() { 76 return output_timestamp_helper_; 77 } 78 79 // Transfer all buffers into |output|. Returns false if AddInput() on the 80 // |output| sanitizer fails for any buffer removed from |this|. 81 bool DrainInto(AudioStreamSanitizer* output); 82 83 private: 84 void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer); 85 86 AudioTimestampHelper output_timestamp_helper_; 87 bool received_end_of_stream_; 88 89 typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue; 90 BufferQueue output_buffers_; 91 92 DISALLOW_ASSIGN(AudioStreamSanitizer); 93 }; 94 95 AudioStreamSanitizer::AudioStreamSanitizer(int samples_per_second) 96 : output_timestamp_helper_(samples_per_second), 97 received_end_of_stream_(false) {} 98 99 AudioStreamSanitizer::~AudioStreamSanitizer() {} 100 101 void AudioStreamSanitizer::Reset() { 102 ResetTimestampState(0, kNoTimestamp()); 103 } 104 105 void AudioStreamSanitizer::ResetTimestampState(int64 frame_count, 106 base::TimeDelta base_timestamp) { 107 output_buffers_.clear(); 108 received_end_of_stream_ = false; 109 output_timestamp_helper_.SetBaseTimestamp(base_timestamp); 110 if (frame_count > 0) 111 output_timestamp_helper_.AddFrames(frame_count); 112 } 113 114 bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) { 115 DCHECK(!received_end_of_stream_ || input->end_of_stream()); 116 117 if (input->end_of_stream()) { 118 output_buffers_.push_back(input); 119 received_end_of_stream_ = true; 120 return true; 121 } 122 123 DCHECK(input->timestamp() != kNoTimestamp()); 124 DCHECK(input->duration() > base::TimeDelta()); 125 DCHECK_GT(input->frame_count(), 0); 126 127 if (output_timestamp_helper_.base_timestamp() == kNoTimestamp()) 128 output_timestamp_helper_.SetBaseTimestamp(input->timestamp()); 129 130 if (output_timestamp_helper_.base_timestamp() > input->timestamp()) { 131 DVLOG(1) << "Input timestamp is before the base timestamp."; 132 return false; 133 } 134 135 const base::TimeDelta timestamp = input->timestamp(); 136 const base::TimeDelta expected_timestamp = 137 output_timestamp_helper_.GetTimestamp(); 138 const base::TimeDelta delta = timestamp - expected_timestamp; 139 140 if (std::abs(delta.InMilliseconds()) > 141 AudioSplicer::kMaxTimeDeltaInMilliseconds) { 142 DVLOG(1) << "Timestamp delta too large: " << delta.InMicroseconds() << "us"; 143 return false; 144 } 145 146 int frames_to_fill = 0; 147 if (delta != base::TimeDelta()) 148 frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp); 149 150 if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) { 151 AddOutputBuffer(input); 152 return true; 153 } 154 155 if (frames_to_fill > 0) { 156 DVLOG(1) << "Gap detected @ " << expected_timestamp.InMicroseconds() 157 << " us: " << delta.InMicroseconds() << " us"; 158 159 // Create a buffer with enough silence samples to fill the gap and 160 // add it to the output buffer. 161 scoped_refptr<AudioBuffer> gap = 162 AudioBuffer::CreateEmptyBuffer(input->channel_layout(), 163 input->channel_count(), 164 input->sample_rate(), 165 frames_to_fill, 166 expected_timestamp); 167 AddOutputBuffer(gap); 168 169 // Add the input buffer now that the gap has been filled. 170 AddOutputBuffer(input); 171 return true; 172 } 173 174 // Overlapping buffers marked as splice frames are handled by AudioSplicer, 175 // but decoder and demuxer quirks may sometimes produce overlapping samples 176 // which need to be sanitized. 177 // 178 // A crossfade can't be done here because only the current buffer is available 179 // at this point, not previous buffers. 180 DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds() 181 << " us: " << -delta.InMicroseconds() << " us"; 182 183 const int frames_to_skip = -frames_to_fill; 184 if (input->frame_count() <= frames_to_skip) { 185 DVLOG(1) << "Dropping whole buffer"; 186 return true; 187 } 188 189 // Copy the trailing samples that do not overlap samples already output 190 // into a new buffer. Add this new buffer to the output queue. 191 // 192 // TODO(acolwell): Implement a cross-fade here so the transition is less 193 // jarring. 194 AccurateTrimStart(frames_to_skip, input, output_timestamp_helper_); 195 AddOutputBuffer(input); 196 return true; 197 } 198 199 bool AudioStreamSanitizer::HasNextBuffer() const { 200 return !output_buffers_.empty(); 201 } 202 203 scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() { 204 scoped_refptr<AudioBuffer> ret = output_buffers_.front(); 205 output_buffers_.pop_front(); 206 return ret; 207 } 208 209 void AudioStreamSanitizer::AddOutputBuffer( 210 const scoped_refptr<AudioBuffer>& buffer) { 211 output_timestamp_helper_.AddFrames(buffer->frame_count()); 212 output_buffers_.push_back(buffer); 213 } 214 215 int AudioStreamSanitizer::GetFrameCount() const { 216 int frame_count = 0; 217 for (const auto& buffer : output_buffers_) 218 frame_count += buffer->frame_count(); 219 return frame_count; 220 } 221 222 bool AudioStreamSanitizer::DrainInto(AudioStreamSanitizer* output) { 223 while (HasNextBuffer()) { 224 if (!output->AddInput(GetNextBuffer())) 225 return false; 226 } 227 return true; 228 } 229 230 AudioSplicer::AudioSplicer(int samples_per_second) 231 : max_crossfade_duration_( 232 base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds)), 233 splice_timestamp_(kNoTimestamp()), 234 max_splice_end_timestamp_(kNoTimestamp()), 235 output_sanitizer_(new AudioStreamSanitizer(samples_per_second)), 236 pre_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)), 237 post_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)), 238 have_all_pre_splice_buffers_(false) {} 239 240 AudioSplicer::~AudioSplicer() {} 241 242 void AudioSplicer::Reset() { 243 output_sanitizer_->Reset(); 244 pre_splice_sanitizer_->Reset(); 245 post_splice_sanitizer_->Reset(); 246 have_all_pre_splice_buffers_ = false; 247 reset_splice_timestamps(); 248 } 249 250 bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) { 251 // If we're not processing a splice, add the input to the output queue. 252 if (splice_timestamp_ == kNoTimestamp()) { 253 DCHECK(!pre_splice_sanitizer_->HasNextBuffer()); 254 DCHECK(!post_splice_sanitizer_->HasNextBuffer()); 255 return output_sanitizer_->AddInput(input); 256 } 257 258 const AudioTimestampHelper& output_ts_helper = 259 output_sanitizer_->timestamp_helper(); 260 261 if (!have_all_pre_splice_buffers_) { 262 DCHECK(!input->end_of_stream()); 263 264 // If the provided buffer is entirely before the splice point it can also be 265 // added to the output queue. 266 if (input->timestamp() + input->duration() < splice_timestamp_) { 267 DCHECK(!pre_splice_sanitizer_->HasNextBuffer()); 268 return output_sanitizer_->AddInput(input); 269 } 270 271 // If we've encountered the first pre splice buffer, reset the pre splice 272 // sanitizer based on |output_sanitizer_|. This is done so that gaps and 273 // overlaps between buffers across the sanitizers are accounted for prior 274 // to calculating crossfade. 275 if (!pre_splice_sanitizer_->HasNextBuffer()) { 276 pre_splice_sanitizer_->ResetTimestampState( 277 output_ts_helper.frame_count(), output_ts_helper.base_timestamp()); 278 } 279 280 return pre_splice_sanitizer_->AddInput(input); 281 } 282 283 // The first post splice buffer is expected to match |splice_timestamp_|. 284 if (!post_splice_sanitizer_->HasNextBuffer()) 285 CHECK(splice_timestamp_ == input->timestamp()); 286 287 // At this point we have all the fade out preroll buffers from the decoder. 288 // We now need to wait until we have enough data to perform the crossfade (or 289 // we receive an end of stream). 290 if (!post_splice_sanitizer_->AddInput(input)) 291 return false; 292 293 // Ensure |output_sanitizer_| has a valid base timestamp so we can use it for 294 // timestamp calculations. 295 if (output_ts_helper.base_timestamp() == kNoTimestamp()) { 296 output_sanitizer_->ResetTimestampState( 297 0, pre_splice_sanitizer_->timestamp_helper().base_timestamp()); 298 } 299 300 // If a splice frame was incorrectly marked due to poor demuxed timestamps, we 301 // may not actually have a splice. Here we check if any frames exist before 302 // the splice. In this case, just transfer all data to the output sanitizer. 303 const int frames_before_splice = 304 output_ts_helper.GetFramesToTarget(splice_timestamp_); 305 if (frames_before_splice < 0 || 306 pre_splice_sanitizer_->GetFrameCount() <= frames_before_splice) { 307 CHECK(pre_splice_sanitizer_->DrainInto(output_sanitizer_.get())); 308 309 // If the file contains incorrectly muxed timestamps, there may be huge gaps 310 // between the demuxed and decoded timestamps. 311 if (!post_splice_sanitizer_->DrainInto(output_sanitizer_.get())) 312 return false; 313 314 reset_splice_timestamps(); 315 return true; 316 } 317 318 // Wait until we have enough data to crossfade or end of stream. 319 if (!input->end_of_stream() && 320 input->timestamp() + input->duration() < max_splice_end_timestamp_) { 321 return true; 322 } 323 324 scoped_refptr<AudioBuffer> crossfade_buffer; 325 scoped_ptr<AudioBus> pre_splice = 326 ExtractCrossfadeFromPreSplice(&crossfade_buffer); 327 328 // Crossfade the pre splice and post splice sections and transfer all relevant 329 // buffers into |output_sanitizer_|. 330 CrossfadePostSplice(pre_splice.Pass(), crossfade_buffer); 331 332 // Clear the splice timestamp so new splices can be accepted. 333 reset_splice_timestamps(); 334 return true; 335 } 336 337 bool AudioSplicer::HasNextBuffer() const { 338 return output_sanitizer_->HasNextBuffer(); 339 } 340 341 scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() { 342 return output_sanitizer_->GetNextBuffer(); 343 } 344 345 void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) { 346 if (splice_timestamp == kNoTimestamp()) { 347 DCHECK(splice_timestamp_ != kNoTimestamp()); 348 DCHECK(!have_all_pre_splice_buffers_); 349 have_all_pre_splice_buffers_ = true; 350 return; 351 } 352 353 if (splice_timestamp_ == splice_timestamp) 354 return; 355 356 // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to 357 // handle cases where another splice comes in before we've received 5ms of 358 // data from the last one. Leave this as a CHECK for now to figure out if 359 // this case is possible. 360 CHECK(splice_timestamp_ == kNoTimestamp()); 361 splice_timestamp_ = splice_timestamp; 362 max_splice_end_timestamp_ = splice_timestamp_ + max_crossfade_duration_; 363 pre_splice_sanitizer_->Reset(); 364 post_splice_sanitizer_->Reset(); 365 have_all_pre_splice_buffers_ = false; 366 } 367 368 scoped_ptr<AudioBus> AudioSplicer::ExtractCrossfadeFromPreSplice( 369 scoped_refptr<AudioBuffer>* crossfade_buffer) { 370 DCHECK(crossfade_buffer); 371 const AudioTimestampHelper& output_ts_helper = 372 output_sanitizer_->timestamp_helper(); 373 374 int frames_before_splice = 375 output_ts_helper.GetFramesToTarget(splice_timestamp_); 376 377 // Determine crossfade frame count based on available frames in each splicer 378 // and capping to the maximum crossfade duration. 379 const int max_crossfade_frame_count = 380 output_ts_helper.GetFramesToTarget(max_splice_end_timestamp_) - 381 frames_before_splice; 382 const int frames_to_crossfade = std::min( 383 max_crossfade_frame_count, 384 std::min(pre_splice_sanitizer_->GetFrameCount() - frames_before_splice, 385 post_splice_sanitizer_->GetFrameCount())); 386 // There must always be frames to crossfade, otherwise the splice should not 387 // have been generated. 388 DCHECK_GT(frames_to_crossfade, 0); 389 390 int frames_read = 0; 391 scoped_ptr<AudioBus> output_bus; 392 while (pre_splice_sanitizer_->HasNextBuffer() && 393 frames_read < frames_to_crossfade) { 394 scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer(); 395 396 // We don't know the channel count until we see the first buffer, so wait 397 // until the first buffer to allocate the output AudioBus. 398 if (!output_bus) { 399 output_bus = 400 AudioBus::Create(preroll->channel_count(), frames_to_crossfade); 401 // Allocate output buffer for crossfade. 402 *crossfade_buffer = AudioBuffer::CreateBuffer(kSampleFormatPlanarF32, 403 preroll->channel_layout(), 404 preroll->channel_count(), 405 preroll->sample_rate(), 406 frames_to_crossfade); 407 } 408 409 // There may be enough of a gap introduced during decoding such that an 410 // entire buffer exists before the splice point. 411 if (frames_before_splice >= preroll->frame_count()) { 412 // Adjust the number of frames remaining before the splice. NOTE: This is 413 // safe since |pre_splice_sanitizer_| is a continuation of the timeline in 414 // |output_sanitizer_|. As such we're guaranteed there are no gaps or 415 // overlaps in the timeline between the two sanitizers. 416 frames_before_splice -= preroll->frame_count(); 417 CHECK(output_sanitizer_->AddInput(preroll)); 418 continue; 419 } 420 421 const int frames_to_read = 422 std::min(preroll->frame_count() - frames_before_splice, 423 output_bus->frames() - frames_read); 424 preroll->ReadFrames( 425 frames_to_read, frames_before_splice, frames_read, output_bus.get()); 426 frames_read += frames_to_read; 427 428 // If only part of the buffer was consumed, trim it appropriately and stick 429 // it into the output queue. 430 if (frames_before_splice) { 431 preroll->TrimEnd(preroll->frame_count() - frames_before_splice); 432 CHECK(output_sanitizer_->AddInput(preroll)); 433 frames_before_splice = 0; 434 } 435 } 436 437 // Ensure outputs were properly allocated. The method should not have been 438 // called if there is not enough data to crossfade. 439 // TODO(dalecurtis): Convert to DCHECK() once http://crbug.com/356073 fixed. 440 CHECK(output_bus); 441 CHECK(crossfade_buffer->get()); 442 443 // All necessary buffers have been processed, it's safe to reset. 444 pre_splice_sanitizer_->Reset(); 445 DCHECK_EQ(output_bus->frames(), frames_read); 446 DCHECK_EQ(output_ts_helper.GetFramesToTarget(splice_timestamp_), 0); 447 return output_bus.Pass(); 448 } 449 450 void AudioSplicer::CrossfadePostSplice( 451 scoped_ptr<AudioBus> pre_splice_bus, 452 const scoped_refptr<AudioBuffer>& crossfade_buffer) { 453 // Use the calculated timestamp and duration to ensure there's no extra gaps 454 // or overlaps to process when adding the buffer to |output_sanitizer_|. 455 const AudioTimestampHelper& output_ts_helper = 456 output_sanitizer_->timestamp_helper(); 457 crossfade_buffer->set_timestamp(output_ts_helper.GetTimestamp()); 458 459 // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap 460 // our AudioBuffer in one so we can avoid extra data copies. 461 scoped_ptr<AudioBus> output_bus = CreateAudioBufferWrapper(crossfade_buffer); 462 463 // Extract crossfade section from the |post_splice_sanitizer_|. 464 int frames_read = 0, frames_to_trim = 0; 465 scoped_refptr<AudioBuffer> remainder; 466 while (post_splice_sanitizer_->HasNextBuffer() && 467 frames_read < output_bus->frames()) { 468 scoped_refptr<AudioBuffer> postroll = 469 post_splice_sanitizer_->GetNextBuffer(); 470 const int frames_to_read = 471 std::min(postroll->frame_count(), output_bus->frames() - frames_read); 472 postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus.get()); 473 frames_read += frames_to_read; 474 475 // If only part of the buffer was consumed, save it for after we've added 476 // the crossfade buffer 477 if (frames_to_read < postroll->frame_count()) { 478 DCHECK(!remainder.get()); 479 remainder.swap(postroll); 480 frames_to_trim = frames_to_read; 481 } 482 } 483 484 DCHECK_EQ(output_bus->frames(), frames_read); 485 486 // Crossfade the audio into |crossfade_buffer|. 487 for (int ch = 0; ch < output_bus->channels(); ++ch) { 488 vector_math::Crossfade(pre_splice_bus->channel(ch), 489 pre_splice_bus->frames(), 490 output_bus->channel(ch)); 491 } 492 493 CHECK(output_sanitizer_->AddInput(crossfade_buffer)); 494 DCHECK_EQ(crossfade_buffer->frame_count(), output_bus->frames()); 495 496 if (remainder.get()) { 497 // Trim off consumed frames. 498 AccurateTrimStart(frames_to_trim, remainder, output_ts_helper); 499 CHECK(output_sanitizer_->AddInput(remainder)); 500 } 501 502 // Transfer all remaining buffers out and reset once empty. 503 CHECK(post_splice_sanitizer_->DrainInto(output_sanitizer_.get())); 504 post_splice_sanitizer_->Reset(); 505 } 506 507 } // namespace media 508