1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/neteq/neteq_impl.h" 12 13 #include <assert.h> 14 #include <memory.h> // memset 15 16 #include <algorithm> 17 18 #include "webrtc/base/checks.h" 19 #include "webrtc/base/logging.h" 20 #include "webrtc/base/safe_conversions.h" 21 #include "webrtc/base/trace_event.h" 22 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 23 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h" 24 #include "webrtc/modules/audio_coding/neteq/accelerate.h" 25 #include "webrtc/modules/audio_coding/neteq/background_noise.h" 26 #include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h" 27 #include "webrtc/modules/audio_coding/neteq/comfort_noise.h" 28 #include "webrtc/modules/audio_coding/neteq/decision_logic.h" 29 #include "webrtc/modules/audio_coding/neteq/decoder_database.h" 30 #include "webrtc/modules/audio_coding/neteq/defines.h" 31 #include "webrtc/modules/audio_coding/neteq/delay_manager.h" 32 #include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h" 33 #include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h" 34 #include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h" 35 #include "webrtc/modules/audio_coding/neteq/expand.h" 36 #include "webrtc/modules/audio_coding/neteq/merge.h" 37 #include "webrtc/modules/audio_coding/neteq/nack.h" 38 #include "webrtc/modules/audio_coding/neteq/normal.h" 39 #include "webrtc/modules/audio_coding/neteq/packet_buffer.h" 40 #include "webrtc/modules/audio_coding/neteq/packet.h" 41 #include "webrtc/modules/audio_coding/neteq/payload_splitter.h" 42 #include "webrtc/modules/audio_coding/neteq/post_decode_vad.h" 43 #include "webrtc/modules/audio_coding/neteq/preemptive_expand.h" 44 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" 45 #include "webrtc/modules/audio_coding/neteq/timestamp_scaler.h" 46 #include "webrtc/modules/include/module_common_types.h" 47 #include "webrtc/system_wrappers/include/critical_section_wrapper.h" 48 49 // Modify the code to obtain backwards bit-exactness. Once bit-exactness is no 50 // longer required, this #define should be removed (and the code that it 51 // enables). 52 #define LEGACY_BITEXACT 53 54 namespace webrtc { 55 56 NetEqImpl::NetEqImpl(const NetEq::Config& config, 57 BufferLevelFilter* buffer_level_filter, 58 DecoderDatabase* decoder_database, 59 DelayManager* delay_manager, 60 DelayPeakDetector* delay_peak_detector, 61 DtmfBuffer* dtmf_buffer, 62 DtmfToneGenerator* dtmf_tone_generator, 63 PacketBuffer* packet_buffer, 64 PayloadSplitter* payload_splitter, 65 TimestampScaler* timestamp_scaler, 66 AccelerateFactory* accelerate_factory, 67 ExpandFactory* expand_factory, 68 PreemptiveExpandFactory* preemptive_expand_factory, 69 bool create_components) 70 : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()), 71 buffer_level_filter_(buffer_level_filter), 72 decoder_database_(decoder_database), 73 delay_manager_(delay_manager), 74 delay_peak_detector_(delay_peak_detector), 75 dtmf_buffer_(dtmf_buffer), 76 dtmf_tone_generator_(dtmf_tone_generator), 77 packet_buffer_(packet_buffer), 78 payload_splitter_(payload_splitter), 79 timestamp_scaler_(timestamp_scaler), 80 vad_(new PostDecodeVad()), 81 expand_factory_(expand_factory), 82 accelerate_factory_(accelerate_factory), 83 preemptive_expand_factory_(preemptive_expand_factory), 84 last_mode_(kModeNormal), 85 decoded_buffer_length_(kMaxFrameSize), 86 decoded_buffer_(new int16_t[decoded_buffer_length_]), 87 playout_timestamp_(0), 88 new_codec_(false), 89 timestamp_(0), 90 reset_decoder_(false), 91 current_rtp_payload_type_(0xFF), // Invalid RTP payload type. 92 current_cng_rtp_payload_type_(0xFF), // Invalid RTP payload type. 93 ssrc_(0), 94 first_packet_(true), 95 error_code_(0), 96 decoder_error_code_(0), 97 background_noise_mode_(config.background_noise_mode), 98 playout_mode_(config.playout_mode), 99 enable_fast_accelerate_(config.enable_fast_accelerate), 100 nack_enabled_(false) { 101 LOG(LS_INFO) << "NetEq config: " << config.ToString(); 102 int fs = config.sample_rate_hz; 103 if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { 104 LOG(LS_ERROR) << "Sample rate " << fs << " Hz not supported. " << 105 "Changing to 8000 Hz."; 106 fs = 8000; 107 } 108 fs_hz_ = fs; 109 fs_mult_ = fs / 8000; 110 last_output_sample_rate_hz_ = fs; 111 output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_); 112 decoder_frame_length_ = 3 * output_size_samples_; 113 WebRtcSpl_Init(); 114 if (create_components) { 115 SetSampleRateAndChannels(fs, 1); // Default is 1 channel. 116 } 117 RTC_DCHECK(!vad_->enabled()); 118 if (config.enable_post_decode_vad) { 119 vad_->Enable(); 120 } 121 } 122 123 NetEqImpl::~NetEqImpl() = default; 124 125 int NetEqImpl::InsertPacket(const WebRtcRTPHeader& rtp_header, 126 rtc::ArrayView<const uint8_t> payload, 127 uint32_t receive_timestamp) { 128 TRACE_EVENT0("webrtc", "NetEqImpl::InsertPacket"); 129 CriticalSectionScoped lock(crit_sect_.get()); 130 int error = 131 InsertPacketInternal(rtp_header, payload, receive_timestamp, false); 132 if (error != 0) { 133 error_code_ = error; 134 return kFail; 135 } 136 return kOK; 137 } 138 139 int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& rtp_header, 140 uint32_t receive_timestamp) { 141 CriticalSectionScoped lock(crit_sect_.get()); 142 const uint8_t kSyncPayload[] = { 's', 'y', 'n', 'c' }; 143 int error = 144 InsertPacketInternal(rtp_header, kSyncPayload, receive_timestamp, true); 145 146 if (error != 0) { 147 error_code_ = error; 148 return kFail; 149 } 150 return kOK; 151 } 152 153 int NetEqImpl::GetAudio(size_t max_length, int16_t* output_audio, 154 size_t* samples_per_channel, size_t* num_channels, 155 NetEqOutputType* type) { 156 TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio"); 157 CriticalSectionScoped lock(crit_sect_.get()); 158 int error = GetAudioInternal(max_length, output_audio, samples_per_channel, 159 num_channels); 160 if (error != 0) { 161 error_code_ = error; 162 return kFail; 163 } 164 if (type) { 165 *type = LastOutputType(); 166 } 167 last_output_sample_rate_hz_ = 168 rtc::checked_cast<int>(*samples_per_channel * 100); 169 RTC_DCHECK(last_output_sample_rate_hz_ == 8000 || 170 last_output_sample_rate_hz_ == 16000 || 171 last_output_sample_rate_hz_ == 32000 || 172 last_output_sample_rate_hz_ == 48000) 173 << "Unexpected sample rate " << last_output_sample_rate_hz_; 174 return kOK; 175 } 176 177 int NetEqImpl::RegisterPayloadType(NetEqDecoder codec, 178 const std::string& name, 179 uint8_t rtp_payload_type) { 180 CriticalSectionScoped lock(crit_sect_.get()); 181 LOG(LS_VERBOSE) << "RegisterPayloadType " 182 << static_cast<int>(rtp_payload_type) << " " 183 << static_cast<int>(codec); 184 int ret = decoder_database_->RegisterPayload(rtp_payload_type, codec, name); 185 if (ret != DecoderDatabase::kOK) { 186 switch (ret) { 187 case DecoderDatabase::kInvalidRtpPayloadType: 188 error_code_ = kInvalidRtpPayloadType; 189 break; 190 case DecoderDatabase::kCodecNotSupported: 191 error_code_ = kCodecNotSupported; 192 break; 193 case DecoderDatabase::kDecoderExists: 194 error_code_ = kDecoderExists; 195 break; 196 default: 197 error_code_ = kOtherError; 198 } 199 return kFail; 200 } 201 return kOK; 202 } 203 204 int NetEqImpl::RegisterExternalDecoder(AudioDecoder* decoder, 205 NetEqDecoder codec, 206 const std::string& codec_name, 207 uint8_t rtp_payload_type, 208 int sample_rate_hz) { 209 CriticalSectionScoped lock(crit_sect_.get()); 210 LOG(LS_VERBOSE) << "RegisterExternalDecoder " 211 << static_cast<int>(rtp_payload_type) << " " 212 << static_cast<int>(codec); 213 if (!decoder) { 214 LOG(LS_ERROR) << "Cannot register external decoder with NULL pointer"; 215 assert(false); 216 return kFail; 217 } 218 int ret = decoder_database_->InsertExternal( 219 rtp_payload_type, codec, codec_name, sample_rate_hz, decoder); 220 if (ret != DecoderDatabase::kOK) { 221 switch (ret) { 222 case DecoderDatabase::kInvalidRtpPayloadType: 223 error_code_ = kInvalidRtpPayloadType; 224 break; 225 case DecoderDatabase::kCodecNotSupported: 226 error_code_ = kCodecNotSupported; 227 break; 228 case DecoderDatabase::kDecoderExists: 229 error_code_ = kDecoderExists; 230 break; 231 case DecoderDatabase::kInvalidSampleRate: 232 error_code_ = kInvalidSampleRate; 233 break; 234 case DecoderDatabase::kInvalidPointer: 235 error_code_ = kInvalidPointer; 236 break; 237 default: 238 error_code_ = kOtherError; 239 } 240 return kFail; 241 } 242 return kOK; 243 } 244 245 int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) { 246 CriticalSectionScoped lock(crit_sect_.get()); 247 int ret = decoder_database_->Remove(rtp_payload_type); 248 if (ret == DecoderDatabase::kOK) { 249 return kOK; 250 } else if (ret == DecoderDatabase::kDecoderNotFound) { 251 error_code_ = kDecoderNotFound; 252 } else { 253 error_code_ = kOtherError; 254 } 255 return kFail; 256 } 257 258 bool NetEqImpl::SetMinimumDelay(int delay_ms) { 259 CriticalSectionScoped lock(crit_sect_.get()); 260 if (delay_ms >= 0 && delay_ms < 10000) { 261 assert(delay_manager_.get()); 262 return delay_manager_->SetMinimumDelay(delay_ms); 263 } 264 return false; 265 } 266 267 bool NetEqImpl::SetMaximumDelay(int delay_ms) { 268 CriticalSectionScoped lock(crit_sect_.get()); 269 if (delay_ms >= 0 && delay_ms < 10000) { 270 assert(delay_manager_.get()); 271 return delay_manager_->SetMaximumDelay(delay_ms); 272 } 273 return false; 274 } 275 276 int NetEqImpl::LeastRequiredDelayMs() const { 277 CriticalSectionScoped lock(crit_sect_.get()); 278 assert(delay_manager_.get()); 279 return delay_manager_->least_required_delay_ms(); 280 } 281 282 int NetEqImpl::SetTargetDelay() { 283 return kNotImplemented; 284 } 285 286 int NetEqImpl::TargetDelay() { 287 return kNotImplemented; 288 } 289 290 int NetEqImpl::CurrentDelayMs() const { 291 CriticalSectionScoped lock(crit_sect_.get()); 292 if (fs_hz_ == 0) 293 return 0; 294 // Sum up the samples in the packet buffer with the future length of the sync 295 // buffer, and divide the sum by the sample rate. 296 const size_t delay_samples = 297 packet_buffer_->NumSamplesInBuffer(decoder_database_.get(), 298 decoder_frame_length_) + 299 sync_buffer_->FutureLength(); 300 // The division below will truncate. 301 const int delay_ms = 302 static_cast<int>(delay_samples) / rtc::CheckedDivExact(fs_hz_, 1000); 303 return delay_ms; 304 } 305 306 // Deprecated. 307 // TODO(henrik.lundin) Delete. 308 void NetEqImpl::SetPlayoutMode(NetEqPlayoutMode mode) { 309 CriticalSectionScoped lock(crit_sect_.get()); 310 if (mode != playout_mode_) { 311 playout_mode_ = mode; 312 CreateDecisionLogic(); 313 } 314 } 315 316 // Deprecated. 317 // TODO(henrik.lundin) Delete. 318 NetEqPlayoutMode NetEqImpl::PlayoutMode() const { 319 CriticalSectionScoped lock(crit_sect_.get()); 320 return playout_mode_; 321 } 322 323 int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { 324 CriticalSectionScoped lock(crit_sect_.get()); 325 assert(decoder_database_.get()); 326 const size_t total_samples_in_buffers = 327 packet_buffer_->NumSamplesInBuffer(decoder_database_.get(), 328 decoder_frame_length_) + 329 sync_buffer_->FutureLength(); 330 assert(delay_manager_.get()); 331 assert(decision_logic_.get()); 332 stats_.GetNetworkStatistics(fs_hz_, total_samples_in_buffers, 333 decoder_frame_length_, *delay_manager_.get(), 334 *decision_logic_.get(), stats); 335 return 0; 336 } 337 338 void NetEqImpl::GetRtcpStatistics(RtcpStatistics* stats) { 339 CriticalSectionScoped lock(crit_sect_.get()); 340 if (stats) { 341 rtcp_.GetStatistics(false, stats); 342 } 343 } 344 345 void NetEqImpl::GetRtcpStatisticsNoReset(RtcpStatistics* stats) { 346 CriticalSectionScoped lock(crit_sect_.get()); 347 if (stats) { 348 rtcp_.GetStatistics(true, stats); 349 } 350 } 351 352 void NetEqImpl::EnableVad() { 353 CriticalSectionScoped lock(crit_sect_.get()); 354 assert(vad_.get()); 355 vad_->Enable(); 356 } 357 358 void NetEqImpl::DisableVad() { 359 CriticalSectionScoped lock(crit_sect_.get()); 360 assert(vad_.get()); 361 vad_->Disable(); 362 } 363 364 bool NetEqImpl::GetPlayoutTimestamp(uint32_t* timestamp) { 365 CriticalSectionScoped lock(crit_sect_.get()); 366 if (first_packet_) { 367 // We don't have a valid RTP timestamp until we have decoded our first 368 // RTP packet. 369 return false; 370 } 371 *timestamp = timestamp_scaler_->ToExternal(playout_timestamp_); 372 return true; 373 } 374 375 int NetEqImpl::last_output_sample_rate_hz() const { 376 CriticalSectionScoped lock(crit_sect_.get()); 377 return last_output_sample_rate_hz_; 378 } 379 380 int NetEqImpl::SetTargetNumberOfChannels() { 381 return kNotImplemented; 382 } 383 384 int NetEqImpl::SetTargetSampleRate() { 385 return kNotImplemented; 386 } 387 388 int NetEqImpl::LastError() const { 389 CriticalSectionScoped lock(crit_sect_.get()); 390 return error_code_; 391 } 392 393 int NetEqImpl::LastDecoderError() { 394 CriticalSectionScoped lock(crit_sect_.get()); 395 return decoder_error_code_; 396 } 397 398 void NetEqImpl::FlushBuffers() { 399 CriticalSectionScoped lock(crit_sect_.get()); 400 LOG(LS_VERBOSE) << "FlushBuffers"; 401 packet_buffer_->Flush(); 402 assert(sync_buffer_.get()); 403 assert(expand_.get()); 404 sync_buffer_->Flush(); 405 sync_buffer_->set_next_index(sync_buffer_->next_index() - 406 expand_->overlap_length()); 407 // Set to wait for new codec. 408 first_packet_ = true; 409 } 410 411 void NetEqImpl::PacketBufferStatistics(int* current_num_packets, 412 int* max_num_packets) const { 413 CriticalSectionScoped lock(crit_sect_.get()); 414 packet_buffer_->BufferStat(current_num_packets, max_num_packets); 415 } 416 417 void NetEqImpl::EnableNack(size_t max_nack_list_size) { 418 CriticalSectionScoped lock(crit_sect_.get()); 419 if (!nack_enabled_) { 420 const int kNackThresholdPackets = 2; 421 nack_.reset(Nack::Create(kNackThresholdPackets)); 422 nack_enabled_ = true; 423 nack_->UpdateSampleRate(fs_hz_); 424 } 425 nack_->SetMaxNackListSize(max_nack_list_size); 426 } 427 428 void NetEqImpl::DisableNack() { 429 CriticalSectionScoped lock(crit_sect_.get()); 430 nack_.reset(); 431 nack_enabled_ = false; 432 } 433 434 std::vector<uint16_t> NetEqImpl::GetNackList(int64_t round_trip_time_ms) const { 435 CriticalSectionScoped lock(crit_sect_.get()); 436 if (!nack_enabled_) { 437 return std::vector<uint16_t>(); 438 } 439 RTC_DCHECK(nack_.get()); 440 return nack_->GetNackList(round_trip_time_ms); 441 } 442 443 const SyncBuffer* NetEqImpl::sync_buffer_for_test() const { 444 CriticalSectionScoped lock(crit_sect_.get()); 445 return sync_buffer_.get(); 446 } 447 448 // Methods below this line are private. 449 450 int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header, 451 rtc::ArrayView<const uint8_t> payload, 452 uint32_t receive_timestamp, 453 bool is_sync_packet) { 454 if (payload.empty()) { 455 LOG_F(LS_ERROR) << "payload is empty"; 456 return kInvalidPointer; 457 } 458 // Sanity checks for sync-packets. 459 if (is_sync_packet) { 460 if (decoder_database_->IsDtmf(rtp_header.header.payloadType) || 461 decoder_database_->IsRed(rtp_header.header.payloadType) || 462 decoder_database_->IsComfortNoise(rtp_header.header.payloadType)) { 463 LOG_F(LS_ERROR) << "Sync-packet with an unacceptable payload type " 464 << static_cast<int>(rtp_header.header.payloadType); 465 return kSyncPacketNotAccepted; 466 } 467 if (first_packet_ || 468 rtp_header.header.payloadType != current_rtp_payload_type_ || 469 rtp_header.header.ssrc != ssrc_) { 470 // Even if |current_rtp_payload_type_| is 0xFF, sync-packet isn't 471 // accepted. 472 LOG_F(LS_ERROR) 473 << "Changing codec, SSRC or first packet with sync-packet."; 474 return kSyncPacketNotAccepted; 475 } 476 } 477 PacketList packet_list; 478 RTPHeader main_header; 479 { 480 // Convert to Packet. 481 // Create |packet| within this separate scope, since it should not be used 482 // directly once it's been inserted in the packet list. This way, |packet| 483 // is not defined outside of this block. 484 Packet* packet = new Packet; 485 packet->header.markerBit = false; 486 packet->header.payloadType = rtp_header.header.payloadType; 487 packet->header.sequenceNumber = rtp_header.header.sequenceNumber; 488 packet->header.timestamp = rtp_header.header.timestamp; 489 packet->header.ssrc = rtp_header.header.ssrc; 490 packet->header.numCSRCs = 0; 491 packet->payload_length = payload.size(); 492 packet->primary = true; 493 packet->waiting_time = 0; 494 packet->payload = new uint8_t[packet->payload_length]; 495 packet->sync_packet = is_sync_packet; 496 if (!packet->payload) { 497 LOG_F(LS_ERROR) << "Payload pointer is NULL."; 498 } 499 assert(!payload.empty()); // Already checked above. 500 memcpy(packet->payload, payload.data(), packet->payload_length); 501 // Insert packet in a packet list. 502 packet_list.push_back(packet); 503 // Save main payloads header for later. 504 memcpy(&main_header, &packet->header, sizeof(main_header)); 505 } 506 507 bool update_sample_rate_and_channels = false; 508 // Reinitialize NetEq if it's needed (changed SSRC or first call). 509 if ((main_header.ssrc != ssrc_) || first_packet_) { 510 // Note: |first_packet_| will be cleared further down in this method, once 511 // the packet has been successfully inserted into the packet buffer. 512 513 rtcp_.Init(main_header.sequenceNumber); 514 515 // Flush the packet buffer and DTMF buffer. 516 packet_buffer_->Flush(); 517 dtmf_buffer_->Flush(); 518 519 // Store new SSRC. 520 ssrc_ = main_header.ssrc; 521 522 // Update audio buffer timestamp. 523 sync_buffer_->IncreaseEndTimestamp(main_header.timestamp - timestamp_); 524 525 // Update codecs. 526 timestamp_ = main_header.timestamp; 527 current_rtp_payload_type_ = main_header.payloadType; 528 529 // Reset timestamp scaling. 530 timestamp_scaler_->Reset(); 531 532 // Trigger an update of sampling rate and the number of channels. 533 update_sample_rate_and_channels = true; 534 } 535 536 // Update RTCP statistics, only for regular packets. 537 if (!is_sync_packet) 538 rtcp_.Update(main_header, receive_timestamp); 539 540 // Check for RED payload type, and separate payloads into several packets. 541 if (decoder_database_->IsRed(main_header.payloadType)) { 542 assert(!is_sync_packet); // We had a sanity check for this. 543 if (payload_splitter_->SplitRed(&packet_list) != PayloadSplitter::kOK) { 544 PacketBuffer::DeleteAllPackets(&packet_list); 545 return kRedundancySplitError; 546 } 547 // Only accept a few RED payloads of the same type as the main data, 548 // DTMF events and CNG. 549 payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_); 550 // Update the stored main payload header since the main payload has now 551 // changed. 552 memcpy(&main_header, &packet_list.front()->header, sizeof(main_header)); 553 } 554 555 // Check payload types. 556 if (decoder_database_->CheckPayloadTypes(packet_list) == 557 DecoderDatabase::kDecoderNotFound) { 558 PacketBuffer::DeleteAllPackets(&packet_list); 559 return kUnknownRtpPayloadType; 560 } 561 562 // Scale timestamp to internal domain (only for some codecs). 563 timestamp_scaler_->ToInternal(&packet_list); 564 565 // Process DTMF payloads. Cycle through the list of packets, and pick out any 566 // DTMF payloads found. 567 PacketList::iterator it = packet_list.begin(); 568 while (it != packet_list.end()) { 569 Packet* current_packet = (*it); 570 assert(current_packet); 571 assert(current_packet->payload); 572 if (decoder_database_->IsDtmf(current_packet->header.payloadType)) { 573 assert(!current_packet->sync_packet); // We had a sanity check for this. 574 DtmfEvent event; 575 int ret = DtmfBuffer::ParseEvent( 576 current_packet->header.timestamp, 577 current_packet->payload, 578 current_packet->payload_length, 579 &event); 580 if (ret != DtmfBuffer::kOK) { 581 PacketBuffer::DeleteAllPackets(&packet_list); 582 return kDtmfParsingError; 583 } 584 if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) { 585 PacketBuffer::DeleteAllPackets(&packet_list); 586 return kDtmfInsertError; 587 } 588 // TODO(hlundin): Let the destructor of Packet handle the payload. 589 delete [] current_packet->payload; 590 delete current_packet; 591 it = packet_list.erase(it); 592 } else { 593 ++it; 594 } 595 } 596 597 // Check for FEC in packets, and separate payloads into several packets. 598 int ret = payload_splitter_->SplitFec(&packet_list, decoder_database_.get()); 599 if (ret != PayloadSplitter::kOK) { 600 PacketBuffer::DeleteAllPackets(&packet_list); 601 switch (ret) { 602 case PayloadSplitter::kUnknownPayloadType: 603 return kUnknownRtpPayloadType; 604 default: 605 return kOtherError; 606 } 607 } 608 609 // Split payloads into smaller chunks. This also verifies that all payloads 610 // are of a known payload type. SplitAudio() method is protected against 611 // sync-packets. 612 ret = payload_splitter_->SplitAudio(&packet_list, *decoder_database_); 613 if (ret != PayloadSplitter::kOK) { 614 PacketBuffer::DeleteAllPackets(&packet_list); 615 switch (ret) { 616 case PayloadSplitter::kUnknownPayloadType: 617 return kUnknownRtpPayloadType; 618 case PayloadSplitter::kFrameSplitError: 619 return kFrameSplitError; 620 default: 621 return kOtherError; 622 } 623 } 624 625 // Update bandwidth estimate, if the packet is not sync-packet. 626 if (!packet_list.empty() && !packet_list.front()->sync_packet) { 627 // The list can be empty here if we got nothing but DTMF payloads. 628 AudioDecoder* decoder = 629 decoder_database_->GetDecoder(main_header.payloadType); 630 assert(decoder); // Should always get a valid object, since we have 631 // already checked that the payload types are known. 632 decoder->IncomingPacket(packet_list.front()->payload, 633 packet_list.front()->payload_length, 634 packet_list.front()->header.sequenceNumber, 635 packet_list.front()->header.timestamp, 636 receive_timestamp); 637 } 638 639 if (nack_enabled_) { 640 RTC_DCHECK(nack_); 641 if (update_sample_rate_and_channels) { 642 nack_->Reset(); 643 } 644 nack_->UpdateLastReceivedPacket(packet_list.front()->header.sequenceNumber, 645 packet_list.front()->header.timestamp); 646 } 647 648 // Insert packets in buffer. 649 const size_t buffer_length_before_insert = 650 packet_buffer_->NumPacketsInBuffer(); 651 ret = packet_buffer_->InsertPacketList( 652 &packet_list, 653 *decoder_database_, 654 ¤t_rtp_payload_type_, 655 ¤t_cng_rtp_payload_type_); 656 if (ret == PacketBuffer::kFlushed) { 657 // Reset DSP timestamp etc. if packet buffer flushed. 658 new_codec_ = true; 659 update_sample_rate_and_channels = true; 660 } else if (ret != PacketBuffer::kOK) { 661 PacketBuffer::DeleteAllPackets(&packet_list); 662 return kOtherError; 663 } 664 665 if (first_packet_) { 666 first_packet_ = false; 667 // Update the codec on the next GetAudio call. 668 new_codec_ = true; 669 } 670 671 if (current_rtp_payload_type_ != 0xFF) { 672 const DecoderDatabase::DecoderInfo* dec_info = 673 decoder_database_->GetDecoderInfo(current_rtp_payload_type_); 674 if (!dec_info) { 675 assert(false); // Already checked that the payload type is known. 676 } 677 } 678 679 if (update_sample_rate_and_channels && !packet_buffer_->Empty()) { 680 // We do not use |current_rtp_payload_type_| to |set payload_type|, but 681 // get the next RTP header from |packet_buffer_| to obtain the payload type. 682 // The reason for it is the following corner case. If NetEq receives a 683 // CNG packet with a sample rate different than the current CNG then it 684 // flushes its buffer, assuming send codec must have been changed. However, 685 // payload type of the hypothetically new send codec is not known. 686 const RTPHeader* rtp_header = packet_buffer_->NextRtpHeader(); 687 assert(rtp_header); 688 int payload_type = rtp_header->payloadType; 689 AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type); 690 assert(decoder); // Payloads are already checked to be valid. 691 const DecoderDatabase::DecoderInfo* decoder_info = 692 decoder_database_->GetDecoderInfo(payload_type); 693 assert(decoder_info); 694 if (decoder_info->fs_hz != fs_hz_ || 695 decoder->Channels() != algorithm_buffer_->Channels()) { 696 SetSampleRateAndChannels(decoder_info->fs_hz, decoder->Channels()); 697 } 698 if (nack_enabled_) { 699 RTC_DCHECK(nack_); 700 // Update the sample rate even if the rate is not new, because of Reset(). 701 nack_->UpdateSampleRate(fs_hz_); 702 } 703 } 704 705 // TODO(hlundin): Move this code to DelayManager class. 706 const DecoderDatabase::DecoderInfo* dec_info = 707 decoder_database_->GetDecoderInfo(main_header.payloadType); 708 assert(dec_info); // Already checked that the payload type is known. 709 delay_manager_->LastDecoderType(dec_info->codec_type); 710 if (delay_manager_->last_pack_cng_or_dtmf() == 0) { 711 // Calculate the total speech length carried in each packet. 712 const size_t buffer_length_after_insert = 713 packet_buffer_->NumPacketsInBuffer(); 714 715 if (buffer_length_after_insert > buffer_length_before_insert) { 716 const size_t packet_length_samples = 717 (buffer_length_after_insert - buffer_length_before_insert) * 718 decoder_frame_length_; 719 if (packet_length_samples != decision_logic_->packet_length_samples()) { 720 decision_logic_->set_packet_length_samples(packet_length_samples); 721 delay_manager_->SetPacketAudioLength( 722 rtc::checked_cast<int>((1000 * packet_length_samples) / fs_hz_)); 723 } 724 } 725 726 // Update statistics. 727 if ((int32_t) (main_header.timestamp - timestamp_) >= 0 && 728 !new_codec_) { 729 // Only update statistics if incoming packet is not older than last played 730 // out packet, and if new codec flag is not set. 731 delay_manager_->Update(main_header.sequenceNumber, main_header.timestamp, 732 fs_hz_); 733 } 734 } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) { 735 // This is first "normal" packet after CNG or DTMF. 736 // Reset packet time counter and measure time until next packet, 737 // but don't update statistics. 738 delay_manager_->set_last_pack_cng_or_dtmf(0); 739 delay_manager_->ResetPacketIatCount(); 740 } 741 return 0; 742 } 743 744 int NetEqImpl::GetAudioInternal(size_t max_length, 745 int16_t* output, 746 size_t* samples_per_channel, 747 size_t* num_channels) { 748 PacketList packet_list; 749 DtmfEvent dtmf_event; 750 Operations operation; 751 bool play_dtmf; 752 int return_value = GetDecision(&operation, &packet_list, &dtmf_event, 753 &play_dtmf); 754 if (return_value != 0) { 755 last_mode_ = kModeError; 756 return return_value; 757 } 758 759 AudioDecoder::SpeechType speech_type; 760 int length = 0; 761 int decode_return_value = Decode(&packet_list, &operation, 762 &length, &speech_type); 763 764 assert(vad_.get()); 765 bool sid_frame_available = 766 (operation == kRfc3389Cng && !packet_list.empty()); 767 vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type, 768 sid_frame_available, fs_hz_); 769 770 algorithm_buffer_->Clear(); 771 switch (operation) { 772 case kNormal: { 773 DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf); 774 break; 775 } 776 case kMerge: { 777 DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf); 778 break; 779 } 780 case kExpand: { 781 return_value = DoExpand(play_dtmf); 782 break; 783 } 784 case kAccelerate: 785 case kFastAccelerate: { 786 const bool fast_accelerate = 787 enable_fast_accelerate_ && (operation == kFastAccelerate); 788 return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type, 789 play_dtmf, fast_accelerate); 790 break; 791 } 792 case kPreemptiveExpand: { 793 return_value = DoPreemptiveExpand(decoded_buffer_.get(), length, 794 speech_type, play_dtmf); 795 break; 796 } 797 case kRfc3389Cng: 798 case kRfc3389CngNoPacket: { 799 return_value = DoRfc3389Cng(&packet_list, play_dtmf); 800 break; 801 } 802 case kCodecInternalCng: { 803 // This handles the case when there is no transmission and the decoder 804 // should produce internal comfort noise. 805 // TODO(hlundin): Write test for codec-internal CNG. 806 DoCodecInternalCng(decoded_buffer_.get(), length); 807 break; 808 } 809 case kDtmf: { 810 // TODO(hlundin): Write test for this. 811 return_value = DoDtmf(dtmf_event, &play_dtmf); 812 break; 813 } 814 case kAlternativePlc: { 815 // TODO(hlundin): Write test for this. 816 DoAlternativePlc(false); 817 break; 818 } 819 case kAlternativePlcIncreaseTimestamp: { 820 // TODO(hlundin): Write test for this. 821 DoAlternativePlc(true); 822 break; 823 } 824 case kAudioRepetitionIncreaseTimestamp: { 825 // TODO(hlundin): Write test for this. 826 sync_buffer_->IncreaseEndTimestamp( 827 static_cast<uint32_t>(output_size_samples_)); 828 // Skipping break on purpose. Execution should move on into the 829 // next case. 830 FALLTHROUGH(); 831 } 832 case kAudioRepetition: { 833 // TODO(hlundin): Write test for this. 834 // Copy last |output_size_samples_| from |sync_buffer_| to 835 // |algorithm_buffer|. 836 algorithm_buffer_->PushBackFromIndex( 837 *sync_buffer_, sync_buffer_->Size() - output_size_samples_); 838 expand_->Reset(); 839 break; 840 } 841 case kUndefined: { 842 LOG(LS_ERROR) << "Invalid operation kUndefined."; 843 assert(false); // This should not happen. 844 last_mode_ = kModeError; 845 return kInvalidOperation; 846 } 847 } // End of switch. 848 if (return_value < 0) { 849 return return_value; 850 } 851 852 if (last_mode_ != kModeRfc3389Cng) { 853 comfort_noise_->Reset(); 854 } 855 856 // Copy from |algorithm_buffer| to |sync_buffer_|. 857 sync_buffer_->PushBack(*algorithm_buffer_); 858 859 // Extract data from |sync_buffer_| to |output|. 860 size_t num_output_samples_per_channel = output_size_samples_; 861 size_t num_output_samples = output_size_samples_ * sync_buffer_->Channels(); 862 if (num_output_samples > max_length) { 863 LOG(LS_WARNING) << "Output array is too short. " << max_length << " < " << 864 output_size_samples_ << " * " << sync_buffer_->Channels(); 865 num_output_samples = max_length; 866 num_output_samples_per_channel = max_length / sync_buffer_->Channels(); 867 } 868 const size_t samples_from_sync = 869 sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel, 870 output); 871 *num_channels = sync_buffer_->Channels(); 872 if (sync_buffer_->FutureLength() < expand_->overlap_length()) { 873 // The sync buffer should always contain |overlap_length| samples, but now 874 // too many samples have been extracted. Reinstall the |overlap_length| 875 // lookahead by moving the index. 876 const size_t missing_lookahead_samples = 877 expand_->overlap_length() - sync_buffer_->FutureLength(); 878 RTC_DCHECK_GE(sync_buffer_->next_index(), missing_lookahead_samples); 879 sync_buffer_->set_next_index(sync_buffer_->next_index() - 880 missing_lookahead_samples); 881 } 882 if (samples_from_sync != output_size_samples_) { 883 LOG(LS_ERROR) << "samples_from_sync (" << samples_from_sync 884 << ") != output_size_samples_ (" << output_size_samples_ 885 << ")"; 886 // TODO(minyue): treatment of under-run, filling zeros 887 memset(output, 0, num_output_samples * sizeof(int16_t)); 888 *samples_per_channel = output_size_samples_; 889 return kSampleUnderrun; 890 } 891 *samples_per_channel = output_size_samples_; 892 893 // Should always have overlap samples left in the |sync_buffer_|. 894 RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length()); 895 896 if (play_dtmf) { 897 return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(), output); 898 } 899 900 // Update the background noise parameters if last operation wrote data 901 // straight from the decoder to the |sync_buffer_|. That is, none of the 902 // operations that modify the signal can be followed by a parameter update. 903 if ((last_mode_ == kModeNormal) || 904 (last_mode_ == kModeAccelerateFail) || 905 (last_mode_ == kModePreemptiveExpandFail) || 906 (last_mode_ == kModeRfc3389Cng) || 907 (last_mode_ == kModeCodecInternalCng)) { 908 background_noise_->Update(*sync_buffer_, *vad_.get()); 909 } 910 911 if (operation == kDtmf) { 912 // DTMF data was written the end of |sync_buffer_|. 913 // Update index to end of DTMF data in |sync_buffer_|. 914 sync_buffer_->set_dtmf_index(sync_buffer_->Size()); 915 } 916 917 if (last_mode_ != kModeExpand) { 918 // If last operation was not expand, calculate the |playout_timestamp_| from 919 // the |sync_buffer_|. However, do not update the |playout_timestamp_| if it 920 // would be moved "backwards". 921 uint32_t temp_timestamp = sync_buffer_->end_timestamp() - 922 static_cast<uint32_t>(sync_buffer_->FutureLength()); 923 if (static_cast<int32_t>(temp_timestamp - playout_timestamp_) > 0) { 924 playout_timestamp_ = temp_timestamp; 925 } 926 } else { 927 // Use dead reckoning to estimate the |playout_timestamp_|. 928 playout_timestamp_ += static_cast<uint32_t>(output_size_samples_); 929 } 930 931 if (decode_return_value) return decode_return_value; 932 return return_value; 933 } 934 935 int NetEqImpl::GetDecision(Operations* operation, 936 PacketList* packet_list, 937 DtmfEvent* dtmf_event, 938 bool* play_dtmf) { 939 // Initialize output variables. 940 *play_dtmf = false; 941 *operation = kUndefined; 942 943 // Increment time counters. 944 packet_buffer_->IncrementWaitingTimes(); 945 stats_.IncreaseCounter(output_size_samples_, fs_hz_); 946 947 assert(sync_buffer_.get()); 948 uint32_t end_timestamp = sync_buffer_->end_timestamp(); 949 if (!new_codec_) { 950 const uint32_t five_seconds_samples = 5 * fs_hz_; 951 packet_buffer_->DiscardOldPackets(end_timestamp, five_seconds_samples); 952 } 953 const RTPHeader* header = packet_buffer_->NextRtpHeader(); 954 955 if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) { 956 // Because of timestamp peculiarities, we have to "manually" disallow using 957 // a CNG packet with the same timestamp as the one that was last played. 958 // This can happen when using redundancy and will cause the timing to shift. 959 while (header && decoder_database_->IsComfortNoise(header->payloadType) && 960 (end_timestamp >= header->timestamp || 961 end_timestamp + decision_logic_->generated_noise_samples() > 962 header->timestamp)) { 963 // Don't use this packet, discard it. 964 if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) { 965 assert(false); // Must be ok by design. 966 } 967 // Check buffer again. 968 if (!new_codec_) { 969 packet_buffer_->DiscardOldPackets(end_timestamp, 5 * fs_hz_); 970 } 971 header = packet_buffer_->NextRtpHeader(); 972 } 973 } 974 975 assert(expand_.get()); 976 const int samples_left = static_cast<int>(sync_buffer_->FutureLength() - 977 expand_->overlap_length()); 978 if (last_mode_ == kModeAccelerateSuccess || 979 last_mode_ == kModeAccelerateLowEnergy || 980 last_mode_ == kModePreemptiveExpandSuccess || 981 last_mode_ == kModePreemptiveExpandLowEnergy) { 982 // Subtract (samples_left + output_size_samples_) from sampleMemory. 983 decision_logic_->AddSampleMemory( 984 -(samples_left + rtc::checked_cast<int>(output_size_samples_))); 985 } 986 987 // Check if it is time to play a DTMF event. 988 if (dtmf_buffer_->GetEvent( 989 static_cast<uint32_t>( 990 end_timestamp + decision_logic_->generated_noise_samples()), 991 dtmf_event)) { 992 *play_dtmf = true; 993 } 994 995 // Get instruction. 996 assert(sync_buffer_.get()); 997 assert(expand_.get()); 998 *operation = decision_logic_->GetDecision(*sync_buffer_, 999 *expand_, 1000 decoder_frame_length_, 1001 header, 1002 last_mode_, 1003 *play_dtmf, 1004 &reset_decoder_); 1005 1006 // Check if we already have enough samples in the |sync_buffer_|. If so, 1007 // change decision to normal, unless the decision was merge, accelerate, or 1008 // preemptive expand. 1009 if (samples_left >= rtc::checked_cast<int>(output_size_samples_) && 1010 *operation != kMerge && 1011 *operation != kAccelerate && 1012 *operation != kFastAccelerate && 1013 *operation != kPreemptiveExpand) { 1014 *operation = kNormal; 1015 return 0; 1016 } 1017 1018 decision_logic_->ExpandDecision(*operation); 1019 1020 // Check conditions for reset. 1021 if (new_codec_ || *operation == kUndefined) { 1022 // The only valid reason to get kUndefined is that new_codec_ is set. 1023 assert(new_codec_); 1024 if (*play_dtmf && !header) { 1025 timestamp_ = dtmf_event->timestamp; 1026 } else { 1027 if (!header) { 1028 LOG(LS_ERROR) << "Packet missing where it shouldn't."; 1029 return -1; 1030 } 1031 timestamp_ = header->timestamp; 1032 if (*operation == kRfc3389CngNoPacket 1033 #ifndef LEGACY_BITEXACT 1034 // Without this check, it can happen that a non-CNG packet is sent to 1035 // the CNG decoder as if it was a SID frame. This is clearly a bug, 1036 // but is kept for now to maintain bit-exactness with the test 1037 // vectors. 1038 && decoder_database_->IsComfortNoise(header->payloadType) 1039 #endif 1040 ) { 1041 // Change decision to CNG packet, since we do have a CNG packet, but it 1042 // was considered too early to use. Now, use it anyway. 1043 *operation = kRfc3389Cng; 1044 } else if (*operation != kRfc3389Cng) { 1045 *operation = kNormal; 1046 } 1047 } 1048 // Adjust |sync_buffer_| timestamp before setting |end_timestamp| to the 1049 // new value. 1050 sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp); 1051 end_timestamp = timestamp_; 1052 new_codec_ = false; 1053 decision_logic_->SoftReset(); 1054 buffer_level_filter_->Reset(); 1055 delay_manager_->Reset(); 1056 stats_.ResetMcu(); 1057 } 1058 1059 size_t required_samples = output_size_samples_; 1060 const size_t samples_10_ms = static_cast<size_t>(80 * fs_mult_); 1061 const size_t samples_20_ms = 2 * samples_10_ms; 1062 const size_t samples_30_ms = 3 * samples_10_ms; 1063 1064 switch (*operation) { 1065 case kExpand: { 1066 timestamp_ = end_timestamp; 1067 return 0; 1068 } 1069 case kRfc3389CngNoPacket: 1070 case kCodecInternalCng: { 1071 return 0; 1072 } 1073 case kDtmf: { 1074 // TODO(hlundin): Write test for this. 1075 // Update timestamp. 1076 timestamp_ = end_timestamp; 1077 if (decision_logic_->generated_noise_samples() > 0 && 1078 last_mode_ != kModeDtmf) { 1079 // Make a jump in timestamp due to the recently played comfort noise. 1080 uint32_t timestamp_jump = 1081 static_cast<uint32_t>(decision_logic_->generated_noise_samples()); 1082 sync_buffer_->IncreaseEndTimestamp(timestamp_jump); 1083 timestamp_ += timestamp_jump; 1084 } 1085 decision_logic_->set_generated_noise_samples(0); 1086 return 0; 1087 } 1088 case kAccelerate: 1089 case kFastAccelerate: { 1090 // In order to do an accelerate we need at least 30 ms of audio data. 1091 if (samples_left >= static_cast<int>(samples_30_ms)) { 1092 // Already have enough data, so we do not need to extract any more. 1093 decision_logic_->set_sample_memory(samples_left); 1094 decision_logic_->set_prev_time_scale(true); 1095 return 0; 1096 } else if (samples_left >= static_cast<int>(samples_10_ms) && 1097 decoder_frame_length_ >= samples_30_ms) { 1098 // Avoid decoding more data as it might overflow the playout buffer. 1099 *operation = kNormal; 1100 return 0; 1101 } else if (samples_left < static_cast<int>(samples_20_ms) && 1102 decoder_frame_length_ < samples_30_ms) { 1103 // Build up decoded data by decoding at least 20 ms of audio data. Do 1104 // not perform accelerate yet, but wait until we only need to do one 1105 // decoding. 1106 required_samples = 2 * output_size_samples_; 1107 *operation = kNormal; 1108 } 1109 // If none of the above is true, we have one of two possible situations: 1110 // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or 1111 // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms. 1112 // In either case, we move on with the accelerate decision, and decode one 1113 // frame now. 1114 break; 1115 } 1116 case kPreemptiveExpand: { 1117 // In order to do a preemptive expand we need at least 30 ms of decoded 1118 // audio data. 1119 if ((samples_left >= static_cast<int>(samples_30_ms)) || 1120 (samples_left >= static_cast<int>(samples_10_ms) && 1121 decoder_frame_length_ >= samples_30_ms)) { 1122 // Already have enough data, so we do not need to extract any more. 1123 // Or, avoid decoding more data as it might overflow the playout buffer. 1124 // Still try preemptive expand, though. 1125 decision_logic_->set_sample_memory(samples_left); 1126 decision_logic_->set_prev_time_scale(true); 1127 return 0; 1128 } 1129 if (samples_left < static_cast<int>(samples_20_ms) && 1130 decoder_frame_length_ < samples_30_ms) { 1131 // Build up decoded data by decoding at least 20 ms of audio data. 1132 // Still try to perform preemptive expand. 1133 required_samples = 2 * output_size_samples_; 1134 } 1135 // Move on with the preemptive expand decision. 1136 break; 1137 } 1138 case kMerge: { 1139 required_samples = 1140 std::max(merge_->RequiredFutureSamples(), required_samples); 1141 break; 1142 } 1143 default: { 1144 // Do nothing. 1145 } 1146 } 1147 1148 // Get packets from buffer. 1149 int extracted_samples = 0; 1150 if (header && 1151 *operation != kAlternativePlc && 1152 *operation != kAlternativePlcIncreaseTimestamp && 1153 *operation != kAudioRepetition && 1154 *operation != kAudioRepetitionIncreaseTimestamp) { 1155 sync_buffer_->IncreaseEndTimestamp(header->timestamp - end_timestamp); 1156 if (decision_logic_->CngOff()) { 1157 // Adjustment of timestamp only corresponds to an actual packet loss 1158 // if comfort noise is not played. If comfort noise was just played, 1159 // this adjustment of timestamp is only done to get back in sync with the 1160 // stream timestamp; no loss to report. 1161 stats_.LostSamples(header->timestamp - end_timestamp); 1162 } 1163 1164 if (*operation != kRfc3389Cng) { 1165 // We are about to decode and use a non-CNG packet. 1166 decision_logic_->SetCngOff(); 1167 } 1168 // Reset CNG timestamp as a new packet will be delivered. 1169 // (Also if this is a CNG packet, since playedOutTS is updated.) 1170 decision_logic_->set_generated_noise_samples(0); 1171 1172 extracted_samples = ExtractPackets(required_samples, packet_list); 1173 if (extracted_samples < 0) { 1174 return kPacketBufferCorruption; 1175 } 1176 } 1177 1178 if (*operation == kAccelerate || *operation == kFastAccelerate || 1179 *operation == kPreemptiveExpand) { 1180 decision_logic_->set_sample_memory(samples_left + extracted_samples); 1181 decision_logic_->set_prev_time_scale(true); 1182 } 1183 1184 if (*operation == kAccelerate || *operation == kFastAccelerate) { 1185 // Check that we have enough data (30ms) to do accelerate. 1186 if (extracted_samples + samples_left < static_cast<int>(samples_30_ms)) { 1187 // TODO(hlundin): Write test for this. 1188 // Not enough, do normal operation instead. 1189 *operation = kNormal; 1190 } 1191 } 1192 1193 timestamp_ = end_timestamp; 1194 return 0; 1195 } 1196 1197 int NetEqImpl::Decode(PacketList* packet_list, Operations* operation, 1198 int* decoded_length, 1199 AudioDecoder::SpeechType* speech_type) { 1200 *speech_type = AudioDecoder::kSpeech; 1201 1202 // When packet_list is empty, we may be in kCodecInternalCng mode, and for 1203 // that we use current active decoder. 1204 AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); 1205 1206 if (!packet_list->empty()) { 1207 const Packet* packet = packet_list->front(); 1208 uint8_t payload_type = packet->header.payloadType; 1209 if (!decoder_database_->IsComfortNoise(payload_type)) { 1210 decoder = decoder_database_->GetDecoder(payload_type); 1211 assert(decoder); 1212 if (!decoder) { 1213 LOG(LS_WARNING) << "Unknown payload type " 1214 << static_cast<int>(payload_type); 1215 PacketBuffer::DeleteAllPackets(packet_list); 1216 return kDecoderNotFound; 1217 } 1218 bool decoder_changed; 1219 decoder_database_->SetActiveDecoder(payload_type, &decoder_changed); 1220 if (decoder_changed) { 1221 // We have a new decoder. Re-init some values. 1222 const DecoderDatabase::DecoderInfo* decoder_info = decoder_database_ 1223 ->GetDecoderInfo(payload_type); 1224 assert(decoder_info); 1225 if (!decoder_info) { 1226 LOG(LS_WARNING) << "Unknown payload type " 1227 << static_cast<int>(payload_type); 1228 PacketBuffer::DeleteAllPackets(packet_list); 1229 return kDecoderNotFound; 1230 } 1231 // If sampling rate or number of channels has changed, we need to make 1232 // a reset. 1233 if (decoder_info->fs_hz != fs_hz_ || 1234 decoder->Channels() != algorithm_buffer_->Channels()) { 1235 // TODO(tlegrand): Add unittest to cover this event. 1236 SetSampleRateAndChannels(decoder_info->fs_hz, decoder->Channels()); 1237 } 1238 sync_buffer_->set_end_timestamp(timestamp_); 1239 playout_timestamp_ = timestamp_; 1240 } 1241 } 1242 } 1243 1244 if (reset_decoder_) { 1245 // TODO(hlundin): Write test for this. 1246 if (decoder) 1247 decoder->Reset(); 1248 1249 // Reset comfort noise decoder. 1250 AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); 1251 if (cng_decoder) 1252 cng_decoder->Reset(); 1253 1254 reset_decoder_ = false; 1255 } 1256 1257 #ifdef LEGACY_BITEXACT 1258 // Due to a bug in old SignalMCU, it could happen that CNG operation was 1259 // decided, but a speech packet was provided. The speech packet will be used 1260 // to update the comfort noise decoder, as if it was a SID frame, which is 1261 // clearly wrong. 1262 if (*operation == kRfc3389Cng) { 1263 return 0; 1264 } 1265 #endif 1266 1267 *decoded_length = 0; 1268 // Update codec-internal PLC state. 1269 if ((*operation == kMerge) && decoder && decoder->HasDecodePlc()) { 1270 decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]); 1271 } 1272 1273 int return_value; 1274 if (*operation == kCodecInternalCng) { 1275 RTC_DCHECK(packet_list->empty()); 1276 return_value = DecodeCng(decoder, decoded_length, speech_type); 1277 } else { 1278 return_value = DecodeLoop(packet_list, *operation, decoder, 1279 decoded_length, speech_type); 1280 } 1281 1282 if (*decoded_length < 0) { 1283 // Error returned from the decoder. 1284 *decoded_length = 0; 1285 sync_buffer_->IncreaseEndTimestamp( 1286 static_cast<uint32_t>(decoder_frame_length_)); 1287 int error_code = 0; 1288 if (decoder) 1289 error_code = decoder->ErrorCode(); 1290 if (error_code != 0) { 1291 // Got some error code from the decoder. 1292 decoder_error_code_ = error_code; 1293 return_value = kDecoderErrorCode; 1294 LOG(LS_WARNING) << "Decoder returned error code: " << error_code; 1295 } else { 1296 // Decoder does not implement error codes. Return generic error. 1297 return_value = kOtherDecoderError; 1298 LOG(LS_WARNING) << "Decoder error (no error code)"; 1299 } 1300 *operation = kExpand; // Do expansion to get data instead. 1301 } 1302 if (*speech_type != AudioDecoder::kComfortNoise) { 1303 // Don't increment timestamp if codec returned CNG speech type 1304 // since in this case, the we will increment the CNGplayedTS counter. 1305 // Increase with number of samples per channel. 1306 assert(*decoded_length == 0 || 1307 (decoder && decoder->Channels() == sync_buffer_->Channels())); 1308 sync_buffer_->IncreaseEndTimestamp( 1309 *decoded_length / static_cast<int>(sync_buffer_->Channels())); 1310 } 1311 return return_value; 1312 } 1313 1314 int NetEqImpl::DecodeCng(AudioDecoder* decoder, int* decoded_length, 1315 AudioDecoder::SpeechType* speech_type) { 1316 if (!decoder) { 1317 // This happens when active decoder is not defined. 1318 *decoded_length = -1; 1319 return 0; 1320 } 1321 1322 while (*decoded_length < rtc::checked_cast<int>(output_size_samples_)) { 1323 const int length = decoder->Decode( 1324 nullptr, 0, fs_hz_, 1325 (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), 1326 &decoded_buffer_[*decoded_length], speech_type); 1327 if (length > 0) { 1328 *decoded_length += length; 1329 } else { 1330 // Error. 1331 LOG(LS_WARNING) << "Failed to decode CNG"; 1332 *decoded_length = -1; 1333 break; 1334 } 1335 if (*decoded_length > static_cast<int>(decoded_buffer_length_)) { 1336 // Guard against overflow. 1337 LOG(LS_WARNING) << "Decoded too much CNG."; 1338 return kDecodedTooMuch; 1339 } 1340 } 1341 return 0; 1342 } 1343 1344 int NetEqImpl::DecodeLoop(PacketList* packet_list, const Operations& operation, 1345 AudioDecoder* decoder, int* decoded_length, 1346 AudioDecoder::SpeechType* speech_type) { 1347 Packet* packet = NULL; 1348 if (!packet_list->empty()) { 1349 packet = packet_list->front(); 1350 } 1351 1352 // Do decoding. 1353 while (packet && 1354 !decoder_database_->IsComfortNoise(packet->header.payloadType)) { 1355 assert(decoder); // At this point, we must have a decoder object. 1356 // The number of channels in the |sync_buffer_| should be the same as the 1357 // number decoder channels. 1358 assert(sync_buffer_->Channels() == decoder->Channels()); 1359 assert(decoded_buffer_length_ >= kMaxFrameSize * decoder->Channels()); 1360 assert(operation == kNormal || operation == kAccelerate || 1361 operation == kFastAccelerate || operation == kMerge || 1362 operation == kPreemptiveExpand); 1363 packet_list->pop_front(); 1364 size_t payload_length = packet->payload_length; 1365 int decode_length; 1366 if (packet->sync_packet) { 1367 // Decode to silence with the same frame size as the last decode. 1368 memset(&decoded_buffer_[*decoded_length], 0, 1369 decoder_frame_length_ * decoder->Channels() * 1370 sizeof(decoded_buffer_[0])); 1371 decode_length = rtc::checked_cast<int>(decoder_frame_length_); 1372 } else if (!packet->primary) { 1373 // This is a redundant payload; call the special decoder method. 1374 decode_length = decoder->DecodeRedundant( 1375 packet->payload, packet->payload_length, fs_hz_, 1376 (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), 1377 &decoded_buffer_[*decoded_length], speech_type); 1378 } else { 1379 decode_length = 1380 decoder->Decode( 1381 packet->payload, packet->payload_length, fs_hz_, 1382 (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), 1383 &decoded_buffer_[*decoded_length], speech_type); 1384 } 1385 1386 delete[] packet->payload; 1387 delete packet; 1388 packet = NULL; 1389 if (decode_length > 0) { 1390 *decoded_length += decode_length; 1391 // Update |decoder_frame_length_| with number of samples per channel. 1392 decoder_frame_length_ = 1393 static_cast<size_t>(decode_length) / decoder->Channels(); 1394 } else if (decode_length < 0) { 1395 // Error. 1396 LOG(LS_WARNING) << "Decode " << decode_length << " " << payload_length; 1397 *decoded_length = -1; 1398 PacketBuffer::DeleteAllPackets(packet_list); 1399 break; 1400 } 1401 if (*decoded_length > static_cast<int>(decoded_buffer_length_)) { 1402 // Guard against overflow. 1403 LOG(LS_WARNING) << "Decoded too much."; 1404 PacketBuffer::DeleteAllPackets(packet_list); 1405 return kDecodedTooMuch; 1406 } 1407 if (!packet_list->empty()) { 1408 packet = packet_list->front(); 1409 } else { 1410 packet = NULL; 1411 } 1412 } // End of decode loop. 1413 1414 // If the list is not empty at this point, either a decoding error terminated 1415 // the while-loop, or list must hold exactly one CNG packet. 1416 assert(packet_list->empty() || *decoded_length < 0 || 1417 (packet_list->size() == 1 && packet && 1418 decoder_database_->IsComfortNoise(packet->header.payloadType))); 1419 return 0; 1420 } 1421 1422 void NetEqImpl::DoNormal(const int16_t* decoded_buffer, size_t decoded_length, 1423 AudioDecoder::SpeechType speech_type, bool play_dtmf) { 1424 assert(normal_.get()); 1425 assert(mute_factor_array_.get()); 1426 normal_->Process(decoded_buffer, decoded_length, last_mode_, 1427 mute_factor_array_.get(), algorithm_buffer_.get()); 1428 if (decoded_length != 0) { 1429 last_mode_ = kModeNormal; 1430 } 1431 1432 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1433 if ((speech_type == AudioDecoder::kComfortNoise) 1434 || ((last_mode_ == kModeCodecInternalCng) 1435 && (decoded_length == 0))) { 1436 // TODO(hlundin): Remove second part of || statement above. 1437 last_mode_ = kModeCodecInternalCng; 1438 } 1439 1440 if (!play_dtmf) { 1441 dtmf_tone_generator_->Reset(); 1442 } 1443 } 1444 1445 void NetEqImpl::DoMerge(int16_t* decoded_buffer, size_t decoded_length, 1446 AudioDecoder::SpeechType speech_type, bool play_dtmf) { 1447 assert(mute_factor_array_.get()); 1448 assert(merge_.get()); 1449 size_t new_length = merge_->Process(decoded_buffer, decoded_length, 1450 mute_factor_array_.get(), 1451 algorithm_buffer_.get()); 1452 size_t expand_length_correction = new_length - 1453 decoded_length / algorithm_buffer_->Channels(); 1454 1455 // Update in-call and post-call statistics. 1456 if (expand_->MuteFactor(0) == 0) { 1457 // Expand generates only noise. 1458 stats_.ExpandedNoiseSamples(expand_length_correction); 1459 } else { 1460 // Expansion generates more than only noise. 1461 stats_.ExpandedVoiceSamples(expand_length_correction); 1462 } 1463 1464 last_mode_ = kModeMerge; 1465 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1466 if (speech_type == AudioDecoder::kComfortNoise) { 1467 last_mode_ = kModeCodecInternalCng; 1468 } 1469 expand_->Reset(); 1470 if (!play_dtmf) { 1471 dtmf_tone_generator_->Reset(); 1472 } 1473 } 1474 1475 int NetEqImpl::DoExpand(bool play_dtmf) { 1476 while ((sync_buffer_->FutureLength() - expand_->overlap_length()) < 1477 output_size_samples_) { 1478 algorithm_buffer_->Clear(); 1479 int return_value = expand_->Process(algorithm_buffer_.get()); 1480 size_t length = algorithm_buffer_->Size(); 1481 1482 // Update in-call and post-call statistics. 1483 if (expand_->MuteFactor(0) == 0) { 1484 // Expand operation generates only noise. 1485 stats_.ExpandedNoiseSamples(length); 1486 } else { 1487 // Expand operation generates more than only noise. 1488 stats_.ExpandedVoiceSamples(length); 1489 } 1490 1491 last_mode_ = kModeExpand; 1492 1493 if (return_value < 0) { 1494 return return_value; 1495 } 1496 1497 sync_buffer_->PushBack(*algorithm_buffer_); 1498 algorithm_buffer_->Clear(); 1499 } 1500 if (!play_dtmf) { 1501 dtmf_tone_generator_->Reset(); 1502 } 1503 return 0; 1504 } 1505 1506 int NetEqImpl::DoAccelerate(int16_t* decoded_buffer, 1507 size_t decoded_length, 1508 AudioDecoder::SpeechType speech_type, 1509 bool play_dtmf, 1510 bool fast_accelerate) { 1511 const size_t required_samples = 1512 static_cast<size_t>(240 * fs_mult_); // Must have 30 ms. 1513 size_t borrowed_samples_per_channel = 0; 1514 size_t num_channels = algorithm_buffer_->Channels(); 1515 size_t decoded_length_per_channel = decoded_length / num_channels; 1516 if (decoded_length_per_channel < required_samples) { 1517 // Must move data from the |sync_buffer_| in order to get 30 ms. 1518 borrowed_samples_per_channel = static_cast<int>(required_samples - 1519 decoded_length_per_channel); 1520 memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], 1521 decoded_buffer, 1522 sizeof(int16_t) * decoded_length); 1523 sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, 1524 decoded_buffer); 1525 decoded_length = required_samples * num_channels; 1526 } 1527 1528 size_t samples_removed; 1529 Accelerate::ReturnCodes return_code = 1530 accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate, 1531 algorithm_buffer_.get(), &samples_removed); 1532 stats_.AcceleratedSamples(samples_removed); 1533 switch (return_code) { 1534 case Accelerate::kSuccess: 1535 last_mode_ = kModeAccelerateSuccess; 1536 break; 1537 case Accelerate::kSuccessLowEnergy: 1538 last_mode_ = kModeAccelerateLowEnergy; 1539 break; 1540 case Accelerate::kNoStretch: 1541 last_mode_ = kModeAccelerateFail; 1542 break; 1543 case Accelerate::kError: 1544 // TODO(hlundin): Map to kModeError instead? 1545 last_mode_ = kModeAccelerateFail; 1546 return kAccelerateError; 1547 } 1548 1549 if (borrowed_samples_per_channel > 0) { 1550 // Copy borrowed samples back to the |sync_buffer_|. 1551 size_t length = algorithm_buffer_->Size(); 1552 if (length < borrowed_samples_per_channel) { 1553 // This destroys the beginning of the buffer, but will not cause any 1554 // problems. 1555 sync_buffer_->ReplaceAtIndex(*algorithm_buffer_, 1556 sync_buffer_->Size() - 1557 borrowed_samples_per_channel); 1558 sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length); 1559 algorithm_buffer_->PopFront(length); 1560 assert(algorithm_buffer_->Empty()); 1561 } else { 1562 sync_buffer_->ReplaceAtIndex(*algorithm_buffer_, 1563 borrowed_samples_per_channel, 1564 sync_buffer_->Size() - 1565 borrowed_samples_per_channel); 1566 algorithm_buffer_->PopFront(borrowed_samples_per_channel); 1567 } 1568 } 1569 1570 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1571 if (speech_type == AudioDecoder::kComfortNoise) { 1572 last_mode_ = kModeCodecInternalCng; 1573 } 1574 if (!play_dtmf) { 1575 dtmf_tone_generator_->Reset(); 1576 } 1577 expand_->Reset(); 1578 return 0; 1579 } 1580 1581 int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer, 1582 size_t decoded_length, 1583 AudioDecoder::SpeechType speech_type, 1584 bool play_dtmf) { 1585 const size_t required_samples = 1586 static_cast<size_t>(240 * fs_mult_); // Must have 30 ms. 1587 size_t num_channels = algorithm_buffer_->Channels(); 1588 size_t borrowed_samples_per_channel = 0; 1589 size_t old_borrowed_samples_per_channel = 0; 1590 size_t decoded_length_per_channel = decoded_length / num_channels; 1591 if (decoded_length_per_channel < required_samples) { 1592 // Must move data from the |sync_buffer_| in order to get 30 ms. 1593 borrowed_samples_per_channel = 1594 required_samples - decoded_length_per_channel; 1595 // Calculate how many of these were already played out. 1596 old_borrowed_samples_per_channel = 1597 (borrowed_samples_per_channel > sync_buffer_->FutureLength()) ? 1598 (borrowed_samples_per_channel - sync_buffer_->FutureLength()) : 0; 1599 memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], 1600 decoded_buffer, 1601 sizeof(int16_t) * decoded_length); 1602 sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, 1603 decoded_buffer); 1604 decoded_length = required_samples * num_channels; 1605 } 1606 1607 size_t samples_added; 1608 PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process( 1609 decoded_buffer, decoded_length, 1610 old_borrowed_samples_per_channel, 1611 algorithm_buffer_.get(), &samples_added); 1612 stats_.PreemptiveExpandedSamples(samples_added); 1613 switch (return_code) { 1614 case PreemptiveExpand::kSuccess: 1615 last_mode_ = kModePreemptiveExpandSuccess; 1616 break; 1617 case PreemptiveExpand::kSuccessLowEnergy: 1618 last_mode_ = kModePreemptiveExpandLowEnergy; 1619 break; 1620 case PreemptiveExpand::kNoStretch: 1621 last_mode_ = kModePreemptiveExpandFail; 1622 break; 1623 case PreemptiveExpand::kError: 1624 // TODO(hlundin): Map to kModeError instead? 1625 last_mode_ = kModePreemptiveExpandFail; 1626 return kPreemptiveExpandError; 1627 } 1628 1629 if (borrowed_samples_per_channel > 0) { 1630 // Copy borrowed samples back to the |sync_buffer_|. 1631 sync_buffer_->ReplaceAtIndex( 1632 *algorithm_buffer_, borrowed_samples_per_channel, 1633 sync_buffer_->Size() - borrowed_samples_per_channel); 1634 algorithm_buffer_->PopFront(borrowed_samples_per_channel); 1635 } 1636 1637 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1638 if (speech_type == AudioDecoder::kComfortNoise) { 1639 last_mode_ = kModeCodecInternalCng; 1640 } 1641 if (!play_dtmf) { 1642 dtmf_tone_generator_->Reset(); 1643 } 1644 expand_->Reset(); 1645 return 0; 1646 } 1647 1648 int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) { 1649 if (!packet_list->empty()) { 1650 // Must have exactly one SID frame at this point. 1651 assert(packet_list->size() == 1); 1652 Packet* packet = packet_list->front(); 1653 packet_list->pop_front(); 1654 if (!decoder_database_->IsComfortNoise(packet->header.payloadType)) { 1655 #ifdef LEGACY_BITEXACT 1656 // This can happen due to a bug in GetDecision. Change the payload type 1657 // to a CNG type, and move on. Note that this means that we are in fact 1658 // sending a non-CNG payload to the comfort noise decoder for decoding. 1659 // Clearly wrong, but will maintain bit-exactness with legacy. 1660 if (fs_hz_ == 8000) { 1661 packet->header.payloadType = 1662 decoder_database_->GetRtpPayloadType(NetEqDecoder::kDecoderCNGnb); 1663 } else if (fs_hz_ == 16000) { 1664 packet->header.payloadType = 1665 decoder_database_->GetRtpPayloadType(NetEqDecoder::kDecoderCNGwb); 1666 } else if (fs_hz_ == 32000) { 1667 packet->header.payloadType = decoder_database_->GetRtpPayloadType( 1668 NetEqDecoder::kDecoderCNGswb32kHz); 1669 } else if (fs_hz_ == 48000) { 1670 packet->header.payloadType = decoder_database_->GetRtpPayloadType( 1671 NetEqDecoder::kDecoderCNGswb48kHz); 1672 } 1673 assert(decoder_database_->IsComfortNoise(packet->header.payloadType)); 1674 #else 1675 LOG(LS_ERROR) << "Trying to decode non-CNG payload as CNG."; 1676 return kOtherError; 1677 #endif 1678 } 1679 // UpdateParameters() deletes |packet|. 1680 if (comfort_noise_->UpdateParameters(packet) == 1681 ComfortNoise::kInternalError) { 1682 algorithm_buffer_->Zeros(output_size_samples_); 1683 return -comfort_noise_->internal_error_code(); 1684 } 1685 } 1686 int cn_return = comfort_noise_->Generate(output_size_samples_, 1687 algorithm_buffer_.get()); 1688 expand_->Reset(); 1689 last_mode_ = kModeRfc3389Cng; 1690 if (!play_dtmf) { 1691 dtmf_tone_generator_->Reset(); 1692 } 1693 if (cn_return == ComfortNoise::kInternalError) { 1694 decoder_error_code_ = comfort_noise_->internal_error_code(); 1695 return kComfortNoiseErrorCode; 1696 } else if (cn_return == ComfortNoise::kUnknownPayloadType) { 1697 return kUnknownRtpPayloadType; 1698 } 1699 return 0; 1700 } 1701 1702 void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer, 1703 size_t decoded_length) { 1704 RTC_DCHECK(normal_.get()); 1705 RTC_DCHECK(mute_factor_array_.get()); 1706 normal_->Process(decoded_buffer, decoded_length, last_mode_, 1707 mute_factor_array_.get(), algorithm_buffer_.get()); 1708 last_mode_ = kModeCodecInternalCng; 1709 expand_->Reset(); 1710 } 1711 1712 int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) { 1713 // This block of the code and the block further down, handling |dtmf_switch| 1714 // are commented out. Otherwise playing out-of-band DTMF would fail in VoE 1715 // test, DtmfTest.ManualSuccessfullySendsOutOfBandTelephoneEvents. This is 1716 // equivalent to |dtmf_switch| always be false. 1717 // 1718 // See http://webrtc-codereview.appspot.com/1195004/ for discussion 1719 // On this issue. This change might cause some glitches at the point of 1720 // switch from audio to DTMF. Issue 1545 is filed to track this. 1721 // 1722 // bool dtmf_switch = false; 1723 // if ((last_mode_ != kModeDtmf) && dtmf_tone_generator_->initialized()) { 1724 // // Special case; see below. 1725 // // We must catch this before calling Generate, since |initialized| is 1726 // // modified in that call. 1727 // dtmf_switch = true; 1728 // } 1729 1730 int dtmf_return_value = 0; 1731 if (!dtmf_tone_generator_->initialized()) { 1732 // Initialize if not already done. 1733 dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, 1734 dtmf_event.volume); 1735 } 1736 1737 if (dtmf_return_value == 0) { 1738 // Generate DTMF signal. 1739 dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_, 1740 algorithm_buffer_.get()); 1741 } 1742 1743 if (dtmf_return_value < 0) { 1744 algorithm_buffer_->Zeros(output_size_samples_); 1745 return dtmf_return_value; 1746 } 1747 1748 // if (dtmf_switch) { 1749 // // This is the special case where the previous operation was DTMF 1750 // // overdub, but the current instruction is "regular" DTMF. We must make 1751 // // sure that the DTMF does not have any discontinuities. The first DTMF 1752 // // sample that we generate now must be played out immediately, therefore 1753 // // it must be copied to the speech buffer. 1754 // // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and 1755 // // verify correct operation. 1756 // assert(false); 1757 // // Must generate enough data to replace all of the |sync_buffer_| 1758 // // "future". 1759 // int required_length = sync_buffer_->FutureLength(); 1760 // assert(dtmf_tone_generator_->initialized()); 1761 // dtmf_return_value = dtmf_tone_generator_->Generate(required_length, 1762 // algorithm_buffer_); 1763 // assert((size_t) required_length == algorithm_buffer_->Size()); 1764 // if (dtmf_return_value < 0) { 1765 // algorithm_buffer_->Zeros(output_size_samples_); 1766 // return dtmf_return_value; 1767 // } 1768 // 1769 // // Overwrite the "future" part of the speech buffer with the new DTMF 1770 // // data. 1771 // // TODO(hlundin): It seems that this overwriting has gone lost. 1772 // // Not adapted for multi-channel yet. 1773 // assert(algorithm_buffer_->Channels() == 1); 1774 // if (algorithm_buffer_->Channels() != 1) { 1775 // LOG(LS_WARNING) << "DTMF not supported for more than one channel"; 1776 // return kStereoNotSupported; 1777 // } 1778 // // Shuffle the remaining data to the beginning of algorithm buffer. 1779 // algorithm_buffer_->PopFront(sync_buffer_->FutureLength()); 1780 // } 1781 1782 sync_buffer_->IncreaseEndTimestamp( 1783 static_cast<uint32_t>(output_size_samples_)); 1784 expand_->Reset(); 1785 last_mode_ = kModeDtmf; 1786 1787 // Set to false because the DTMF is already in the algorithm buffer. 1788 *play_dtmf = false; 1789 return 0; 1790 } 1791 1792 void NetEqImpl::DoAlternativePlc(bool increase_timestamp) { 1793 AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); 1794 size_t length; 1795 if (decoder && decoder->HasDecodePlc()) { 1796 // Use the decoder's packet-loss concealment. 1797 // TODO(hlundin): Will probably need a longer buffer for multi-channel. 1798 int16_t decoded_buffer[kMaxFrameSize]; 1799 length = decoder->DecodePlc(1, decoded_buffer); 1800 if (length > 0) 1801 algorithm_buffer_->PushBackInterleaved(decoded_buffer, length); 1802 } else { 1803 // Do simple zero-stuffing. 1804 length = output_size_samples_; 1805 algorithm_buffer_->Zeros(length); 1806 // By not advancing the timestamp, NetEq inserts samples. 1807 stats_.AddZeros(length); 1808 } 1809 if (increase_timestamp) { 1810 sync_buffer_->IncreaseEndTimestamp(static_cast<uint32_t>(length)); 1811 } 1812 expand_->Reset(); 1813 } 1814 1815 int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels, 1816 int16_t* output) const { 1817 size_t out_index = 0; 1818 size_t overdub_length = output_size_samples_; // Default value. 1819 1820 if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) { 1821 // Special operation for transition from "DTMF only" to "DTMF overdub". 1822 out_index = std::min( 1823 sync_buffer_->dtmf_index() - sync_buffer_->next_index(), 1824 output_size_samples_); 1825 overdub_length = output_size_samples_ - out_index; 1826 } 1827 1828 AudioMultiVector dtmf_output(num_channels); 1829 int dtmf_return_value = 0; 1830 if (!dtmf_tone_generator_->initialized()) { 1831 dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, 1832 dtmf_event.volume); 1833 } 1834 if (dtmf_return_value == 0) { 1835 dtmf_return_value = dtmf_tone_generator_->Generate(overdub_length, 1836 &dtmf_output); 1837 assert(overdub_length == dtmf_output.Size()); 1838 } 1839 dtmf_output.ReadInterleaved(overdub_length, &output[out_index]); 1840 return dtmf_return_value < 0 ? dtmf_return_value : 0; 1841 } 1842 1843 int NetEqImpl::ExtractPackets(size_t required_samples, 1844 PacketList* packet_list) { 1845 bool first_packet = true; 1846 uint8_t prev_payload_type = 0; 1847 uint32_t prev_timestamp = 0; 1848 uint16_t prev_sequence_number = 0; 1849 bool next_packet_available = false; 1850 1851 const RTPHeader* header = packet_buffer_->NextRtpHeader(); 1852 assert(header); 1853 if (!header) { 1854 LOG(LS_ERROR) << "Packet buffer unexpectedly empty."; 1855 return -1; 1856 } 1857 uint32_t first_timestamp = header->timestamp; 1858 int extracted_samples = 0; 1859 1860 // Packet extraction loop. 1861 do { 1862 timestamp_ = header->timestamp; 1863 size_t discard_count = 0; 1864 Packet* packet = packet_buffer_->GetNextPacket(&discard_count); 1865 // |header| may be invalid after the |packet_buffer_| operation. 1866 header = NULL; 1867 if (!packet) { 1868 LOG(LS_ERROR) << "Should always be able to extract a packet here"; 1869 assert(false); // Should always be able to extract a packet here. 1870 return -1; 1871 } 1872 stats_.PacketsDiscarded(discard_count); 1873 // Store waiting time in ms; packets->waiting_time is in "output blocks". 1874 stats_.StoreWaitingTime(packet->waiting_time * kOutputSizeMs); 1875 assert(packet->payload_length > 0); 1876 packet_list->push_back(packet); // Store packet in list. 1877 1878 if (first_packet) { 1879 first_packet = false; 1880 if (nack_enabled_) { 1881 RTC_DCHECK(nack_); 1882 // TODO(henrik.lundin): Should we update this for all decoded packets? 1883 nack_->UpdateLastDecodedPacket(packet->header.sequenceNumber, 1884 packet->header.timestamp); 1885 } 1886 prev_sequence_number = packet->header.sequenceNumber; 1887 prev_timestamp = packet->header.timestamp; 1888 prev_payload_type = packet->header.payloadType; 1889 } 1890 1891 // Store number of extracted samples. 1892 int packet_duration = 0; 1893 AudioDecoder* decoder = decoder_database_->GetDecoder( 1894 packet->header.payloadType); 1895 if (decoder) { 1896 if (packet->sync_packet) { 1897 packet_duration = rtc::checked_cast<int>(decoder_frame_length_); 1898 } else { 1899 if (packet->primary) { 1900 packet_duration = decoder->PacketDuration(packet->payload, 1901 packet->payload_length); 1902 } else { 1903 packet_duration = decoder-> 1904 PacketDurationRedundant(packet->payload, packet->payload_length); 1905 stats_.SecondaryDecodedSamples(packet_duration); 1906 } 1907 } 1908 } else { 1909 LOG(LS_WARNING) << "Unknown payload type " 1910 << static_cast<int>(packet->header.payloadType); 1911 assert(false); 1912 } 1913 if (packet_duration <= 0) { 1914 // Decoder did not return a packet duration. Assume that the packet 1915 // contains the same number of samples as the previous one. 1916 packet_duration = rtc::checked_cast<int>(decoder_frame_length_); 1917 } 1918 extracted_samples = packet->header.timestamp - first_timestamp + 1919 packet_duration; 1920 1921 // Check what packet is available next. 1922 header = packet_buffer_->NextRtpHeader(); 1923 next_packet_available = false; 1924 if (header && prev_payload_type == header->payloadType) { 1925 int16_t seq_no_diff = header->sequenceNumber - prev_sequence_number; 1926 size_t ts_diff = header->timestamp - prev_timestamp; 1927 if (seq_no_diff == 1 || 1928 (seq_no_diff == 0 && ts_diff == decoder_frame_length_)) { 1929 // The next sequence number is available, or the next part of a packet 1930 // that was split into pieces upon insertion. 1931 next_packet_available = true; 1932 } 1933 prev_sequence_number = header->sequenceNumber; 1934 } 1935 } while (extracted_samples < rtc::checked_cast<int>(required_samples) && 1936 next_packet_available); 1937 1938 if (extracted_samples > 0) { 1939 // Delete old packets only when we are going to decode something. Otherwise, 1940 // we could end up in the situation where we never decode anything, since 1941 // all incoming packets are considered too old but the buffer will also 1942 // never be flooded and flushed. 1943 packet_buffer_->DiscardAllOldPackets(timestamp_); 1944 } 1945 1946 return extracted_samples; 1947 } 1948 1949 void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) { 1950 // Delete objects and create new ones. 1951 expand_.reset(expand_factory_->Create(background_noise_.get(), 1952 sync_buffer_.get(), &random_vector_, 1953 &stats_, fs_hz, channels)); 1954 merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get())); 1955 } 1956 1957 void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { 1958 LOG(LS_VERBOSE) << "SetSampleRateAndChannels " << fs_hz << " " << channels; 1959 // TODO(hlundin): Change to an enumerator and skip assert. 1960 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); 1961 assert(channels > 0); 1962 1963 fs_hz_ = fs_hz; 1964 fs_mult_ = fs_hz / 8000; 1965 output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_); 1966 decoder_frame_length_ = 3 * output_size_samples_; // Initialize to 30ms. 1967 1968 last_mode_ = kModeNormal; 1969 1970 // Create a new array of mute factors and set all to 1. 1971 mute_factor_array_.reset(new int16_t[channels]); 1972 for (size_t i = 0; i < channels; ++i) { 1973 mute_factor_array_[i] = 16384; // 1.0 in Q14. 1974 } 1975 1976 AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); 1977 if (cng_decoder) 1978 cng_decoder->Reset(); 1979 1980 // Reinit post-decode VAD with new sample rate. 1981 assert(vad_.get()); // Cannot be NULL here. 1982 vad_->Init(); 1983 1984 // Delete algorithm buffer and create a new one. 1985 algorithm_buffer_.reset(new AudioMultiVector(channels)); 1986 1987 // Delete sync buffer and create a new one. 1988 sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_)); 1989 1990 // Delete BackgroundNoise object and create a new one. 1991 background_noise_.reset(new BackgroundNoise(channels)); 1992 background_noise_->set_mode(background_noise_mode_); 1993 1994 // Reset random vector. 1995 random_vector_.Reset(); 1996 1997 UpdatePlcComponents(fs_hz, channels); 1998 1999 // Move index so that we create a small set of future samples (all 0). 2000 sync_buffer_->set_next_index(sync_buffer_->next_index() - 2001 expand_->overlap_length()); 2002 2003 normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_, 2004 expand_.get())); 2005 accelerate_.reset( 2006 accelerate_factory_->Create(fs_hz, channels, *background_noise_)); 2007 preemptive_expand_.reset(preemptive_expand_factory_->Create( 2008 fs_hz, channels, *background_noise_, expand_->overlap_length())); 2009 2010 // Delete ComfortNoise object and create a new one. 2011 comfort_noise_.reset(new ComfortNoise(fs_hz, decoder_database_.get(), 2012 sync_buffer_.get())); 2013 2014 // Verify that |decoded_buffer_| is long enough. 2015 if (decoded_buffer_length_ < kMaxFrameSize * channels) { 2016 // Reallocate to larger size. 2017 decoded_buffer_length_ = kMaxFrameSize * channels; 2018 decoded_buffer_.reset(new int16_t[decoded_buffer_length_]); 2019 } 2020 2021 // Create DecisionLogic if it is not created yet, then communicate new sample 2022 // rate and output size to DecisionLogic object. 2023 if (!decision_logic_.get()) { 2024 CreateDecisionLogic(); 2025 } 2026 decision_logic_->SetSampleRate(fs_hz_, output_size_samples_); 2027 } 2028 2029 NetEqOutputType NetEqImpl::LastOutputType() { 2030 assert(vad_.get()); 2031 assert(expand_.get()); 2032 if (last_mode_ == kModeCodecInternalCng || last_mode_ == kModeRfc3389Cng) { 2033 return kOutputCNG; 2034 } else if (last_mode_ == kModeExpand && expand_->MuteFactor(0) == 0) { 2035 // Expand mode has faded down to background noise only (very long expand). 2036 return kOutputPLCtoCNG; 2037 } else if (last_mode_ == kModeExpand) { 2038 return kOutputPLC; 2039 } else if (vad_->running() && !vad_->active_speech()) { 2040 return kOutputVADPassive; 2041 } else { 2042 return kOutputNormal; 2043 } 2044 } 2045 2046 void NetEqImpl::CreateDecisionLogic() { 2047 decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_, 2048 playout_mode_, 2049 decoder_database_.get(), 2050 *packet_buffer_.get(), 2051 delay_manager_.get(), 2052 buffer_level_filter_.get())); 2053 } 2054 } // namespace webrtc 2055