1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/neteq/neteq_impl.h" 12 13 #include <assert.h> 14 #include <memory.h> // memset 15 16 #include <algorithm> 17 18 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 19 #include "webrtc/modules/audio_coding/neteq/accelerate.h" 20 #include "webrtc/modules/audio_coding/neteq/background_noise.h" 21 #include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h" 22 #include "webrtc/modules/audio_coding/neteq/comfort_noise.h" 23 #include "webrtc/modules/audio_coding/neteq/decision_logic.h" 24 #include "webrtc/modules/audio_coding/neteq/decoder_database.h" 25 #include "webrtc/modules/audio_coding/neteq/defines.h" 26 #include "webrtc/modules/audio_coding/neteq/delay_manager.h" 27 #include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h" 28 #include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h" 29 #include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h" 30 #include "webrtc/modules/audio_coding/neteq/expand.h" 31 #include "webrtc/modules/audio_coding/neteq/interface/audio_decoder.h" 32 #include "webrtc/modules/audio_coding/neteq/merge.h" 33 #include "webrtc/modules/audio_coding/neteq/normal.h" 34 #include "webrtc/modules/audio_coding/neteq/packet_buffer.h" 35 #include "webrtc/modules/audio_coding/neteq/packet.h" 36 #include "webrtc/modules/audio_coding/neteq/payload_splitter.h" 37 #include "webrtc/modules/audio_coding/neteq/post_decode_vad.h" 38 #include "webrtc/modules/audio_coding/neteq/preemptive_expand.h" 39 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" 40 #include "webrtc/modules/audio_coding/neteq/timestamp_scaler.h" 41 #include "webrtc/modules/interface/module_common_types.h" 42 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" 43 #include "webrtc/system_wrappers/interface/logging.h" 44 45 // Modify the code to obtain backwards bit-exactness. Once bit-exactness is no 46 // longer required, this #define should be removed (and the code that it 47 // enables). 48 #define LEGACY_BITEXACT 49 50 namespace webrtc { 51 52 NetEqImpl::NetEqImpl(const NetEq::Config& config, 53 BufferLevelFilter* buffer_level_filter, 54 DecoderDatabase* decoder_database, 55 DelayManager* delay_manager, 56 DelayPeakDetector* delay_peak_detector, 57 DtmfBuffer* dtmf_buffer, 58 DtmfToneGenerator* dtmf_tone_generator, 59 PacketBuffer* packet_buffer, 60 PayloadSplitter* payload_splitter, 61 TimestampScaler* timestamp_scaler, 62 AccelerateFactory* accelerate_factory, 63 ExpandFactory* expand_factory, 64 PreemptiveExpandFactory* preemptive_expand_factory, 65 bool create_components) 66 : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()), 67 buffer_level_filter_(buffer_level_filter), 68 decoder_database_(decoder_database), 69 delay_manager_(delay_manager), 70 delay_peak_detector_(delay_peak_detector), 71 dtmf_buffer_(dtmf_buffer), 72 dtmf_tone_generator_(dtmf_tone_generator), 73 packet_buffer_(packet_buffer), 74 payload_splitter_(payload_splitter), 75 timestamp_scaler_(timestamp_scaler), 76 vad_(new PostDecodeVad()), 77 expand_factory_(expand_factory), 78 accelerate_factory_(accelerate_factory), 79 preemptive_expand_factory_(preemptive_expand_factory), 80 last_mode_(kModeNormal), 81 decoded_buffer_length_(kMaxFrameSize), 82 decoded_buffer_(new int16_t[decoded_buffer_length_]), 83 playout_timestamp_(0), 84 new_codec_(false), 85 timestamp_(0), 86 reset_decoder_(false), 87 current_rtp_payload_type_(0xFF), // Invalid RTP payload type. 88 current_cng_rtp_payload_type_(0xFF), // Invalid RTP payload type. 89 ssrc_(0), 90 first_packet_(true), 91 error_code_(0), 92 decoder_error_code_(0), 93 background_noise_mode_(config.background_noise_mode), 94 decoded_packet_sequence_number_(-1), 95 decoded_packet_timestamp_(0) { 96 int fs = config.sample_rate_hz; 97 if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { 98 LOG(LS_ERROR) << "Sample rate " << fs << " Hz not supported. " << 99 "Changing to 8000 Hz."; 100 fs = 8000; 101 } 102 LOG(LS_VERBOSE) << "Create NetEqImpl object with fs = " << fs << "."; 103 fs_hz_ = fs; 104 fs_mult_ = fs / 8000; 105 output_size_samples_ = kOutputSizeMs * 8 * fs_mult_; 106 decoder_frame_length_ = 3 * output_size_samples_; 107 WebRtcSpl_Init(); 108 if (create_components) { 109 SetSampleRateAndChannels(fs, 1); // Default is 1 channel. 110 } 111 } 112 113 NetEqImpl::~NetEqImpl() { 114 LOG(LS_INFO) << "Deleting NetEqImpl object."; 115 } 116 117 int NetEqImpl::InsertPacket(const WebRtcRTPHeader& rtp_header, 118 const uint8_t* payload, 119 int length_bytes, 120 uint32_t receive_timestamp) { 121 CriticalSectionScoped lock(crit_sect_.get()); 122 LOG(LS_VERBOSE) << "InsertPacket: ts=" << rtp_header.header.timestamp << 123 ", sn=" << rtp_header.header.sequenceNumber << 124 ", pt=" << static_cast<int>(rtp_header.header.payloadType) << 125 ", ssrc=" << rtp_header.header.ssrc << 126 ", len=" << length_bytes; 127 int error = InsertPacketInternal(rtp_header, payload, length_bytes, 128 receive_timestamp, false); 129 if (error != 0) { 130 LOG_FERR1(LS_WARNING, InsertPacketInternal, error); 131 error_code_ = error; 132 return kFail; 133 } 134 return kOK; 135 } 136 137 int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& rtp_header, 138 uint32_t receive_timestamp) { 139 CriticalSectionScoped lock(crit_sect_.get()); 140 LOG(LS_VERBOSE) << "InsertPacket-Sync: ts=" 141 << rtp_header.header.timestamp << 142 ", sn=" << rtp_header.header.sequenceNumber << 143 ", pt=" << static_cast<int>(rtp_header.header.payloadType) << 144 ", ssrc=" << rtp_header.header.ssrc; 145 146 const uint8_t kSyncPayload[] = { 's', 'y', 'n', 'c' }; 147 int error = InsertPacketInternal( 148 rtp_header, kSyncPayload, sizeof(kSyncPayload), receive_timestamp, true); 149 150 if (error != 0) { 151 LOG_FERR1(LS_WARNING, InsertPacketInternal, error); 152 error_code_ = error; 153 return kFail; 154 } 155 return kOK; 156 } 157 158 int NetEqImpl::GetAudio(size_t max_length, int16_t* output_audio, 159 int* samples_per_channel, int* num_channels, 160 NetEqOutputType* type) { 161 CriticalSectionScoped lock(crit_sect_.get()); 162 LOG(LS_VERBOSE) << "GetAudio"; 163 int error = GetAudioInternal(max_length, output_audio, samples_per_channel, 164 num_channels); 165 LOG(LS_VERBOSE) << "Produced " << *samples_per_channel << 166 " samples/channel for " << *num_channels << " channel(s)"; 167 if (error != 0) { 168 LOG_FERR1(LS_WARNING, GetAudioInternal, error); 169 error_code_ = error; 170 return kFail; 171 } 172 if (type) { 173 *type = LastOutputType(); 174 } 175 return kOK; 176 } 177 178 int NetEqImpl::RegisterPayloadType(enum NetEqDecoder codec, 179 uint8_t rtp_payload_type) { 180 CriticalSectionScoped lock(crit_sect_.get()); 181 LOG_API2(static_cast<int>(rtp_payload_type), codec); 182 int ret = decoder_database_->RegisterPayload(rtp_payload_type, codec); 183 if (ret != DecoderDatabase::kOK) { 184 LOG_FERR2(LS_WARNING, RegisterPayload, rtp_payload_type, codec); 185 switch (ret) { 186 case DecoderDatabase::kInvalidRtpPayloadType: 187 error_code_ = kInvalidRtpPayloadType; 188 break; 189 case DecoderDatabase::kCodecNotSupported: 190 error_code_ = kCodecNotSupported; 191 break; 192 case DecoderDatabase::kDecoderExists: 193 error_code_ = kDecoderExists; 194 break; 195 default: 196 error_code_ = kOtherError; 197 } 198 return kFail; 199 } 200 return kOK; 201 } 202 203 int NetEqImpl::RegisterExternalDecoder(AudioDecoder* decoder, 204 enum NetEqDecoder codec, 205 uint8_t rtp_payload_type) { 206 CriticalSectionScoped lock(crit_sect_.get()); 207 LOG_API2(static_cast<int>(rtp_payload_type), codec); 208 if (!decoder) { 209 LOG(LS_ERROR) << "Cannot register external decoder with NULL pointer"; 210 assert(false); 211 return kFail; 212 } 213 const int sample_rate_hz = AudioDecoder::CodecSampleRateHz(codec); 214 int ret = decoder_database_->InsertExternal(rtp_payload_type, codec, 215 sample_rate_hz, decoder); 216 if (ret != DecoderDatabase::kOK) { 217 LOG_FERR2(LS_WARNING, InsertExternal, rtp_payload_type, codec); 218 switch (ret) { 219 case DecoderDatabase::kInvalidRtpPayloadType: 220 error_code_ = kInvalidRtpPayloadType; 221 break; 222 case DecoderDatabase::kCodecNotSupported: 223 error_code_ = kCodecNotSupported; 224 break; 225 case DecoderDatabase::kDecoderExists: 226 error_code_ = kDecoderExists; 227 break; 228 case DecoderDatabase::kInvalidSampleRate: 229 error_code_ = kInvalidSampleRate; 230 break; 231 case DecoderDatabase::kInvalidPointer: 232 error_code_ = kInvalidPointer; 233 break; 234 default: 235 error_code_ = kOtherError; 236 } 237 return kFail; 238 } 239 return kOK; 240 } 241 242 int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) { 243 CriticalSectionScoped lock(crit_sect_.get()); 244 LOG_API1(static_cast<int>(rtp_payload_type)); 245 int ret = decoder_database_->Remove(rtp_payload_type); 246 if (ret == DecoderDatabase::kOK) { 247 return kOK; 248 } else if (ret == DecoderDatabase::kDecoderNotFound) { 249 error_code_ = kDecoderNotFound; 250 } else { 251 error_code_ = kOtherError; 252 } 253 LOG_FERR1(LS_WARNING, Remove, rtp_payload_type); 254 return kFail; 255 } 256 257 bool NetEqImpl::SetMinimumDelay(int delay_ms) { 258 CriticalSectionScoped lock(crit_sect_.get()); 259 if (delay_ms >= 0 && delay_ms < 10000) { 260 assert(delay_manager_.get()); 261 return delay_manager_->SetMinimumDelay(delay_ms); 262 } 263 return false; 264 } 265 266 bool NetEqImpl::SetMaximumDelay(int delay_ms) { 267 CriticalSectionScoped lock(crit_sect_.get()); 268 if (delay_ms >= 0 && delay_ms < 10000) { 269 assert(delay_manager_.get()); 270 return delay_manager_->SetMaximumDelay(delay_ms); 271 } 272 return false; 273 } 274 275 int NetEqImpl::LeastRequiredDelayMs() const { 276 CriticalSectionScoped lock(crit_sect_.get()); 277 assert(delay_manager_.get()); 278 return delay_manager_->least_required_delay_ms(); 279 } 280 281 void NetEqImpl::SetPlayoutMode(NetEqPlayoutMode mode) { 282 CriticalSectionScoped lock(crit_sect_.get()); 283 if (!decision_logic_.get() || mode != decision_logic_->playout_mode()) { 284 // The reset() method calls delete for the old object. 285 CreateDecisionLogic(mode); 286 } 287 } 288 289 NetEqPlayoutMode NetEqImpl::PlayoutMode() const { 290 CriticalSectionScoped lock(crit_sect_.get()); 291 assert(decision_logic_.get()); 292 return decision_logic_->playout_mode(); 293 } 294 295 int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { 296 CriticalSectionScoped lock(crit_sect_.get()); 297 assert(decoder_database_.get()); 298 const int total_samples_in_buffers = packet_buffer_->NumSamplesInBuffer( 299 decoder_database_.get(), decoder_frame_length_) + 300 static_cast<int>(sync_buffer_->FutureLength()); 301 assert(delay_manager_.get()); 302 assert(decision_logic_.get()); 303 stats_.GetNetworkStatistics(fs_hz_, total_samples_in_buffers, 304 decoder_frame_length_, *delay_manager_.get(), 305 *decision_logic_.get(), stats); 306 return 0; 307 } 308 309 void NetEqImpl::WaitingTimes(std::vector<int>* waiting_times) { 310 CriticalSectionScoped lock(crit_sect_.get()); 311 stats_.WaitingTimes(waiting_times); 312 } 313 314 void NetEqImpl::GetRtcpStatistics(RtcpStatistics* stats) { 315 CriticalSectionScoped lock(crit_sect_.get()); 316 if (stats) { 317 rtcp_.GetStatistics(false, stats); 318 } 319 } 320 321 void NetEqImpl::GetRtcpStatisticsNoReset(RtcpStatistics* stats) { 322 CriticalSectionScoped lock(crit_sect_.get()); 323 if (stats) { 324 rtcp_.GetStatistics(true, stats); 325 } 326 } 327 328 void NetEqImpl::EnableVad() { 329 CriticalSectionScoped lock(crit_sect_.get()); 330 assert(vad_.get()); 331 vad_->Enable(); 332 } 333 334 void NetEqImpl::DisableVad() { 335 CriticalSectionScoped lock(crit_sect_.get()); 336 assert(vad_.get()); 337 vad_->Disable(); 338 } 339 340 bool NetEqImpl::GetPlayoutTimestamp(uint32_t* timestamp) { 341 CriticalSectionScoped lock(crit_sect_.get()); 342 if (first_packet_) { 343 // We don't have a valid RTP timestamp until we have decoded our first 344 // RTP packet. 345 return false; 346 } 347 *timestamp = timestamp_scaler_->ToExternal(playout_timestamp_); 348 return true; 349 } 350 351 int NetEqImpl::LastError() { 352 CriticalSectionScoped lock(crit_sect_.get()); 353 return error_code_; 354 } 355 356 int NetEqImpl::LastDecoderError() { 357 CriticalSectionScoped lock(crit_sect_.get()); 358 return decoder_error_code_; 359 } 360 361 void NetEqImpl::FlushBuffers() { 362 CriticalSectionScoped lock(crit_sect_.get()); 363 LOG_API0(); 364 packet_buffer_->Flush(); 365 assert(sync_buffer_.get()); 366 assert(expand_.get()); 367 sync_buffer_->Flush(); 368 sync_buffer_->set_next_index(sync_buffer_->next_index() - 369 expand_->overlap_length()); 370 // Set to wait for new codec. 371 first_packet_ = true; 372 } 373 374 void NetEqImpl::PacketBufferStatistics(int* current_num_packets, 375 int* max_num_packets) const { 376 CriticalSectionScoped lock(crit_sect_.get()); 377 packet_buffer_->BufferStat(current_num_packets, max_num_packets); 378 } 379 380 int NetEqImpl::DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) const { 381 CriticalSectionScoped lock(crit_sect_.get()); 382 if (decoded_packet_sequence_number_ < 0) 383 return -1; 384 *sequence_number = decoded_packet_sequence_number_; 385 *timestamp = decoded_packet_timestamp_; 386 return 0; 387 } 388 389 const SyncBuffer* NetEqImpl::sync_buffer_for_test() const { 390 CriticalSectionScoped lock(crit_sect_.get()); 391 return sync_buffer_.get(); 392 } 393 394 // Methods below this line are private. 395 396 int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header, 397 const uint8_t* payload, 398 int length_bytes, 399 uint32_t receive_timestamp, 400 bool is_sync_packet) { 401 if (!payload) { 402 LOG_F(LS_ERROR) << "payload == NULL"; 403 return kInvalidPointer; 404 } 405 // Sanity checks for sync-packets. 406 if (is_sync_packet) { 407 if (decoder_database_->IsDtmf(rtp_header.header.payloadType) || 408 decoder_database_->IsRed(rtp_header.header.payloadType) || 409 decoder_database_->IsComfortNoise(rtp_header.header.payloadType)) { 410 LOG_F(LS_ERROR) << "Sync-packet with an unacceptable payload type " 411 << rtp_header.header.payloadType; 412 return kSyncPacketNotAccepted; 413 } 414 if (first_packet_ || 415 rtp_header.header.payloadType != current_rtp_payload_type_ || 416 rtp_header.header.ssrc != ssrc_) { 417 // Even if |current_rtp_payload_type_| is 0xFF, sync-packet isn't 418 // accepted. 419 LOG_F(LS_ERROR) << "Changing codec, SSRC or first packet " 420 "with sync-packet."; 421 return kSyncPacketNotAccepted; 422 } 423 } 424 PacketList packet_list; 425 RTPHeader main_header; 426 { 427 // Convert to Packet. 428 // Create |packet| within this separate scope, since it should not be used 429 // directly once it's been inserted in the packet list. This way, |packet| 430 // is not defined outside of this block. 431 Packet* packet = new Packet; 432 packet->header.markerBit = false; 433 packet->header.payloadType = rtp_header.header.payloadType; 434 packet->header.sequenceNumber = rtp_header.header.sequenceNumber; 435 packet->header.timestamp = rtp_header.header.timestamp; 436 packet->header.ssrc = rtp_header.header.ssrc; 437 packet->header.numCSRCs = 0; 438 packet->payload_length = length_bytes; 439 packet->primary = true; 440 packet->waiting_time = 0; 441 packet->payload = new uint8_t[packet->payload_length]; 442 packet->sync_packet = is_sync_packet; 443 if (!packet->payload) { 444 LOG_F(LS_ERROR) << "Payload pointer is NULL."; 445 } 446 assert(payload); // Already checked above. 447 memcpy(packet->payload, payload, packet->payload_length); 448 // Insert packet in a packet list. 449 packet_list.push_back(packet); 450 // Save main payloads header for later. 451 memcpy(&main_header, &packet->header, sizeof(main_header)); 452 } 453 454 bool update_sample_rate_and_channels = false; 455 // Reinitialize NetEq if it's needed (changed SSRC or first call). 456 if ((main_header.ssrc != ssrc_) || first_packet_) { 457 rtcp_.Init(main_header.sequenceNumber); 458 first_packet_ = false; 459 460 // Flush the packet buffer and DTMF buffer. 461 packet_buffer_->Flush(); 462 dtmf_buffer_->Flush(); 463 464 // Store new SSRC. 465 ssrc_ = main_header.ssrc; 466 467 // Update audio buffer timestamp. 468 sync_buffer_->IncreaseEndTimestamp(main_header.timestamp - timestamp_); 469 470 // Update codecs. 471 timestamp_ = main_header.timestamp; 472 current_rtp_payload_type_ = main_header.payloadType; 473 474 // Set MCU to update codec on next SignalMCU call. 475 new_codec_ = true; 476 477 // Reset timestamp scaling. 478 timestamp_scaler_->Reset(); 479 480 // Triger an update of sampling rate and the number of channels. 481 update_sample_rate_and_channels = true; 482 } 483 484 // Update RTCP statistics, only for regular packets. 485 if (!is_sync_packet) 486 rtcp_.Update(main_header, receive_timestamp); 487 488 // Check for RED payload type, and separate payloads into several packets. 489 if (decoder_database_->IsRed(main_header.payloadType)) { 490 assert(!is_sync_packet); // We had a sanity check for this. 491 if (payload_splitter_->SplitRed(&packet_list) != PayloadSplitter::kOK) { 492 LOG_FERR1(LS_WARNING, SplitRed, packet_list.size()); 493 PacketBuffer::DeleteAllPackets(&packet_list); 494 return kRedundancySplitError; 495 } 496 // Only accept a few RED payloads of the same type as the main data, 497 // DTMF events and CNG. 498 payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_); 499 // Update the stored main payload header since the main payload has now 500 // changed. 501 memcpy(&main_header, &packet_list.front()->header, sizeof(main_header)); 502 } 503 504 // Check payload types. 505 if (decoder_database_->CheckPayloadTypes(packet_list) == 506 DecoderDatabase::kDecoderNotFound) { 507 LOG_FERR1(LS_WARNING, CheckPayloadTypes, packet_list.size()); 508 PacketBuffer::DeleteAllPackets(&packet_list); 509 return kUnknownRtpPayloadType; 510 } 511 512 // Scale timestamp to internal domain (only for some codecs). 513 timestamp_scaler_->ToInternal(&packet_list); 514 515 // Process DTMF payloads. Cycle through the list of packets, and pick out any 516 // DTMF payloads found. 517 PacketList::iterator it = packet_list.begin(); 518 while (it != packet_list.end()) { 519 Packet* current_packet = (*it); 520 assert(current_packet); 521 assert(current_packet->payload); 522 if (decoder_database_->IsDtmf(current_packet->header.payloadType)) { 523 assert(!current_packet->sync_packet); // We had a sanity check for this. 524 DtmfEvent event; 525 int ret = DtmfBuffer::ParseEvent( 526 current_packet->header.timestamp, 527 current_packet->payload, 528 current_packet->payload_length, 529 &event); 530 if (ret != DtmfBuffer::kOK) { 531 LOG_FERR2(LS_WARNING, ParseEvent, ret, 532 current_packet->payload_length); 533 PacketBuffer::DeleteAllPackets(&packet_list); 534 return kDtmfParsingError; 535 } 536 if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) { 537 LOG_FERR0(LS_WARNING, InsertEvent); 538 PacketBuffer::DeleteAllPackets(&packet_list); 539 return kDtmfInsertError; 540 } 541 // TODO(hlundin): Let the destructor of Packet handle the payload. 542 delete [] current_packet->payload; 543 delete current_packet; 544 it = packet_list.erase(it); 545 } else { 546 ++it; 547 } 548 } 549 550 // Check for FEC in packets, and separate payloads into several packets. 551 int ret = payload_splitter_->SplitFec(&packet_list, decoder_database_.get()); 552 if (ret != PayloadSplitter::kOK) { 553 LOG_FERR1(LS_WARNING, SplitFec, packet_list.size()); 554 PacketBuffer::DeleteAllPackets(&packet_list); 555 switch (ret) { 556 case PayloadSplitter::kUnknownPayloadType: 557 return kUnknownRtpPayloadType; 558 default: 559 return kOtherError; 560 } 561 } 562 563 // Split payloads into smaller chunks. This also verifies that all payloads 564 // are of a known payload type. SplitAudio() method is protected against 565 // sync-packets. 566 ret = payload_splitter_->SplitAudio(&packet_list, *decoder_database_); 567 if (ret != PayloadSplitter::kOK) { 568 LOG_FERR1(LS_WARNING, SplitAudio, packet_list.size()); 569 PacketBuffer::DeleteAllPackets(&packet_list); 570 switch (ret) { 571 case PayloadSplitter::kUnknownPayloadType: 572 return kUnknownRtpPayloadType; 573 case PayloadSplitter::kFrameSplitError: 574 return kFrameSplitError; 575 default: 576 return kOtherError; 577 } 578 } 579 580 // Update bandwidth estimate, if the packet is not sync-packet. 581 if (!packet_list.empty() && !packet_list.front()->sync_packet) { 582 // The list can be empty here if we got nothing but DTMF payloads. 583 AudioDecoder* decoder = 584 decoder_database_->GetDecoder(main_header.payloadType); 585 assert(decoder); // Should always get a valid object, since we have 586 // already checked that the payload types are known. 587 decoder->IncomingPacket(packet_list.front()->payload, 588 packet_list.front()->payload_length, 589 packet_list.front()->header.sequenceNumber, 590 packet_list.front()->header.timestamp, 591 receive_timestamp); 592 } 593 594 // Insert packets in buffer. 595 int temp_bufsize = packet_buffer_->NumPacketsInBuffer(); 596 ret = packet_buffer_->InsertPacketList( 597 &packet_list, 598 *decoder_database_, 599 ¤t_rtp_payload_type_, 600 ¤t_cng_rtp_payload_type_); 601 if (ret == PacketBuffer::kFlushed) { 602 // Reset DSP timestamp etc. if packet buffer flushed. 603 new_codec_ = true; 604 update_sample_rate_and_channels = true; 605 LOG_F(LS_WARNING) << "Packet buffer flushed"; 606 } else if (ret != PacketBuffer::kOK) { 607 LOG_FERR1(LS_WARNING, InsertPacketList, packet_list.size()); 608 PacketBuffer::DeleteAllPackets(&packet_list); 609 return kOtherError; 610 } 611 if (current_rtp_payload_type_ != 0xFF) { 612 const DecoderDatabase::DecoderInfo* dec_info = 613 decoder_database_->GetDecoderInfo(current_rtp_payload_type_); 614 if (!dec_info) { 615 assert(false); // Already checked that the payload type is known. 616 } 617 } 618 619 if (update_sample_rate_and_channels && !packet_buffer_->Empty()) { 620 // We do not use |current_rtp_payload_type_| to |set payload_type|, but 621 // get the next RTP header from |packet_buffer_| to obtain the payload type. 622 // The reason for it is the following corner case. If NetEq receives a 623 // CNG packet with a sample rate different than the current CNG then it 624 // flushes its buffer, assuming send codec must have been changed. However, 625 // payload type of the hypothetically new send codec is not known. 626 const RTPHeader* rtp_header = packet_buffer_->NextRtpHeader(); 627 assert(rtp_header); 628 int payload_type = rtp_header->payloadType; 629 AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type); 630 assert(decoder); // Payloads are already checked to be valid. 631 const DecoderDatabase::DecoderInfo* decoder_info = 632 decoder_database_->GetDecoderInfo(payload_type); 633 assert(decoder_info); 634 if (decoder_info->fs_hz != fs_hz_ || 635 decoder->channels() != algorithm_buffer_->Channels()) 636 SetSampleRateAndChannels(decoder_info->fs_hz, decoder->channels()); 637 } 638 639 // TODO(hlundin): Move this code to DelayManager class. 640 const DecoderDatabase::DecoderInfo* dec_info = 641 decoder_database_->GetDecoderInfo(main_header.payloadType); 642 assert(dec_info); // Already checked that the payload type is known. 643 delay_manager_->LastDecoderType(dec_info->codec_type); 644 if (delay_manager_->last_pack_cng_or_dtmf() == 0) { 645 // Calculate the total speech length carried in each packet. 646 temp_bufsize = packet_buffer_->NumPacketsInBuffer() - temp_bufsize; 647 temp_bufsize *= decoder_frame_length_; 648 649 if ((temp_bufsize > 0) && 650 (temp_bufsize != decision_logic_->packet_length_samples())) { 651 decision_logic_->set_packet_length_samples(temp_bufsize); 652 delay_manager_->SetPacketAudioLength((1000 * temp_bufsize) / fs_hz_); 653 } 654 655 // Update statistics. 656 if ((int32_t) (main_header.timestamp - timestamp_) >= 0 && 657 !new_codec_) { 658 // Only update statistics if incoming packet is not older than last played 659 // out packet, and if new codec flag is not set. 660 delay_manager_->Update(main_header.sequenceNumber, main_header.timestamp, 661 fs_hz_); 662 } 663 } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) { 664 // This is first "normal" packet after CNG or DTMF. 665 // Reset packet time counter and measure time until next packet, 666 // but don't update statistics. 667 delay_manager_->set_last_pack_cng_or_dtmf(0); 668 delay_manager_->ResetPacketIatCount(); 669 } 670 return 0; 671 } 672 673 int NetEqImpl::GetAudioInternal(size_t max_length, int16_t* output, 674 int* samples_per_channel, int* num_channels) { 675 PacketList packet_list; 676 DtmfEvent dtmf_event; 677 Operations operation; 678 bool play_dtmf; 679 int return_value = GetDecision(&operation, &packet_list, &dtmf_event, 680 &play_dtmf); 681 if (return_value != 0) { 682 LOG_FERR1(LS_WARNING, GetDecision, return_value); 683 assert(false); 684 last_mode_ = kModeError; 685 return return_value; 686 } 687 LOG(LS_VERBOSE) << "GetDecision returned operation=" << operation << 688 " and " << packet_list.size() << " packet(s)"; 689 690 AudioDecoder::SpeechType speech_type; 691 int length = 0; 692 int decode_return_value = Decode(&packet_list, &operation, 693 &length, &speech_type); 694 695 assert(vad_.get()); 696 bool sid_frame_available = 697 (operation == kRfc3389Cng && !packet_list.empty()); 698 vad_->Update(decoded_buffer_.get(), length, speech_type, 699 sid_frame_available, fs_hz_); 700 701 algorithm_buffer_->Clear(); 702 switch (operation) { 703 case kNormal: { 704 DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf); 705 break; 706 } 707 case kMerge: { 708 DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf); 709 break; 710 } 711 case kExpand: { 712 return_value = DoExpand(play_dtmf); 713 break; 714 } 715 case kAccelerate: { 716 return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type, 717 play_dtmf); 718 break; 719 } 720 case kPreemptiveExpand: { 721 return_value = DoPreemptiveExpand(decoded_buffer_.get(), length, 722 speech_type, play_dtmf); 723 break; 724 } 725 case kRfc3389Cng: 726 case kRfc3389CngNoPacket: { 727 return_value = DoRfc3389Cng(&packet_list, play_dtmf); 728 break; 729 } 730 case kCodecInternalCng: { 731 // This handles the case when there is no transmission and the decoder 732 // should produce internal comfort noise. 733 // TODO(hlundin): Write test for codec-internal CNG. 734 DoCodecInternalCng(); 735 break; 736 } 737 case kDtmf: { 738 // TODO(hlundin): Write test for this. 739 return_value = DoDtmf(dtmf_event, &play_dtmf); 740 break; 741 } 742 case kAlternativePlc: { 743 // TODO(hlundin): Write test for this. 744 DoAlternativePlc(false); 745 break; 746 } 747 case kAlternativePlcIncreaseTimestamp: { 748 // TODO(hlundin): Write test for this. 749 DoAlternativePlc(true); 750 break; 751 } 752 case kAudioRepetitionIncreaseTimestamp: { 753 // TODO(hlundin): Write test for this. 754 sync_buffer_->IncreaseEndTimestamp(output_size_samples_); 755 // Skipping break on purpose. Execution should move on into the 756 // next case. 757 } 758 case kAudioRepetition: { 759 // TODO(hlundin): Write test for this. 760 // Copy last |output_size_samples_| from |sync_buffer_| to 761 // |algorithm_buffer|. 762 algorithm_buffer_->PushBackFromIndex( 763 *sync_buffer_, sync_buffer_->Size() - output_size_samples_); 764 expand_->Reset(); 765 break; 766 } 767 case kUndefined: { 768 LOG_F(LS_ERROR) << "Invalid operation kUndefined."; 769 assert(false); // This should not happen. 770 last_mode_ = kModeError; 771 return kInvalidOperation; 772 } 773 } // End of switch. 774 if (return_value < 0) { 775 return return_value; 776 } 777 778 if (last_mode_ != kModeRfc3389Cng) { 779 comfort_noise_->Reset(); 780 } 781 782 // Copy from |algorithm_buffer| to |sync_buffer_|. 783 sync_buffer_->PushBack(*algorithm_buffer_); 784 785 // Extract data from |sync_buffer_| to |output|. 786 size_t num_output_samples_per_channel = output_size_samples_; 787 size_t num_output_samples = output_size_samples_ * sync_buffer_->Channels(); 788 if (num_output_samples > max_length) { 789 LOG(LS_WARNING) << "Output array is too short. " << max_length << " < " << 790 output_size_samples_ << " * " << sync_buffer_->Channels(); 791 num_output_samples = max_length; 792 num_output_samples_per_channel = static_cast<int>( 793 max_length / sync_buffer_->Channels()); 794 } 795 int samples_from_sync = static_cast<int>( 796 sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel, 797 output)); 798 *num_channels = static_cast<int>(sync_buffer_->Channels()); 799 LOG(LS_VERBOSE) << "Sync buffer (" << *num_channels << " channel(s)):" << 800 " insert " << algorithm_buffer_->Size() << " samples, extract " << 801 samples_from_sync << " samples"; 802 if (samples_from_sync != output_size_samples_) { 803 LOG_F(LS_ERROR) << "samples_from_sync != output_size_samples_"; 804 // TODO(minyue): treatment of under-run, filling zeros 805 memset(output, 0, num_output_samples * sizeof(int16_t)); 806 *samples_per_channel = output_size_samples_; 807 return kSampleUnderrun; 808 } 809 *samples_per_channel = output_size_samples_; 810 811 // Should always have overlap samples left in the |sync_buffer_|. 812 assert(sync_buffer_->FutureLength() >= expand_->overlap_length()); 813 814 if (play_dtmf) { 815 return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(), output); 816 } 817 818 // Update the background noise parameters if last operation wrote data 819 // straight from the decoder to the |sync_buffer_|. That is, none of the 820 // operations that modify the signal can be followed by a parameter update. 821 if ((last_mode_ == kModeNormal) || 822 (last_mode_ == kModeAccelerateFail) || 823 (last_mode_ == kModePreemptiveExpandFail) || 824 (last_mode_ == kModeRfc3389Cng) || 825 (last_mode_ == kModeCodecInternalCng)) { 826 background_noise_->Update(*sync_buffer_, *vad_.get()); 827 } 828 829 if (operation == kDtmf) { 830 // DTMF data was written the end of |sync_buffer_|. 831 // Update index to end of DTMF data in |sync_buffer_|. 832 sync_buffer_->set_dtmf_index(sync_buffer_->Size()); 833 } 834 835 if (last_mode_ != kModeExpand) { 836 // If last operation was not expand, calculate the |playout_timestamp_| from 837 // the |sync_buffer_|. However, do not update the |playout_timestamp_| if it 838 // would be moved "backwards". 839 uint32_t temp_timestamp = sync_buffer_->end_timestamp() - 840 static_cast<uint32_t>(sync_buffer_->FutureLength()); 841 if (static_cast<int32_t>(temp_timestamp - playout_timestamp_) > 0) { 842 playout_timestamp_ = temp_timestamp; 843 } 844 } else { 845 // Use dead reckoning to estimate the |playout_timestamp_|. 846 playout_timestamp_ += output_size_samples_; 847 } 848 849 if (decode_return_value) return decode_return_value; 850 return return_value; 851 } 852 853 int NetEqImpl::GetDecision(Operations* operation, 854 PacketList* packet_list, 855 DtmfEvent* dtmf_event, 856 bool* play_dtmf) { 857 // Initialize output variables. 858 *play_dtmf = false; 859 *operation = kUndefined; 860 861 // Increment time counters. 862 packet_buffer_->IncrementWaitingTimes(); 863 stats_.IncreaseCounter(output_size_samples_, fs_hz_); 864 865 assert(sync_buffer_.get()); 866 uint32_t end_timestamp = sync_buffer_->end_timestamp(); 867 const RTPHeader* header = packet_buffer_->NextRtpHeader(); 868 869 if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) { 870 // Because of timestamp peculiarities, we have to "manually" disallow using 871 // a CNG packet with the same timestamp as the one that was last played. 872 // This can happen when using redundancy and will cause the timing to shift. 873 while (header && decoder_database_->IsComfortNoise(header->payloadType) && 874 (end_timestamp >= header->timestamp || 875 end_timestamp + decision_logic_->generated_noise_samples() > 876 header->timestamp)) { 877 // Don't use this packet, discard it. 878 if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) { 879 assert(false); // Must be ok by design. 880 } 881 // Check buffer again. 882 if (!new_codec_) { 883 packet_buffer_->DiscardOldPackets(end_timestamp); 884 } 885 header = packet_buffer_->NextRtpHeader(); 886 } 887 } 888 889 assert(expand_.get()); 890 const int samples_left = static_cast<int>(sync_buffer_->FutureLength() - 891 expand_->overlap_length()); 892 if (last_mode_ == kModeAccelerateSuccess || 893 last_mode_ == kModeAccelerateLowEnergy || 894 last_mode_ == kModePreemptiveExpandSuccess || 895 last_mode_ == kModePreemptiveExpandLowEnergy) { 896 // Subtract (samples_left + output_size_samples_) from sampleMemory. 897 decision_logic_->AddSampleMemory(-(samples_left + output_size_samples_)); 898 } 899 900 // Check if it is time to play a DTMF event. 901 if (dtmf_buffer_->GetEvent(end_timestamp + 902 decision_logic_->generated_noise_samples(), 903 dtmf_event)) { 904 *play_dtmf = true; 905 } 906 907 // Get instruction. 908 assert(sync_buffer_.get()); 909 assert(expand_.get()); 910 *operation = decision_logic_->GetDecision(*sync_buffer_, 911 *expand_, 912 decoder_frame_length_, 913 header, 914 last_mode_, 915 *play_dtmf, 916 &reset_decoder_); 917 918 // Check if we already have enough samples in the |sync_buffer_|. If so, 919 // change decision to normal, unless the decision was merge, accelerate, or 920 // preemptive expand. 921 if (samples_left >= output_size_samples_ && 922 *operation != kMerge && 923 *operation != kAccelerate && 924 *operation != kPreemptiveExpand) { 925 *operation = kNormal; 926 return 0; 927 } 928 929 decision_logic_->ExpandDecision(*operation); 930 931 // Check conditions for reset. 932 if (new_codec_ || *operation == kUndefined) { 933 // The only valid reason to get kUndefined is that new_codec_ is set. 934 assert(new_codec_); 935 if (*play_dtmf && !header) { 936 timestamp_ = dtmf_event->timestamp; 937 } else { 938 assert(header); 939 if (!header) { 940 LOG_F(LS_ERROR) << "Packet missing where it shouldn't."; 941 return -1; 942 } 943 timestamp_ = header->timestamp; 944 if (*operation == kRfc3389CngNoPacket 945 #ifndef LEGACY_BITEXACT 946 // Without this check, it can happen that a non-CNG packet is sent to 947 // the CNG decoder as if it was a SID frame. This is clearly a bug, 948 // but is kept for now to maintain bit-exactness with the test 949 // vectors. 950 && decoder_database_->IsComfortNoise(header->payloadType) 951 #endif 952 ) { 953 // Change decision to CNG packet, since we do have a CNG packet, but it 954 // was considered too early to use. Now, use it anyway. 955 *operation = kRfc3389Cng; 956 } else if (*operation != kRfc3389Cng) { 957 *operation = kNormal; 958 } 959 } 960 // Adjust |sync_buffer_| timestamp before setting |end_timestamp| to the 961 // new value. 962 sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp); 963 end_timestamp = timestamp_; 964 new_codec_ = false; 965 decision_logic_->SoftReset(); 966 buffer_level_filter_->Reset(); 967 delay_manager_->Reset(); 968 stats_.ResetMcu(); 969 } 970 971 int required_samples = output_size_samples_; 972 const int samples_10_ms = 80 * fs_mult_; 973 const int samples_20_ms = 2 * samples_10_ms; 974 const int samples_30_ms = 3 * samples_10_ms; 975 976 switch (*operation) { 977 case kExpand: { 978 timestamp_ = end_timestamp; 979 return 0; 980 } 981 case kRfc3389CngNoPacket: 982 case kCodecInternalCng: { 983 return 0; 984 } 985 case kDtmf: { 986 // TODO(hlundin): Write test for this. 987 // Update timestamp. 988 timestamp_ = end_timestamp; 989 if (decision_logic_->generated_noise_samples() > 0 && 990 last_mode_ != kModeDtmf) { 991 // Make a jump in timestamp due to the recently played comfort noise. 992 uint32_t timestamp_jump = decision_logic_->generated_noise_samples(); 993 sync_buffer_->IncreaseEndTimestamp(timestamp_jump); 994 timestamp_ += timestamp_jump; 995 } 996 decision_logic_->set_generated_noise_samples(0); 997 return 0; 998 } 999 case kAccelerate: { 1000 // In order to do a accelerate we need at least 30 ms of audio data. 1001 if (samples_left >= samples_30_ms) { 1002 // Already have enough data, so we do not need to extract any more. 1003 decision_logic_->set_sample_memory(samples_left); 1004 decision_logic_->set_prev_time_scale(true); 1005 return 0; 1006 } else if (samples_left >= samples_10_ms && 1007 decoder_frame_length_ >= samples_30_ms) { 1008 // Avoid decoding more data as it might overflow the playout buffer. 1009 *operation = kNormal; 1010 return 0; 1011 } else if (samples_left < samples_20_ms && 1012 decoder_frame_length_ < samples_30_ms) { 1013 // Build up decoded data by decoding at least 20 ms of audio data. Do 1014 // not perform accelerate yet, but wait until we only need to do one 1015 // decoding. 1016 required_samples = 2 * output_size_samples_; 1017 *operation = kNormal; 1018 } 1019 // If none of the above is true, we have one of two possible situations: 1020 // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or 1021 // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms. 1022 // In either case, we move on with the accelerate decision, and decode one 1023 // frame now. 1024 break; 1025 } 1026 case kPreemptiveExpand: { 1027 // In order to do a preemptive expand we need at least 30 ms of decoded 1028 // audio data. 1029 if ((samples_left >= samples_30_ms) || 1030 (samples_left >= samples_10_ms && 1031 decoder_frame_length_ >= samples_30_ms)) { 1032 // Already have enough data, so we do not need to extract any more. 1033 // Or, avoid decoding more data as it might overflow the playout buffer. 1034 // Still try preemptive expand, though. 1035 decision_logic_->set_sample_memory(samples_left); 1036 decision_logic_->set_prev_time_scale(true); 1037 return 0; 1038 } 1039 if (samples_left < samples_20_ms && 1040 decoder_frame_length_ < samples_30_ms) { 1041 // Build up decoded data by decoding at least 20 ms of audio data. 1042 // Still try to perform preemptive expand. 1043 required_samples = 2 * output_size_samples_; 1044 } 1045 // Move on with the preemptive expand decision. 1046 break; 1047 } 1048 case kMerge: { 1049 required_samples = 1050 std::max(merge_->RequiredFutureSamples(), required_samples); 1051 break; 1052 } 1053 default: { 1054 // Do nothing. 1055 } 1056 } 1057 1058 // Get packets from buffer. 1059 int extracted_samples = 0; 1060 if (header && 1061 *operation != kAlternativePlc && 1062 *operation != kAlternativePlcIncreaseTimestamp && 1063 *operation != kAudioRepetition && 1064 *operation != kAudioRepetitionIncreaseTimestamp) { 1065 sync_buffer_->IncreaseEndTimestamp(header->timestamp - end_timestamp); 1066 if (decision_logic_->CngOff()) { 1067 // Adjustment of timestamp only corresponds to an actual packet loss 1068 // if comfort noise is not played. If comfort noise was just played, 1069 // this adjustment of timestamp is only done to get back in sync with the 1070 // stream timestamp; no loss to report. 1071 stats_.LostSamples(header->timestamp - end_timestamp); 1072 } 1073 1074 if (*operation != kRfc3389Cng) { 1075 // We are about to decode and use a non-CNG packet. 1076 decision_logic_->SetCngOff(); 1077 } 1078 // Reset CNG timestamp as a new packet will be delivered. 1079 // (Also if this is a CNG packet, since playedOutTS is updated.) 1080 decision_logic_->set_generated_noise_samples(0); 1081 1082 extracted_samples = ExtractPackets(required_samples, packet_list); 1083 if (extracted_samples < 0) { 1084 LOG_F(LS_WARNING) << "Failed to extract packets from buffer."; 1085 return kPacketBufferCorruption; 1086 } 1087 } 1088 1089 if (*operation == kAccelerate || 1090 *operation == kPreemptiveExpand) { 1091 decision_logic_->set_sample_memory(samples_left + extracted_samples); 1092 decision_logic_->set_prev_time_scale(true); 1093 } 1094 1095 if (*operation == kAccelerate) { 1096 // Check that we have enough data (30ms) to do accelerate. 1097 if (extracted_samples + samples_left < samples_30_ms) { 1098 // TODO(hlundin): Write test for this. 1099 // Not enough, do normal operation instead. 1100 *operation = kNormal; 1101 } 1102 } 1103 1104 timestamp_ = end_timestamp; 1105 return 0; 1106 } 1107 1108 int NetEqImpl::Decode(PacketList* packet_list, Operations* operation, 1109 int* decoded_length, 1110 AudioDecoder::SpeechType* speech_type) { 1111 *speech_type = AudioDecoder::kSpeech; 1112 AudioDecoder* decoder = NULL; 1113 if (!packet_list->empty()) { 1114 const Packet* packet = packet_list->front(); 1115 int payload_type = packet->header.payloadType; 1116 if (!decoder_database_->IsComfortNoise(payload_type)) { 1117 decoder = decoder_database_->GetDecoder(payload_type); 1118 assert(decoder); 1119 if (!decoder) { 1120 LOG_FERR1(LS_WARNING, GetDecoder, payload_type); 1121 PacketBuffer::DeleteAllPackets(packet_list); 1122 return kDecoderNotFound; 1123 } 1124 bool decoder_changed; 1125 decoder_database_->SetActiveDecoder(payload_type, &decoder_changed); 1126 if (decoder_changed) { 1127 // We have a new decoder. Re-init some values. 1128 const DecoderDatabase::DecoderInfo* decoder_info = decoder_database_ 1129 ->GetDecoderInfo(payload_type); 1130 assert(decoder_info); 1131 if (!decoder_info) { 1132 LOG_FERR1(LS_WARNING, GetDecoderInfo, payload_type); 1133 PacketBuffer::DeleteAllPackets(packet_list); 1134 return kDecoderNotFound; 1135 } 1136 // If sampling rate or number of channels has changed, we need to make 1137 // a reset. 1138 if (decoder_info->fs_hz != fs_hz_ || 1139 decoder->channels() != algorithm_buffer_->Channels()) { 1140 // TODO(tlegrand): Add unittest to cover this event. 1141 SetSampleRateAndChannels(decoder_info->fs_hz, decoder->channels()); 1142 } 1143 sync_buffer_->set_end_timestamp(timestamp_); 1144 playout_timestamp_ = timestamp_; 1145 } 1146 } 1147 } 1148 1149 if (reset_decoder_) { 1150 // TODO(hlundin): Write test for this. 1151 // Reset decoder. 1152 if (decoder) { 1153 decoder->Init(); 1154 } 1155 // Reset comfort noise decoder. 1156 AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); 1157 if (cng_decoder) { 1158 cng_decoder->Init(); 1159 } 1160 reset_decoder_ = false; 1161 } 1162 1163 #ifdef LEGACY_BITEXACT 1164 // Due to a bug in old SignalMCU, it could happen that CNG operation was 1165 // decided, but a speech packet was provided. The speech packet will be used 1166 // to update the comfort noise decoder, as if it was a SID frame, which is 1167 // clearly wrong. 1168 if (*operation == kRfc3389Cng) { 1169 return 0; 1170 } 1171 #endif 1172 1173 *decoded_length = 0; 1174 // Update codec-internal PLC state. 1175 if ((*operation == kMerge) && decoder && decoder->HasDecodePlc()) { 1176 decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]); 1177 } 1178 1179 int return_value = DecodeLoop(packet_list, operation, decoder, 1180 decoded_length, speech_type); 1181 1182 if (*decoded_length < 0) { 1183 // Error returned from the decoder. 1184 *decoded_length = 0; 1185 sync_buffer_->IncreaseEndTimestamp(decoder_frame_length_); 1186 int error_code = 0; 1187 if (decoder) 1188 error_code = decoder->ErrorCode(); 1189 if (error_code != 0) { 1190 // Got some error code from the decoder. 1191 decoder_error_code_ = error_code; 1192 return_value = kDecoderErrorCode; 1193 } else { 1194 // Decoder does not implement error codes. Return generic error. 1195 return_value = kOtherDecoderError; 1196 } 1197 LOG_FERR2(LS_WARNING, DecodeLoop, error_code, packet_list->size()); 1198 *operation = kExpand; // Do expansion to get data instead. 1199 } 1200 if (*speech_type != AudioDecoder::kComfortNoise) { 1201 // Don't increment timestamp if codec returned CNG speech type 1202 // since in this case, the we will increment the CNGplayedTS counter. 1203 // Increase with number of samples per channel. 1204 assert(*decoded_length == 0 || 1205 (decoder && decoder->channels() == sync_buffer_->Channels())); 1206 sync_buffer_->IncreaseEndTimestamp( 1207 *decoded_length / static_cast<int>(sync_buffer_->Channels())); 1208 } 1209 return return_value; 1210 } 1211 1212 int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation, 1213 AudioDecoder* decoder, int* decoded_length, 1214 AudioDecoder::SpeechType* speech_type) { 1215 Packet* packet = NULL; 1216 if (!packet_list->empty()) { 1217 packet = packet_list->front(); 1218 } 1219 // Do decoding. 1220 while (packet && 1221 !decoder_database_->IsComfortNoise(packet->header.payloadType)) { 1222 assert(decoder); // At this point, we must have a decoder object. 1223 // The number of channels in the |sync_buffer_| should be the same as the 1224 // number decoder channels. 1225 assert(sync_buffer_->Channels() == decoder->channels()); 1226 assert(decoded_buffer_length_ >= kMaxFrameSize * decoder->channels()); 1227 assert(*operation == kNormal || *operation == kAccelerate || 1228 *operation == kMerge || *operation == kPreemptiveExpand); 1229 packet_list->pop_front(); 1230 int payload_length = packet->payload_length; 1231 int16_t decode_length; 1232 if (packet->sync_packet) { 1233 // Decode to silence with the same frame size as the last decode. 1234 LOG(LS_VERBOSE) << "Decoding sync-packet: " << 1235 " ts=" << packet->header.timestamp << 1236 ", sn=" << packet->header.sequenceNumber << 1237 ", pt=" << static_cast<int>(packet->header.payloadType) << 1238 ", ssrc=" << packet->header.ssrc << 1239 ", len=" << packet->payload_length; 1240 memset(&decoded_buffer_[*decoded_length], 0, decoder_frame_length_ * 1241 decoder->channels() * sizeof(decoded_buffer_[0])); 1242 decode_length = decoder_frame_length_; 1243 } else if (!packet->primary) { 1244 // This is a redundant payload; call the special decoder method. 1245 LOG(LS_VERBOSE) << "Decoding packet (redundant):" << 1246 " ts=" << packet->header.timestamp << 1247 ", sn=" << packet->header.sequenceNumber << 1248 ", pt=" << static_cast<int>(packet->header.payloadType) << 1249 ", ssrc=" << packet->header.ssrc << 1250 ", len=" << packet->payload_length; 1251 decode_length = decoder->DecodeRedundant( 1252 packet->payload, packet->payload_length, 1253 &decoded_buffer_[*decoded_length], speech_type); 1254 } else { 1255 LOG(LS_VERBOSE) << "Decoding packet: ts=" << packet->header.timestamp << 1256 ", sn=" << packet->header.sequenceNumber << 1257 ", pt=" << static_cast<int>(packet->header.payloadType) << 1258 ", ssrc=" << packet->header.ssrc << 1259 ", len=" << packet->payload_length; 1260 decode_length = decoder->Decode(packet->payload, 1261 packet->payload_length, 1262 &decoded_buffer_[*decoded_length], 1263 speech_type); 1264 } 1265 1266 delete[] packet->payload; 1267 delete packet; 1268 packet = NULL; 1269 if (decode_length > 0) { 1270 *decoded_length += decode_length; 1271 // Update |decoder_frame_length_| with number of samples per channel. 1272 decoder_frame_length_ = decode_length / 1273 static_cast<int>(decoder->channels()); 1274 LOG(LS_VERBOSE) << "Decoded " << decode_length << " samples (" << 1275 decoder->channels() << " channel(s) -> " << decoder_frame_length_ << 1276 " samples per channel)"; 1277 } else if (decode_length < 0) { 1278 // Error. 1279 LOG_FERR2(LS_WARNING, Decode, decode_length, payload_length); 1280 *decoded_length = -1; 1281 PacketBuffer::DeleteAllPackets(packet_list); 1282 break; 1283 } 1284 if (*decoded_length > static_cast<int>(decoded_buffer_length_)) { 1285 // Guard against overflow. 1286 LOG_F(LS_WARNING) << "Decoded too much."; 1287 PacketBuffer::DeleteAllPackets(packet_list); 1288 return kDecodedTooMuch; 1289 } 1290 if (!packet_list->empty()) { 1291 packet = packet_list->front(); 1292 } else { 1293 packet = NULL; 1294 } 1295 } // End of decode loop. 1296 1297 // If the list is not empty at this point, either a decoding error terminated 1298 // the while-loop, or list must hold exactly one CNG packet. 1299 assert(packet_list->empty() || *decoded_length < 0 || 1300 (packet_list->size() == 1 && packet && 1301 decoder_database_->IsComfortNoise(packet->header.payloadType))); 1302 return 0; 1303 } 1304 1305 void NetEqImpl::DoNormal(const int16_t* decoded_buffer, size_t decoded_length, 1306 AudioDecoder::SpeechType speech_type, bool play_dtmf) { 1307 assert(normal_.get()); 1308 assert(mute_factor_array_.get()); 1309 normal_->Process(decoded_buffer, decoded_length, last_mode_, 1310 mute_factor_array_.get(), algorithm_buffer_.get()); 1311 if (decoded_length != 0) { 1312 last_mode_ = kModeNormal; 1313 } 1314 1315 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1316 if ((speech_type == AudioDecoder::kComfortNoise) 1317 || ((last_mode_ == kModeCodecInternalCng) 1318 && (decoded_length == 0))) { 1319 // TODO(hlundin): Remove second part of || statement above. 1320 last_mode_ = kModeCodecInternalCng; 1321 } 1322 1323 if (!play_dtmf) { 1324 dtmf_tone_generator_->Reset(); 1325 } 1326 } 1327 1328 void NetEqImpl::DoMerge(int16_t* decoded_buffer, size_t decoded_length, 1329 AudioDecoder::SpeechType speech_type, bool play_dtmf) { 1330 assert(mute_factor_array_.get()); 1331 assert(merge_.get()); 1332 int new_length = merge_->Process(decoded_buffer, decoded_length, 1333 mute_factor_array_.get(), 1334 algorithm_buffer_.get()); 1335 1336 // Update in-call and post-call statistics. 1337 if (expand_->MuteFactor(0) == 0) { 1338 // Expand generates only noise. 1339 stats_.ExpandedNoiseSamples(new_length - static_cast<int>(decoded_length)); 1340 } else { 1341 // Expansion generates more than only noise. 1342 stats_.ExpandedVoiceSamples(new_length - static_cast<int>(decoded_length)); 1343 } 1344 1345 last_mode_ = kModeMerge; 1346 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1347 if (speech_type == AudioDecoder::kComfortNoise) { 1348 last_mode_ = kModeCodecInternalCng; 1349 } 1350 expand_->Reset(); 1351 if (!play_dtmf) { 1352 dtmf_tone_generator_->Reset(); 1353 } 1354 } 1355 1356 int NetEqImpl::DoExpand(bool play_dtmf) { 1357 while ((sync_buffer_->FutureLength() - expand_->overlap_length()) < 1358 static_cast<size_t>(output_size_samples_)) { 1359 algorithm_buffer_->Clear(); 1360 int return_value = expand_->Process(algorithm_buffer_.get()); 1361 int length = static_cast<int>(algorithm_buffer_->Size()); 1362 1363 // Update in-call and post-call statistics. 1364 if (expand_->MuteFactor(0) == 0) { 1365 // Expand operation generates only noise. 1366 stats_.ExpandedNoiseSamples(length); 1367 } else { 1368 // Expand operation generates more than only noise. 1369 stats_.ExpandedVoiceSamples(length); 1370 } 1371 1372 last_mode_ = kModeExpand; 1373 1374 if (return_value < 0) { 1375 return return_value; 1376 } 1377 1378 sync_buffer_->PushBack(*algorithm_buffer_); 1379 algorithm_buffer_->Clear(); 1380 } 1381 if (!play_dtmf) { 1382 dtmf_tone_generator_->Reset(); 1383 } 1384 return 0; 1385 } 1386 1387 int NetEqImpl::DoAccelerate(int16_t* decoded_buffer, size_t decoded_length, 1388 AudioDecoder::SpeechType speech_type, 1389 bool play_dtmf) { 1390 const size_t required_samples = 240 * fs_mult_; // Must have 30 ms. 1391 size_t borrowed_samples_per_channel = 0; 1392 size_t num_channels = algorithm_buffer_->Channels(); 1393 size_t decoded_length_per_channel = decoded_length / num_channels; 1394 if (decoded_length_per_channel < required_samples) { 1395 // Must move data from the |sync_buffer_| in order to get 30 ms. 1396 borrowed_samples_per_channel = static_cast<int>(required_samples - 1397 decoded_length_per_channel); 1398 memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], 1399 decoded_buffer, 1400 sizeof(int16_t) * decoded_length); 1401 sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, 1402 decoded_buffer); 1403 decoded_length = required_samples * num_channels; 1404 } 1405 1406 int16_t samples_removed; 1407 Accelerate::ReturnCodes return_code = accelerate_->Process( 1408 decoded_buffer, decoded_length, algorithm_buffer_.get(), 1409 &samples_removed); 1410 stats_.AcceleratedSamples(samples_removed); 1411 switch (return_code) { 1412 case Accelerate::kSuccess: 1413 last_mode_ = kModeAccelerateSuccess; 1414 break; 1415 case Accelerate::kSuccessLowEnergy: 1416 last_mode_ = kModeAccelerateLowEnergy; 1417 break; 1418 case Accelerate::kNoStretch: 1419 last_mode_ = kModeAccelerateFail; 1420 break; 1421 case Accelerate::kError: 1422 // TODO(hlundin): Map to kModeError instead? 1423 last_mode_ = kModeAccelerateFail; 1424 return kAccelerateError; 1425 } 1426 1427 if (borrowed_samples_per_channel > 0) { 1428 // Copy borrowed samples back to the |sync_buffer_|. 1429 size_t length = algorithm_buffer_->Size(); 1430 if (length < borrowed_samples_per_channel) { 1431 // This destroys the beginning of the buffer, but will not cause any 1432 // problems. 1433 sync_buffer_->ReplaceAtIndex(*algorithm_buffer_, 1434 sync_buffer_->Size() - 1435 borrowed_samples_per_channel); 1436 sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length); 1437 algorithm_buffer_->PopFront(length); 1438 assert(algorithm_buffer_->Empty()); 1439 } else { 1440 sync_buffer_->ReplaceAtIndex(*algorithm_buffer_, 1441 borrowed_samples_per_channel, 1442 sync_buffer_->Size() - 1443 borrowed_samples_per_channel); 1444 algorithm_buffer_->PopFront(borrowed_samples_per_channel); 1445 } 1446 } 1447 1448 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1449 if (speech_type == AudioDecoder::kComfortNoise) { 1450 last_mode_ = kModeCodecInternalCng; 1451 } 1452 if (!play_dtmf) { 1453 dtmf_tone_generator_->Reset(); 1454 } 1455 expand_->Reset(); 1456 return 0; 1457 } 1458 1459 int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer, 1460 size_t decoded_length, 1461 AudioDecoder::SpeechType speech_type, 1462 bool play_dtmf) { 1463 const size_t required_samples = 240 * fs_mult_; // Must have 30 ms. 1464 size_t num_channels = algorithm_buffer_->Channels(); 1465 int borrowed_samples_per_channel = 0; 1466 int old_borrowed_samples_per_channel = 0; 1467 size_t decoded_length_per_channel = decoded_length / num_channels; 1468 if (decoded_length_per_channel < required_samples) { 1469 // Must move data from the |sync_buffer_| in order to get 30 ms. 1470 borrowed_samples_per_channel = static_cast<int>(required_samples - 1471 decoded_length_per_channel); 1472 // Calculate how many of these were already played out. 1473 old_borrowed_samples_per_channel = static_cast<int>( 1474 borrowed_samples_per_channel - sync_buffer_->FutureLength()); 1475 old_borrowed_samples_per_channel = std::max( 1476 0, old_borrowed_samples_per_channel); 1477 memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], 1478 decoded_buffer, 1479 sizeof(int16_t) * decoded_length); 1480 sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, 1481 decoded_buffer); 1482 decoded_length = required_samples * num_channels; 1483 } 1484 1485 int16_t samples_added; 1486 PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process( 1487 decoded_buffer, static_cast<int>(decoded_length), 1488 old_borrowed_samples_per_channel, 1489 algorithm_buffer_.get(), &samples_added); 1490 stats_.PreemptiveExpandedSamples(samples_added); 1491 switch (return_code) { 1492 case PreemptiveExpand::kSuccess: 1493 last_mode_ = kModePreemptiveExpandSuccess; 1494 break; 1495 case PreemptiveExpand::kSuccessLowEnergy: 1496 last_mode_ = kModePreemptiveExpandLowEnergy; 1497 break; 1498 case PreemptiveExpand::kNoStretch: 1499 last_mode_ = kModePreemptiveExpandFail; 1500 break; 1501 case PreemptiveExpand::kError: 1502 // TODO(hlundin): Map to kModeError instead? 1503 last_mode_ = kModePreemptiveExpandFail; 1504 return kPreemptiveExpandError; 1505 } 1506 1507 if (borrowed_samples_per_channel > 0) { 1508 // Copy borrowed samples back to the |sync_buffer_|. 1509 sync_buffer_->ReplaceAtIndex( 1510 *algorithm_buffer_, borrowed_samples_per_channel, 1511 sync_buffer_->Size() - borrowed_samples_per_channel); 1512 algorithm_buffer_->PopFront(borrowed_samples_per_channel); 1513 } 1514 1515 // If last packet was decoded as an inband CNG, set mode to CNG instead. 1516 if (speech_type == AudioDecoder::kComfortNoise) { 1517 last_mode_ = kModeCodecInternalCng; 1518 } 1519 if (!play_dtmf) { 1520 dtmf_tone_generator_->Reset(); 1521 } 1522 expand_->Reset(); 1523 return 0; 1524 } 1525 1526 int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) { 1527 if (!packet_list->empty()) { 1528 // Must have exactly one SID frame at this point. 1529 assert(packet_list->size() == 1); 1530 Packet* packet = packet_list->front(); 1531 packet_list->pop_front(); 1532 if (!decoder_database_->IsComfortNoise(packet->header.payloadType)) { 1533 #ifdef LEGACY_BITEXACT 1534 // This can happen due to a bug in GetDecision. Change the payload type 1535 // to a CNG type, and move on. Note that this means that we are in fact 1536 // sending a non-CNG payload to the comfort noise decoder for decoding. 1537 // Clearly wrong, but will maintain bit-exactness with legacy. 1538 if (fs_hz_ == 8000) { 1539 packet->header.payloadType = 1540 decoder_database_->GetRtpPayloadType(kDecoderCNGnb); 1541 } else if (fs_hz_ == 16000) { 1542 packet->header.payloadType = 1543 decoder_database_->GetRtpPayloadType(kDecoderCNGwb); 1544 } else if (fs_hz_ == 32000) { 1545 packet->header.payloadType = 1546 decoder_database_->GetRtpPayloadType(kDecoderCNGswb32kHz); 1547 } else if (fs_hz_ == 48000) { 1548 packet->header.payloadType = 1549 decoder_database_->GetRtpPayloadType(kDecoderCNGswb48kHz); 1550 } 1551 assert(decoder_database_->IsComfortNoise(packet->header.payloadType)); 1552 #else 1553 LOG(LS_ERROR) << "Trying to decode non-CNG payload as CNG."; 1554 return kOtherError; 1555 #endif 1556 } 1557 // UpdateParameters() deletes |packet|. 1558 if (comfort_noise_->UpdateParameters(packet) == 1559 ComfortNoise::kInternalError) { 1560 LOG_FERR0(LS_WARNING, UpdateParameters); 1561 algorithm_buffer_->Zeros(output_size_samples_); 1562 return -comfort_noise_->internal_error_code(); 1563 } 1564 } 1565 int cn_return = comfort_noise_->Generate(output_size_samples_, 1566 algorithm_buffer_.get()); 1567 expand_->Reset(); 1568 last_mode_ = kModeRfc3389Cng; 1569 if (!play_dtmf) { 1570 dtmf_tone_generator_->Reset(); 1571 } 1572 if (cn_return == ComfortNoise::kInternalError) { 1573 LOG_FERR1(LS_WARNING, comfort_noise_->Generate, cn_return); 1574 decoder_error_code_ = comfort_noise_->internal_error_code(); 1575 return kComfortNoiseErrorCode; 1576 } else if (cn_return == ComfortNoise::kUnknownPayloadType) { 1577 LOG_FERR1(LS_WARNING, comfort_noise_->Generate, cn_return); 1578 return kUnknownRtpPayloadType; 1579 } 1580 return 0; 1581 } 1582 1583 void NetEqImpl::DoCodecInternalCng() { 1584 int length = 0; 1585 // TODO(hlundin): Will probably need a longer buffer for multi-channel. 1586 int16_t decoded_buffer[kMaxFrameSize]; 1587 AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); 1588 if (decoder) { 1589 const uint8_t* dummy_payload = NULL; 1590 AudioDecoder::SpeechType speech_type; 1591 length = decoder->Decode(dummy_payload, 0, decoded_buffer, &speech_type); 1592 } 1593 assert(mute_factor_array_.get()); 1594 normal_->Process(decoded_buffer, length, last_mode_, mute_factor_array_.get(), 1595 algorithm_buffer_.get()); 1596 last_mode_ = kModeCodecInternalCng; 1597 expand_->Reset(); 1598 } 1599 1600 int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) { 1601 // This block of the code and the block further down, handling |dtmf_switch| 1602 // are commented out. Otherwise playing out-of-band DTMF would fail in VoE 1603 // test, DtmfTest.ManualSuccessfullySendsOutOfBandTelephoneEvents. This is 1604 // equivalent to |dtmf_switch| always be false. 1605 // 1606 // See http://webrtc-codereview.appspot.com/1195004/ for discussion 1607 // On this issue. This change might cause some glitches at the point of 1608 // switch from audio to DTMF. Issue 1545 is filed to track this. 1609 // 1610 // bool dtmf_switch = false; 1611 // if ((last_mode_ != kModeDtmf) && dtmf_tone_generator_->initialized()) { 1612 // // Special case; see below. 1613 // // We must catch this before calling Generate, since |initialized| is 1614 // // modified in that call. 1615 // dtmf_switch = true; 1616 // } 1617 1618 int dtmf_return_value = 0; 1619 if (!dtmf_tone_generator_->initialized()) { 1620 // Initialize if not already done. 1621 dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, 1622 dtmf_event.volume); 1623 } 1624 1625 if (dtmf_return_value == 0) { 1626 // Generate DTMF signal. 1627 dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_, 1628 algorithm_buffer_.get()); 1629 } 1630 1631 if (dtmf_return_value < 0) { 1632 algorithm_buffer_->Zeros(output_size_samples_); 1633 return dtmf_return_value; 1634 } 1635 1636 // if (dtmf_switch) { 1637 // // This is the special case where the previous operation was DTMF 1638 // // overdub, but the current instruction is "regular" DTMF. We must make 1639 // // sure that the DTMF does not have any discontinuities. The first DTMF 1640 // // sample that we generate now must be played out immediately, therefore 1641 // // it must be copied to the speech buffer. 1642 // // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and 1643 // // verify correct operation. 1644 // assert(false); 1645 // // Must generate enough data to replace all of the |sync_buffer_| 1646 // // "future". 1647 // int required_length = sync_buffer_->FutureLength(); 1648 // assert(dtmf_tone_generator_->initialized()); 1649 // dtmf_return_value = dtmf_tone_generator_->Generate(required_length, 1650 // algorithm_buffer_); 1651 // assert((size_t) required_length == algorithm_buffer_->Size()); 1652 // if (dtmf_return_value < 0) { 1653 // algorithm_buffer_->Zeros(output_size_samples_); 1654 // return dtmf_return_value; 1655 // } 1656 // 1657 // // Overwrite the "future" part of the speech buffer with the new DTMF 1658 // // data. 1659 // // TODO(hlundin): It seems that this overwriting has gone lost. 1660 // // Not adapted for multi-channel yet. 1661 // assert(algorithm_buffer_->Channels() == 1); 1662 // if (algorithm_buffer_->Channels() != 1) { 1663 // LOG(LS_WARNING) << "DTMF not supported for more than one channel"; 1664 // return kStereoNotSupported; 1665 // } 1666 // // Shuffle the remaining data to the beginning of algorithm buffer. 1667 // algorithm_buffer_->PopFront(sync_buffer_->FutureLength()); 1668 // } 1669 1670 sync_buffer_->IncreaseEndTimestamp(output_size_samples_); 1671 expand_->Reset(); 1672 last_mode_ = kModeDtmf; 1673 1674 // Set to false because the DTMF is already in the algorithm buffer. 1675 *play_dtmf = false; 1676 return 0; 1677 } 1678 1679 void NetEqImpl::DoAlternativePlc(bool increase_timestamp) { 1680 AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); 1681 int length; 1682 if (decoder && decoder->HasDecodePlc()) { 1683 // Use the decoder's packet-loss concealment. 1684 // TODO(hlundin): Will probably need a longer buffer for multi-channel. 1685 int16_t decoded_buffer[kMaxFrameSize]; 1686 length = decoder->DecodePlc(1, decoded_buffer); 1687 if (length > 0) { 1688 algorithm_buffer_->PushBackInterleaved(decoded_buffer, length); 1689 } else { 1690 length = 0; 1691 } 1692 } else { 1693 // Do simple zero-stuffing. 1694 length = output_size_samples_; 1695 algorithm_buffer_->Zeros(length); 1696 // By not advancing the timestamp, NetEq inserts samples. 1697 stats_.AddZeros(length); 1698 } 1699 if (increase_timestamp) { 1700 sync_buffer_->IncreaseEndTimestamp(length); 1701 } 1702 expand_->Reset(); 1703 } 1704 1705 int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels, 1706 int16_t* output) const { 1707 size_t out_index = 0; 1708 int overdub_length = output_size_samples_; // Default value. 1709 1710 if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) { 1711 // Special operation for transition from "DTMF only" to "DTMF overdub". 1712 out_index = std::min( 1713 sync_buffer_->dtmf_index() - sync_buffer_->next_index(), 1714 static_cast<size_t>(output_size_samples_)); 1715 overdub_length = output_size_samples_ - static_cast<int>(out_index); 1716 } 1717 1718 AudioMultiVector dtmf_output(num_channels); 1719 int dtmf_return_value = 0; 1720 if (!dtmf_tone_generator_->initialized()) { 1721 dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, 1722 dtmf_event.volume); 1723 } 1724 if (dtmf_return_value == 0) { 1725 dtmf_return_value = dtmf_tone_generator_->Generate(overdub_length, 1726 &dtmf_output); 1727 assert((size_t) overdub_length == dtmf_output.Size()); 1728 } 1729 dtmf_output.ReadInterleaved(overdub_length, &output[out_index]); 1730 return dtmf_return_value < 0 ? dtmf_return_value : 0; 1731 } 1732 1733 int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) { 1734 bool first_packet = true; 1735 uint8_t prev_payload_type = 0; 1736 uint32_t prev_timestamp = 0; 1737 uint16_t prev_sequence_number = 0; 1738 bool next_packet_available = false; 1739 1740 const RTPHeader* header = packet_buffer_->NextRtpHeader(); 1741 assert(header); 1742 if (!header) { 1743 return -1; 1744 } 1745 uint32_t first_timestamp = header->timestamp; 1746 int extracted_samples = 0; 1747 1748 // Packet extraction loop. 1749 do { 1750 timestamp_ = header->timestamp; 1751 int discard_count = 0; 1752 Packet* packet = packet_buffer_->GetNextPacket(&discard_count); 1753 // |header| may be invalid after the |packet_buffer_| operation. 1754 header = NULL; 1755 if (!packet) { 1756 LOG_FERR1(LS_ERROR, GetNextPacket, discard_count) << 1757 "Should always be able to extract a packet here"; 1758 assert(false); // Should always be able to extract a packet here. 1759 return -1; 1760 } 1761 stats_.PacketsDiscarded(discard_count); 1762 // Store waiting time in ms; packets->waiting_time is in "output blocks". 1763 stats_.StoreWaitingTime(packet->waiting_time * kOutputSizeMs); 1764 assert(packet->payload_length > 0); 1765 packet_list->push_back(packet); // Store packet in list. 1766 1767 if (first_packet) { 1768 first_packet = false; 1769 decoded_packet_sequence_number_ = prev_sequence_number = 1770 packet->header.sequenceNumber; 1771 decoded_packet_timestamp_ = prev_timestamp = packet->header.timestamp; 1772 prev_payload_type = packet->header.payloadType; 1773 } 1774 1775 // Store number of extracted samples. 1776 int packet_duration = 0; 1777 AudioDecoder* decoder = decoder_database_->GetDecoder( 1778 packet->header.payloadType); 1779 if (decoder) { 1780 if (packet->sync_packet) { 1781 packet_duration = decoder_frame_length_; 1782 } else { 1783 packet_duration = packet->primary ? 1784 decoder->PacketDuration(packet->payload, packet->payload_length) : 1785 decoder->PacketDurationRedundant(packet->payload, 1786 packet->payload_length); 1787 } 1788 } else { 1789 LOG_FERR1(LS_WARNING, GetDecoder, packet->header.payloadType) << 1790 "Could not find a decoder for a packet about to be extracted."; 1791 assert(false); 1792 } 1793 if (packet_duration <= 0) { 1794 // Decoder did not return a packet duration. Assume that the packet 1795 // contains the same number of samples as the previous one. 1796 packet_duration = decoder_frame_length_; 1797 } 1798 extracted_samples = packet->header.timestamp - first_timestamp + 1799 packet_duration; 1800 1801 // Check what packet is available next. 1802 header = packet_buffer_->NextRtpHeader(); 1803 next_packet_available = false; 1804 if (header && prev_payload_type == header->payloadType) { 1805 int16_t seq_no_diff = header->sequenceNumber - prev_sequence_number; 1806 int32_t ts_diff = header->timestamp - prev_timestamp; 1807 if (seq_no_diff == 1 || 1808 (seq_no_diff == 0 && ts_diff == decoder_frame_length_)) { 1809 // The next sequence number is available, or the next part of a packet 1810 // that was split into pieces upon insertion. 1811 next_packet_available = true; 1812 } 1813 prev_sequence_number = header->sequenceNumber; 1814 } 1815 } while (extracted_samples < required_samples && next_packet_available); 1816 1817 if (extracted_samples > 0) { 1818 // Delete old packets only when we are going to decode something. Otherwise, 1819 // we could end up in the situation where we never decode anything, since 1820 // all incoming packets are considered too old but the buffer will also 1821 // never be flooded and flushed. 1822 packet_buffer_->DiscardOldPackets(timestamp_); 1823 } 1824 1825 return extracted_samples; 1826 } 1827 1828 void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) { 1829 // Delete objects and create new ones. 1830 expand_.reset(expand_factory_->Create(background_noise_.get(), 1831 sync_buffer_.get(), &random_vector_, 1832 fs_hz, channels)); 1833 merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get())); 1834 } 1835 1836 void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { 1837 LOG_API2(fs_hz, channels); 1838 // TODO(hlundin): Change to an enumerator and skip assert. 1839 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); 1840 assert(channels > 0); 1841 1842 fs_hz_ = fs_hz; 1843 fs_mult_ = fs_hz / 8000; 1844 output_size_samples_ = kOutputSizeMs * 8 * fs_mult_; 1845 decoder_frame_length_ = 3 * output_size_samples_; // Initialize to 30ms. 1846 1847 last_mode_ = kModeNormal; 1848 1849 // Create a new array of mute factors and set all to 1. 1850 mute_factor_array_.reset(new int16_t[channels]); 1851 for (size_t i = 0; i < channels; ++i) { 1852 mute_factor_array_[i] = 16384; // 1.0 in Q14. 1853 } 1854 1855 // Reset comfort noise decoder, if there is one active. 1856 AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); 1857 if (cng_decoder) { 1858 cng_decoder->Init(); 1859 } 1860 1861 // Reinit post-decode VAD with new sample rate. 1862 assert(vad_.get()); // Cannot be NULL here. 1863 vad_->Init(); 1864 1865 // Delete algorithm buffer and create a new one. 1866 algorithm_buffer_.reset(new AudioMultiVector(channels)); 1867 1868 // Delete sync buffer and create a new one. 1869 sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_)); 1870 1871 // Delete BackgroundNoise object and create a new one. 1872 background_noise_.reset(new BackgroundNoise(channels)); 1873 background_noise_->set_mode(background_noise_mode_); 1874 1875 // Reset random vector. 1876 random_vector_.Reset(); 1877 1878 UpdatePlcComponents(fs_hz, channels); 1879 1880 // Move index so that we create a small set of future samples (all 0). 1881 sync_buffer_->set_next_index(sync_buffer_->next_index() - 1882 expand_->overlap_length()); 1883 1884 normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_, 1885 expand_.get())); 1886 accelerate_.reset( 1887 accelerate_factory_->Create(fs_hz, channels, *background_noise_)); 1888 preemptive_expand_.reset(preemptive_expand_factory_->Create( 1889 fs_hz, channels, 1890 *background_noise_, 1891 static_cast<int>(expand_->overlap_length()))); 1892 1893 // Delete ComfortNoise object and create a new one. 1894 comfort_noise_.reset(new ComfortNoise(fs_hz, decoder_database_.get(), 1895 sync_buffer_.get())); 1896 1897 // Verify that |decoded_buffer_| is long enough. 1898 if (decoded_buffer_length_ < kMaxFrameSize * channels) { 1899 // Reallocate to larger size. 1900 decoded_buffer_length_ = kMaxFrameSize * channels; 1901 decoded_buffer_.reset(new int16_t[decoded_buffer_length_]); 1902 } 1903 1904 // Create DecisionLogic if it is not created yet, then communicate new sample 1905 // rate and output size to DecisionLogic object. 1906 if (!decision_logic_.get()) { 1907 CreateDecisionLogic(kPlayoutOn); 1908 } 1909 decision_logic_->SetSampleRate(fs_hz_, output_size_samples_); 1910 } 1911 1912 NetEqOutputType NetEqImpl::LastOutputType() { 1913 assert(vad_.get()); 1914 assert(expand_.get()); 1915 if (last_mode_ == kModeCodecInternalCng || last_mode_ == kModeRfc3389Cng) { 1916 return kOutputCNG; 1917 } else if (last_mode_ == kModeExpand && expand_->MuteFactor(0) == 0) { 1918 // Expand mode has faded down to background noise only (very long expand). 1919 return kOutputPLCtoCNG; 1920 } else if (last_mode_ == kModeExpand) { 1921 return kOutputPLC; 1922 } else if (vad_->running() && !vad_->active_speech()) { 1923 return kOutputVADPassive; 1924 } else { 1925 return kOutputNormal; 1926 } 1927 } 1928 1929 void NetEqImpl::CreateDecisionLogic(NetEqPlayoutMode mode) { 1930 decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_, 1931 mode, 1932 decoder_database_.get(), 1933 *packet_buffer_.get(), 1934 delay_manager_.get(), 1935 buffer_level_filter_.get())); 1936 } 1937 } // namespace webrtc 1938