1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/acm2/acm_receiver.h" 12 13 #include <stdlib.h> // malloc 14 15 #include <algorithm> // sort 16 #include <vector> 17 18 #include "webrtc/base/checks.h" 19 #include "webrtc/base/format_macros.h" 20 #include "webrtc/base/logging.h" 21 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 22 #include "webrtc/common_types.h" 23 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h" 24 #include "webrtc/modules/audio_coding/acm2/acm_resampler.h" 25 #include "webrtc/modules/audio_coding/acm2/call_statistics.h" 26 #include "webrtc/modules/audio_coding/neteq/include/neteq.h" 27 #include "webrtc/system_wrappers/include/clock.h" 28 #include "webrtc/system_wrappers/include/critical_section_wrapper.h" 29 #include "webrtc/system_wrappers/include/tick_util.h" 30 #include "webrtc/system_wrappers/include/trace.h" 31 32 namespace webrtc { 33 34 namespace acm2 { 35 36 namespace { 37 38 // |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_| 39 // before the call to this function. 40 void SetAudioFrameActivityAndType(bool vad_enabled, 41 NetEqOutputType type, 42 AudioFrame* audio_frame) { 43 if (vad_enabled) { 44 switch (type) { 45 case kOutputNormal: { 46 audio_frame->vad_activity_ = AudioFrame::kVadActive; 47 audio_frame->speech_type_ = AudioFrame::kNormalSpeech; 48 break; 49 } 50 case kOutputVADPassive: { 51 audio_frame->vad_activity_ = AudioFrame::kVadPassive; 52 audio_frame->speech_type_ = AudioFrame::kNormalSpeech; 53 break; 54 } 55 case kOutputCNG: { 56 audio_frame->vad_activity_ = AudioFrame::kVadPassive; 57 audio_frame->speech_type_ = AudioFrame::kCNG; 58 break; 59 } 60 case kOutputPLC: { 61 // Don't change |audio_frame->vad_activity_|, it should be the same as 62 // |previous_audio_activity_|. 63 audio_frame->speech_type_ = AudioFrame::kPLC; 64 break; 65 } 66 case kOutputPLCtoCNG: { 67 audio_frame->vad_activity_ = AudioFrame::kVadPassive; 68 audio_frame->speech_type_ = AudioFrame::kPLCCNG; 69 break; 70 } 71 default: 72 assert(false); 73 } 74 } else { 75 // Always return kVadUnknown when receive VAD is inactive 76 audio_frame->vad_activity_ = AudioFrame::kVadUnknown; 77 switch (type) { 78 case kOutputNormal: { 79 audio_frame->speech_type_ = AudioFrame::kNormalSpeech; 80 break; 81 } 82 case kOutputCNG: { 83 audio_frame->speech_type_ = AudioFrame::kCNG; 84 break; 85 } 86 case kOutputPLC: { 87 audio_frame->speech_type_ = AudioFrame::kPLC; 88 break; 89 } 90 case kOutputPLCtoCNG: { 91 audio_frame->speech_type_ = AudioFrame::kPLCCNG; 92 break; 93 } 94 case kOutputVADPassive: { 95 // Normally, we should no get any VAD decision if post-decoding VAD is 96 // not active. However, if post-decoding VAD has been active then 97 // disabled, we might be here for couple of frames. 98 audio_frame->speech_type_ = AudioFrame::kNormalSpeech; 99 LOG(WARNING) << "Post-decoding VAD is disabled but output is " 100 << "labeled VAD-passive"; 101 break; 102 } 103 default: 104 assert(false); 105 } 106 } 107 } 108 109 // Is the given codec a CNG codec? 110 // TODO(kwiberg): Move to RentACodec. 111 bool IsCng(int codec_id) { 112 auto i = RentACodec::CodecIdFromIndex(codec_id); 113 return (i && (*i == RentACodec::CodecId::kCNNB || 114 *i == RentACodec::CodecId::kCNWB || 115 *i == RentACodec::CodecId::kCNSWB || 116 *i == RentACodec::CodecId::kCNFB)); 117 } 118 119 } // namespace 120 121 AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config) 122 : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()), 123 id_(config.id), 124 last_audio_decoder_(nullptr), 125 previous_audio_activity_(AudioFrame::kVadPassive), 126 audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), 127 last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), 128 neteq_(NetEq::Create(config.neteq_config)), 129 vad_enabled_(config.neteq_config.enable_post_decode_vad), 130 clock_(config.clock), 131 resampled_last_output_frame_(true) { 132 assert(clock_); 133 memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples); 134 memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples); 135 } 136 137 AcmReceiver::~AcmReceiver() { 138 delete neteq_; 139 } 140 141 int AcmReceiver::SetMinimumDelay(int delay_ms) { 142 if (neteq_->SetMinimumDelay(delay_ms)) 143 return 0; 144 LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms; 145 return -1; 146 } 147 148 int AcmReceiver::SetMaximumDelay(int delay_ms) { 149 if (neteq_->SetMaximumDelay(delay_ms)) 150 return 0; 151 LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms; 152 return -1; 153 } 154 155 int AcmReceiver::LeastRequiredDelayMs() const { 156 return neteq_->LeastRequiredDelayMs(); 157 } 158 159 rtc::Optional<int> AcmReceiver::last_packet_sample_rate_hz() const { 160 CriticalSectionScoped lock(crit_sect_.get()); 161 return last_packet_sample_rate_hz_; 162 } 163 164 int AcmReceiver::last_output_sample_rate_hz() const { 165 return neteq_->last_output_sample_rate_hz(); 166 } 167 168 int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header, 169 rtc::ArrayView<const uint8_t> incoming_payload) { 170 uint32_t receive_timestamp = 0; 171 const RTPHeader* header = &rtp_header.header; // Just a shorthand. 172 173 { 174 CriticalSectionScoped lock(crit_sect_.get()); 175 176 const Decoder* decoder = RtpHeaderToDecoder(*header, incoming_payload[0]); 177 if (!decoder) { 178 LOG_F(LS_ERROR) << "Payload-type " 179 << static_cast<int>(header->payloadType) 180 << " is not registered."; 181 return -1; 182 } 183 const int sample_rate_hz = [&decoder] { 184 const auto ci = RentACodec::CodecIdFromIndex(decoder->acm_codec_id); 185 return ci ? RentACodec::CodecInstById(*ci)->plfreq : -1; 186 }(); 187 receive_timestamp = NowInTimestamp(sample_rate_hz); 188 189 // If this is a CNG while the audio codec is not mono, skip pushing in 190 // packets into NetEq. 191 if (IsCng(decoder->acm_codec_id) && last_audio_decoder_ && 192 last_audio_decoder_->channels > 1) 193 return 0; 194 if (!IsCng(decoder->acm_codec_id) && 195 decoder->acm_codec_id != 196 *RentACodec::CodecIndexFromId(RentACodec::CodecId::kAVT)) { 197 last_audio_decoder_ = decoder; 198 last_packet_sample_rate_hz_ = rtc::Optional<int>(decoder->sample_rate_hz); 199 } 200 201 } // |crit_sect_| is released. 202 203 if (neteq_->InsertPacket(rtp_header, incoming_payload, receive_timestamp) < 204 0) { 205 LOG(LERROR) << "AcmReceiver::InsertPacket " 206 << static_cast<int>(header->payloadType) 207 << " Failed to insert packet"; 208 return -1; 209 } 210 return 0; 211 } 212 213 int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) { 214 enum NetEqOutputType type; 215 size_t samples_per_channel; 216 size_t num_channels; 217 218 // Accessing members, take the lock. 219 CriticalSectionScoped lock(crit_sect_.get()); 220 221 // Always write the output to |audio_buffer_| first. 222 if (neteq_->GetAudio(AudioFrame::kMaxDataSizeSamples, 223 audio_buffer_.get(), 224 &samples_per_channel, 225 &num_channels, 226 &type) != NetEq::kOK) { 227 LOG(LERROR) << "AcmReceiver::GetAudio - NetEq Failed."; 228 return -1; 229 } 230 231 const int current_sample_rate_hz = neteq_->last_output_sample_rate_hz(); 232 233 // Update if resampling is required. 234 const bool need_resampling = 235 (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz); 236 237 if (need_resampling && !resampled_last_output_frame_) { 238 // Prime the resampler with the last frame. 239 int16_t temp_output[AudioFrame::kMaxDataSizeSamples]; 240 int samples_per_channel_int = resampler_.Resample10Msec( 241 last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz, 242 num_channels, AudioFrame::kMaxDataSizeSamples, temp_output); 243 if (samples_per_channel_int < 0) { 244 LOG(LERROR) << "AcmReceiver::GetAudio - " 245 "Resampling last_audio_buffer_ failed."; 246 return -1; 247 } 248 samples_per_channel = static_cast<size_t>(samples_per_channel_int); 249 } 250 251 // The audio in |audio_buffer_| is tansferred to |audio_frame_| below, either 252 // through resampling, or through straight memcpy. 253 // TODO(henrik.lundin) Glitches in the output may appear if the output rate 254 // from NetEq changes. See WebRTC issue 3923. 255 if (need_resampling) { 256 int samples_per_channel_int = resampler_.Resample10Msec( 257 audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz, 258 num_channels, AudioFrame::kMaxDataSizeSamples, audio_frame->data_); 259 if (samples_per_channel_int < 0) { 260 LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed."; 261 return -1; 262 } 263 samples_per_channel = static_cast<size_t>(samples_per_channel_int); 264 resampled_last_output_frame_ = true; 265 } else { 266 resampled_last_output_frame_ = false; 267 // We might end up here ONLY if codec is changed. 268 memcpy(audio_frame->data_, 269 audio_buffer_.get(), 270 samples_per_channel * num_channels * sizeof(int16_t)); 271 } 272 273 // Swap buffers, so that the current audio is stored in |last_audio_buffer_| 274 // for next time. 275 audio_buffer_.swap(last_audio_buffer_); 276 277 audio_frame->num_channels_ = num_channels; 278 audio_frame->samples_per_channel_ = samples_per_channel; 279 audio_frame->sample_rate_hz_ = static_cast<int>(samples_per_channel * 100); 280 281 // Should set |vad_activity| before calling SetAudioFrameActivityAndType(). 282 audio_frame->vad_activity_ = previous_audio_activity_; 283 SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame); 284 previous_audio_activity_ = audio_frame->vad_activity_; 285 call_stats_.DecodedByNetEq(audio_frame->speech_type_); 286 287 // Computes the RTP timestamp of the first sample in |audio_frame| from 288 // |GetPlayoutTimestamp|, which is the timestamp of the last sample of 289 // |audio_frame|. 290 uint32_t playout_timestamp = 0; 291 if (GetPlayoutTimestamp(&playout_timestamp)) { 292 audio_frame->timestamp_ = playout_timestamp - 293 static_cast<uint32_t>(audio_frame->samples_per_channel_); 294 } else { 295 // Remain 0 until we have a valid |playout_timestamp|. 296 audio_frame->timestamp_ = 0; 297 } 298 299 return 0; 300 } 301 302 int32_t AcmReceiver::AddCodec(int acm_codec_id, 303 uint8_t payload_type, 304 size_t channels, 305 int sample_rate_hz, 306 AudioDecoder* audio_decoder, 307 const std::string& name) { 308 const auto neteq_decoder = [acm_codec_id, channels]() -> NetEqDecoder { 309 if (acm_codec_id == -1) 310 return NetEqDecoder::kDecoderArbitrary; // External decoder. 311 const rtc::Optional<RentACodec::CodecId> cid = 312 RentACodec::CodecIdFromIndex(acm_codec_id); 313 RTC_DCHECK(cid) << "Invalid codec index: " << acm_codec_id; 314 const rtc::Optional<NetEqDecoder> ned = 315 RentACodec::NetEqDecoderFromCodecId(*cid, channels); 316 RTC_DCHECK(ned) << "Invalid codec ID: " << static_cast<int>(*cid); 317 return *ned; 318 }(); 319 320 CriticalSectionScoped lock(crit_sect_.get()); 321 322 // The corresponding NetEq decoder ID. 323 // If this codec has been registered before. 324 auto it = decoders_.find(payload_type); 325 if (it != decoders_.end()) { 326 const Decoder& decoder = it->second; 327 if (acm_codec_id != -1 && decoder.acm_codec_id == acm_codec_id && 328 decoder.channels == channels && 329 decoder.sample_rate_hz == sample_rate_hz) { 330 // Re-registering the same codec. Do nothing and return. 331 return 0; 332 } 333 334 // Changing codec. First unregister the old codec, then register the new 335 // one. 336 if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) { 337 LOG(LERROR) << "Cannot remove payload " << static_cast<int>(payload_type); 338 return -1; 339 } 340 341 decoders_.erase(it); 342 } 343 344 int ret_val; 345 if (!audio_decoder) { 346 ret_val = neteq_->RegisterPayloadType(neteq_decoder, name, payload_type); 347 } else { 348 ret_val = neteq_->RegisterExternalDecoder( 349 audio_decoder, neteq_decoder, name, payload_type, sample_rate_hz); 350 } 351 if (ret_val != NetEq::kOK) { 352 LOG(LERROR) << "AcmReceiver::AddCodec " << acm_codec_id 353 << static_cast<int>(payload_type) 354 << " channels: " << channels; 355 return -1; 356 } 357 358 Decoder decoder; 359 decoder.acm_codec_id = acm_codec_id; 360 decoder.payload_type = payload_type; 361 decoder.channels = channels; 362 decoder.sample_rate_hz = sample_rate_hz; 363 decoders_[payload_type] = decoder; 364 return 0; 365 } 366 367 void AcmReceiver::EnableVad() { 368 neteq_->EnableVad(); 369 CriticalSectionScoped lock(crit_sect_.get()); 370 vad_enabled_ = true; 371 } 372 373 void AcmReceiver::DisableVad() { 374 neteq_->DisableVad(); 375 CriticalSectionScoped lock(crit_sect_.get()); 376 vad_enabled_ = false; 377 } 378 379 void AcmReceiver::FlushBuffers() { 380 neteq_->FlushBuffers(); 381 } 382 383 // If failed in removing one of the codecs, this method continues to remove as 384 // many as it can. 385 int AcmReceiver::RemoveAllCodecs() { 386 int ret_val = 0; 387 CriticalSectionScoped lock(crit_sect_.get()); 388 for (auto it = decoders_.begin(); it != decoders_.end(); ) { 389 auto cur = it; 390 ++it; // it will be valid even if we erase cur 391 if (neteq_->RemovePayloadType(cur->second.payload_type) == 0) { 392 decoders_.erase(cur); 393 } else { 394 LOG_F(LS_ERROR) << "Cannot remove payload " 395 << static_cast<int>(cur->second.payload_type); 396 ret_val = -1; 397 } 398 } 399 400 // No codec is registered, invalidate last audio decoder. 401 last_audio_decoder_ = nullptr; 402 last_packet_sample_rate_hz_ = rtc::Optional<int>(); 403 return ret_val; 404 } 405 406 int AcmReceiver::RemoveCodec(uint8_t payload_type) { 407 CriticalSectionScoped lock(crit_sect_.get()); 408 auto it = decoders_.find(payload_type); 409 if (it == decoders_.end()) { // Such a payload-type is not registered. 410 return 0; 411 } 412 if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) { 413 LOG(LERROR) << "AcmReceiver::RemoveCodec" << static_cast<int>(payload_type); 414 return -1; 415 } 416 if (last_audio_decoder_ == &it->second) { 417 last_audio_decoder_ = nullptr; 418 last_packet_sample_rate_hz_ = rtc::Optional<int>(); 419 } 420 decoders_.erase(it); 421 return 0; 422 } 423 424 void AcmReceiver::set_id(int id) { 425 CriticalSectionScoped lock(crit_sect_.get()); 426 id_ = id; 427 } 428 429 bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) { 430 return neteq_->GetPlayoutTimestamp(timestamp); 431 } 432 433 int AcmReceiver::LastAudioCodec(CodecInst* codec) const { 434 CriticalSectionScoped lock(crit_sect_.get()); 435 if (!last_audio_decoder_) { 436 return -1; 437 } 438 *codec = *RentACodec::CodecInstById( 439 *RentACodec::CodecIdFromIndex(last_audio_decoder_->acm_codec_id)); 440 codec->pltype = last_audio_decoder_->payload_type; 441 codec->channels = last_audio_decoder_->channels; 442 codec->plfreq = last_audio_decoder_->sample_rate_hz; 443 return 0; 444 } 445 446 void AcmReceiver::GetNetworkStatistics(NetworkStatistics* acm_stat) { 447 NetEqNetworkStatistics neteq_stat; 448 // NetEq function always returns zero, so we don't check the return value. 449 neteq_->NetworkStatistics(&neteq_stat); 450 451 acm_stat->currentBufferSize = neteq_stat.current_buffer_size_ms; 452 acm_stat->preferredBufferSize = neteq_stat.preferred_buffer_size_ms; 453 acm_stat->jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false; 454 acm_stat->currentPacketLossRate = neteq_stat.packet_loss_rate; 455 acm_stat->currentDiscardRate = neteq_stat.packet_discard_rate; 456 acm_stat->currentExpandRate = neteq_stat.expand_rate; 457 acm_stat->currentSpeechExpandRate = neteq_stat.speech_expand_rate; 458 acm_stat->currentPreemptiveRate = neteq_stat.preemptive_rate; 459 acm_stat->currentAccelerateRate = neteq_stat.accelerate_rate; 460 acm_stat->currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate; 461 acm_stat->clockDriftPPM = neteq_stat.clockdrift_ppm; 462 acm_stat->addedSamples = neteq_stat.added_zero_samples; 463 acm_stat->meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms; 464 acm_stat->medianWaitingTimeMs = neteq_stat.median_waiting_time_ms; 465 acm_stat->minWaitingTimeMs = neteq_stat.min_waiting_time_ms; 466 acm_stat->maxWaitingTimeMs = neteq_stat.max_waiting_time_ms; 467 } 468 469 int AcmReceiver::DecoderByPayloadType(uint8_t payload_type, 470 CodecInst* codec) const { 471 CriticalSectionScoped lock(crit_sect_.get()); 472 auto it = decoders_.find(payload_type); 473 if (it == decoders_.end()) { 474 LOG(LERROR) << "AcmReceiver::DecoderByPayloadType " 475 << static_cast<int>(payload_type); 476 return -1; 477 } 478 const Decoder& decoder = it->second; 479 *codec = *RentACodec::CodecInstById( 480 *RentACodec::CodecIdFromIndex(decoder.acm_codec_id)); 481 codec->pltype = decoder.payload_type; 482 codec->channels = decoder.channels; 483 codec->plfreq = decoder.sample_rate_hz; 484 return 0; 485 } 486 487 int AcmReceiver::EnableNack(size_t max_nack_list_size) { 488 neteq_->EnableNack(max_nack_list_size); 489 return 0; 490 } 491 492 void AcmReceiver::DisableNack() { 493 neteq_->DisableNack(); 494 } 495 496 std::vector<uint16_t> AcmReceiver::GetNackList( 497 int64_t round_trip_time_ms) const { 498 return neteq_->GetNackList(round_trip_time_ms); 499 } 500 501 void AcmReceiver::ResetInitialDelay() { 502 neteq_->SetMinimumDelay(0); 503 // TODO(turajs): Should NetEq Buffer be flushed? 504 } 505 506 const AcmReceiver::Decoder* AcmReceiver::RtpHeaderToDecoder( 507 const RTPHeader& rtp_header, 508 uint8_t payload_type) const { 509 auto it = decoders_.find(rtp_header.payloadType); 510 const auto red_index = 511 RentACodec::CodecIndexFromId(RentACodec::CodecId::kRED); 512 if (red_index && // This ensures that RED is defined in WebRTC. 513 it != decoders_.end() && it->second.acm_codec_id == *red_index) { 514 // This is a RED packet, get the payload of the audio codec. 515 it = decoders_.find(payload_type & 0x7F); 516 } 517 518 // Check if the payload is registered. 519 return it != decoders_.end() ? &it->second : nullptr; 520 } 521 522 uint32_t AcmReceiver::NowInTimestamp(int decoder_sampling_rate) const { 523 // Down-cast the time to (32-6)-bit since we only care about 524 // the least significant bits. (32-6) bits cover 2^(32-6) = 67108864 ms. 525 // We masked 6 most significant bits of 32-bit so there is no overflow in 526 // the conversion from milliseconds to timestamp. 527 const uint32_t now_in_ms = static_cast<uint32_t>( 528 clock_->TimeInMilliseconds() & 0x03ffffff); 529 return static_cast<uint32_t>( 530 (decoder_sampling_rate / 1000) * now_in_ms); 531 } 532 533 void AcmReceiver::GetDecodingCallStatistics( 534 AudioDecodingCallStats* stats) const { 535 CriticalSectionScoped lock(crit_sect_.get()); 536 *stats = call_stats_.GetDecodingStatistics(); 537 } 538 539 } // namespace acm2 540 541 } // namespace webrtc 542