Home | History | Annotate | Download | only in acm2
      1 /*
      2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_coding/acm2/acm_receiver.h"
     12 
     13 #include <stdlib.h>  // malloc
     14 
     15 #include <algorithm>  // sort
     16 #include <vector>
     17 
     18 #include "webrtc/base/checks.h"
     19 #include "webrtc/base/format_macros.h"
     20 #include "webrtc/base/logging.h"
     21 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
     22 #include "webrtc/common_types.h"
     23 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
     24 #include "webrtc/modules/audio_coding/acm2/acm_resampler.h"
     25 #include "webrtc/modules/audio_coding/acm2/call_statistics.h"
     26 #include "webrtc/modules/audio_coding/neteq/include/neteq.h"
     27 #include "webrtc/system_wrappers/include/clock.h"
     28 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
     29 #include "webrtc/system_wrappers/include/tick_util.h"
     30 #include "webrtc/system_wrappers/include/trace.h"
     31 
     32 namespace webrtc {
     33 
     34 namespace acm2 {
     35 
     36 namespace {
     37 
     38 // |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_|
     39 // before the call to this function.
     40 void SetAudioFrameActivityAndType(bool vad_enabled,
     41                                   NetEqOutputType type,
     42                                   AudioFrame* audio_frame) {
     43   if (vad_enabled) {
     44     switch (type) {
     45       case kOutputNormal: {
     46         audio_frame->vad_activity_ = AudioFrame::kVadActive;
     47         audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
     48         break;
     49       }
     50       case kOutputVADPassive: {
     51         audio_frame->vad_activity_ = AudioFrame::kVadPassive;
     52         audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
     53         break;
     54       }
     55       case kOutputCNG: {
     56         audio_frame->vad_activity_ = AudioFrame::kVadPassive;
     57         audio_frame->speech_type_ = AudioFrame::kCNG;
     58         break;
     59       }
     60       case kOutputPLC: {
     61         // Don't change |audio_frame->vad_activity_|, it should be the same as
     62         // |previous_audio_activity_|.
     63         audio_frame->speech_type_ = AudioFrame::kPLC;
     64         break;
     65       }
     66       case kOutputPLCtoCNG: {
     67         audio_frame->vad_activity_ = AudioFrame::kVadPassive;
     68         audio_frame->speech_type_ = AudioFrame::kPLCCNG;
     69         break;
     70       }
     71       default:
     72         assert(false);
     73     }
     74   } else {
     75     // Always return kVadUnknown when receive VAD is inactive
     76     audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
     77     switch (type) {
     78       case kOutputNormal: {
     79         audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
     80         break;
     81       }
     82       case kOutputCNG: {
     83         audio_frame->speech_type_ = AudioFrame::kCNG;
     84         break;
     85       }
     86       case kOutputPLC: {
     87         audio_frame->speech_type_ = AudioFrame::kPLC;
     88         break;
     89       }
     90       case kOutputPLCtoCNG: {
     91         audio_frame->speech_type_ = AudioFrame::kPLCCNG;
     92         break;
     93       }
     94       case kOutputVADPassive: {
     95         // Normally, we should no get any VAD decision if post-decoding VAD is
     96         // not active. However, if post-decoding VAD has been active then
     97         // disabled, we might be here for couple of frames.
     98         audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
     99         LOG(WARNING) << "Post-decoding VAD is disabled but output is "
    100             << "labeled VAD-passive";
    101         break;
    102       }
    103       default:
    104         assert(false);
    105     }
    106   }
    107 }
    108 
    109 // Is the given codec a CNG codec?
    110 // TODO(kwiberg): Move to RentACodec.
    111 bool IsCng(int codec_id) {
    112   auto i = RentACodec::CodecIdFromIndex(codec_id);
    113   return (i && (*i == RentACodec::CodecId::kCNNB ||
    114                 *i == RentACodec::CodecId::kCNWB ||
    115                 *i == RentACodec::CodecId::kCNSWB ||
    116                 *i == RentACodec::CodecId::kCNFB));
    117 }
    118 
    119 }  // namespace
    120 
    121 AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
    122     : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
    123       id_(config.id),
    124       last_audio_decoder_(nullptr),
    125       previous_audio_activity_(AudioFrame::kVadPassive),
    126       audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
    127       last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
    128       neteq_(NetEq::Create(config.neteq_config)),
    129       vad_enabled_(config.neteq_config.enable_post_decode_vad),
    130       clock_(config.clock),
    131       resampled_last_output_frame_(true) {
    132   assert(clock_);
    133   memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
    134   memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
    135 }
    136 
    137 AcmReceiver::~AcmReceiver() {
    138   delete neteq_;
    139 }
    140 
    141 int AcmReceiver::SetMinimumDelay(int delay_ms) {
    142   if (neteq_->SetMinimumDelay(delay_ms))
    143     return 0;
    144   LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
    145   return -1;
    146 }
    147 
    148 int AcmReceiver::SetMaximumDelay(int delay_ms) {
    149   if (neteq_->SetMaximumDelay(delay_ms))
    150     return 0;
    151   LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
    152   return -1;
    153 }
    154 
    155 int AcmReceiver::LeastRequiredDelayMs() const {
    156   return neteq_->LeastRequiredDelayMs();
    157 }
    158 
    159 rtc::Optional<int> AcmReceiver::last_packet_sample_rate_hz() const {
    160   CriticalSectionScoped lock(crit_sect_.get());
    161   return last_packet_sample_rate_hz_;
    162 }
    163 
    164 int AcmReceiver::last_output_sample_rate_hz() const {
    165   return neteq_->last_output_sample_rate_hz();
    166 }
    167 
    168 int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header,
    169                               rtc::ArrayView<const uint8_t> incoming_payload) {
    170   uint32_t receive_timestamp = 0;
    171   const RTPHeader* header = &rtp_header.header;  // Just a shorthand.
    172 
    173   {
    174     CriticalSectionScoped lock(crit_sect_.get());
    175 
    176     const Decoder* decoder = RtpHeaderToDecoder(*header, incoming_payload[0]);
    177     if (!decoder) {
    178       LOG_F(LS_ERROR) << "Payload-type "
    179                       << static_cast<int>(header->payloadType)
    180                       << " is not registered.";
    181       return -1;
    182     }
    183     const int sample_rate_hz = [&decoder] {
    184       const auto ci = RentACodec::CodecIdFromIndex(decoder->acm_codec_id);
    185       return ci ? RentACodec::CodecInstById(*ci)->plfreq : -1;
    186     }();
    187     receive_timestamp = NowInTimestamp(sample_rate_hz);
    188 
    189     // If this is a CNG while the audio codec is not mono, skip pushing in
    190     // packets into NetEq.
    191     if (IsCng(decoder->acm_codec_id) && last_audio_decoder_ &&
    192         last_audio_decoder_->channels > 1)
    193         return 0;
    194     if (!IsCng(decoder->acm_codec_id) &&
    195         decoder->acm_codec_id !=
    196             *RentACodec::CodecIndexFromId(RentACodec::CodecId::kAVT)) {
    197       last_audio_decoder_ = decoder;
    198       last_packet_sample_rate_hz_ = rtc::Optional<int>(decoder->sample_rate_hz);
    199     }
    200 
    201   }  // |crit_sect_| is released.
    202 
    203   if (neteq_->InsertPacket(rtp_header, incoming_payload, receive_timestamp) <
    204       0) {
    205     LOG(LERROR) << "AcmReceiver::InsertPacket "
    206                 << static_cast<int>(header->payloadType)
    207                 << " Failed to insert packet";
    208     return -1;
    209   }
    210   return 0;
    211 }
    212 
    213 int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
    214   enum NetEqOutputType type;
    215   size_t samples_per_channel;
    216   size_t num_channels;
    217 
    218   // Accessing members, take the lock.
    219   CriticalSectionScoped lock(crit_sect_.get());
    220 
    221   // Always write the output to |audio_buffer_| first.
    222   if (neteq_->GetAudio(AudioFrame::kMaxDataSizeSamples,
    223                        audio_buffer_.get(),
    224                        &samples_per_channel,
    225                        &num_channels,
    226                        &type) != NetEq::kOK) {
    227     LOG(LERROR) << "AcmReceiver::GetAudio - NetEq Failed.";
    228     return -1;
    229   }
    230 
    231   const int current_sample_rate_hz = neteq_->last_output_sample_rate_hz();
    232 
    233   // Update if resampling is required.
    234   const bool need_resampling =
    235       (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz);
    236 
    237   if (need_resampling && !resampled_last_output_frame_) {
    238     // Prime the resampler with the last frame.
    239     int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
    240     int samples_per_channel_int = resampler_.Resample10Msec(
    241         last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
    242         num_channels, AudioFrame::kMaxDataSizeSamples, temp_output);
    243     if (samples_per_channel_int < 0) {
    244       LOG(LERROR) << "AcmReceiver::GetAudio - "
    245                      "Resampling last_audio_buffer_ failed.";
    246       return -1;
    247     }
    248     samples_per_channel = static_cast<size_t>(samples_per_channel_int);
    249   }
    250 
    251   // The audio in |audio_buffer_| is tansferred to |audio_frame_| below, either
    252   // through resampling, or through straight memcpy.
    253   // TODO(henrik.lundin) Glitches in the output may appear if the output rate
    254   // from NetEq changes. See WebRTC issue 3923.
    255   if (need_resampling) {
    256     int samples_per_channel_int = resampler_.Resample10Msec(
    257         audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
    258         num_channels, AudioFrame::kMaxDataSizeSamples, audio_frame->data_);
    259     if (samples_per_channel_int < 0) {
    260       LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
    261       return -1;
    262     }
    263     samples_per_channel = static_cast<size_t>(samples_per_channel_int);
    264     resampled_last_output_frame_ = true;
    265   } else {
    266     resampled_last_output_frame_ = false;
    267     // We might end up here ONLY if codec is changed.
    268     memcpy(audio_frame->data_,
    269            audio_buffer_.get(),
    270            samples_per_channel * num_channels * sizeof(int16_t));
    271   }
    272 
    273   // Swap buffers, so that the current audio is stored in |last_audio_buffer_|
    274   // for next time.
    275   audio_buffer_.swap(last_audio_buffer_);
    276 
    277   audio_frame->num_channels_ = num_channels;
    278   audio_frame->samples_per_channel_ = samples_per_channel;
    279   audio_frame->sample_rate_hz_ = static_cast<int>(samples_per_channel * 100);
    280 
    281   // Should set |vad_activity| before calling SetAudioFrameActivityAndType().
    282   audio_frame->vad_activity_ = previous_audio_activity_;
    283   SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
    284   previous_audio_activity_ = audio_frame->vad_activity_;
    285   call_stats_.DecodedByNetEq(audio_frame->speech_type_);
    286 
    287   // Computes the RTP timestamp of the first sample in |audio_frame| from
    288   // |GetPlayoutTimestamp|, which is the timestamp of the last sample of
    289   // |audio_frame|.
    290   uint32_t playout_timestamp = 0;
    291   if (GetPlayoutTimestamp(&playout_timestamp)) {
    292     audio_frame->timestamp_ = playout_timestamp -
    293         static_cast<uint32_t>(audio_frame->samples_per_channel_);
    294   } else {
    295     // Remain 0 until we have a valid |playout_timestamp|.
    296     audio_frame->timestamp_ = 0;
    297   }
    298 
    299   return 0;
    300 }
    301 
    302 int32_t AcmReceiver::AddCodec(int acm_codec_id,
    303                               uint8_t payload_type,
    304                               size_t channels,
    305                               int sample_rate_hz,
    306                               AudioDecoder* audio_decoder,
    307                               const std::string& name) {
    308   const auto neteq_decoder = [acm_codec_id, channels]() -> NetEqDecoder {
    309     if (acm_codec_id == -1)
    310       return NetEqDecoder::kDecoderArbitrary;  // External decoder.
    311     const rtc::Optional<RentACodec::CodecId> cid =
    312         RentACodec::CodecIdFromIndex(acm_codec_id);
    313     RTC_DCHECK(cid) << "Invalid codec index: " << acm_codec_id;
    314     const rtc::Optional<NetEqDecoder> ned =
    315         RentACodec::NetEqDecoderFromCodecId(*cid, channels);
    316     RTC_DCHECK(ned) << "Invalid codec ID: " << static_cast<int>(*cid);
    317     return *ned;
    318   }();
    319 
    320   CriticalSectionScoped lock(crit_sect_.get());
    321 
    322   // The corresponding NetEq decoder ID.
    323   // If this codec has been registered before.
    324   auto it = decoders_.find(payload_type);
    325   if (it != decoders_.end()) {
    326     const Decoder& decoder = it->second;
    327     if (acm_codec_id != -1 && decoder.acm_codec_id == acm_codec_id &&
    328         decoder.channels == channels &&
    329         decoder.sample_rate_hz == sample_rate_hz) {
    330       // Re-registering the same codec. Do nothing and return.
    331       return 0;
    332     }
    333 
    334     // Changing codec. First unregister the old codec, then register the new
    335     // one.
    336     if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) {
    337       LOG(LERROR) << "Cannot remove payload " << static_cast<int>(payload_type);
    338       return -1;
    339     }
    340 
    341     decoders_.erase(it);
    342   }
    343 
    344   int ret_val;
    345   if (!audio_decoder) {
    346     ret_val = neteq_->RegisterPayloadType(neteq_decoder, name, payload_type);
    347   } else {
    348     ret_val = neteq_->RegisterExternalDecoder(
    349         audio_decoder, neteq_decoder, name, payload_type, sample_rate_hz);
    350   }
    351   if (ret_val != NetEq::kOK) {
    352     LOG(LERROR) << "AcmReceiver::AddCodec " << acm_codec_id
    353                 << static_cast<int>(payload_type)
    354                 << " channels: " << channels;
    355     return -1;
    356   }
    357 
    358   Decoder decoder;
    359   decoder.acm_codec_id = acm_codec_id;
    360   decoder.payload_type = payload_type;
    361   decoder.channels = channels;
    362   decoder.sample_rate_hz = sample_rate_hz;
    363   decoders_[payload_type] = decoder;
    364   return 0;
    365 }
    366 
    367 void AcmReceiver::EnableVad() {
    368   neteq_->EnableVad();
    369   CriticalSectionScoped lock(crit_sect_.get());
    370   vad_enabled_ = true;
    371 }
    372 
    373 void AcmReceiver::DisableVad() {
    374   neteq_->DisableVad();
    375   CriticalSectionScoped lock(crit_sect_.get());
    376   vad_enabled_ = false;
    377 }
    378 
    379 void AcmReceiver::FlushBuffers() {
    380   neteq_->FlushBuffers();
    381 }
    382 
    383 // If failed in removing one of the codecs, this method continues to remove as
    384 // many as it can.
    385 int AcmReceiver::RemoveAllCodecs() {
    386   int ret_val = 0;
    387   CriticalSectionScoped lock(crit_sect_.get());
    388   for (auto it = decoders_.begin(); it != decoders_.end(); ) {
    389     auto cur = it;
    390     ++it;  // it will be valid even if we erase cur
    391     if (neteq_->RemovePayloadType(cur->second.payload_type) == 0) {
    392       decoders_.erase(cur);
    393     } else {
    394       LOG_F(LS_ERROR) << "Cannot remove payload "
    395                       << static_cast<int>(cur->second.payload_type);
    396       ret_val = -1;
    397     }
    398   }
    399 
    400   // No codec is registered, invalidate last audio decoder.
    401   last_audio_decoder_ = nullptr;
    402   last_packet_sample_rate_hz_ = rtc::Optional<int>();
    403   return ret_val;
    404 }
    405 
    406 int AcmReceiver::RemoveCodec(uint8_t payload_type) {
    407   CriticalSectionScoped lock(crit_sect_.get());
    408   auto it = decoders_.find(payload_type);
    409   if (it == decoders_.end()) {  // Such a payload-type is not registered.
    410     return 0;
    411   }
    412   if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) {
    413     LOG(LERROR) << "AcmReceiver::RemoveCodec" << static_cast<int>(payload_type);
    414     return -1;
    415   }
    416   if (last_audio_decoder_ == &it->second) {
    417     last_audio_decoder_ = nullptr;
    418     last_packet_sample_rate_hz_ = rtc::Optional<int>();
    419   }
    420   decoders_.erase(it);
    421   return 0;
    422 }
    423 
    424 void AcmReceiver::set_id(int id) {
    425   CriticalSectionScoped lock(crit_sect_.get());
    426   id_ = id;
    427 }
    428 
    429 bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) {
    430   return neteq_->GetPlayoutTimestamp(timestamp);
    431 }
    432 
    433 int AcmReceiver::LastAudioCodec(CodecInst* codec) const {
    434   CriticalSectionScoped lock(crit_sect_.get());
    435   if (!last_audio_decoder_) {
    436     return -1;
    437   }
    438   *codec = *RentACodec::CodecInstById(
    439       *RentACodec::CodecIdFromIndex(last_audio_decoder_->acm_codec_id));
    440   codec->pltype = last_audio_decoder_->payload_type;
    441   codec->channels = last_audio_decoder_->channels;
    442   codec->plfreq = last_audio_decoder_->sample_rate_hz;
    443   return 0;
    444 }
    445 
    446 void AcmReceiver::GetNetworkStatistics(NetworkStatistics* acm_stat) {
    447   NetEqNetworkStatistics neteq_stat;
    448   // NetEq function always returns zero, so we don't check the return value.
    449   neteq_->NetworkStatistics(&neteq_stat);
    450 
    451   acm_stat->currentBufferSize = neteq_stat.current_buffer_size_ms;
    452   acm_stat->preferredBufferSize = neteq_stat.preferred_buffer_size_ms;
    453   acm_stat->jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false;
    454   acm_stat->currentPacketLossRate = neteq_stat.packet_loss_rate;
    455   acm_stat->currentDiscardRate = neteq_stat.packet_discard_rate;
    456   acm_stat->currentExpandRate = neteq_stat.expand_rate;
    457   acm_stat->currentSpeechExpandRate = neteq_stat.speech_expand_rate;
    458   acm_stat->currentPreemptiveRate = neteq_stat.preemptive_rate;
    459   acm_stat->currentAccelerateRate = neteq_stat.accelerate_rate;
    460   acm_stat->currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate;
    461   acm_stat->clockDriftPPM = neteq_stat.clockdrift_ppm;
    462   acm_stat->addedSamples = neteq_stat.added_zero_samples;
    463   acm_stat->meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms;
    464   acm_stat->medianWaitingTimeMs = neteq_stat.median_waiting_time_ms;
    465   acm_stat->minWaitingTimeMs = neteq_stat.min_waiting_time_ms;
    466   acm_stat->maxWaitingTimeMs = neteq_stat.max_waiting_time_ms;
    467 }
    468 
    469 int AcmReceiver::DecoderByPayloadType(uint8_t payload_type,
    470                                       CodecInst* codec) const {
    471   CriticalSectionScoped lock(crit_sect_.get());
    472   auto it = decoders_.find(payload_type);
    473   if (it == decoders_.end()) {
    474     LOG(LERROR) << "AcmReceiver::DecoderByPayloadType "
    475                 << static_cast<int>(payload_type);
    476     return -1;
    477   }
    478   const Decoder& decoder = it->second;
    479   *codec = *RentACodec::CodecInstById(
    480       *RentACodec::CodecIdFromIndex(decoder.acm_codec_id));
    481   codec->pltype = decoder.payload_type;
    482   codec->channels = decoder.channels;
    483   codec->plfreq = decoder.sample_rate_hz;
    484   return 0;
    485 }
    486 
    487 int AcmReceiver::EnableNack(size_t max_nack_list_size) {
    488   neteq_->EnableNack(max_nack_list_size);
    489   return 0;
    490 }
    491 
    492 void AcmReceiver::DisableNack() {
    493   neteq_->DisableNack();
    494 }
    495 
    496 std::vector<uint16_t> AcmReceiver::GetNackList(
    497     int64_t round_trip_time_ms) const {
    498   return neteq_->GetNackList(round_trip_time_ms);
    499 }
    500 
    501 void AcmReceiver::ResetInitialDelay() {
    502   neteq_->SetMinimumDelay(0);
    503   // TODO(turajs): Should NetEq Buffer be flushed?
    504 }
    505 
    506 const AcmReceiver::Decoder* AcmReceiver::RtpHeaderToDecoder(
    507     const RTPHeader& rtp_header,
    508     uint8_t payload_type) const {
    509   auto it = decoders_.find(rtp_header.payloadType);
    510   const auto red_index =
    511       RentACodec::CodecIndexFromId(RentACodec::CodecId::kRED);
    512   if (red_index &&  // This ensures that RED is defined in WebRTC.
    513       it != decoders_.end() && it->second.acm_codec_id == *red_index) {
    514     // This is a RED packet, get the payload of the audio codec.
    515     it = decoders_.find(payload_type & 0x7F);
    516   }
    517 
    518   // Check if the payload is registered.
    519   return it != decoders_.end() ? &it->second : nullptr;
    520 }
    521 
    522 uint32_t AcmReceiver::NowInTimestamp(int decoder_sampling_rate) const {
    523   // Down-cast the time to (32-6)-bit since we only care about
    524   // the least significant bits. (32-6) bits cover 2^(32-6) = 67108864 ms.
    525   // We masked 6 most significant bits of 32-bit so there is no overflow in
    526   // the conversion from milliseconds to timestamp.
    527   const uint32_t now_in_ms = static_cast<uint32_t>(
    528       clock_->TimeInMilliseconds() & 0x03ffffff);
    529   return static_cast<uint32_t>(
    530       (decoder_sampling_rate / 1000) * now_in_ms);
    531 }
    532 
    533 void AcmReceiver::GetDecodingCallStatistics(
    534     AudioDecodingCallStats* stats) const {
    535   CriticalSectionScoped lock(crit_sect_.get());
    536   *stats = call_stats_.GetDecodingStatistics();
    537 }
    538 
    539 }  // namespace acm2
    540 
    541 }  // namespace webrtc
    542