Home | History | Annotate | Download | only in vad
      1 /*
      2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
     12 
     13 #include <algorithm>
     14 
     15 #include "webrtc/base/checks.h"
     16 
     17 namespace webrtc {
     18 namespace {
     19 
     20 const size_t kMaxLength = 320;
     21 const size_t kNumChannels = 1;
     22 
     23 const double kDefaultVoiceValue = 1.0;
     24 const double kNeutralProbability = 0.5;
     25 const double kLowProbability = 0.01;
     26 
     27 }  // namespace
     28 
     29 VoiceActivityDetector::VoiceActivityDetector()
     30     : last_voice_probability_(kDefaultVoiceValue),
     31       standalone_vad_(StandaloneVad::Create()) {
     32 }
     33 
     34 // Because ISAC has a different chunk length, it updates
     35 // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
     36 // Otherwise it clears them.
     37 void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
     38                                          size_t length,
     39                                          int sample_rate_hz) {
     40   RTC_DCHECK_EQ(static_cast<int>(length), sample_rate_hz / 100);
     41   RTC_DCHECK_LE(length, kMaxLength);
     42   // Resample to the required rate.
     43   const int16_t* resampled_ptr = audio;
     44   if (sample_rate_hz != kSampleRateHz) {
     45     RTC_CHECK_EQ(
     46         resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
     47         0);
     48     resampler_.Push(audio, length, resampled_, kLength10Ms, length);
     49     resampled_ptr = resampled_;
     50   }
     51   RTC_DCHECK_EQ(length, kLength10Ms);
     52 
     53   // Each chunk needs to be passed into |standalone_vad_|, because internally it
     54   // buffers the audio and processes it all at once when GetActivity() is
     55   // called.
     56   RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
     57 
     58   audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
     59 
     60   chunkwise_voice_probabilities_.resize(features_.num_frames);
     61   chunkwise_rms_.resize(features_.num_frames);
     62   std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
     63             chunkwise_rms_.begin());
     64   if (features_.num_frames > 0) {
     65     if (features_.silence) {
     66       // The other features are invalid, so set the voice probabilities to an
     67       // arbitrary low value.
     68       std::fill(chunkwise_voice_probabilities_.begin(),
     69                 chunkwise_voice_probabilities_.end(), kLowProbability);
     70     } else {
     71       std::fill(chunkwise_voice_probabilities_.begin(),
     72                 chunkwise_voice_probabilities_.end(), kNeutralProbability);
     73       RTC_CHECK_GE(
     74           standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
     75                                        chunkwise_voice_probabilities_.size()),
     76           0);
     77       RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
     78                        features_, &chunkwise_voice_probabilities_[0]),
     79                    0);
     80     }
     81     last_voice_probability_ = chunkwise_voice_probabilities_.back();
     82   }
     83 }
     84 
     85 }  // namespace webrtc
     86