1 /* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" 12 13 #include <algorithm> 14 15 #include "webrtc/base/checks.h" 16 17 namespace webrtc { 18 namespace { 19 20 const size_t kMaxLength = 320; 21 const size_t kNumChannels = 1; 22 23 const double kDefaultVoiceValue = 1.0; 24 const double kNeutralProbability = 0.5; 25 const double kLowProbability = 0.01; 26 27 } // namespace 28 29 VoiceActivityDetector::VoiceActivityDetector() 30 : last_voice_probability_(kDefaultVoiceValue), 31 standalone_vad_(StandaloneVad::Create()) { 32 } 33 34 // Because ISAC has a different chunk length, it updates 35 // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data. 36 // Otherwise it clears them. 37 void VoiceActivityDetector::ProcessChunk(const int16_t* audio, 38 size_t length, 39 int sample_rate_hz) { 40 RTC_DCHECK_EQ(static_cast<int>(length), sample_rate_hz / 100); 41 RTC_DCHECK_LE(length, kMaxLength); 42 // Resample to the required rate. 43 const int16_t* resampled_ptr = audio; 44 if (sample_rate_hz != kSampleRateHz) { 45 RTC_CHECK_EQ( 46 resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), 47 0); 48 resampler_.Push(audio, length, resampled_, kLength10Ms, length); 49 resampled_ptr = resampled_; 50 } 51 RTC_DCHECK_EQ(length, kLength10Ms); 52 53 // Each chunk needs to be passed into |standalone_vad_|, because internally it 54 // buffers the audio and processes it all at once when GetActivity() is 55 // called. 56 RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); 57 58 audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); 59 60 chunkwise_voice_probabilities_.resize(features_.num_frames); 61 chunkwise_rms_.resize(features_.num_frames); 62 std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), 63 chunkwise_rms_.begin()); 64 if (features_.num_frames > 0) { 65 if (features_.silence) { 66 // The other features are invalid, so set the voice probabilities to an 67 // arbitrary low value. 68 std::fill(chunkwise_voice_probabilities_.begin(), 69 chunkwise_voice_probabilities_.end(), kLowProbability); 70 } else { 71 std::fill(chunkwise_voice_probabilities_.begin(), 72 chunkwise_voice_probabilities_.end(), kNeutralProbability); 73 RTC_CHECK_GE( 74 standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], 75 chunkwise_voice_probabilities_.size()), 76 0); 77 RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( 78 features_, &chunkwise_voice_probabilities_[0]), 79 0); 80 } 81 last_voice_probability_ = chunkwise_voice_probabilities_.back(); 82 } 83 } 84 85 } // namespace webrtc 86