1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 12 13 #include <assert.h> 14 15 #include "webrtc/common_audio/vad/include/webrtc_vad.h" 16 #include "webrtc/modules/audio_processing/audio_buffer.h" 17 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" 18 19 namespace webrtc { 20 21 typedef VadInst Handle; 22 23 namespace { 24 int MapSetting(VoiceDetection::Likelihood likelihood) { 25 switch (likelihood) { 26 case VoiceDetection::kVeryLowLikelihood: 27 return 3; 28 case VoiceDetection::kLowLikelihood: 29 return 2; 30 case VoiceDetection::kModerateLikelihood: 31 return 1; 32 case VoiceDetection::kHighLikelihood: 33 return 0; 34 } 35 assert(false); 36 return -1; 37 } 38 } // namespace 39 40 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, 41 CriticalSectionWrapper* crit) 42 : ProcessingComponent(), 43 apm_(apm), 44 crit_(crit), 45 stream_has_voice_(false), 46 using_external_vad_(false), 47 likelihood_(kLowLikelihood), 48 frame_size_ms_(10), 49 frame_size_samples_(0) {} 50 51 VoiceDetectionImpl::~VoiceDetectionImpl() {} 52 53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 54 if (!is_component_enabled()) { 55 return apm_->kNoError; 56 } 57 58 if (using_external_vad_) { 59 using_external_vad_ = false; 60 return apm_->kNoError; 61 } 62 assert(audio->samples_per_split_channel() <= 160); 63 64 const int16_t* mixed_data = audio->low_pass_split_data(0); 65 if (audio->num_channels() > 1) { 66 audio->CopyAndMixLowPass(1); 67 mixed_data = audio->mixed_low_pass_data(0); 68 } 69 70 // TODO(ajm): concatenate data in frame buffer here. 71 72 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), 73 apm_->proc_split_sample_rate_hz(), 74 mixed_data, 75 frame_size_samples_); 76 if (vad_ret == 0) { 77 stream_has_voice_ = false; 78 audio->set_activity(AudioFrame::kVadPassive); 79 } else if (vad_ret == 1) { 80 stream_has_voice_ = true; 81 audio->set_activity(AudioFrame::kVadActive); 82 } else { 83 return apm_->kUnspecifiedError; 84 } 85 86 return apm_->kNoError; 87 } 88 89 int VoiceDetectionImpl::Enable(bool enable) { 90 CriticalSectionScoped crit_scoped(crit_); 91 return EnableComponent(enable); 92 } 93 94 bool VoiceDetectionImpl::is_enabled() const { 95 return is_component_enabled(); 96 } 97 98 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 99 using_external_vad_ = true; 100 stream_has_voice_ = has_voice; 101 return apm_->kNoError; 102 } 103 104 bool VoiceDetectionImpl::stream_has_voice() const { 105 // TODO(ajm): enable this assertion? 106 //assert(using_external_vad_ || is_component_enabled()); 107 return stream_has_voice_; 108 } 109 110 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 111 CriticalSectionScoped crit_scoped(crit_); 112 if (MapSetting(likelihood) == -1) { 113 return apm_->kBadParameterError; 114 } 115 116 likelihood_ = likelihood; 117 return Configure(); 118 } 119 120 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 121 return likelihood_; 122 } 123 124 int VoiceDetectionImpl::set_frame_size_ms(int size) { 125 CriticalSectionScoped crit_scoped(crit_); 126 assert(size == 10); // TODO(ajm): remove when supported. 127 if (size != 10 && 128 size != 20 && 129 size != 30) { 130 return apm_->kBadParameterError; 131 } 132 133 frame_size_ms_ = size; 134 135 return Initialize(); 136 } 137 138 int VoiceDetectionImpl::frame_size_ms() const { 139 return frame_size_ms_; 140 } 141 142 int VoiceDetectionImpl::Initialize() { 143 int err = ProcessingComponent::Initialize(); 144 if (err != apm_->kNoError || !is_component_enabled()) { 145 return err; 146 } 147 148 using_external_vad_ = false; 149 frame_size_samples_ = frame_size_ms_ * 150 apm_->proc_split_sample_rate_hz() / 1000; 151 // TODO(ajm): intialize frame buffer here. 152 153 return apm_->kNoError; 154 } 155 156 void* VoiceDetectionImpl::CreateHandle() const { 157 Handle* handle = NULL; 158 if (WebRtcVad_Create(&handle) != apm_->kNoError) { 159 handle = NULL; 160 } else { 161 assert(handle != NULL); 162 } 163 164 return handle; 165 } 166 167 void VoiceDetectionImpl::DestroyHandle(void* handle) const { 168 WebRtcVad_Free(static_cast<Handle*>(handle)); 169 } 170 171 int VoiceDetectionImpl::InitializeHandle(void* handle) const { 172 return WebRtcVad_Init(static_cast<Handle*>(handle)); 173 } 174 175 int VoiceDetectionImpl::ConfigureHandle(void* handle) const { 176 return WebRtcVad_set_mode(static_cast<Handle*>(handle), 177 MapSetting(likelihood_)); 178 } 179 180 int VoiceDetectionImpl::num_handles_required() const { 181 return 1; 182 } 183 184 int VoiceDetectionImpl::GetHandleError(void* handle) const { 185 // The VAD has no get_error() function. 186 assert(handle != NULL); 187 return apm_->kUnspecifiedError; 188 } 189 } // namespace webrtc 190