1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "voice_detection_impl.h" 12 13 #include <cassert> 14 15 #include "critical_section_wrapper.h" 16 #include "webrtc_vad.h" 17 18 #include "audio_processing_impl.h" 19 #include "audio_buffer.h" 20 21 namespace webrtc { 22 23 typedef VadInst Handle; 24 25 namespace { 26 WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) { 27 switch (likelihood) { 28 case VoiceDetection::kVeryLowLikelihood: 29 return 3; 30 break; 31 case VoiceDetection::kLowLikelihood: 32 return 2; 33 break; 34 case VoiceDetection::kModerateLikelihood: 35 return 1; 36 break; 37 case VoiceDetection::kHighLikelihood: 38 return 0; 39 break; 40 default: 41 return -1; 42 } 43 } 44 } // namespace 45 46 47 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm) 48 : ProcessingComponent(apm), 49 apm_(apm), 50 stream_has_voice_(false), 51 using_external_vad_(false), 52 likelihood_(kLowLikelihood), 53 frame_size_ms_(10), 54 frame_size_samples_(0) {} 55 56 VoiceDetectionImpl::~VoiceDetectionImpl() {} 57 58 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 59 if (!is_component_enabled()) { 60 return apm_->kNoError; 61 } 62 63 if (using_external_vad_) { 64 using_external_vad_ = false; 65 return apm_->kNoError; 66 } 67 assert(audio->samples_per_split_channel() <= 160); 68 69 WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); 70 if (audio->num_channels() > 1) { 71 audio->CopyAndMixLowPass(1); 72 mixed_data = audio->mixed_low_pass_data(0); 73 } 74 75 // TODO(ajm): concatenate data in frame buffer here. 76 77 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), 78 apm_->split_sample_rate_hz(), 79 mixed_data, 80 frame_size_samples_); 81 if (vad_ret == 0) { 82 stream_has_voice_ = false; 83 audio->set_activity(AudioFrame::kVadPassive); 84 } else if (vad_ret == 1) { 85 stream_has_voice_ = true; 86 audio->set_activity(AudioFrame::kVadActive); 87 } else { 88 return apm_->kUnspecifiedError; 89 } 90 91 return apm_->kNoError; 92 } 93 94 int VoiceDetectionImpl::Enable(bool enable) { 95 CriticalSectionScoped crit_scoped(*apm_->crit()); 96 return EnableComponent(enable); 97 } 98 99 bool VoiceDetectionImpl::is_enabled() const { 100 return is_component_enabled(); 101 } 102 103 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 104 using_external_vad_ = true; 105 stream_has_voice_ = has_voice; 106 return apm_->kNoError; 107 } 108 109 bool VoiceDetectionImpl::stream_has_voice() const { 110 // TODO(ajm): enable this assertion? 111 //assert(using_external_vad_ || is_component_enabled()); 112 return stream_has_voice_; 113 } 114 115 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 116 CriticalSectionScoped crit_scoped(*apm_->crit()); 117 if (MapSetting(likelihood) == -1) { 118 return apm_->kBadParameterError; 119 } 120 121 likelihood_ = likelihood; 122 return Configure(); 123 } 124 125 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 126 return likelihood_; 127 } 128 129 int VoiceDetectionImpl::set_frame_size_ms(int size) { 130 CriticalSectionScoped crit_scoped(*apm_->crit()); 131 assert(size == 10); // TODO(ajm): remove when supported. 132 if (size != 10 && 133 size != 20 && 134 size != 30) { 135 return apm_->kBadParameterError; 136 } 137 138 frame_size_ms_ = size; 139 140 return Initialize(); 141 } 142 143 int VoiceDetectionImpl::frame_size_ms() const { 144 return frame_size_ms_; 145 } 146 147 int VoiceDetectionImpl::Initialize() { 148 int err = ProcessingComponent::Initialize(); 149 if (err != apm_->kNoError || !is_component_enabled()) { 150 return err; 151 } 152 153 using_external_vad_ = false; 154 frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000); 155 // TODO(ajm): intialize frame buffer here. 156 157 return apm_->kNoError; 158 } 159 160 int VoiceDetectionImpl::get_version(char* version, 161 int version_len_bytes) const { 162 if (WebRtcVad_get_version(version, version_len_bytes) != 0) { 163 return apm_->kBadParameterError; 164 } 165 166 return apm_->kNoError; 167 } 168 169 void* VoiceDetectionImpl::CreateHandle() const { 170 Handle* handle = NULL; 171 if (WebRtcVad_Create(&handle) != apm_->kNoError) { 172 handle = NULL; 173 } else { 174 assert(handle != NULL); 175 } 176 177 return handle; 178 } 179 180 int VoiceDetectionImpl::DestroyHandle(void* handle) const { 181 return WebRtcVad_Free(static_cast<Handle*>(handle)); 182 } 183 184 int VoiceDetectionImpl::InitializeHandle(void* handle) const { 185 return WebRtcVad_Init(static_cast<Handle*>(handle)); 186 } 187 188 int VoiceDetectionImpl::ConfigureHandle(void* handle) const { 189 return WebRtcVad_set_mode(static_cast<Handle*>(handle), 190 MapSetting(likelihood_)); 191 } 192 193 int VoiceDetectionImpl::num_handles_required() const { 194 return 1; 195 } 196 197 int VoiceDetectionImpl::GetHandleError(void* handle) const { 198 // The VAD has no get_error() function. 199 assert(handle != NULL); 200 return apm_->kUnspecifiedError; 201 } 202 } // namespace webrtc 203