1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 12 13 #include <assert.h> 14 15 #include "webrtc/common_audio/vad/include/webrtc_vad.h" 16 #include "webrtc/modules/audio_processing/audio_buffer.h" 17 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" 18 19 namespace webrtc { 20 21 typedef VadInst Handle; 22 23 namespace { 24 int MapSetting(VoiceDetection::Likelihood likelihood) { 25 switch (likelihood) { 26 case VoiceDetection::kVeryLowLikelihood: 27 return 3; 28 case VoiceDetection::kLowLikelihood: 29 return 2; 30 case VoiceDetection::kModerateLikelihood: 31 return 1; 32 case VoiceDetection::kHighLikelihood: 33 return 0; 34 } 35 assert(false); 36 return -1; 37 } 38 } // namespace 39 40 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, 41 CriticalSectionWrapper* crit) 42 : ProcessingComponent(), 43 apm_(apm), 44 crit_(crit), 45 stream_has_voice_(false), 46 using_external_vad_(false), 47 likelihood_(kLowLikelihood), 48 frame_size_ms_(10), 49 frame_size_samples_(0) {} 50 51 VoiceDetectionImpl::~VoiceDetectionImpl() {} 52 53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 54 if (!is_component_enabled()) { 55 return apm_->kNoError; 56 } 57 58 if (using_external_vad_) { 59 using_external_vad_ = false; 60 return apm_->kNoError; 61 } 62 assert(audio->samples_per_split_channel() <= 160); 63 64 // TODO(ajm): concatenate data in frame buffer here. 65 66 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), 67 apm_->proc_split_sample_rate_hz(), 68 audio->mixed_low_pass_data(), 69 frame_size_samples_); 70 if (vad_ret == 0) { 71 stream_has_voice_ = false; 72 audio->set_activity(AudioFrame::kVadPassive); 73 } else if (vad_ret == 1) { 74 stream_has_voice_ = true; 75 audio->set_activity(AudioFrame::kVadActive); 76 } else { 77 return apm_->kUnspecifiedError; 78 } 79 80 return apm_->kNoError; 81 } 82 83 int VoiceDetectionImpl::Enable(bool enable) { 84 CriticalSectionScoped crit_scoped(crit_); 85 return EnableComponent(enable); 86 } 87 88 bool VoiceDetectionImpl::is_enabled() const { 89 return is_component_enabled(); 90 } 91 92 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 93 using_external_vad_ = true; 94 stream_has_voice_ = has_voice; 95 return apm_->kNoError; 96 } 97 98 bool VoiceDetectionImpl::stream_has_voice() const { 99 // TODO(ajm): enable this assertion? 100 //assert(using_external_vad_ || is_component_enabled()); 101 return stream_has_voice_; 102 } 103 104 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 105 CriticalSectionScoped crit_scoped(crit_); 106 if (MapSetting(likelihood) == -1) { 107 return apm_->kBadParameterError; 108 } 109 110 likelihood_ = likelihood; 111 return Configure(); 112 } 113 114 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 115 return likelihood_; 116 } 117 118 int VoiceDetectionImpl::set_frame_size_ms(int size) { 119 CriticalSectionScoped crit_scoped(crit_); 120 assert(size == 10); // TODO(ajm): remove when supported. 121 if (size != 10 && 122 size != 20 && 123 size != 30) { 124 return apm_->kBadParameterError; 125 } 126 127 frame_size_ms_ = size; 128 129 return Initialize(); 130 } 131 132 int VoiceDetectionImpl::frame_size_ms() const { 133 return frame_size_ms_; 134 } 135 136 int VoiceDetectionImpl::Initialize() { 137 int err = ProcessingComponent::Initialize(); 138 if (err != apm_->kNoError || !is_component_enabled()) { 139 return err; 140 } 141 142 using_external_vad_ = false; 143 frame_size_samples_ = frame_size_ms_ * 144 apm_->proc_split_sample_rate_hz() / 1000; 145 // TODO(ajm): intialize frame buffer here. 146 147 return apm_->kNoError; 148 } 149 150 void* VoiceDetectionImpl::CreateHandle() const { 151 Handle* handle = NULL; 152 if (WebRtcVad_Create(&handle) != apm_->kNoError) { 153 handle = NULL; 154 } else { 155 assert(handle != NULL); 156 } 157 158 return handle; 159 } 160 161 void VoiceDetectionImpl::DestroyHandle(void* handle) const { 162 WebRtcVad_Free(static_cast<Handle*>(handle)); 163 } 164 165 int VoiceDetectionImpl::InitializeHandle(void* handle) const { 166 return WebRtcVad_Init(static_cast<Handle*>(handle)); 167 } 168 169 int VoiceDetectionImpl::ConfigureHandle(void* handle) const { 170 return WebRtcVad_set_mode(static_cast<Handle*>(handle), 171 MapSetting(likelihood_)); 172 } 173 174 int VoiceDetectionImpl::num_handles_required() const { 175 return 1; 176 } 177 178 int VoiceDetectionImpl::GetHandleError(void* handle) const { 179 // The VAD has no get_error() function. 180 assert(handle != NULL); 181 return apm_->kUnspecifiedError; 182 } 183 } // namespace webrtc 184