Home | History | Annotate | Download | only in audio_processing
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
     12 
     13 #include <assert.h>
     14 
     15 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
     16 #include "webrtc/modules/audio_processing/audio_buffer.h"
     17 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
     18 
     19 namespace webrtc {
     20 
     21 typedef VadInst Handle;
     22 
     23 namespace {
     24 int MapSetting(VoiceDetection::Likelihood likelihood) {
     25   switch (likelihood) {
     26     case VoiceDetection::kVeryLowLikelihood:
     27       return 3;
     28     case VoiceDetection::kLowLikelihood:
     29       return 2;
     30     case VoiceDetection::kModerateLikelihood:
     31       return 1;
     32     case VoiceDetection::kHighLikelihood:
     33       return 0;
     34   }
     35   assert(false);
     36   return -1;
     37 }
     38 }  // namespace
     39 
     40 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
     41                                        CriticalSectionWrapper* crit)
     42   : ProcessingComponent(),
     43     apm_(apm),
     44     crit_(crit),
     45     stream_has_voice_(false),
     46     using_external_vad_(false),
     47     likelihood_(kLowLikelihood),
     48     frame_size_ms_(10),
     49     frame_size_samples_(0) {}
     50 
     51 VoiceDetectionImpl::~VoiceDetectionImpl() {}
     52 
     53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
     54   if (!is_component_enabled()) {
     55     return apm_->kNoError;
     56   }
     57 
     58   if (using_external_vad_) {
     59     using_external_vad_ = false;
     60     return apm_->kNoError;
     61   }
     62   assert(audio->samples_per_split_channel() <= 160);
     63 
     64   const int16_t* mixed_data = audio->low_pass_split_data(0);
     65   if (audio->num_channels() > 1) {
     66     audio->CopyAndMixLowPass(1);
     67     mixed_data = audio->mixed_low_pass_data(0);
     68   }
     69 
     70   // TODO(ajm): concatenate data in frame buffer here.
     71 
     72   int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
     73                                   apm_->proc_split_sample_rate_hz(),
     74                                   mixed_data,
     75                                   frame_size_samples_);
     76   if (vad_ret == 0) {
     77     stream_has_voice_ = false;
     78     audio->set_activity(AudioFrame::kVadPassive);
     79   } else if (vad_ret == 1) {
     80     stream_has_voice_ = true;
     81     audio->set_activity(AudioFrame::kVadActive);
     82   } else {
     83     return apm_->kUnspecifiedError;
     84   }
     85 
     86   return apm_->kNoError;
     87 }
     88 
     89 int VoiceDetectionImpl::Enable(bool enable) {
     90   CriticalSectionScoped crit_scoped(crit_);
     91   return EnableComponent(enable);
     92 }
     93 
     94 bool VoiceDetectionImpl::is_enabled() const {
     95   return is_component_enabled();
     96 }
     97 
     98 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
     99   using_external_vad_ = true;
    100   stream_has_voice_ = has_voice;
    101   return apm_->kNoError;
    102 }
    103 
    104 bool VoiceDetectionImpl::stream_has_voice() const {
    105   // TODO(ajm): enable this assertion?
    106   //assert(using_external_vad_ || is_component_enabled());
    107   return stream_has_voice_;
    108 }
    109 
    110 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
    111   CriticalSectionScoped crit_scoped(crit_);
    112   if (MapSetting(likelihood) == -1) {
    113     return apm_->kBadParameterError;
    114   }
    115 
    116   likelihood_ = likelihood;
    117   return Configure();
    118 }
    119 
    120 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
    121   return likelihood_;
    122 }
    123 
    124 int VoiceDetectionImpl::set_frame_size_ms(int size) {
    125   CriticalSectionScoped crit_scoped(crit_);
    126   assert(size == 10); // TODO(ajm): remove when supported.
    127   if (size != 10 &&
    128       size != 20 &&
    129       size != 30) {
    130     return apm_->kBadParameterError;
    131   }
    132 
    133   frame_size_ms_ = size;
    134 
    135   return Initialize();
    136 }
    137 
    138 int VoiceDetectionImpl::frame_size_ms() const {
    139   return frame_size_ms_;
    140 }
    141 
    142 int VoiceDetectionImpl::Initialize() {
    143   int err = ProcessingComponent::Initialize();
    144   if (err != apm_->kNoError || !is_component_enabled()) {
    145     return err;
    146   }
    147 
    148   using_external_vad_ = false;
    149   frame_size_samples_ = frame_size_ms_ *
    150       apm_->proc_split_sample_rate_hz() / 1000;
    151   // TODO(ajm): intialize frame buffer here.
    152 
    153   return apm_->kNoError;
    154 }
    155 
    156 void* VoiceDetectionImpl::CreateHandle() const {
    157   Handle* handle = NULL;
    158   if (WebRtcVad_Create(&handle) != apm_->kNoError) {
    159     handle = NULL;
    160   } else {
    161     assert(handle != NULL);
    162   }
    163 
    164   return handle;
    165 }
    166 
    167 void VoiceDetectionImpl::DestroyHandle(void* handle) const {
    168   WebRtcVad_Free(static_cast<Handle*>(handle));
    169 }
    170 
    171 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
    172   return WebRtcVad_Init(static_cast<Handle*>(handle));
    173 }
    174 
    175 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
    176   return WebRtcVad_set_mode(static_cast<Handle*>(handle),
    177                             MapSetting(likelihood_));
    178 }
    179 
    180 int VoiceDetectionImpl::num_handles_required() const {
    181   return 1;
    182 }
    183 
    184 int VoiceDetectionImpl::GetHandleError(void* handle) const {
    185   // The VAD has no get_error() function.
    186   assert(handle != NULL);
    187   return apm_->kUnspecifiedError;
    188 }
    189 }  // namespace webrtc
    190