Home | History | Annotate | Download | only in audio_processing
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
     12 
     13 #include <assert.h>
     14 
     15 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
     16 #include "webrtc/modules/audio_processing/audio_buffer.h"
     17 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
     18 
     19 namespace webrtc {
     20 
     21 typedef VadInst Handle;
     22 
     23 namespace {
     24 int MapSetting(VoiceDetection::Likelihood likelihood) {
     25   switch (likelihood) {
     26     case VoiceDetection::kVeryLowLikelihood:
     27       return 3;
     28     case VoiceDetection::kLowLikelihood:
     29       return 2;
     30     case VoiceDetection::kModerateLikelihood:
     31       return 1;
     32     case VoiceDetection::kHighLikelihood:
     33       return 0;
     34   }
     35   assert(false);
     36   return -1;
     37 }
     38 }  // namespace
     39 
     40 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
     41                                        CriticalSectionWrapper* crit)
     42   : ProcessingComponent(),
     43     apm_(apm),
     44     crit_(crit),
     45     stream_has_voice_(false),
     46     using_external_vad_(false),
     47     likelihood_(kLowLikelihood),
     48     frame_size_ms_(10),
     49     frame_size_samples_(0) {}
     50 
     51 VoiceDetectionImpl::~VoiceDetectionImpl() {}
     52 
     53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
     54   if (!is_component_enabled()) {
     55     return apm_->kNoError;
     56   }
     57 
     58   if (using_external_vad_) {
     59     using_external_vad_ = false;
     60     return apm_->kNoError;
     61   }
     62   assert(audio->samples_per_split_channel() <= 160);
     63 
     64   // TODO(ajm): concatenate data in frame buffer here.
     65 
     66   int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
     67                                   apm_->proc_split_sample_rate_hz(),
     68                                   audio->mixed_low_pass_data(),
     69                                   frame_size_samples_);
     70   if (vad_ret == 0) {
     71     stream_has_voice_ = false;
     72     audio->set_activity(AudioFrame::kVadPassive);
     73   } else if (vad_ret == 1) {
     74     stream_has_voice_ = true;
     75     audio->set_activity(AudioFrame::kVadActive);
     76   } else {
     77     return apm_->kUnspecifiedError;
     78   }
     79 
     80   return apm_->kNoError;
     81 }
     82 
     83 int VoiceDetectionImpl::Enable(bool enable) {
     84   CriticalSectionScoped crit_scoped(crit_);
     85   return EnableComponent(enable);
     86 }
     87 
     88 bool VoiceDetectionImpl::is_enabled() const {
     89   return is_component_enabled();
     90 }
     91 
     92 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
     93   using_external_vad_ = true;
     94   stream_has_voice_ = has_voice;
     95   return apm_->kNoError;
     96 }
     97 
     98 bool VoiceDetectionImpl::stream_has_voice() const {
     99   // TODO(ajm): enable this assertion?
    100   //assert(using_external_vad_ || is_component_enabled());
    101   return stream_has_voice_;
    102 }
    103 
    104 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
    105   CriticalSectionScoped crit_scoped(crit_);
    106   if (MapSetting(likelihood) == -1) {
    107     return apm_->kBadParameterError;
    108   }
    109 
    110   likelihood_ = likelihood;
    111   return Configure();
    112 }
    113 
    114 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
    115   return likelihood_;
    116 }
    117 
    118 int VoiceDetectionImpl::set_frame_size_ms(int size) {
    119   CriticalSectionScoped crit_scoped(crit_);
    120   assert(size == 10); // TODO(ajm): remove when supported.
    121   if (size != 10 &&
    122       size != 20 &&
    123       size != 30) {
    124     return apm_->kBadParameterError;
    125   }
    126 
    127   frame_size_ms_ = size;
    128 
    129   return Initialize();
    130 }
    131 
    132 int VoiceDetectionImpl::frame_size_ms() const {
    133   return frame_size_ms_;
    134 }
    135 
    136 int VoiceDetectionImpl::Initialize() {
    137   int err = ProcessingComponent::Initialize();
    138   if (err != apm_->kNoError || !is_component_enabled()) {
    139     return err;
    140   }
    141 
    142   using_external_vad_ = false;
    143   frame_size_samples_ = frame_size_ms_ *
    144       apm_->proc_split_sample_rate_hz() / 1000;
    145   // TODO(ajm): intialize frame buffer here.
    146 
    147   return apm_->kNoError;
    148 }
    149 
    150 void* VoiceDetectionImpl::CreateHandle() const {
    151   Handle* handle = NULL;
    152   if (WebRtcVad_Create(&handle) != apm_->kNoError) {
    153     handle = NULL;
    154   } else {
    155     assert(handle != NULL);
    156   }
    157 
    158   return handle;
    159 }
    160 
    161 void VoiceDetectionImpl::DestroyHandle(void* handle) const {
    162   WebRtcVad_Free(static_cast<Handle*>(handle));
    163 }
    164 
    165 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
    166   return WebRtcVad_Init(static_cast<Handle*>(handle));
    167 }
    168 
    169 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
    170   return WebRtcVad_set_mode(static_cast<Handle*>(handle),
    171                             MapSetting(likelihood_));
    172 }
    173 
    174 int VoiceDetectionImpl::num_handles_required() const {
    175   return 1;
    176 }
    177 
    178 int VoiceDetectionImpl::GetHandleError(void* handle) const {
    179   // The VAD has no get_error() function.
    180   assert(handle != NULL);
    181   return apm_->kUnspecifiedError;
    182 }
    183 }  // namespace webrtc
    184