Home | History | Annotate | Download | only in audio_processing
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "voice_detection_impl.h"
     12 
     13 #include <cassert>
     14 
     15 #include "critical_section_wrapper.h"
     16 #include "webrtc_vad.h"
     17 
     18 #include "audio_processing_impl.h"
     19 #include "audio_buffer.h"
     20 
     21 namespace webrtc {
     22 
     23 typedef VadInst Handle;
     24 
     25 namespace {
     26 WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) {
     27   switch (likelihood) {
     28     case VoiceDetection::kVeryLowLikelihood:
     29       return 3;
     30       break;
     31     case VoiceDetection::kLowLikelihood:
     32       return 2;
     33       break;
     34     case VoiceDetection::kModerateLikelihood:
     35       return 1;
     36       break;
     37     case VoiceDetection::kHighLikelihood:
     38       return 0;
     39       break;
     40     default:
     41       return -1;
     42   }
     43 }
     44 }  // namespace
     45 
     46 
     47 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
     48   : ProcessingComponent(apm),
     49     apm_(apm),
     50     stream_has_voice_(false),
     51     using_external_vad_(false),
     52     likelihood_(kLowLikelihood),
     53     frame_size_ms_(10),
     54     frame_size_samples_(0) {}
     55 
     56 VoiceDetectionImpl::~VoiceDetectionImpl() {}
     57 
     58 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
     59   if (!is_component_enabled()) {
     60     return apm_->kNoError;
     61   }
     62 
     63   if (using_external_vad_) {
     64     using_external_vad_ = false;
     65     return apm_->kNoError;
     66   }
     67   assert(audio->samples_per_split_channel() <= 160);
     68 
     69   WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
     70   if (audio->num_channels() > 1) {
     71     audio->CopyAndMixLowPass(1);
     72     mixed_data = audio->mixed_low_pass_data(0);
     73   }
     74 
     75   // TODO(ajm): concatenate data in frame buffer here.
     76 
     77   int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
     78                                   apm_->split_sample_rate_hz(),
     79                                   mixed_data,
     80                                   frame_size_samples_);
     81   if (vad_ret == 0) {
     82     stream_has_voice_ = false;
     83     audio->set_activity(AudioFrame::kVadPassive);
     84   } else if (vad_ret == 1) {
     85     stream_has_voice_ = true;
     86     audio->set_activity(AudioFrame::kVadActive);
     87   } else {
     88     return apm_->kUnspecifiedError;
     89   }
     90 
     91   return apm_->kNoError;
     92 }
     93 
     94 int VoiceDetectionImpl::Enable(bool enable) {
     95   CriticalSectionScoped crit_scoped(*apm_->crit());
     96   return EnableComponent(enable);
     97 }
     98 
     99 bool VoiceDetectionImpl::is_enabled() const {
    100   return is_component_enabled();
    101 }
    102 
    103 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
    104   using_external_vad_ = true;
    105   stream_has_voice_ = has_voice;
    106   return apm_->kNoError;
    107 }
    108 
    109 bool VoiceDetectionImpl::stream_has_voice() const {
    110   // TODO(ajm): enable this assertion?
    111   //assert(using_external_vad_ || is_component_enabled());
    112   return stream_has_voice_;
    113 }
    114 
    115 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
    116   CriticalSectionScoped crit_scoped(*apm_->crit());
    117   if (MapSetting(likelihood) == -1) {
    118     return apm_->kBadParameterError;
    119   }
    120 
    121   likelihood_ = likelihood;
    122   return Configure();
    123 }
    124 
    125 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
    126   return likelihood_;
    127 }
    128 
    129 int VoiceDetectionImpl::set_frame_size_ms(int size) {
    130   CriticalSectionScoped crit_scoped(*apm_->crit());
    131   assert(size == 10); // TODO(ajm): remove when supported.
    132   if (size != 10 &&
    133       size != 20 &&
    134       size != 30) {
    135     return apm_->kBadParameterError;
    136   }
    137 
    138   frame_size_ms_ = size;
    139 
    140   return Initialize();
    141 }
    142 
    143 int VoiceDetectionImpl::frame_size_ms() const {
    144   return frame_size_ms_;
    145 }
    146 
    147 int VoiceDetectionImpl::Initialize() {
    148   int err = ProcessingComponent::Initialize();
    149   if (err != apm_->kNoError || !is_component_enabled()) {
    150     return err;
    151   }
    152 
    153   using_external_vad_ = false;
    154   frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
    155   // TODO(ajm): intialize frame buffer here.
    156 
    157   return apm_->kNoError;
    158 }
    159 
    160 int VoiceDetectionImpl::get_version(char* version,
    161                                     int version_len_bytes) const {
    162   if (WebRtcVad_get_version(version, version_len_bytes) != 0) {
    163     return apm_->kBadParameterError;
    164   }
    165 
    166   return apm_->kNoError;
    167 }
    168 
    169 void* VoiceDetectionImpl::CreateHandle() const {
    170   Handle* handle = NULL;
    171   if (WebRtcVad_Create(&handle) != apm_->kNoError) {
    172     handle = NULL;
    173   } else {
    174     assert(handle != NULL);
    175   }
    176 
    177   return handle;
    178 }
    179 
    180 int VoiceDetectionImpl::DestroyHandle(void* handle) const {
    181   return WebRtcVad_Free(static_cast<Handle*>(handle));
    182 }
    183 
    184 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
    185   return WebRtcVad_Init(static_cast<Handle*>(handle));
    186 }
    187 
    188 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
    189   return WebRtcVad_set_mode(static_cast<Handle*>(handle),
    190                             MapSetting(likelihood_));
    191 }
    192 
    193 int VoiceDetectionImpl::num_handles_required() const {
    194   return 1;
    195 }
    196 
    197 int VoiceDetectionImpl::GetHandleError(void* handle) const {
    198   // The VAD has no get_error() function.
    199   assert(handle != NULL);
    200   return apm_->kUnspecifiedError;
    201 }
    202 }  // namespace webrtc
    203