Home | History | Annotate | Download | only in audio_processing
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
     12 
     13 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
     14 #include "webrtc/modules/audio_processing/audio_buffer.h"
     15 
     16 namespace webrtc {
     17 class VoiceDetectionImpl::Vad {
     18  public:
     19   Vad() {
     20     state_ = WebRtcVad_Create();
     21     RTC_CHECK(state_);
     22     int error = WebRtcVad_Init(state_);
     23     RTC_DCHECK_EQ(0, error);
     24   }
     25   ~Vad() {
     26     WebRtcVad_Free(state_);
     27   }
     28   VadInst* state() { return state_; }
     29  private:
     30   VadInst* state_ = nullptr;
     31   RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
     32 };
     33 
     34 VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
     35     : crit_(crit) {
     36   RTC_DCHECK(crit);
     37 }
     38 
     39 VoiceDetectionImpl::~VoiceDetectionImpl() {}
     40 
     41 void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
     42   rtc::CritScope cs(crit_);
     43   sample_rate_hz_ = sample_rate_hz;
     44   rtc::scoped_ptr<Vad> new_vad;
     45   if (enabled_) {
     46     new_vad.reset(new Vad());
     47   }
     48   vad_.swap(new_vad);
     49   using_external_vad_ = false;
     50   frame_size_samples_ =
     51       static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
     52   set_likelihood(likelihood_);
     53 }
     54 
     55 void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
     56   rtc::CritScope cs(crit_);
     57   if (!enabled_) {
     58     return;
     59   }
     60   if (using_external_vad_) {
     61     using_external_vad_ = false;
     62     return;
     63   }
     64 
     65   RTC_DCHECK_GE(160u, audio->num_frames_per_band());
     66   // TODO(ajm): concatenate data in frame buffer here.
     67   int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
     68                                   audio->mixed_low_pass_data(),
     69                                   frame_size_samples_);
     70   if (vad_ret == 0) {
     71     stream_has_voice_ = false;
     72     audio->set_activity(AudioFrame::kVadPassive);
     73   } else if (vad_ret == 1) {
     74     stream_has_voice_ = true;
     75     audio->set_activity(AudioFrame::kVadActive);
     76   } else {
     77     RTC_NOTREACHED();
     78   }
     79 }
     80 
     81 int VoiceDetectionImpl::Enable(bool enable) {
     82   rtc::CritScope cs(crit_);
     83   if (enabled_ != enable) {
     84     enabled_ = enable;
     85     Initialize(sample_rate_hz_);
     86   }
     87   return AudioProcessing::kNoError;
     88 }
     89 
     90 bool VoiceDetectionImpl::is_enabled() const {
     91   rtc::CritScope cs(crit_);
     92   return enabled_;
     93 }
     94 
     95 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
     96   rtc::CritScope cs(crit_);
     97   using_external_vad_ = true;
     98   stream_has_voice_ = has_voice;
     99   return AudioProcessing::kNoError;
    100 }
    101 
    102 bool VoiceDetectionImpl::stream_has_voice() const {
    103   rtc::CritScope cs(crit_);
    104   // TODO(ajm): enable this assertion?
    105   //assert(using_external_vad_ || is_component_enabled());
    106   return stream_has_voice_;
    107 }
    108 
    109 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
    110   rtc::CritScope cs(crit_);
    111   likelihood_ = likelihood;
    112   if (enabled_) {
    113     int mode = 2;
    114     switch (likelihood) {
    115       case VoiceDetection::kVeryLowLikelihood:
    116         mode = 3;
    117         break;
    118       case VoiceDetection::kLowLikelihood:
    119         mode = 2;
    120         break;
    121       case VoiceDetection::kModerateLikelihood:
    122         mode = 1;
    123         break;
    124       case VoiceDetection::kHighLikelihood:
    125         mode = 0;
    126         break;
    127       default:
    128         RTC_NOTREACHED();
    129         break;
    130     }
    131     int error = WebRtcVad_set_mode(vad_->state(), mode);
    132     RTC_DCHECK_EQ(0, error);
    133   }
    134   return AudioProcessing::kNoError;
    135 }
    136 
    137 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
    138   rtc::CritScope cs(crit_);
    139   return likelihood_;
    140 }
    141 
    142 int VoiceDetectionImpl::set_frame_size_ms(int size) {
    143   rtc::CritScope cs(crit_);
    144   RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
    145   frame_size_ms_ = size;
    146   Initialize(sample_rate_hz_);
    147   return AudioProcessing::kNoError;
    148 }
    149 
    150 int VoiceDetectionImpl::frame_size_ms() const {
    151   rtc::CritScope cs(crit_);
    152   return frame_size_ms_;
    153 }
    154 }  // namespace webrtc
    155