1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 13 14 #include "webrtc/base/scoped_ptr.h" 15 #include "webrtc/modules/audio_processing/vad/common.h" 16 #include "webrtc/typedefs.h" 17 18 namespace webrtc { 19 20 class AudioFrame; 21 class PoleZeroFilter; 22 23 class VadAudioProc { 24 public: 25 // Forward declare iSAC structs. 26 struct PitchAnalysisStruct; 27 struct PreFiltBankstr; 28 29 VadAudioProc(); 30 ~VadAudioProc(); 31 32 int ExtractFeatures(const int16_t* audio_frame, 33 size_t length, 34 AudioFeatures* audio_features); 35 36 static const size_t kDftSize = 512; 37 38 private: 39 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); 40 void SubframeCorrelation(double* corr, 41 size_t length_corr, 42 size_t subframe_index); 43 void GetLpcPolynomials(double* lpc, size_t length_lpc); 44 void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); 45 void Rms(double* rms, size_t length_rms); 46 void ResetBuffer(); 47 48 // To compute spectral peak we perform LPC analysis to get spectral envelope. 49 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. 50 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame 51 // we need 5 ms of past signal to create the input of LPC analysis. 52 static const size_t kNumPastSignalSamples = 53 static_cast<size_t>(kSampleRateHz / 200); 54 55 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that 56 // all the code recognize it as "no-error." 57 static const int kNoError = 0; 58 59 static const size_t kNum10msSubframes = 3; 60 static const size_t kNumSubframeSamples = 61 static_cast<size_t>(kSampleRateHz / 100); 62 static const size_t kNumSamplesToProcess = 63 kNum10msSubframes * 64 kNumSubframeSamples; // Samples in 30 ms @ given sampling rate. 65 static const size_t kBufferLength = 66 kNumPastSignalSamples + kNumSamplesToProcess; 67 static const size_t kIpLength = kDftSize >> 1; 68 static const size_t kWLength = kDftSize >> 1; 69 70 static const size_t kLpcOrder = 16; 71 72 size_t ip_[kIpLength]; 73 float w_fft_[kWLength]; 74 75 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). 76 float audio_buffer_[kBufferLength]; 77 size_t num_buffer_samples_; 78 79 double log_old_gain_; 80 double old_lag_; 81 82 rtc::scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_; 83 rtc::scoped_ptr<PreFiltBankstr> pre_filter_handle_; 84 rtc::scoped_ptr<PoleZeroFilter> high_pass_filter_; 85 }; 86 87 } // namespace webrtc 88 89 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 90