Home | History | Annotate | Download | only in vad
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
     12 #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
     13 
     14 #include "webrtc/base/scoped_ptr.h"
     15 #include "webrtc/modules/audio_processing/vad/common.h"
     16 #include "webrtc/typedefs.h"
     17 
     18 namespace webrtc {
     19 
     20 class AudioFrame;
     21 class PoleZeroFilter;
     22 
     23 class VadAudioProc {
     24  public:
     25   // Forward declare iSAC structs.
     26   struct PitchAnalysisStruct;
     27   struct PreFiltBankstr;
     28 
     29   VadAudioProc();
     30   ~VadAudioProc();
     31 
     32   int ExtractFeatures(const int16_t* audio_frame,
     33                       size_t length,
     34                       AudioFeatures* audio_features);
     35 
     36   static const size_t kDftSize = 512;
     37 
     38  private:
     39   void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
     40   void SubframeCorrelation(double* corr,
     41                            size_t length_corr,
     42                            size_t subframe_index);
     43   void GetLpcPolynomials(double* lpc, size_t length_lpc);
     44   void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
     45   void Rms(double* rms, size_t length_rms);
     46   void ResetBuffer();
     47 
     48   // To compute spectral peak we perform LPC analysis to get spectral envelope.
     49   // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
     50   // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
     51   // we need 5 ms of past signal to create the input of LPC analysis.
     52   static const size_t kNumPastSignalSamples =
     53       static_cast<size_t>(kSampleRateHz / 200);
     54 
     55   // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
     56   // all the code recognize it as "no-error."
     57   static const int kNoError = 0;
     58 
     59   static const size_t kNum10msSubframes = 3;
     60   static const size_t kNumSubframeSamples =
     61       static_cast<size_t>(kSampleRateHz / 100);
     62   static const size_t kNumSamplesToProcess =
     63       kNum10msSubframes *
     64       kNumSubframeSamples;  // Samples in 30 ms @ given sampling rate.
     65   static const size_t kBufferLength =
     66       kNumPastSignalSamples + kNumSamplesToProcess;
     67   static const size_t kIpLength = kDftSize >> 1;
     68   static const size_t kWLength = kDftSize >> 1;
     69 
     70   static const size_t kLpcOrder = 16;
     71 
     72   size_t ip_[kIpLength];
     73   float w_fft_[kWLength];
     74 
     75   // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
     76   float audio_buffer_[kBufferLength];
     77   size_t num_buffer_samples_;
     78 
     79   double log_old_gain_;
     80   double old_lag_;
     81 
     82   rtc::scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
     83   rtc::scoped_ptr<PreFiltBankstr> pre_filter_handle_;
     84   rtc::scoped_ptr<PoleZeroFilter> high_pass_filter_;
     85 };
     86 
     87 }  // namespace webrtc
     88 
     89 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
     90