Home | History | Annotate | Download | only in transient
      1 /*
      2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
     12 #define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
     13 
     14 #include <deque>
     15 #include <set>
     16 
     17 #include "webrtc/base/scoped_ptr.h"
     18 #include "webrtc/test/testsupport/gtest_prod_util.h"
     19 #include "webrtc/typedefs.h"
     20 
     21 namespace webrtc {
     22 
     23 class TransientDetector;
     24 
     25 // Detects transients in an audio stream and suppress them using a simple
     26 // restoration algorithm that attenuates unexpected spikes in the spectrum.
     27 class TransientSuppressor {
     28  public:
     29   TransientSuppressor();
     30   ~TransientSuppressor();
     31 
     32   int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
     33 
     34   // Processes a |data| chunk, and returns it with keystrokes suppressed from
     35   // it. The float format is assumed to be int16 ranged. If there are more than
     36   // one channel, the chunks are concatenated one after the other in |data|.
     37   // |data_length| must be equal to |data_length_|.
     38   // |num_channels| must be equal to |num_channels_|.
     39   // A sub-band, ideally the higher, can be used as |detection_data|. If it is
     40   // NULL, |data| is used for the detection too. The |detection_data| is always
     41   // assumed mono.
     42   // If a reference signal (e.g. keyboard microphone) is available, it can be
     43   // passed in as |reference_data|. It is assumed mono and must have the same
     44   // length as |data|. NULL is accepted if unavailable.
     45   // This suppressor performs better if voice information is available.
     46   // |voice_probability| is the probability of voice being present in this chunk
     47   // of audio. If voice information is not available, |voice_probability| must
     48   // always be set to 1.
     49   // |key_pressed| determines if a key was pressed on this audio chunk.
     50   // Returns 0 on success and -1 otherwise.
     51   int Suppress(float* data,
     52                size_t data_length,
     53                int num_channels,
     54                const float* detection_data,
     55                size_t detection_length,
     56                const float* reference_data,
     57                size_t reference_length,
     58                float voice_probability,
     59                bool key_pressed);
     60 
     61  private:
     62   FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
     63                            TypingDetectionLogicWorksAsExpectedForMono);
     64   void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
     65 
     66   void UpdateKeypress(bool key_pressed);
     67   void UpdateRestoration(float voice_probability);
     68 
     69   void UpdateBuffers(float* data);
     70 
     71   void HardRestoration(float* spectral_mean);
     72   void SoftRestoration(float* spectral_mean);
     73 
     74   rtc::scoped_ptr<TransientDetector> detector_;
     75 
     76   size_t data_length_;
     77   size_t detection_length_;
     78   size_t analysis_length_;
     79   size_t buffer_delay_;
     80   size_t complex_analysis_length_;
     81   int num_channels_;
     82   // Input buffer where the original samples are stored.
     83   rtc::scoped_ptr<float[]> in_buffer_;
     84   rtc::scoped_ptr<float[]> detection_buffer_;
     85   // Output buffer where the restored samples are stored.
     86   rtc::scoped_ptr<float[]> out_buffer_;
     87 
     88   // Arrays for fft.
     89   rtc::scoped_ptr<size_t[]> ip_;
     90   rtc::scoped_ptr<float[]> wfft_;
     91 
     92   rtc::scoped_ptr<float[]> spectral_mean_;
     93 
     94   // Stores the data for the fft.
     95   rtc::scoped_ptr<float[]> fft_buffer_;
     96 
     97   rtc::scoped_ptr<float[]> magnitudes_;
     98 
     99   const float* window_;
    100 
    101   rtc::scoped_ptr<float[]> mean_factor_;
    102 
    103   float detector_smoothed_;
    104 
    105   int keypress_counter_;
    106   int chunks_since_keypress_;
    107   bool detection_enabled_;
    108   bool suppression_enabled_;
    109 
    110   bool use_hard_restoration_;
    111   int chunks_since_voice_change_;
    112 
    113   uint32_t seed_;
    114 
    115   bool using_reference_;
    116 };
    117 
    118 }  // namespace webrtc
    119 
    120 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
    121