Home | History | Annotate | Download | only in transient
      1 /*
      2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
     12 
     13 #include <stdlib.h>
     14 #include <stdio.h>
     15 #include <string>
     16 
     17 #include "gflags/gflags.h"
     18 #include "testing/gtest/include/gtest/gtest.h"
     19 #include "webrtc/base/scoped_ptr.h"
     20 #include "webrtc/common_audio/include/audio_util.h"
     21 #include "webrtc/modules/audio_processing/agc/agc.h"
     22 #include "webrtc/modules/include/module_common_types.h"
     23 #include "webrtc/test/testsupport/fileutils.h"
     24 #include "webrtc/typedefs.h"
     25 
     26 DEFINE_string(in_file_name, "", "PCM file that contains the signal.");
     27 DEFINE_string(detection_file_name,
     28               "",
     29               "PCM file that contains the detection signal.");
     30 DEFINE_string(reference_file_name,
     31               "",
     32               "PCM file that contains the reference signal.");
     33 
     34 static bool ValidatePositiveInt(const char* flagname, int32_t value) {
     35   if (value <= 0) {
     36     printf("%s must be a positive integer.\n", flagname);
     37     return false;
     38   }
     39   return true;
     40 }
     41 DEFINE_int32(chunk_size_ms,
     42              10,
     43              "Time between each chunk of samples in milliseconds.");
     44 static const bool chunk_size_ms_dummy =
     45     google::RegisterFlagValidator(&FLAGS_chunk_size_ms, &ValidatePositiveInt);
     46 
     47 DEFINE_int32(sample_rate_hz,
     48              16000,
     49              "Sampling frequency of the signal in Hertz.");
     50 static const bool sample_rate_hz_dummy =
     51     google::RegisterFlagValidator(&FLAGS_sample_rate_hz, &ValidatePositiveInt);
     52 DEFINE_int32(detection_rate_hz,
     53              0,
     54              "Sampling frequency of the detection signal in Hertz.");
     55 
     56 DEFINE_int32(num_channels, 1, "Number of channels.");
     57 static const bool num_channels_dummy =
     58     google::RegisterFlagValidator(&FLAGS_num_channels, &ValidatePositiveInt);
     59 
     60 namespace webrtc {
     61 
     62 const char kUsage[] =
     63     "\nDetects and suppresses transients from file.\n\n"
     64     "This application loads the signal from the in_file_name with a specific\n"
     65     "num_channels and sample_rate_hz, the detection signal from the\n"
     66     "detection_file_name with a specific detection_rate_hz, and the reference\n"
     67     "signal from the reference_file_name with sample_rate_hz, divides them\n"
     68     "into chunk_size_ms blocks, computes its voice value and depending on the\n"
     69     "voice_threshold does the respective restoration. You can always get the\n"
     70     "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
     71     "1 respectively.\n\n";
     72 
     73 // Read next buffers from the test files (signed 16-bit host-endian PCM
     74 // format). audio_buffer has int16 samples, detection_buffer has float samples
     75 // with range [-32768,32767], and reference_buffer has float samples with range
     76 // [-1,1]. Return true iff all the buffers were filled completely.
     77 bool ReadBuffers(FILE* in_file,
     78                  size_t audio_buffer_size,
     79                  int num_channels,
     80                  int16_t* audio_buffer,
     81                  FILE* detection_file,
     82                  size_t detection_buffer_size,
     83                  float* detection_buffer,
     84                  FILE* reference_file,
     85                  float* reference_buffer) {
     86   rtc::scoped_ptr<int16_t[]> tmpbuf;
     87   int16_t* read_ptr = audio_buffer;
     88   if (num_channels > 1) {
     89     tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
     90     read_ptr = tmpbuf.get();
     91   }
     92   if (fread(read_ptr,
     93             sizeof(*read_ptr),
     94             num_channels * audio_buffer_size,
     95             in_file) != num_channels * audio_buffer_size) {
     96     return false;
     97   }
     98   // De-interleave.
     99   if (num_channels > 1) {
    100     for (int i = 0; i < num_channels; ++i) {
    101       for (size_t j = 0; j < audio_buffer_size; ++j) {
    102         audio_buffer[i * audio_buffer_size + j] =
    103             read_ptr[i + j * num_channels];
    104       }
    105     }
    106   }
    107   if (detection_file) {
    108     rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
    109     if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
    110               detection_file) != detection_buffer_size)
    111       return false;
    112     for (size_t i = 0; i < detection_buffer_size; ++i)
    113       detection_buffer[i] = ibuf[i];
    114   }
    115   if (reference_file) {
    116     rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
    117     if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file)
    118         != audio_buffer_size)
    119       return false;
    120     S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
    121   }
    122   return true;
    123 }
    124 
    125 // Write a number of samples to an open signed 16-bit host-endian PCM file.
    126 static void WritePCM(FILE* f,
    127                      size_t num_samples,
    128                      int num_channels,
    129                      const float* buffer) {
    130   rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
    131   // Interleave.
    132   for (int i = 0; i < num_channels; ++i) {
    133     for (size_t j = 0; j < num_samples; ++j) {
    134       ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
    135     }
    136   }
    137   fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
    138 }
    139 
    140 // This application tests the transient suppression by providing a processed
    141 // PCM file, which has to be listened to in order to evaluate the
    142 // performance.
    143 // It gets an audio file, and its voice gain information, and the suppressor
    144 // process it giving the output file "suppressed_keystrokes.pcm".
    145 void void_main() {
    146   // TODO(aluebs): Remove all FileWrappers.
    147   // Prepare the input file.
    148   FILE* in_file = fopen(FLAGS_in_file_name.c_str(), "rb");
    149   ASSERT_TRUE(in_file != NULL);
    150 
    151   // Prepare the detection file.
    152   FILE* detection_file = NULL;
    153   if (!FLAGS_detection_file_name.empty()) {
    154     detection_file = fopen(FLAGS_detection_file_name.c_str(), "rb");
    155   }
    156 
    157   // Prepare the reference file.
    158   FILE* reference_file = NULL;
    159   if (!FLAGS_reference_file_name.empty()) {
    160     reference_file = fopen(FLAGS_reference_file_name.c_str(), "rb");
    161   }
    162 
    163   // Prepare the output file.
    164   std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
    165   FILE* out_file = fopen(out_file_name.c_str(), "wb");
    166   ASSERT_TRUE(out_file != NULL);
    167 
    168   int detection_rate_hz = FLAGS_detection_rate_hz;
    169   if (detection_rate_hz == 0) {
    170     detection_rate_hz = FLAGS_sample_rate_hz;
    171   }
    172 
    173   Agc agc;
    174 
    175   TransientSuppressor suppressor;
    176   suppressor.Initialize(
    177       FLAGS_sample_rate_hz, detection_rate_hz, FLAGS_num_channels);
    178 
    179   const size_t audio_buffer_size =
    180       FLAGS_chunk_size_ms * FLAGS_sample_rate_hz / 1000;
    181   const size_t detection_buffer_size =
    182       FLAGS_chunk_size_ms * detection_rate_hz / 1000;
    183 
    184   // int16 and float variants of the same data.
    185   rtc::scoped_ptr<int16_t[]> audio_buffer_i(
    186       new int16_t[FLAGS_num_channels * audio_buffer_size]);
    187   rtc::scoped_ptr<float[]> audio_buffer_f(
    188       new float[FLAGS_num_channels * audio_buffer_size]);
    189 
    190   rtc::scoped_ptr<float[]> detection_buffer, reference_buffer;
    191 
    192   if (detection_file)
    193     detection_buffer.reset(new float[detection_buffer_size]);
    194   if (reference_file)
    195     reference_buffer.reset(new float[audio_buffer_size]);
    196 
    197   while (ReadBuffers(in_file,
    198                      audio_buffer_size,
    199                      FLAGS_num_channels,
    200                      audio_buffer_i.get(),
    201                      detection_file,
    202                      detection_buffer_size,
    203                      detection_buffer.get(),
    204                      reference_file,
    205                      reference_buffer.get())) {
    206     ASSERT_EQ(0,
    207               agc.Process(audio_buffer_i.get(),
    208                           static_cast<int>(audio_buffer_size),
    209                           FLAGS_sample_rate_hz))
    210         << "The AGC could not process the frame";
    211 
    212     for (size_t i = 0; i < FLAGS_num_channels * audio_buffer_size; ++i) {
    213       audio_buffer_f[i] = audio_buffer_i[i];
    214     }
    215 
    216     ASSERT_EQ(0,
    217               suppressor.Suppress(audio_buffer_f.get(),
    218                                   audio_buffer_size,
    219                                   FLAGS_num_channels,
    220                                   detection_buffer.get(),
    221                                   detection_buffer_size,
    222                                   reference_buffer.get(),
    223                                   audio_buffer_size,
    224                                   agc.voice_probability(),
    225                                   true))
    226         << "The transient suppressor could not suppress the frame";
    227 
    228     // Write result to out file.
    229     WritePCM(
    230         out_file, audio_buffer_size, FLAGS_num_channels, audio_buffer_f.get());
    231   }
    232 
    233   fclose(in_file);
    234   if (detection_file) {
    235     fclose(detection_file);
    236   }
    237   if (reference_file) {
    238     fclose(reference_file);
    239   }
    240   fclose(out_file);
    241 }
    242 
    243 }  // namespace webrtc
    244 
    245 int main(int argc, char* argv[]) {
    246   google::SetUsageMessage(webrtc::kUsage);
    247   google::ParseCommandLineFlags(&argc, &argv, true);
    248   webrtc::void_main();
    249   return 0;
    250 }
    251