1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" 12 13 #include <stdlib.h> 14 #include <stdio.h> 15 #include <string> 16 17 #include "gflags/gflags.h" 18 #include "testing/gtest/include/gtest/gtest.h" 19 #include "webrtc/base/scoped_ptr.h" 20 #include "webrtc/common_audio/include/audio_util.h" 21 #include "webrtc/modules/audio_processing/agc/agc.h" 22 #include "webrtc/modules/include/module_common_types.h" 23 #include "webrtc/test/testsupport/fileutils.h" 24 #include "webrtc/typedefs.h" 25 26 DEFINE_string(in_file_name, "", "PCM file that contains the signal."); 27 DEFINE_string(detection_file_name, 28 "", 29 "PCM file that contains the detection signal."); 30 DEFINE_string(reference_file_name, 31 "", 32 "PCM file that contains the reference signal."); 33 34 static bool ValidatePositiveInt(const char* flagname, int32_t value) { 35 if (value <= 0) { 36 printf("%s must be a positive integer.\n", flagname); 37 return false; 38 } 39 return true; 40 } 41 DEFINE_int32(chunk_size_ms, 42 10, 43 "Time between each chunk of samples in milliseconds."); 44 static const bool chunk_size_ms_dummy = 45 google::RegisterFlagValidator(&FLAGS_chunk_size_ms, &ValidatePositiveInt); 46 47 DEFINE_int32(sample_rate_hz, 48 16000, 49 "Sampling frequency of the signal in Hertz."); 50 static const bool sample_rate_hz_dummy = 51 google::RegisterFlagValidator(&FLAGS_sample_rate_hz, &ValidatePositiveInt); 52 DEFINE_int32(detection_rate_hz, 53 0, 54 "Sampling frequency of the detection signal in Hertz."); 55 56 DEFINE_int32(num_channels, 1, "Number of channels."); 57 static const bool num_channels_dummy = 58 google::RegisterFlagValidator(&FLAGS_num_channels, &ValidatePositiveInt); 59 60 namespace webrtc { 61 62 const char kUsage[] = 63 "\nDetects and suppresses transients from file.\n\n" 64 "This application loads the signal from the in_file_name with a specific\n" 65 "num_channels and sample_rate_hz, the detection signal from the\n" 66 "detection_file_name with a specific detection_rate_hz, and the reference\n" 67 "signal from the reference_file_name with sample_rate_hz, divides them\n" 68 "into chunk_size_ms blocks, computes its voice value and depending on the\n" 69 "voice_threshold does the respective restoration. You can always get the\n" 70 "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n" 71 "1 respectively.\n\n"; 72 73 // Read next buffers from the test files (signed 16-bit host-endian PCM 74 // format). audio_buffer has int16 samples, detection_buffer has float samples 75 // with range [-32768,32767], and reference_buffer has float samples with range 76 // [-1,1]. Return true iff all the buffers were filled completely. 77 bool ReadBuffers(FILE* in_file, 78 size_t audio_buffer_size, 79 int num_channels, 80 int16_t* audio_buffer, 81 FILE* detection_file, 82 size_t detection_buffer_size, 83 float* detection_buffer, 84 FILE* reference_file, 85 float* reference_buffer) { 86 rtc::scoped_ptr<int16_t[]> tmpbuf; 87 int16_t* read_ptr = audio_buffer; 88 if (num_channels > 1) { 89 tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]); 90 read_ptr = tmpbuf.get(); 91 } 92 if (fread(read_ptr, 93 sizeof(*read_ptr), 94 num_channels * audio_buffer_size, 95 in_file) != num_channels * audio_buffer_size) { 96 return false; 97 } 98 // De-interleave. 99 if (num_channels > 1) { 100 for (int i = 0; i < num_channels; ++i) { 101 for (size_t j = 0; j < audio_buffer_size; ++j) { 102 audio_buffer[i * audio_buffer_size + j] = 103 read_ptr[i + j * num_channels]; 104 } 105 } 106 } 107 if (detection_file) { 108 rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]); 109 if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size, 110 detection_file) != detection_buffer_size) 111 return false; 112 for (size_t i = 0; i < detection_buffer_size; ++i) 113 detection_buffer[i] = ibuf[i]; 114 } 115 if (reference_file) { 116 rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]); 117 if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) 118 != audio_buffer_size) 119 return false; 120 S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer); 121 } 122 return true; 123 } 124 125 // Write a number of samples to an open signed 16-bit host-endian PCM file. 126 static void WritePCM(FILE* f, 127 size_t num_samples, 128 int num_channels, 129 const float* buffer) { 130 rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]); 131 // Interleave. 132 for (int i = 0; i < num_channels; ++i) { 133 for (size_t j = 0; j < num_samples; ++j) { 134 ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]); 135 } 136 } 137 fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f); 138 } 139 140 // This application tests the transient suppression by providing a processed 141 // PCM file, which has to be listened to in order to evaluate the 142 // performance. 143 // It gets an audio file, and its voice gain information, and the suppressor 144 // process it giving the output file "suppressed_keystrokes.pcm". 145 void void_main() { 146 // TODO(aluebs): Remove all FileWrappers. 147 // Prepare the input file. 148 FILE* in_file = fopen(FLAGS_in_file_name.c_str(), "rb"); 149 ASSERT_TRUE(in_file != NULL); 150 151 // Prepare the detection file. 152 FILE* detection_file = NULL; 153 if (!FLAGS_detection_file_name.empty()) { 154 detection_file = fopen(FLAGS_detection_file_name.c_str(), "rb"); 155 } 156 157 // Prepare the reference file. 158 FILE* reference_file = NULL; 159 if (!FLAGS_reference_file_name.empty()) { 160 reference_file = fopen(FLAGS_reference_file_name.c_str(), "rb"); 161 } 162 163 // Prepare the output file. 164 std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm"; 165 FILE* out_file = fopen(out_file_name.c_str(), "wb"); 166 ASSERT_TRUE(out_file != NULL); 167 168 int detection_rate_hz = FLAGS_detection_rate_hz; 169 if (detection_rate_hz == 0) { 170 detection_rate_hz = FLAGS_sample_rate_hz; 171 } 172 173 Agc agc; 174 175 TransientSuppressor suppressor; 176 suppressor.Initialize( 177 FLAGS_sample_rate_hz, detection_rate_hz, FLAGS_num_channels); 178 179 const size_t audio_buffer_size = 180 FLAGS_chunk_size_ms * FLAGS_sample_rate_hz / 1000; 181 const size_t detection_buffer_size = 182 FLAGS_chunk_size_ms * detection_rate_hz / 1000; 183 184 // int16 and float variants of the same data. 185 rtc::scoped_ptr<int16_t[]> audio_buffer_i( 186 new int16_t[FLAGS_num_channels * audio_buffer_size]); 187 rtc::scoped_ptr<float[]> audio_buffer_f( 188 new float[FLAGS_num_channels * audio_buffer_size]); 189 190 rtc::scoped_ptr<float[]> detection_buffer, reference_buffer; 191 192 if (detection_file) 193 detection_buffer.reset(new float[detection_buffer_size]); 194 if (reference_file) 195 reference_buffer.reset(new float[audio_buffer_size]); 196 197 while (ReadBuffers(in_file, 198 audio_buffer_size, 199 FLAGS_num_channels, 200 audio_buffer_i.get(), 201 detection_file, 202 detection_buffer_size, 203 detection_buffer.get(), 204 reference_file, 205 reference_buffer.get())) { 206 ASSERT_EQ(0, 207 agc.Process(audio_buffer_i.get(), 208 static_cast<int>(audio_buffer_size), 209 FLAGS_sample_rate_hz)) 210 << "The AGC could not process the frame"; 211 212 for (size_t i = 0; i < FLAGS_num_channels * audio_buffer_size; ++i) { 213 audio_buffer_f[i] = audio_buffer_i[i]; 214 } 215 216 ASSERT_EQ(0, 217 suppressor.Suppress(audio_buffer_f.get(), 218 audio_buffer_size, 219 FLAGS_num_channels, 220 detection_buffer.get(), 221 detection_buffer_size, 222 reference_buffer.get(), 223 audio_buffer_size, 224 agc.voice_probability(), 225 true)) 226 << "The transient suppressor could not suppress the frame"; 227 228 // Write result to out file. 229 WritePCM( 230 out_file, audio_buffer_size, FLAGS_num_channels, audio_buffer_f.get()); 231 } 232 233 fclose(in_file); 234 if (detection_file) { 235 fclose(detection_file); 236 } 237 if (reference_file) { 238 fclose(reference_file); 239 } 240 fclose(out_file); 241 } 242 243 } // namespace webrtc 244 245 int main(int argc, char* argv[]) { 246 google::SetUsageMessage(webrtc::kUsage); 247 google::ParseCommandLineFlags(&argc, &argv, true); 248 webrtc::void_main(); 249 return 0; 250 } 251