1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 #include <stdio.h> 13 #include <string.h> 14 #ifdef WEBRTC_ANDROID 15 #include <sys/stat.h> 16 #endif 17 18 #include <algorithm> 19 20 #include "webrtc/base/format_macros.h" 21 #include "webrtc/base/scoped_ptr.h" 22 #include "webrtc/common.h" 23 #include "webrtc/modules/audio_processing/include/audio_processing.h" 24 #include "webrtc/modules/audio_processing/test/protobuf_utils.h" 25 #include "webrtc/modules/audio_processing/test/test_utils.h" 26 #include "webrtc/modules/include/module_common_types.h" 27 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" 28 #include "webrtc/system_wrappers/include/tick_util.h" 29 #include "webrtc/test/testsupport/fileutils.h" 30 #include "webrtc/test/testsupport/perf_test.h" 31 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD 32 #include "gtest/gtest.h" 33 #include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" 34 #else 35 #include "testing/gtest/include/gtest/gtest.h" 36 #include "webrtc/audio_processing/debug.pb.h" 37 #endif 38 39 namespace webrtc { 40 41 using webrtc::audioproc::Event; 42 using webrtc::audioproc::Init; 43 using webrtc::audioproc::ReverseStream; 44 using webrtc::audioproc::Stream; 45 46 namespace { 47 48 void PrintStat(const AudioProcessing::Statistic& stat) { 49 printf("%d, %d, %d\n", stat.average, 50 stat.maximum, 51 stat.minimum); 52 } 53 54 void usage() { 55 printf( 56 "Usage: process_test [options] [-pb PROTOBUF_FILE]\n" 57 " [-ir REVERSE_FILE] [-i PRIMARY_FILE] [-o OUT_FILE]\n"); 58 printf( 59 "process_test is a test application for AudioProcessing.\n\n" 60 "When a protobuf debug file is available, specify it with -pb. Alternately,\n" 61 "when -ir or -i is used, the specified files will be processed directly in\n" 62 "a simulation mode. Otherwise the full set of legacy test files is expected\n" 63 "to be present in the working directory. OUT_FILE should be specified\n" 64 "without extension to support both raw and wav output.\n\n"); 65 printf("Options\n"); 66 printf("General configuration (only used for the simulation mode):\n"); 67 printf(" -fs SAMPLE_RATE_HZ\n"); 68 printf(" -ch CHANNELS_IN CHANNELS_OUT\n"); 69 printf(" -rch REVERSE_CHANNELS\n"); 70 printf("\n"); 71 printf("Component configuration:\n"); 72 printf( 73 "All components are disabled by default. Each block below begins with a\n" 74 "flag to enable the component with default settings. The subsequent flags\n" 75 "in the block are used to provide configuration settings.\n"); 76 printf("\n -aec Echo cancellation\n"); 77 printf(" --drift_compensation\n"); 78 printf(" --no_drift_compensation\n"); 79 printf(" --no_echo_metrics\n"); 80 printf(" --no_delay_logging\n"); 81 printf(" --aec_suppression_level LEVEL [0 - 2]\n"); 82 printf(" --extended_filter\n"); 83 printf(" --no_reported_delay\n"); 84 printf("\n -aecm Echo control mobile\n"); 85 printf(" --aecm_echo_path_in_file FILE\n"); 86 printf(" --aecm_echo_path_out_file FILE\n"); 87 printf(" --no_comfort_noise\n"); 88 printf(" --routing_mode MODE [0 - 4]\n"); 89 printf("\n -agc Gain control\n"); 90 printf(" --analog\n"); 91 printf(" --adaptive_digital\n"); 92 printf(" --fixed_digital\n"); 93 printf(" --target_level LEVEL\n"); 94 printf(" --compression_gain GAIN\n"); 95 printf(" --limiter\n"); 96 printf(" --no_limiter\n"); 97 printf("\n -hpf High pass filter\n"); 98 printf("\n -ns Noise suppression\n"); 99 printf(" --ns_low\n"); 100 printf(" --ns_moderate\n"); 101 printf(" --ns_high\n"); 102 printf(" --ns_very_high\n"); 103 printf(" --ns_prob_file FILE\n"); 104 printf("\n -vad Voice activity detection\n"); 105 printf(" --vad_out_file FILE\n"); 106 printf("\n -expns Experimental noise suppression\n"); 107 printf("\n Level metrics (enabled by default)\n"); 108 printf(" --no_level_metrics\n"); 109 printf("\n"); 110 printf("Modifiers:\n"); 111 printf(" --noasm Disable SSE optimization.\n"); 112 printf(" --add_delay DELAY Add DELAY ms to input value.\n"); 113 printf(" --delay DELAY Override input delay with DELAY ms.\n"); 114 printf(" --perf Measure performance.\n"); 115 printf(" --quiet Suppress text output.\n"); 116 printf(" --no_progress Suppress progress.\n"); 117 printf(" --raw_output Raw output instead of WAV file.\n"); 118 printf(" --debug_file FILE Dump a debug recording.\n"); 119 } 120 121 static float MicLevel2Gain(int level) { 122 return pow(10.0f, ((level - 127.0f) / 128.0f * 40.0f) / 20.0f); 123 } 124 125 static void SimulateMic(int mic_level, AudioFrame* frame) { 126 mic_level = std::min(std::max(mic_level, 0), 255); 127 float mic_gain = MicLevel2Gain(mic_level); 128 int num_samples = frame->samples_per_channel_ * frame->num_channels_; 129 float v; 130 for (int n = 0; n < num_samples; n++) { 131 v = floor(frame->data_[n] * mic_gain + 0.5); 132 v = std::max(std::min(32767.0f, v), -32768.0f); 133 frame->data_[n] = static_cast<int16_t>(v); 134 } 135 } 136 137 // void function for gtest. 138 void void_main(int argc, char* argv[]) { 139 if (argc > 1 && strcmp(argv[1], "--help") == 0) { 140 usage(); 141 return; 142 } 143 144 if (argc < 2) { 145 printf("Did you mean to run without arguments?\n"); 146 printf("Try `process_test --help' for more information.\n\n"); 147 } 148 149 rtc::scoped_ptr<AudioProcessing> apm(AudioProcessing::Create()); 150 ASSERT_TRUE(apm.get() != NULL); 151 152 const char* pb_filename = NULL; 153 const char* far_filename = NULL; 154 const char* near_filename = NULL; 155 std::string out_filename; 156 const char* vad_out_filename = NULL; 157 const char* ns_prob_filename = NULL; 158 const char* aecm_echo_path_in_filename = NULL; 159 const char* aecm_echo_path_out_filename = NULL; 160 161 int32_t sample_rate_hz = 16000; 162 163 size_t num_capture_input_channels = 1; 164 size_t num_capture_output_channels = 1; 165 size_t num_render_channels = 1; 166 167 int samples_per_channel = sample_rate_hz / 100; 168 169 bool simulating = false; 170 bool perf_testing = false; 171 bool verbose = true; 172 bool progress = true; 173 bool raw_output = false; 174 int extra_delay_ms = 0; 175 int override_delay_ms = 0; 176 Config config; 177 178 ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true)); 179 for (int i = 1; i < argc; i++) { 180 if (strcmp(argv[i], "-pb") == 0) { 181 i++; 182 ASSERT_LT(i, argc) << "Specify protobuf filename after -pb"; 183 pb_filename = argv[i]; 184 185 } else if (strcmp(argv[i], "-ir") == 0) { 186 i++; 187 ASSERT_LT(i, argc) << "Specify filename after -ir"; 188 far_filename = argv[i]; 189 simulating = true; 190 191 } else if (strcmp(argv[i], "-i") == 0) { 192 i++; 193 ASSERT_LT(i, argc) << "Specify filename after -i"; 194 near_filename = argv[i]; 195 simulating = true; 196 197 } else if (strcmp(argv[i], "-o") == 0) { 198 i++; 199 ASSERT_LT(i, argc) << "Specify filename without extension after -o"; 200 out_filename = argv[i]; 201 202 } else if (strcmp(argv[i], "-fs") == 0) { 203 i++; 204 ASSERT_LT(i, argc) << "Specify sample rate after -fs"; 205 ASSERT_EQ(1, sscanf(argv[i], "%d", &sample_rate_hz)); 206 samples_per_channel = sample_rate_hz / 100; 207 208 } else if (strcmp(argv[i], "-ch") == 0) { 209 i++; 210 ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch"; 211 ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_capture_input_channels)); 212 i++; 213 ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_capture_output_channels)); 214 215 } else if (strcmp(argv[i], "-rch") == 0) { 216 i++; 217 ASSERT_LT(i, argc) << "Specify number of channels after -rch"; 218 ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_render_channels)); 219 220 } else if (strcmp(argv[i], "-aec") == 0) { 221 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 222 ASSERT_EQ(apm->kNoError, 223 apm->echo_cancellation()->enable_metrics(true)); 224 ASSERT_EQ(apm->kNoError, 225 apm->echo_cancellation()->enable_delay_logging(true)); 226 227 } else if (strcmp(argv[i], "--drift_compensation") == 0) { 228 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 229 // TODO(ajm): this is enabled in the VQE test app by default. Investigate 230 // why it can give better performance despite passing zeros. 231 ASSERT_EQ(apm->kNoError, 232 apm->echo_cancellation()->enable_drift_compensation(true)); 233 } else if (strcmp(argv[i], "--no_drift_compensation") == 0) { 234 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 235 ASSERT_EQ(apm->kNoError, 236 apm->echo_cancellation()->enable_drift_compensation(false)); 237 238 } else if (strcmp(argv[i], "--no_echo_metrics") == 0) { 239 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 240 ASSERT_EQ(apm->kNoError, 241 apm->echo_cancellation()->enable_metrics(false)); 242 243 } else if (strcmp(argv[i], "--no_delay_logging") == 0) { 244 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 245 ASSERT_EQ(apm->kNoError, 246 apm->echo_cancellation()->enable_delay_logging(false)); 247 248 } else if (strcmp(argv[i], "--no_level_metrics") == 0) { 249 ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(false)); 250 251 } else if (strcmp(argv[i], "--aec_suppression_level") == 0) { 252 i++; 253 ASSERT_LT(i, argc) << "Specify level after --aec_suppression_level"; 254 int suppression_level; 255 ASSERT_EQ(1, sscanf(argv[i], "%d", &suppression_level)); 256 ASSERT_EQ(apm->kNoError, 257 apm->echo_cancellation()->set_suppression_level( 258 static_cast<webrtc::EchoCancellation::SuppressionLevel>( 259 suppression_level))); 260 261 } else if (strcmp(argv[i], "--extended_filter") == 0) { 262 config.Set<ExtendedFilter>(new ExtendedFilter(true)); 263 264 } else if (strcmp(argv[i], "--no_reported_delay") == 0) { 265 config.Set<DelayAgnostic>(new DelayAgnostic(true)); 266 267 } else if (strcmp(argv[i], "--delay_agnostic") == 0) { 268 config.Set<DelayAgnostic>(new DelayAgnostic(true)); 269 270 } else if (strcmp(argv[i], "-aecm") == 0) { 271 ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); 272 273 } else if (strcmp(argv[i], "--aecm_echo_path_in_file") == 0) { 274 i++; 275 ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_in_file"; 276 aecm_echo_path_in_filename = argv[i]; 277 278 } else if (strcmp(argv[i], "--aecm_echo_path_out_file") == 0) { 279 i++; 280 ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_out_file"; 281 aecm_echo_path_out_filename = argv[i]; 282 283 } else if (strcmp(argv[i], "--no_comfort_noise") == 0) { 284 ASSERT_EQ(apm->kNoError, 285 apm->echo_control_mobile()->enable_comfort_noise(false)); 286 287 } else if (strcmp(argv[i], "--routing_mode") == 0) { 288 i++; 289 ASSERT_LT(i, argc) << "Specify mode after --routing_mode"; 290 int routing_mode; 291 ASSERT_EQ(1, sscanf(argv[i], "%d", &routing_mode)); 292 ASSERT_EQ(apm->kNoError, 293 apm->echo_control_mobile()->set_routing_mode( 294 static_cast<webrtc::EchoControlMobile::RoutingMode>( 295 routing_mode))); 296 297 } else if (strcmp(argv[i], "-agc") == 0) { 298 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 299 300 } else if (strcmp(argv[i], "--analog") == 0) { 301 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 302 ASSERT_EQ(apm->kNoError, 303 apm->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); 304 305 } else if (strcmp(argv[i], "--adaptive_digital") == 0) { 306 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 307 ASSERT_EQ(apm->kNoError, 308 apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); 309 310 } else if (strcmp(argv[i], "--fixed_digital") == 0) { 311 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 312 ASSERT_EQ(apm->kNoError, 313 apm->gain_control()->set_mode(GainControl::kFixedDigital)); 314 315 } else if (strcmp(argv[i], "--target_level") == 0) { 316 i++; 317 int level; 318 ASSERT_EQ(1, sscanf(argv[i], "%d", &level)); 319 320 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 321 ASSERT_EQ(apm->kNoError, 322 apm->gain_control()->set_target_level_dbfs(level)); 323 324 } else if (strcmp(argv[i], "--compression_gain") == 0) { 325 i++; 326 int gain; 327 ASSERT_EQ(1, sscanf(argv[i], "%d", &gain)); 328 329 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 330 ASSERT_EQ(apm->kNoError, 331 apm->gain_control()->set_compression_gain_db(gain)); 332 333 } else if (strcmp(argv[i], "--limiter") == 0) { 334 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 335 ASSERT_EQ(apm->kNoError, 336 apm->gain_control()->enable_limiter(true)); 337 338 } else if (strcmp(argv[i], "--no_limiter") == 0) { 339 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 340 ASSERT_EQ(apm->kNoError, 341 apm->gain_control()->enable_limiter(false)); 342 343 } else if (strcmp(argv[i], "-hpf") == 0) { 344 ASSERT_EQ(apm->kNoError, apm->high_pass_filter()->Enable(true)); 345 346 } else if (strcmp(argv[i], "-ns") == 0) { 347 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 348 349 } else if (strcmp(argv[i], "--ns_low") == 0) { 350 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 351 ASSERT_EQ(apm->kNoError, 352 apm->noise_suppression()->set_level(NoiseSuppression::kLow)); 353 354 } else if (strcmp(argv[i], "--ns_moderate") == 0) { 355 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 356 ASSERT_EQ(apm->kNoError, 357 apm->noise_suppression()->set_level(NoiseSuppression::kModerate)); 358 359 } else if (strcmp(argv[i], "--ns_high") == 0) { 360 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 361 ASSERT_EQ(apm->kNoError, 362 apm->noise_suppression()->set_level(NoiseSuppression::kHigh)); 363 364 } else if (strcmp(argv[i], "--ns_very_high") == 0) { 365 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 366 ASSERT_EQ(apm->kNoError, 367 apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh)); 368 369 } else if (strcmp(argv[i], "--ns_prob_file") == 0) { 370 i++; 371 ASSERT_LT(i, argc) << "Specify filename after --ns_prob_file"; 372 ns_prob_filename = argv[i]; 373 374 } else if (strcmp(argv[i], "-vad") == 0) { 375 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 376 377 } else if (strcmp(argv[i], "--vad_very_low") == 0) { 378 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 379 ASSERT_EQ(apm->kNoError, 380 apm->voice_detection()->set_likelihood( 381 VoiceDetection::kVeryLowLikelihood)); 382 383 } else if (strcmp(argv[i], "--vad_low") == 0) { 384 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 385 ASSERT_EQ(apm->kNoError, 386 apm->voice_detection()->set_likelihood( 387 VoiceDetection::kLowLikelihood)); 388 389 } else if (strcmp(argv[i], "--vad_moderate") == 0) { 390 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 391 ASSERT_EQ(apm->kNoError, 392 apm->voice_detection()->set_likelihood( 393 VoiceDetection::kModerateLikelihood)); 394 395 } else if (strcmp(argv[i], "--vad_high") == 0) { 396 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 397 ASSERT_EQ(apm->kNoError, 398 apm->voice_detection()->set_likelihood( 399 VoiceDetection::kHighLikelihood)); 400 401 } else if (strcmp(argv[i], "--vad_out_file") == 0) { 402 i++; 403 ASSERT_LT(i, argc) << "Specify filename after --vad_out_file"; 404 vad_out_filename = argv[i]; 405 406 } else if (strcmp(argv[i], "-expns") == 0) { 407 config.Set<ExperimentalNs>(new ExperimentalNs(true)); 408 409 } else if (strcmp(argv[i], "--noasm") == 0) { 410 WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM; 411 // We need to reinitialize here if components have already been enabled. 412 ASSERT_EQ(apm->kNoError, apm->Initialize()); 413 414 } else if (strcmp(argv[i], "--add_delay") == 0) { 415 i++; 416 ASSERT_EQ(1, sscanf(argv[i], "%d", &extra_delay_ms)); 417 418 } else if (strcmp(argv[i], "--delay") == 0) { 419 i++; 420 ASSERT_EQ(1, sscanf(argv[i], "%d", &override_delay_ms)); 421 422 } else if (strcmp(argv[i], "--perf") == 0) { 423 perf_testing = true; 424 425 } else if (strcmp(argv[i], "--quiet") == 0) { 426 verbose = false; 427 progress = false; 428 429 } else if (strcmp(argv[i], "--no_progress") == 0) { 430 progress = false; 431 432 } else if (strcmp(argv[i], "--raw_output") == 0) { 433 raw_output = true; 434 435 } else if (strcmp(argv[i], "--debug_file") == 0) { 436 i++; 437 ASSERT_LT(i, argc) << "Specify filename after --debug_file"; 438 ASSERT_EQ(apm->kNoError, apm->StartDebugRecording(argv[i])); 439 } else { 440 FAIL() << "Unrecognized argument " << argv[i]; 441 } 442 } 443 apm->SetExtraOptions(config); 444 445 // If we're reading a protobuf file, ensure a simulation hasn't also 446 // been requested (which makes no sense...) 447 ASSERT_FALSE(pb_filename && simulating); 448 449 if (verbose) { 450 printf("Sample rate: %d Hz\n", sample_rate_hz); 451 printf("Primary channels: %" PRIuS " (in), %" PRIuS " (out)\n", 452 num_capture_input_channels, 453 num_capture_output_channels); 454 printf("Reverse channels: %" PRIuS "\n", num_render_channels); 455 } 456 457 const std::string out_path = webrtc::test::OutputPath(); 458 const char far_file_default[] = "apm_far.pcm"; 459 const char near_file_default[] = "apm_near.pcm"; 460 const char event_filename[] = "apm_event.dat"; 461 const char delay_filename[] = "apm_delay.dat"; 462 const char drift_filename[] = "apm_drift.dat"; 463 const std::string vad_file_default = out_path + "vad_out.dat"; 464 const std::string ns_prob_file_default = out_path + "ns_prob.dat"; 465 466 if (!simulating) { 467 far_filename = far_file_default; 468 near_filename = near_file_default; 469 } 470 471 if (out_filename.size() == 0) { 472 out_filename = out_path + "out"; 473 } 474 475 if (!vad_out_filename) { 476 vad_out_filename = vad_file_default.c_str(); 477 } 478 479 if (!ns_prob_filename) { 480 ns_prob_filename = ns_prob_file_default.c_str(); 481 } 482 483 FILE* pb_file = NULL; 484 FILE* far_file = NULL; 485 FILE* near_file = NULL; 486 FILE* event_file = NULL; 487 FILE* delay_file = NULL; 488 FILE* drift_file = NULL; 489 FILE* vad_out_file = NULL; 490 FILE* ns_prob_file = NULL; 491 FILE* aecm_echo_path_in_file = NULL; 492 FILE* aecm_echo_path_out_file = NULL; 493 494 rtc::scoped_ptr<WavWriter> output_wav_file; 495 rtc::scoped_ptr<RawFile> output_raw_file; 496 497 if (pb_filename) { 498 pb_file = OpenFile(pb_filename, "rb"); 499 } else { 500 if (far_filename) { 501 far_file = OpenFile(far_filename, "rb"); 502 } 503 504 near_file = OpenFile(near_filename, "rb"); 505 if (!simulating) { 506 event_file = OpenFile(event_filename, "rb"); 507 delay_file = OpenFile(delay_filename, "rb"); 508 drift_file = OpenFile(drift_filename, "rb"); 509 } 510 } 511 512 int near_size_bytes = 0; 513 if (pb_file) { 514 struct stat st; 515 stat(pb_filename, &st); 516 // Crude estimate, but should be good enough. 517 near_size_bytes = st.st_size / 3; 518 } else { 519 struct stat st; 520 stat(near_filename, &st); 521 near_size_bytes = st.st_size; 522 } 523 524 if (apm->voice_detection()->is_enabled()) { 525 vad_out_file = OpenFile(vad_out_filename, "wb"); 526 } 527 528 if (apm->noise_suppression()->is_enabled()) { 529 ns_prob_file = OpenFile(ns_prob_filename, "wb"); 530 } 531 532 if (aecm_echo_path_in_filename != NULL) { 533 aecm_echo_path_in_file = OpenFile(aecm_echo_path_in_filename, "rb"); 534 535 const size_t path_size = 536 apm->echo_control_mobile()->echo_path_size_bytes(); 537 rtc::scoped_ptr<char[]> echo_path(new char[path_size]); 538 ASSERT_EQ(path_size, fread(echo_path.get(), 539 sizeof(char), 540 path_size, 541 aecm_echo_path_in_file)); 542 EXPECT_EQ(apm->kNoError, 543 apm->echo_control_mobile()->SetEchoPath(echo_path.get(), 544 path_size)); 545 fclose(aecm_echo_path_in_file); 546 aecm_echo_path_in_file = NULL; 547 } 548 549 if (aecm_echo_path_out_filename != NULL) { 550 aecm_echo_path_out_file = OpenFile(aecm_echo_path_out_filename, "wb"); 551 } 552 553 size_t read_count = 0; 554 int reverse_count = 0; 555 int primary_count = 0; 556 int near_read_bytes = 0; 557 TickInterval acc_ticks; 558 559 AudioFrame far_frame; 560 AudioFrame near_frame; 561 562 int delay_ms = 0; 563 int drift_samples = 0; 564 int capture_level = 127; 565 int8_t stream_has_voice = 0; 566 float ns_speech_prob = 0.0f; 567 568 TickTime t0 = TickTime::Now(); 569 TickTime t1 = t0; 570 int64_t max_time_us = 0; 571 int64_t max_time_reverse_us = 0; 572 int64_t min_time_us = 1e6; 573 int64_t min_time_reverse_us = 1e6; 574 575 // TODO(ajm): Ideally we would refactor this block into separate functions, 576 // but for now we want to share the variables. 577 if (pb_file) { 578 Event event_msg; 579 rtc::scoped_ptr<ChannelBuffer<float> > reverse_cb; 580 rtc::scoped_ptr<ChannelBuffer<float> > primary_cb; 581 int output_sample_rate = 32000; 582 AudioProcessing::ChannelLayout output_layout = AudioProcessing::kMono; 583 while (ReadMessageFromFile(pb_file, &event_msg)) { 584 std::ostringstream trace_stream; 585 trace_stream << "Processed frames: " << reverse_count << " (reverse), " 586 << primary_count << " (primary)"; 587 SCOPED_TRACE(trace_stream.str()); 588 589 if (event_msg.type() == Event::INIT) { 590 ASSERT_TRUE(event_msg.has_init()); 591 const Init msg = event_msg.init(); 592 593 ASSERT_TRUE(msg.has_sample_rate()); 594 ASSERT_TRUE(msg.has_num_input_channels()); 595 ASSERT_TRUE(msg.has_num_output_channels()); 596 ASSERT_TRUE(msg.has_num_reverse_channels()); 597 int reverse_sample_rate = msg.sample_rate(); 598 if (msg.has_reverse_sample_rate()) { 599 reverse_sample_rate = msg.reverse_sample_rate(); 600 } 601 output_sample_rate = msg.sample_rate(); 602 if (msg.has_output_sample_rate()) { 603 output_sample_rate = msg.output_sample_rate(); 604 } 605 output_layout = 606 LayoutFromChannels(static_cast<size_t>(msg.num_output_channels())); 607 ASSERT_EQ(kNoErr, 608 apm->Initialize( 609 msg.sample_rate(), 610 output_sample_rate, 611 reverse_sample_rate, 612 LayoutFromChannels( 613 static_cast<size_t>(msg.num_input_channels())), 614 output_layout, 615 LayoutFromChannels( 616 static_cast<size_t>(msg.num_reverse_channels())))); 617 618 samples_per_channel = msg.sample_rate() / 100; 619 far_frame.sample_rate_hz_ = reverse_sample_rate; 620 far_frame.samples_per_channel_ = reverse_sample_rate / 100; 621 far_frame.num_channels_ = msg.num_reverse_channels(); 622 near_frame.sample_rate_hz_ = msg.sample_rate(); 623 near_frame.samples_per_channel_ = samples_per_channel; 624 near_frame.num_channels_ = msg.num_input_channels(); 625 reverse_cb.reset(new ChannelBuffer<float>( 626 far_frame.samples_per_channel_, 627 msg.num_reverse_channels())); 628 primary_cb.reset(new ChannelBuffer<float>(samples_per_channel, 629 msg.num_input_channels())); 630 631 if (verbose) { 632 printf("Init at frame: %d (primary), %d (reverse)\n", 633 primary_count, reverse_count); 634 printf(" Primary rates: %d Hz (in), %d Hz (out)\n", 635 msg.sample_rate(), output_sample_rate); 636 printf(" Primary channels: %d (in), %d (out)\n", 637 msg.num_input_channels(), 638 msg.num_output_channels()); 639 printf(" Reverse rate: %d\n", reverse_sample_rate); 640 printf(" Reverse channels: %d\n", msg.num_reverse_channels()); 641 } 642 643 if (!raw_output) { 644 // The WAV file needs to be reset every time, because it can't change 645 // its sample rate or number of channels. 646 output_wav_file.reset(new WavWriter( 647 out_filename + ".wav", output_sample_rate, 648 static_cast<size_t>(msg.num_output_channels()))); 649 } 650 651 } else if (event_msg.type() == Event::REVERSE_STREAM) { 652 ASSERT_TRUE(event_msg.has_reverse_stream()); 653 ReverseStream msg = event_msg.reverse_stream(); 654 reverse_count++; 655 656 ASSERT_TRUE(msg.has_data() ^ (msg.channel_size() > 0)); 657 if (msg.has_data()) { 658 ASSERT_EQ(sizeof(int16_t) * far_frame.samples_per_channel_ * 659 far_frame.num_channels_, msg.data().size()); 660 memcpy(far_frame.data_, msg.data().data(), msg.data().size()); 661 } else { 662 for (int i = 0; i < msg.channel_size(); ++i) { 663 memcpy(reverse_cb->channels()[i], 664 msg.channel(i).data(), 665 reverse_cb->num_frames() * 666 sizeof(reverse_cb->channels()[i][0])); 667 } 668 } 669 670 if (perf_testing) { 671 t0 = TickTime::Now(); 672 } 673 674 if (msg.has_data()) { 675 ASSERT_EQ(apm->kNoError, 676 apm->AnalyzeReverseStream(&far_frame)); 677 } else { 678 ASSERT_EQ(apm->kNoError, 679 apm->AnalyzeReverseStream( 680 reverse_cb->channels(), 681 far_frame.samples_per_channel_, 682 far_frame.sample_rate_hz_, 683 LayoutFromChannels(far_frame.num_channels_))); 684 } 685 686 if (perf_testing) { 687 t1 = TickTime::Now(); 688 TickInterval tick_diff = t1 - t0; 689 acc_ticks += tick_diff; 690 if (tick_diff.Microseconds() > max_time_reverse_us) { 691 max_time_reverse_us = tick_diff.Microseconds(); 692 } 693 if (tick_diff.Microseconds() < min_time_reverse_us) { 694 min_time_reverse_us = tick_diff.Microseconds(); 695 } 696 } 697 698 } else if (event_msg.type() == Event::STREAM) { 699 ASSERT_TRUE(event_msg.has_stream()); 700 const Stream msg = event_msg.stream(); 701 primary_count++; 702 703 // ProcessStream could have changed this for the output frame. 704 near_frame.num_channels_ = apm->num_input_channels(); 705 706 ASSERT_TRUE(msg.has_input_data() ^ (msg.input_channel_size() > 0)); 707 if (msg.has_input_data()) { 708 ASSERT_EQ(sizeof(int16_t) * samples_per_channel * 709 near_frame.num_channels_, msg.input_data().size()); 710 memcpy(near_frame.data_, 711 msg.input_data().data(), 712 msg.input_data().size()); 713 near_read_bytes += msg.input_data().size(); 714 } else { 715 for (int i = 0; i < msg.input_channel_size(); ++i) { 716 memcpy(primary_cb->channels()[i], 717 msg.input_channel(i).data(), 718 primary_cb->num_frames() * 719 sizeof(primary_cb->channels()[i][0])); 720 near_read_bytes += msg.input_channel(i).size(); 721 } 722 } 723 724 if (progress && primary_count % 100 == 0) { 725 near_read_bytes = std::min(near_read_bytes, near_size_bytes); 726 printf("%.0f%% complete\r", 727 (near_read_bytes * 100.0) / near_size_bytes); 728 fflush(stdout); 729 } 730 731 if (perf_testing) { 732 t0 = TickTime::Now(); 733 } 734 735 ASSERT_EQ(apm->kNoError, 736 apm->gain_control()->set_stream_analog_level(msg.level())); 737 delay_ms = msg.delay() + extra_delay_ms; 738 if (override_delay_ms) { 739 delay_ms = override_delay_ms; 740 } 741 ASSERT_EQ(apm->kNoError, 742 apm->set_stream_delay_ms(delay_ms)); 743 apm->echo_cancellation()->set_stream_drift_samples(msg.drift()); 744 745 if (msg.has_keypress()) { 746 apm->set_stream_key_pressed(msg.keypress()); 747 } else { 748 apm->set_stream_key_pressed(true); 749 } 750 751 int err = apm->kNoError; 752 if (msg.has_input_data()) { 753 err = apm->ProcessStream(&near_frame); 754 ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels()); 755 } else { 756 err = apm->ProcessStream( 757 primary_cb->channels(), 758 near_frame.samples_per_channel_, 759 near_frame.sample_rate_hz_, 760 LayoutFromChannels(near_frame.num_channels_), 761 output_sample_rate, 762 output_layout, 763 primary_cb->channels()); 764 } 765 766 if (err == apm->kBadStreamParameterWarning) { 767 printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); 768 } 769 ASSERT_TRUE(err == apm->kNoError || 770 err == apm->kBadStreamParameterWarning); 771 772 stream_has_voice = 773 static_cast<int8_t>(apm->voice_detection()->stream_has_voice()); 774 if (vad_out_file != NULL) { 775 ASSERT_EQ(1u, fwrite(&stream_has_voice, 776 sizeof(stream_has_voice), 777 1, 778 vad_out_file)); 779 } 780 781 if (ns_prob_file != NULL) { 782 ns_speech_prob = apm->noise_suppression()->speech_probability(); 783 ASSERT_EQ(1u, fwrite(&ns_speech_prob, 784 sizeof(ns_speech_prob), 785 1, 786 ns_prob_file)); 787 } 788 789 if (perf_testing) { 790 t1 = TickTime::Now(); 791 TickInterval tick_diff = t1 - t0; 792 acc_ticks += tick_diff; 793 if (tick_diff.Microseconds() > max_time_us) { 794 max_time_us = tick_diff.Microseconds(); 795 } 796 if (tick_diff.Microseconds() < min_time_us) { 797 min_time_us = tick_diff.Microseconds(); 798 } 799 } 800 801 const size_t samples_per_channel = output_sample_rate / 100; 802 if (msg.has_input_data()) { 803 if (raw_output && !output_raw_file) { 804 output_raw_file.reset(new RawFile(out_filename + ".pcm")); 805 } 806 WriteIntData(near_frame.data_, 807 apm->num_output_channels() * samples_per_channel, 808 output_wav_file.get(), 809 output_raw_file.get()); 810 } else { 811 if (raw_output && !output_raw_file) { 812 output_raw_file.reset(new RawFile(out_filename + ".float")); 813 } 814 WriteFloatData(primary_cb->channels(), 815 samples_per_channel, 816 apm->num_output_channels(), 817 output_wav_file.get(), 818 output_raw_file.get()); 819 } 820 } 821 } 822 823 ASSERT_TRUE(feof(pb_file)); 824 825 } else { 826 enum Events { 827 kInitializeEvent, 828 kRenderEvent, 829 kCaptureEvent, 830 kResetEventDeprecated 831 }; 832 int16_t event = 0; 833 while (simulating || feof(event_file) == 0) { 834 std::ostringstream trace_stream; 835 trace_stream << "Processed frames: " << reverse_count << " (reverse), " 836 << primary_count << " (primary)"; 837 SCOPED_TRACE(trace_stream.str()); 838 839 if (simulating) { 840 if (far_file == NULL) { 841 event = kCaptureEvent; 842 } else { 843 if (event == kRenderEvent) { 844 event = kCaptureEvent; 845 } else { 846 event = kRenderEvent; 847 } 848 } 849 } else { 850 read_count = fread(&event, sizeof(event), 1, event_file); 851 if (read_count != 1) { 852 break; 853 } 854 } 855 856 far_frame.sample_rate_hz_ = sample_rate_hz; 857 far_frame.samples_per_channel_ = samples_per_channel; 858 far_frame.num_channels_ = num_render_channels; 859 near_frame.sample_rate_hz_ = sample_rate_hz; 860 near_frame.samples_per_channel_ = samples_per_channel; 861 862 if (event == kInitializeEvent || event == kResetEventDeprecated) { 863 ASSERT_EQ(1u, 864 fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file)); 865 samples_per_channel = sample_rate_hz / 100; 866 867 int32_t unused_device_sample_rate_hz; 868 ASSERT_EQ(1u, 869 fread(&unused_device_sample_rate_hz, 870 sizeof(unused_device_sample_rate_hz), 871 1, 872 event_file)); 873 874 ASSERT_EQ(kNoErr, apm->Initialize( 875 sample_rate_hz, 876 sample_rate_hz, 877 sample_rate_hz, 878 LayoutFromChannels(num_capture_input_channels), 879 LayoutFromChannels(num_capture_output_channels), 880 LayoutFromChannels(num_render_channels))); 881 882 far_frame.sample_rate_hz_ = sample_rate_hz; 883 far_frame.samples_per_channel_ = samples_per_channel; 884 far_frame.num_channels_ = num_render_channels; 885 near_frame.sample_rate_hz_ = sample_rate_hz; 886 near_frame.samples_per_channel_ = samples_per_channel; 887 888 if (!raw_output) { 889 // The WAV file needs to be reset every time, because it can't change 890 // it's sample rate or number of channels. 891 output_wav_file.reset(new WavWriter(out_filename + ".wav", 892 sample_rate_hz, 893 num_capture_output_channels)); 894 } 895 896 if (verbose) { 897 printf("Init at frame: %d (primary), %d (reverse)\n", 898 primary_count, reverse_count); 899 printf(" Sample rate: %d Hz\n", sample_rate_hz); 900 } 901 902 } else if (event == kRenderEvent) { 903 reverse_count++; 904 905 size_t size = samples_per_channel * num_render_channels; 906 read_count = fread(far_frame.data_, 907 sizeof(int16_t), 908 size, 909 far_file); 910 911 if (simulating) { 912 if (read_count != size) { 913 // Read an equal amount from the near file to avoid errors due to 914 // not reaching end-of-file. 915 EXPECT_EQ(0, fseek(near_file, read_count * sizeof(int16_t), 916 SEEK_CUR)); 917 break; // This is expected. 918 } 919 } else { 920 ASSERT_EQ(size, read_count); 921 } 922 923 if (perf_testing) { 924 t0 = TickTime::Now(); 925 } 926 927 ASSERT_EQ(apm->kNoError, 928 apm->AnalyzeReverseStream(&far_frame)); 929 930 if (perf_testing) { 931 t1 = TickTime::Now(); 932 TickInterval tick_diff = t1 - t0; 933 acc_ticks += tick_diff; 934 if (tick_diff.Microseconds() > max_time_reverse_us) { 935 max_time_reverse_us = tick_diff.Microseconds(); 936 } 937 if (tick_diff.Microseconds() < min_time_reverse_us) { 938 min_time_reverse_us = tick_diff.Microseconds(); 939 } 940 } 941 942 } else if (event == kCaptureEvent) { 943 primary_count++; 944 near_frame.num_channels_ = num_capture_input_channels; 945 946 size_t size = samples_per_channel * num_capture_input_channels; 947 read_count = fread(near_frame.data_, 948 sizeof(int16_t), 949 size, 950 near_file); 951 952 near_read_bytes += read_count * sizeof(int16_t); 953 if (progress && primary_count % 100 == 0) { 954 printf("%.0f%% complete\r", 955 (near_read_bytes * 100.0) / near_size_bytes); 956 fflush(stdout); 957 } 958 if (simulating) { 959 if (read_count != size) { 960 break; // This is expected. 961 } 962 963 delay_ms = 0; 964 drift_samples = 0; 965 } else { 966 ASSERT_EQ(size, read_count); 967 968 // TODO(ajm): sizeof(delay_ms) for current files? 969 ASSERT_EQ(1u, 970 fread(&delay_ms, 2, 1, delay_file)); 971 ASSERT_EQ(1u, 972 fread(&drift_samples, sizeof(drift_samples), 1, drift_file)); 973 } 974 975 if (apm->gain_control()->is_enabled() && 976 apm->gain_control()->mode() == GainControl::kAdaptiveAnalog) { 977 SimulateMic(capture_level, &near_frame); 978 } 979 980 if (perf_testing) { 981 t0 = TickTime::Now(); 982 } 983 984 const int capture_level_in = capture_level; 985 ASSERT_EQ(apm->kNoError, 986 apm->gain_control()->set_stream_analog_level(capture_level)); 987 delay_ms += extra_delay_ms; 988 if (override_delay_ms) { 989 delay_ms = override_delay_ms; 990 } 991 ASSERT_EQ(apm->kNoError, 992 apm->set_stream_delay_ms(delay_ms)); 993 apm->echo_cancellation()->set_stream_drift_samples(drift_samples); 994 995 apm->set_stream_key_pressed(true); 996 997 int err = apm->ProcessStream(&near_frame); 998 if (err == apm->kBadStreamParameterWarning) { 999 printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); 1000 } 1001 ASSERT_TRUE(err == apm->kNoError || 1002 err == apm->kBadStreamParameterWarning); 1003 ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels()); 1004 1005 capture_level = apm->gain_control()->stream_analog_level(); 1006 1007 stream_has_voice = 1008 static_cast<int8_t>(apm->voice_detection()->stream_has_voice()); 1009 if (vad_out_file != NULL) { 1010 ASSERT_EQ(1u, fwrite(&stream_has_voice, 1011 sizeof(stream_has_voice), 1012 1, 1013 vad_out_file)); 1014 } 1015 1016 if (ns_prob_file != NULL) { 1017 ns_speech_prob = apm->noise_suppression()->speech_probability(); 1018 ASSERT_EQ(1u, fwrite(&ns_speech_prob, 1019 sizeof(ns_speech_prob), 1020 1, 1021 ns_prob_file)); 1022 } 1023 1024 if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { 1025 ASSERT_EQ(capture_level_in, capture_level); 1026 } 1027 1028 if (perf_testing) { 1029 t1 = TickTime::Now(); 1030 TickInterval tick_diff = t1 - t0; 1031 acc_ticks += tick_diff; 1032 if (tick_diff.Microseconds() > max_time_us) { 1033 max_time_us = tick_diff.Microseconds(); 1034 } 1035 if (tick_diff.Microseconds() < min_time_us) { 1036 min_time_us = tick_diff.Microseconds(); 1037 } 1038 } 1039 1040 if (raw_output && !output_raw_file) { 1041 output_raw_file.reset(new RawFile(out_filename + ".pcm")); 1042 } 1043 if (!raw_output && !output_wav_file) { 1044 output_wav_file.reset(new WavWriter(out_filename + ".wav", 1045 sample_rate_hz, 1046 num_capture_output_channels)); 1047 } 1048 WriteIntData(near_frame.data_, 1049 size, 1050 output_wav_file.get(), 1051 output_raw_file.get()); 1052 } else { 1053 FAIL() << "Event " << event << " is unrecognized"; 1054 } 1055 } 1056 } 1057 if (progress) { 1058 printf("100%% complete\r"); 1059 } 1060 1061 if (aecm_echo_path_out_file != NULL) { 1062 const size_t path_size = 1063 apm->echo_control_mobile()->echo_path_size_bytes(); 1064 rtc::scoped_ptr<char[]> echo_path(new char[path_size]); 1065 apm->echo_control_mobile()->GetEchoPath(echo_path.get(), path_size); 1066 ASSERT_EQ(path_size, fwrite(echo_path.get(), 1067 sizeof(char), 1068 path_size, 1069 aecm_echo_path_out_file)); 1070 fclose(aecm_echo_path_out_file); 1071 aecm_echo_path_out_file = NULL; 1072 } 1073 1074 if (verbose) { 1075 printf("\nProcessed frames: %d (primary), %d (reverse)\n", 1076 primary_count, reverse_count); 1077 1078 if (apm->level_estimator()->is_enabled()) { 1079 printf("\n--Level metrics--\n"); 1080 printf("RMS: %d dBFS\n", -apm->level_estimator()->RMS()); 1081 } 1082 if (apm->echo_cancellation()->are_metrics_enabled()) { 1083 EchoCancellation::Metrics metrics; 1084 apm->echo_cancellation()->GetMetrics(&metrics); 1085 printf("\n--Echo metrics--\n"); 1086 printf("(avg, max, min)\n"); 1087 printf("ERL: "); 1088 PrintStat(metrics.echo_return_loss); 1089 printf("ERLE: "); 1090 PrintStat(metrics.echo_return_loss_enhancement); 1091 printf("ANLP: "); 1092 PrintStat(metrics.a_nlp); 1093 } 1094 if (apm->echo_cancellation()->is_delay_logging_enabled()) { 1095 int median = 0; 1096 int std = 0; 1097 float fraction_poor_delays = 0; 1098 apm->echo_cancellation()->GetDelayMetrics(&median, &std, 1099 &fraction_poor_delays); 1100 printf("\n--Delay metrics--\n"); 1101 printf("Median: %3d\n", median); 1102 printf("Standard deviation: %3d\n", std); 1103 printf("Poor delay values: %3.1f%%\n", fraction_poor_delays * 100); 1104 } 1105 } 1106 1107 if (!pb_file) { 1108 int8_t temp_int8; 1109 if (far_file) { 1110 read_count = fread(&temp_int8, sizeof(temp_int8), 1, far_file); 1111 EXPECT_NE(0, feof(far_file)) << "Far-end file not fully processed"; 1112 } 1113 1114 read_count = fread(&temp_int8, sizeof(temp_int8), 1, near_file); 1115 EXPECT_NE(0, feof(near_file)) << "Near-end file not fully processed"; 1116 1117 if (!simulating) { 1118 read_count = fread(&temp_int8, sizeof(temp_int8), 1, event_file); 1119 EXPECT_NE(0, feof(event_file)) << "Event file not fully processed"; 1120 read_count = fread(&temp_int8, sizeof(temp_int8), 1, delay_file); 1121 EXPECT_NE(0, feof(delay_file)) << "Delay file not fully processed"; 1122 read_count = fread(&temp_int8, sizeof(temp_int8), 1, drift_file); 1123 EXPECT_NE(0, feof(drift_file)) << "Drift file not fully processed"; 1124 } 1125 } 1126 1127 if (perf_testing) { 1128 if (primary_count > 0) { 1129 int64_t exec_time = acc_ticks.Milliseconds(); 1130 printf("\nTotal time: %.3f s, file time: %.2f s\n", 1131 exec_time * 0.001, primary_count * 0.01); 1132 printf("Time per frame: %.3f ms (average), %.3f ms (max)," 1133 " %.3f ms (min)\n", 1134 (exec_time * 1.0) / primary_count, 1135 (max_time_us + max_time_reverse_us) / 1000.0, 1136 (min_time_us + min_time_reverse_us) / 1000.0); 1137 // Record the results with Perf test tools. 1138 webrtc::test::PrintResult("audioproc", "", "time_per_10ms_frame", 1139 (exec_time * 1000) / primary_count, "us", false); 1140 } else { 1141 printf("Warning: no capture frames\n"); 1142 } 1143 } 1144 } 1145 1146 } // namespace 1147 } // namespace webrtc 1148 1149 int main(int argc, char* argv[]) { 1150 webrtc::void_main(argc, argv); 1151 1152 // Optional, but removes memory leak noise from Valgrind. 1153 google::protobuf::ShutdownProtobufLibrary(); 1154 return 0; 1155 } 1156