1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 #include <stdio.h> 13 #include <string.h> 14 #ifdef WEBRTC_ANDROID 15 #include <sys/stat.h> 16 #endif 17 18 #include <algorithm> 19 20 #include "webrtc/common.h" 21 #include "webrtc/modules/audio_processing/include/audio_processing.h" 22 #include "webrtc/modules/audio_processing/test/test_utils.h" 23 #include "webrtc/modules/interface/module_common_types.h" 24 #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" 25 #include "webrtc/system_wrappers/interface/scoped_ptr.h" 26 #include "webrtc/system_wrappers/interface/tick_util.h" 27 #include "webrtc/test/testsupport/fileutils.h" 28 #include "webrtc/test/testsupport/perf_test.h" 29 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD 30 #include "gtest/gtest.h" 31 #include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" 32 #else 33 #include "testing/gtest/include/gtest/gtest.h" 34 #include "webrtc/audio_processing/debug.pb.h" 35 #endif 36 37 namespace webrtc { 38 39 using webrtc::audioproc::Event; 40 using webrtc::audioproc::Init; 41 using webrtc::audioproc::ReverseStream; 42 using webrtc::audioproc::Stream; 43 44 namespace { 45 46 void PrintStat(const AudioProcessing::Statistic& stat) { 47 printf("%d, %d, %d\n", stat.average, 48 stat.maximum, 49 stat.minimum); 50 } 51 52 void usage() { 53 printf( 54 "Usage: process_test [options] [-pb PROTOBUF_FILE]\n" 55 " [-ir REVERSE_FILE] [-i PRIMARY_FILE] [-o OUT_FILE]\n"); 56 printf( 57 "process_test is a test application for AudioProcessing.\n\n" 58 "When a protobuf debug file is available, specify it with -pb. Alternately,\n" 59 "when -ir or -i is used, the specified files will be processed directly in\n" 60 "a simulation mode. Otherwise the full set of legacy test files is expected\n" 61 "to be present in the working directory. OUT_FILE should be specified\n" 62 "without extension to support both raw and wav output.\n\n"); 63 printf("Options\n"); 64 printf("General configuration (only used for the simulation mode):\n"); 65 printf(" -fs SAMPLE_RATE_HZ\n"); 66 printf(" -ch CHANNELS_IN CHANNELS_OUT\n"); 67 printf(" -rch REVERSE_CHANNELS\n"); 68 printf("\n"); 69 printf("Component configuration:\n"); 70 printf( 71 "All components are disabled by default. Each block below begins with a\n" 72 "flag to enable the component with default settings. The subsequent flags\n" 73 "in the block are used to provide configuration settings.\n"); 74 printf("\n -aec Echo cancellation\n"); 75 printf(" --drift_compensation\n"); 76 printf(" --no_drift_compensation\n"); 77 printf(" --no_echo_metrics\n"); 78 printf(" --no_delay_logging\n"); 79 printf(" --aec_suppression_level LEVEL [0 - 2]\n"); 80 printf(" --extended_filter\n"); 81 printf(" --no_reported_delay\n"); 82 printf("\n -aecm Echo control mobile\n"); 83 printf(" --aecm_echo_path_in_file FILE\n"); 84 printf(" --aecm_echo_path_out_file FILE\n"); 85 printf(" --no_comfort_noise\n"); 86 printf(" --routing_mode MODE [0 - 4]\n"); 87 printf("\n -agc Gain control\n"); 88 printf(" --analog\n"); 89 printf(" --adaptive_digital\n"); 90 printf(" --fixed_digital\n"); 91 printf(" --target_level LEVEL\n"); 92 printf(" --compression_gain GAIN\n"); 93 printf(" --limiter\n"); 94 printf(" --no_limiter\n"); 95 printf("\n -hpf High pass filter\n"); 96 printf("\n -ns Noise suppression\n"); 97 printf(" --ns_low\n"); 98 printf(" --ns_moderate\n"); 99 printf(" --ns_high\n"); 100 printf(" --ns_very_high\n"); 101 printf(" --ns_prob_file FILE\n"); 102 printf("\n -vad Voice activity detection\n"); 103 printf(" --vad_out_file FILE\n"); 104 printf("\n -expns Experimental noise suppression\n"); 105 printf("\n Level metrics (enabled by default)\n"); 106 printf(" --no_level_metrics\n"); 107 printf("\n"); 108 printf("Modifiers:\n"); 109 printf(" --noasm Disable SSE optimization.\n"); 110 printf(" --add_delay DELAY Add DELAY ms to input value.\n"); 111 printf(" --delay DELAY Override input delay with DELAY ms.\n"); 112 printf(" --perf Measure performance.\n"); 113 printf(" --quiet Suppress text output.\n"); 114 printf(" --no_progress Suppress progress.\n"); 115 printf(" --raw_output Raw output instead of WAV file.\n"); 116 printf(" --debug_file FILE Dump a debug recording.\n"); 117 } 118 119 static float MicLevel2Gain(int level) { 120 return pow(10.0f, ((level - 127.0f) / 128.0f * 40.0f) / 20.0f); 121 } 122 123 static void SimulateMic(int mic_level, AudioFrame* frame) { 124 mic_level = std::min(std::max(mic_level, 0), 255); 125 float mic_gain = MicLevel2Gain(mic_level); 126 int num_samples = frame->samples_per_channel_ * frame->num_channels_; 127 float v; 128 for (int n = 0; n < num_samples; n++) { 129 v = floor(frame->data_[n] * mic_gain + 0.5); 130 v = std::max(std::min(32767.0f, v), -32768.0f); 131 frame->data_[n] = static_cast<int16_t>(v); 132 } 133 } 134 135 // void function for gtest. 136 void void_main(int argc, char* argv[]) { 137 if (argc > 1 && strcmp(argv[1], "--help") == 0) { 138 usage(); 139 return; 140 } 141 142 if (argc < 2) { 143 printf("Did you mean to run without arguments?\n"); 144 printf("Try `process_test --help' for more information.\n\n"); 145 } 146 147 scoped_ptr<AudioProcessing> apm(AudioProcessing::Create()); 148 ASSERT_TRUE(apm.get() != NULL); 149 150 const char* pb_filename = NULL; 151 const char* far_filename = NULL; 152 const char* near_filename = NULL; 153 std::string out_filename; 154 const char* vad_out_filename = NULL; 155 const char* ns_prob_filename = NULL; 156 const char* aecm_echo_path_in_filename = NULL; 157 const char* aecm_echo_path_out_filename = NULL; 158 159 int32_t sample_rate_hz = 16000; 160 161 int num_capture_input_channels = 1; 162 int num_capture_output_channels = 1; 163 int num_render_channels = 1; 164 165 int samples_per_channel = sample_rate_hz / 100; 166 167 bool simulating = false; 168 bool perf_testing = false; 169 bool verbose = true; 170 bool progress = true; 171 bool raw_output = false; 172 int extra_delay_ms = 0; 173 int override_delay_ms = 0; 174 175 ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true)); 176 for (int i = 1; i < argc; i++) { 177 if (strcmp(argv[i], "-pb") == 0) { 178 i++; 179 ASSERT_LT(i, argc) << "Specify protobuf filename after -pb"; 180 pb_filename = argv[i]; 181 182 } else if (strcmp(argv[i], "-ir") == 0) { 183 i++; 184 ASSERT_LT(i, argc) << "Specify filename after -ir"; 185 far_filename = argv[i]; 186 simulating = true; 187 188 } else if (strcmp(argv[i], "-i") == 0) { 189 i++; 190 ASSERT_LT(i, argc) << "Specify filename after -i"; 191 near_filename = argv[i]; 192 simulating = true; 193 194 } else if (strcmp(argv[i], "-o") == 0) { 195 i++; 196 ASSERT_LT(i, argc) << "Specify filename without extension after -o"; 197 out_filename = argv[i]; 198 199 } else if (strcmp(argv[i], "-fs") == 0) { 200 i++; 201 ASSERT_LT(i, argc) << "Specify sample rate after -fs"; 202 ASSERT_EQ(1, sscanf(argv[i], "%d", &sample_rate_hz)); 203 samples_per_channel = sample_rate_hz / 100; 204 205 } else if (strcmp(argv[i], "-ch") == 0) { 206 i++; 207 ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch"; 208 ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_input_channels)); 209 i++; 210 ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_output_channels)); 211 212 } else if (strcmp(argv[i], "-rch") == 0) { 213 i++; 214 ASSERT_LT(i, argc) << "Specify number of channels after -rch"; 215 ASSERT_EQ(1, sscanf(argv[i], "%d", &num_render_channels)); 216 217 } else if (strcmp(argv[i], "-aec") == 0) { 218 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 219 ASSERT_EQ(apm->kNoError, 220 apm->echo_cancellation()->enable_metrics(true)); 221 ASSERT_EQ(apm->kNoError, 222 apm->echo_cancellation()->enable_delay_logging(true)); 223 224 } else if (strcmp(argv[i], "--drift_compensation") == 0) { 225 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 226 // TODO(ajm): this is enabled in the VQE test app by default. Investigate 227 // why it can give better performance despite passing zeros. 228 ASSERT_EQ(apm->kNoError, 229 apm->echo_cancellation()->enable_drift_compensation(true)); 230 } else if (strcmp(argv[i], "--no_drift_compensation") == 0) { 231 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 232 ASSERT_EQ(apm->kNoError, 233 apm->echo_cancellation()->enable_drift_compensation(false)); 234 235 } else if (strcmp(argv[i], "--no_echo_metrics") == 0) { 236 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 237 ASSERT_EQ(apm->kNoError, 238 apm->echo_cancellation()->enable_metrics(false)); 239 240 } else if (strcmp(argv[i], "--no_delay_logging") == 0) { 241 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 242 ASSERT_EQ(apm->kNoError, 243 apm->echo_cancellation()->enable_delay_logging(false)); 244 245 } else if (strcmp(argv[i], "--no_level_metrics") == 0) { 246 ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(false)); 247 248 } else if (strcmp(argv[i], "--aec_suppression_level") == 0) { 249 i++; 250 ASSERT_LT(i, argc) << "Specify level after --aec_suppression_level"; 251 int suppression_level; 252 ASSERT_EQ(1, sscanf(argv[i], "%d", &suppression_level)); 253 ASSERT_EQ(apm->kNoError, 254 apm->echo_cancellation()->set_suppression_level( 255 static_cast<webrtc::EchoCancellation::SuppressionLevel>( 256 suppression_level))); 257 258 } else if (strcmp(argv[i], "--extended_filter") == 0) { 259 Config config; 260 config.Set<DelayCorrection>(new DelayCorrection(true)); 261 apm->SetExtraOptions(config); 262 263 } else if (strcmp(argv[i], "--no_reported_delay") == 0) { 264 Config config; 265 config.Set<ReportedDelay>(new ReportedDelay(false)); 266 apm->SetExtraOptions(config); 267 268 } else if (strcmp(argv[i], "-aecm") == 0) { 269 ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); 270 271 } else if (strcmp(argv[i], "--aecm_echo_path_in_file") == 0) { 272 i++; 273 ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_in_file"; 274 aecm_echo_path_in_filename = argv[i]; 275 276 } else if (strcmp(argv[i], "--aecm_echo_path_out_file") == 0) { 277 i++; 278 ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_out_file"; 279 aecm_echo_path_out_filename = argv[i]; 280 281 } else if (strcmp(argv[i], "--no_comfort_noise") == 0) { 282 ASSERT_EQ(apm->kNoError, 283 apm->echo_control_mobile()->enable_comfort_noise(false)); 284 285 } else if (strcmp(argv[i], "--routing_mode") == 0) { 286 i++; 287 ASSERT_LT(i, argc) << "Specify mode after --routing_mode"; 288 int routing_mode; 289 ASSERT_EQ(1, sscanf(argv[i], "%d", &routing_mode)); 290 ASSERT_EQ(apm->kNoError, 291 apm->echo_control_mobile()->set_routing_mode( 292 static_cast<webrtc::EchoControlMobile::RoutingMode>( 293 routing_mode))); 294 295 } else if (strcmp(argv[i], "-agc") == 0) { 296 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 297 298 } else if (strcmp(argv[i], "--analog") == 0) { 299 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 300 ASSERT_EQ(apm->kNoError, 301 apm->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); 302 303 } else if (strcmp(argv[i], "--adaptive_digital") == 0) { 304 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 305 ASSERT_EQ(apm->kNoError, 306 apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); 307 308 } else if (strcmp(argv[i], "--fixed_digital") == 0) { 309 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 310 ASSERT_EQ(apm->kNoError, 311 apm->gain_control()->set_mode(GainControl::kFixedDigital)); 312 313 } else if (strcmp(argv[i], "--target_level") == 0) { 314 i++; 315 int level; 316 ASSERT_EQ(1, sscanf(argv[i], "%d", &level)); 317 318 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 319 ASSERT_EQ(apm->kNoError, 320 apm->gain_control()->set_target_level_dbfs(level)); 321 322 } else if (strcmp(argv[i], "--compression_gain") == 0) { 323 i++; 324 int gain; 325 ASSERT_EQ(1, sscanf(argv[i], "%d", &gain)); 326 327 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 328 ASSERT_EQ(apm->kNoError, 329 apm->gain_control()->set_compression_gain_db(gain)); 330 331 } else if (strcmp(argv[i], "--limiter") == 0) { 332 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 333 ASSERT_EQ(apm->kNoError, 334 apm->gain_control()->enable_limiter(true)); 335 336 } else if (strcmp(argv[i], "--no_limiter") == 0) { 337 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 338 ASSERT_EQ(apm->kNoError, 339 apm->gain_control()->enable_limiter(false)); 340 341 } else if (strcmp(argv[i], "-hpf") == 0) { 342 ASSERT_EQ(apm->kNoError, apm->high_pass_filter()->Enable(true)); 343 344 } else if (strcmp(argv[i], "-ns") == 0) { 345 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 346 347 } else if (strcmp(argv[i], "--ns_low") == 0) { 348 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 349 ASSERT_EQ(apm->kNoError, 350 apm->noise_suppression()->set_level(NoiseSuppression::kLow)); 351 352 } else if (strcmp(argv[i], "--ns_moderate") == 0) { 353 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 354 ASSERT_EQ(apm->kNoError, 355 apm->noise_suppression()->set_level(NoiseSuppression::kModerate)); 356 357 } else if (strcmp(argv[i], "--ns_high") == 0) { 358 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 359 ASSERT_EQ(apm->kNoError, 360 apm->noise_suppression()->set_level(NoiseSuppression::kHigh)); 361 362 } else if (strcmp(argv[i], "--ns_very_high") == 0) { 363 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 364 ASSERT_EQ(apm->kNoError, 365 apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh)); 366 367 } else if (strcmp(argv[i], "--ns_prob_file") == 0) { 368 i++; 369 ASSERT_LT(i, argc) << "Specify filename after --ns_prob_file"; 370 ns_prob_filename = argv[i]; 371 372 } else if (strcmp(argv[i], "-vad") == 0) { 373 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 374 375 } else if (strcmp(argv[i], "--vad_very_low") == 0) { 376 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 377 ASSERT_EQ(apm->kNoError, 378 apm->voice_detection()->set_likelihood( 379 VoiceDetection::kVeryLowLikelihood)); 380 381 } else if (strcmp(argv[i], "--vad_low") == 0) { 382 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 383 ASSERT_EQ(apm->kNoError, 384 apm->voice_detection()->set_likelihood( 385 VoiceDetection::kLowLikelihood)); 386 387 } else if (strcmp(argv[i], "--vad_moderate") == 0) { 388 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 389 ASSERT_EQ(apm->kNoError, 390 apm->voice_detection()->set_likelihood( 391 VoiceDetection::kModerateLikelihood)); 392 393 } else if (strcmp(argv[i], "--vad_high") == 0) { 394 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 395 ASSERT_EQ(apm->kNoError, 396 apm->voice_detection()->set_likelihood( 397 VoiceDetection::kHighLikelihood)); 398 399 } else if (strcmp(argv[i], "--vad_out_file") == 0) { 400 i++; 401 ASSERT_LT(i, argc) << "Specify filename after --vad_out_file"; 402 vad_out_filename = argv[i]; 403 404 } else if (strcmp(argv[i], "-expns") == 0) { 405 Config config; 406 config.Set<ExperimentalNs>(new ExperimentalNs(true)); 407 apm->SetExtraOptions(config); 408 409 } else if (strcmp(argv[i], "--noasm") == 0) { 410 WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM; 411 // We need to reinitialize here if components have already been enabled. 412 ASSERT_EQ(apm->kNoError, apm->Initialize()); 413 414 } else if (strcmp(argv[i], "--add_delay") == 0) { 415 i++; 416 ASSERT_EQ(1, sscanf(argv[i], "%d", &extra_delay_ms)); 417 418 } else if (strcmp(argv[i], "--delay") == 0) { 419 i++; 420 ASSERT_EQ(1, sscanf(argv[i], "%d", &override_delay_ms)); 421 422 } else if (strcmp(argv[i], "--perf") == 0) { 423 perf_testing = true; 424 425 } else if (strcmp(argv[i], "--quiet") == 0) { 426 verbose = false; 427 progress = false; 428 429 } else if (strcmp(argv[i], "--no_progress") == 0) { 430 progress = false; 431 432 } else if (strcmp(argv[i], "--raw_output") == 0) { 433 raw_output = true; 434 435 } else if (strcmp(argv[i], "--debug_file") == 0) { 436 i++; 437 ASSERT_LT(i, argc) << "Specify filename after --debug_file"; 438 ASSERT_EQ(apm->kNoError, apm->StartDebugRecording(argv[i])); 439 } else { 440 FAIL() << "Unrecognized argument " << argv[i]; 441 } 442 } 443 // If we're reading a protobuf file, ensure a simulation hasn't also 444 // been requested (which makes no sense...) 445 ASSERT_FALSE(pb_filename && simulating); 446 447 if (verbose) { 448 printf("Sample rate: %d Hz\n", sample_rate_hz); 449 printf("Primary channels: %d (in), %d (out)\n", 450 num_capture_input_channels, 451 num_capture_output_channels); 452 printf("Reverse channels: %d \n", num_render_channels); 453 } 454 455 const std::string out_path = webrtc::test::OutputPath(); 456 const char far_file_default[] = "apm_far.pcm"; 457 const char near_file_default[] = "apm_near.pcm"; 458 const char event_filename[] = "apm_event.dat"; 459 const char delay_filename[] = "apm_delay.dat"; 460 const char drift_filename[] = "apm_drift.dat"; 461 const std::string vad_file_default = out_path + "vad_out.dat"; 462 const std::string ns_prob_file_default = out_path + "ns_prob.dat"; 463 464 if (!simulating) { 465 far_filename = far_file_default; 466 near_filename = near_file_default; 467 } 468 469 if (out_filename.size() == 0) { 470 out_filename = out_path + "out"; 471 } 472 473 if (!vad_out_filename) { 474 vad_out_filename = vad_file_default.c_str(); 475 } 476 477 if (!ns_prob_filename) { 478 ns_prob_filename = ns_prob_file_default.c_str(); 479 } 480 481 FILE* pb_file = NULL; 482 FILE* far_file = NULL; 483 FILE* near_file = NULL; 484 FILE* event_file = NULL; 485 FILE* delay_file = NULL; 486 FILE* drift_file = NULL; 487 FILE* vad_out_file = NULL; 488 FILE* ns_prob_file = NULL; 489 FILE* aecm_echo_path_in_file = NULL; 490 FILE* aecm_echo_path_out_file = NULL; 491 492 scoped_ptr<WavFile> output_wav_file; 493 scoped_ptr<RawFile> output_raw_file; 494 495 if (pb_filename) { 496 pb_file = OpenFile(pb_filename, "rb"); 497 } else { 498 if (far_filename) { 499 far_file = OpenFile(far_filename, "rb"); 500 } 501 502 near_file = OpenFile(near_filename, "rb"); 503 if (!simulating) { 504 event_file = OpenFile(event_filename, "rb"); 505 delay_file = OpenFile(delay_filename, "rb"); 506 drift_file = OpenFile(drift_filename, "rb"); 507 } 508 } 509 510 int near_size_bytes = 0; 511 if (pb_file) { 512 struct stat st; 513 stat(pb_filename, &st); 514 // Crude estimate, but should be good enough. 515 near_size_bytes = st.st_size / 3; 516 } else { 517 struct stat st; 518 stat(near_filename, &st); 519 near_size_bytes = st.st_size; 520 } 521 522 if (apm->voice_detection()->is_enabled()) { 523 vad_out_file = OpenFile(vad_out_filename, "wb"); 524 } 525 526 if (apm->noise_suppression()->is_enabled()) { 527 ns_prob_file = OpenFile(ns_prob_filename, "wb"); 528 } 529 530 if (aecm_echo_path_in_filename != NULL) { 531 aecm_echo_path_in_file = OpenFile(aecm_echo_path_in_filename, "rb"); 532 533 const size_t path_size = 534 apm->echo_control_mobile()->echo_path_size_bytes(); 535 scoped_ptr<char[]> echo_path(new char[path_size]); 536 ASSERT_EQ(path_size, fread(echo_path.get(), 537 sizeof(char), 538 path_size, 539 aecm_echo_path_in_file)); 540 EXPECT_EQ(apm->kNoError, 541 apm->echo_control_mobile()->SetEchoPath(echo_path.get(), 542 path_size)); 543 fclose(aecm_echo_path_in_file); 544 aecm_echo_path_in_file = NULL; 545 } 546 547 if (aecm_echo_path_out_filename != NULL) { 548 aecm_echo_path_out_file = OpenFile(aecm_echo_path_out_filename, "wb"); 549 } 550 551 size_t read_count = 0; 552 int reverse_count = 0; 553 int primary_count = 0; 554 int near_read_bytes = 0; 555 TickInterval acc_ticks; 556 557 AudioFrame far_frame; 558 AudioFrame near_frame; 559 560 int delay_ms = 0; 561 int drift_samples = 0; 562 int capture_level = 127; 563 int8_t stream_has_voice = 0; 564 float ns_speech_prob = 0.0f; 565 566 TickTime t0 = TickTime::Now(); 567 TickTime t1 = t0; 568 int64_t max_time_us = 0; 569 int64_t max_time_reverse_us = 0; 570 int64_t min_time_us = 1e6; 571 int64_t min_time_reverse_us = 1e6; 572 573 // TODO(ajm): Ideally we would refactor this block into separate functions, 574 // but for now we want to share the variables. 575 if (pb_file) { 576 Event event_msg; 577 scoped_ptr<ChannelBuffer<float> > reverse_cb; 578 scoped_ptr<ChannelBuffer<float> > primary_cb; 579 int output_sample_rate = 32000; 580 AudioProcessing::ChannelLayout output_layout = AudioProcessing::kMono; 581 while (ReadMessageFromFile(pb_file, &event_msg)) { 582 std::ostringstream trace_stream; 583 trace_stream << "Processed frames: " << reverse_count << " (reverse), " 584 << primary_count << " (primary)"; 585 SCOPED_TRACE(trace_stream.str()); 586 587 if (event_msg.type() == Event::INIT) { 588 ASSERT_TRUE(event_msg.has_init()); 589 const Init msg = event_msg.init(); 590 591 ASSERT_TRUE(msg.has_sample_rate()); 592 ASSERT_TRUE(msg.has_num_input_channels()); 593 ASSERT_TRUE(msg.has_num_output_channels()); 594 ASSERT_TRUE(msg.has_num_reverse_channels()); 595 int reverse_sample_rate = msg.sample_rate(); 596 if (msg.has_reverse_sample_rate()) { 597 reverse_sample_rate = msg.reverse_sample_rate(); 598 } 599 output_sample_rate = msg.sample_rate(); 600 if (msg.has_output_sample_rate()) { 601 output_sample_rate = msg.output_sample_rate(); 602 } 603 output_layout = LayoutFromChannels(msg.num_output_channels()); 604 ASSERT_EQ(kNoErr, apm->Initialize( 605 msg.sample_rate(), 606 output_sample_rate, 607 reverse_sample_rate, 608 LayoutFromChannels(msg.num_input_channels()), 609 output_layout, 610 LayoutFromChannels(msg.num_reverse_channels()))); 611 612 samples_per_channel = msg.sample_rate() / 100; 613 far_frame.sample_rate_hz_ = msg.sample_rate(); 614 far_frame.samples_per_channel_ = reverse_sample_rate / 100; 615 far_frame.num_channels_ = msg.num_reverse_channels(); 616 near_frame.sample_rate_hz_ = msg.sample_rate(); 617 near_frame.samples_per_channel_ = samples_per_channel; 618 near_frame.num_channels_ = msg.num_input_channels(); 619 reverse_cb.reset(new ChannelBuffer<float>( 620 far_frame.samples_per_channel_, 621 msg.num_reverse_channels())); 622 primary_cb.reset(new ChannelBuffer<float>(samples_per_channel, 623 msg.num_input_channels())); 624 625 if (verbose) { 626 printf("Init at frame: %d (primary), %d (reverse)\n", 627 primary_count, reverse_count); 628 printf(" Primary rates: %d Hz (in), %d Hz (out)\n", 629 msg.sample_rate(), output_sample_rate); 630 printf(" Primary channels: %d (in), %d (out)\n", 631 msg.num_input_channels(), 632 msg.num_output_channels()); 633 printf(" Reverse rate: %d\n", reverse_sample_rate); 634 printf(" Reverse channels: %d\n", msg.num_reverse_channels()); 635 } 636 637 if (!raw_output) { 638 // The WAV file needs to be reset every time, because it cant change 639 // it's sample rate or number of channels. 640 output_wav_file.reset(new WavFile(out_filename + ".wav", 641 output_sample_rate, 642 msg.num_output_channels())); 643 } 644 645 } else if (event_msg.type() == Event::REVERSE_STREAM) { 646 ASSERT_TRUE(event_msg.has_reverse_stream()); 647 ReverseStream msg = event_msg.reverse_stream(); 648 reverse_count++; 649 650 ASSERT_TRUE(msg.has_data() ^ (msg.channel_size() > 0)); 651 if (msg.has_data()) { 652 ASSERT_EQ(sizeof(int16_t) * far_frame.samples_per_channel_ * 653 far_frame.num_channels_, msg.data().size()); 654 memcpy(far_frame.data_, msg.data().data(), msg.data().size()); 655 } else { 656 for (int i = 0; i < msg.channel_size(); ++i) { 657 reverse_cb->CopyFrom(msg.channel(i).data(), i); 658 } 659 } 660 661 if (perf_testing) { 662 t0 = TickTime::Now(); 663 } 664 665 if (msg.has_data()) { 666 ASSERT_EQ(apm->kNoError, 667 apm->AnalyzeReverseStream(&far_frame)); 668 } else { 669 ASSERT_EQ(apm->kNoError, 670 apm->AnalyzeReverseStream( 671 reverse_cb->channels(), 672 far_frame.samples_per_channel_, 673 far_frame.sample_rate_hz_, 674 LayoutFromChannels(far_frame.num_channels_))); 675 } 676 677 if (perf_testing) { 678 t1 = TickTime::Now(); 679 TickInterval tick_diff = t1 - t0; 680 acc_ticks += tick_diff; 681 if (tick_diff.Microseconds() > max_time_reverse_us) { 682 max_time_reverse_us = tick_diff.Microseconds(); 683 } 684 if (tick_diff.Microseconds() < min_time_reverse_us) { 685 min_time_reverse_us = tick_diff.Microseconds(); 686 } 687 } 688 689 } else if (event_msg.type() == Event::STREAM) { 690 ASSERT_TRUE(event_msg.has_stream()); 691 const Stream msg = event_msg.stream(); 692 primary_count++; 693 694 // ProcessStream could have changed this for the output frame. 695 near_frame.num_channels_ = apm->num_input_channels(); 696 697 ASSERT_TRUE(msg.has_input_data() ^ (msg.input_channel_size() > 0)); 698 if (msg.has_input_data()) { 699 ASSERT_EQ(sizeof(int16_t) * samples_per_channel * 700 near_frame.num_channels_, msg.input_data().size()); 701 memcpy(near_frame.data_, 702 msg.input_data().data(), 703 msg.input_data().size()); 704 near_read_bytes += msg.input_data().size(); 705 } else { 706 for (int i = 0; i < msg.input_channel_size(); ++i) { 707 primary_cb->CopyFrom(msg.input_channel(i).data(), i); 708 near_read_bytes += msg.input_channel(i).size(); 709 } 710 } 711 712 if (progress && primary_count % 100 == 0) { 713 near_read_bytes = std::min(near_read_bytes, near_size_bytes); 714 printf("%.0f%% complete\r", 715 (near_read_bytes * 100.0) / near_size_bytes); 716 fflush(stdout); 717 } 718 719 if (perf_testing) { 720 t0 = TickTime::Now(); 721 } 722 723 ASSERT_EQ(apm->kNoError, 724 apm->gain_control()->set_stream_analog_level(msg.level())); 725 delay_ms = msg.delay() + extra_delay_ms; 726 if (override_delay_ms) { 727 delay_ms = override_delay_ms; 728 } 729 ASSERT_EQ(apm->kNoError, 730 apm->set_stream_delay_ms(delay_ms)); 731 apm->echo_cancellation()->set_stream_drift_samples(msg.drift()); 732 733 if (msg.has_keypress()) { 734 apm->set_stream_key_pressed(msg.keypress()); 735 } else { 736 apm->set_stream_key_pressed(true); 737 } 738 739 int err = apm->kNoError; 740 if (msg.has_input_data()) { 741 err = apm->ProcessStream(&near_frame); 742 ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels()); 743 } else { 744 err = apm->ProcessStream( 745 primary_cb->channels(), 746 near_frame.samples_per_channel_, 747 near_frame.sample_rate_hz_, 748 LayoutFromChannels(near_frame.num_channels_), 749 output_sample_rate, 750 output_layout, 751 primary_cb->channels()); 752 } 753 754 if (err == apm->kBadStreamParameterWarning) { 755 printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); 756 } 757 ASSERT_TRUE(err == apm->kNoError || 758 err == apm->kBadStreamParameterWarning); 759 760 stream_has_voice = 761 static_cast<int8_t>(apm->voice_detection()->stream_has_voice()); 762 if (vad_out_file != NULL) { 763 ASSERT_EQ(1u, fwrite(&stream_has_voice, 764 sizeof(stream_has_voice), 765 1, 766 vad_out_file)); 767 } 768 769 if (ns_prob_file != NULL) { 770 ns_speech_prob = apm->noise_suppression()->speech_probability(); 771 ASSERT_EQ(1u, fwrite(&ns_speech_prob, 772 sizeof(ns_speech_prob), 773 1, 774 ns_prob_file)); 775 } 776 777 if (perf_testing) { 778 t1 = TickTime::Now(); 779 TickInterval tick_diff = t1 - t0; 780 acc_ticks += tick_diff; 781 if (tick_diff.Microseconds() > max_time_us) { 782 max_time_us = tick_diff.Microseconds(); 783 } 784 if (tick_diff.Microseconds() < min_time_us) { 785 min_time_us = tick_diff.Microseconds(); 786 } 787 } 788 789 const size_t samples_per_channel = output_sample_rate / 100; 790 if (msg.has_input_data()) { 791 if (raw_output && !output_raw_file) { 792 output_raw_file.reset(new RawFile(out_filename + ".pcm")); 793 } 794 WriteIntData(near_frame.data_, 795 apm->num_output_channels() * samples_per_channel, 796 output_wav_file.get(), 797 output_raw_file.get()); 798 } else { 799 if (raw_output && !output_raw_file) { 800 output_raw_file.reset(new RawFile(out_filename + ".float")); 801 } 802 WriteFloatData(primary_cb->channels(), 803 samples_per_channel, 804 apm->num_output_channels(), 805 output_wav_file.get(), 806 output_raw_file.get()); 807 } 808 } 809 } 810 811 ASSERT_TRUE(feof(pb_file)); 812 813 } else { 814 enum Events { 815 kInitializeEvent, 816 kRenderEvent, 817 kCaptureEvent, 818 kResetEventDeprecated 819 }; 820 int16_t event = 0; 821 while (simulating || feof(event_file) == 0) { 822 std::ostringstream trace_stream; 823 trace_stream << "Processed frames: " << reverse_count << " (reverse), " 824 << primary_count << " (primary)"; 825 SCOPED_TRACE(trace_stream.str()); 826 827 if (simulating) { 828 if (far_file == NULL) { 829 event = kCaptureEvent; 830 } else { 831 if (event == kRenderEvent) { 832 event = kCaptureEvent; 833 } else { 834 event = kRenderEvent; 835 } 836 } 837 } else { 838 read_count = fread(&event, sizeof(event), 1, event_file); 839 if (read_count != 1) { 840 break; 841 } 842 } 843 844 far_frame.sample_rate_hz_ = sample_rate_hz; 845 far_frame.samples_per_channel_ = samples_per_channel; 846 far_frame.num_channels_ = num_render_channels; 847 near_frame.sample_rate_hz_ = sample_rate_hz; 848 near_frame.samples_per_channel_ = samples_per_channel; 849 850 if (event == kInitializeEvent || event == kResetEventDeprecated) { 851 ASSERT_EQ(1u, 852 fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file)); 853 samples_per_channel = sample_rate_hz / 100; 854 855 int32_t unused_device_sample_rate_hz; 856 ASSERT_EQ(1u, 857 fread(&unused_device_sample_rate_hz, 858 sizeof(unused_device_sample_rate_hz), 859 1, 860 event_file)); 861 862 ASSERT_EQ(kNoErr, apm->Initialize( 863 sample_rate_hz, 864 sample_rate_hz, 865 sample_rate_hz, 866 LayoutFromChannels(num_capture_input_channels), 867 LayoutFromChannels(num_capture_output_channels), 868 LayoutFromChannels(num_render_channels))); 869 870 far_frame.sample_rate_hz_ = sample_rate_hz; 871 far_frame.samples_per_channel_ = samples_per_channel; 872 far_frame.num_channels_ = num_render_channels; 873 near_frame.sample_rate_hz_ = sample_rate_hz; 874 near_frame.samples_per_channel_ = samples_per_channel; 875 876 if (!raw_output) { 877 // The WAV file needs to be reset every time, because it can't change 878 // it's sample rate or number of channels. 879 output_wav_file.reset(new WavFile(out_filename + ".wav", 880 sample_rate_hz, 881 num_capture_output_channels)); 882 } 883 884 if (verbose) { 885 printf("Init at frame: %d (primary), %d (reverse)\n", 886 primary_count, reverse_count); 887 printf(" Sample rate: %d Hz\n", sample_rate_hz); 888 } 889 890 } else if (event == kRenderEvent) { 891 reverse_count++; 892 893 size_t size = samples_per_channel * num_render_channels; 894 read_count = fread(far_frame.data_, 895 sizeof(int16_t), 896 size, 897 far_file); 898 899 if (simulating) { 900 if (read_count != size) { 901 // Read an equal amount from the near file to avoid errors due to 902 // not reaching end-of-file. 903 EXPECT_EQ(0, fseek(near_file, read_count * sizeof(int16_t), 904 SEEK_CUR)); 905 break; // This is expected. 906 } 907 } else { 908 ASSERT_EQ(size, read_count); 909 } 910 911 if (perf_testing) { 912 t0 = TickTime::Now(); 913 } 914 915 ASSERT_EQ(apm->kNoError, 916 apm->AnalyzeReverseStream(&far_frame)); 917 918 if (perf_testing) { 919 t1 = TickTime::Now(); 920 TickInterval tick_diff = t1 - t0; 921 acc_ticks += tick_diff; 922 if (tick_diff.Microseconds() > max_time_reverse_us) { 923 max_time_reverse_us = tick_diff.Microseconds(); 924 } 925 if (tick_diff.Microseconds() < min_time_reverse_us) { 926 min_time_reverse_us = tick_diff.Microseconds(); 927 } 928 } 929 930 } else if (event == kCaptureEvent) { 931 primary_count++; 932 near_frame.num_channels_ = num_capture_input_channels; 933 934 size_t size = samples_per_channel * num_capture_input_channels; 935 read_count = fread(near_frame.data_, 936 sizeof(int16_t), 937 size, 938 near_file); 939 940 near_read_bytes += read_count * sizeof(int16_t); 941 if (progress && primary_count % 100 == 0) { 942 printf("%.0f%% complete\r", 943 (near_read_bytes * 100.0) / near_size_bytes); 944 fflush(stdout); 945 } 946 if (simulating) { 947 if (read_count != size) { 948 break; // This is expected. 949 } 950 951 delay_ms = 0; 952 drift_samples = 0; 953 } else { 954 ASSERT_EQ(size, read_count); 955 956 // TODO(ajm): sizeof(delay_ms) for current files? 957 ASSERT_EQ(1u, 958 fread(&delay_ms, 2, 1, delay_file)); 959 ASSERT_EQ(1u, 960 fread(&drift_samples, sizeof(drift_samples), 1, drift_file)); 961 } 962 963 if (apm->gain_control()->is_enabled() && 964 apm->gain_control()->mode() == GainControl::kAdaptiveAnalog) { 965 SimulateMic(capture_level, &near_frame); 966 } 967 968 if (perf_testing) { 969 t0 = TickTime::Now(); 970 } 971 972 const int capture_level_in = capture_level; 973 ASSERT_EQ(apm->kNoError, 974 apm->gain_control()->set_stream_analog_level(capture_level)); 975 delay_ms += extra_delay_ms; 976 if (override_delay_ms) { 977 delay_ms = override_delay_ms; 978 } 979 ASSERT_EQ(apm->kNoError, 980 apm->set_stream_delay_ms(delay_ms)); 981 apm->echo_cancellation()->set_stream_drift_samples(drift_samples); 982 983 apm->set_stream_key_pressed(true); 984 985 int err = apm->ProcessStream(&near_frame); 986 if (err == apm->kBadStreamParameterWarning) { 987 printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); 988 } 989 ASSERT_TRUE(err == apm->kNoError || 990 err == apm->kBadStreamParameterWarning); 991 ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels()); 992 993 capture_level = apm->gain_control()->stream_analog_level(); 994 995 stream_has_voice = 996 static_cast<int8_t>(apm->voice_detection()->stream_has_voice()); 997 if (vad_out_file != NULL) { 998 ASSERT_EQ(1u, fwrite(&stream_has_voice, 999 sizeof(stream_has_voice), 1000 1, 1001 vad_out_file)); 1002 } 1003 1004 if (ns_prob_file != NULL) { 1005 ns_speech_prob = apm->noise_suppression()->speech_probability(); 1006 ASSERT_EQ(1u, fwrite(&ns_speech_prob, 1007 sizeof(ns_speech_prob), 1008 1, 1009 ns_prob_file)); 1010 } 1011 1012 if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { 1013 ASSERT_EQ(capture_level_in, capture_level); 1014 } 1015 1016 if (perf_testing) { 1017 t1 = TickTime::Now(); 1018 TickInterval tick_diff = t1 - t0; 1019 acc_ticks += tick_diff; 1020 if (tick_diff.Microseconds() > max_time_us) { 1021 max_time_us = tick_diff.Microseconds(); 1022 } 1023 if (tick_diff.Microseconds() < min_time_us) { 1024 min_time_us = tick_diff.Microseconds(); 1025 } 1026 } 1027 1028 if (raw_output && !output_raw_file) { 1029 output_raw_file.reset(new RawFile(out_filename + ".pcm")); 1030 } 1031 if (!raw_output && !output_wav_file) { 1032 output_wav_file.reset(new WavFile(out_filename + ".wav", 1033 sample_rate_hz, 1034 num_capture_output_channels)); 1035 } 1036 WriteIntData(near_frame.data_, 1037 size, 1038 output_wav_file.get(), 1039 output_raw_file.get()); 1040 } 1041 else { 1042 FAIL() << "Event " << event << " is unrecognized"; 1043 } 1044 } 1045 } 1046 printf("100%% complete\r"); 1047 1048 if (aecm_echo_path_out_file != NULL) { 1049 const size_t path_size = 1050 apm->echo_control_mobile()->echo_path_size_bytes(); 1051 scoped_ptr<char[]> echo_path(new char[path_size]); 1052 apm->echo_control_mobile()->GetEchoPath(echo_path.get(), path_size); 1053 ASSERT_EQ(path_size, fwrite(echo_path.get(), 1054 sizeof(char), 1055 path_size, 1056 aecm_echo_path_out_file)); 1057 fclose(aecm_echo_path_out_file); 1058 aecm_echo_path_out_file = NULL; 1059 } 1060 1061 if (verbose) { 1062 printf("\nProcessed frames: %d (primary), %d (reverse)\n", 1063 primary_count, reverse_count); 1064 1065 if (apm->level_estimator()->is_enabled()) { 1066 printf("\n--Level metrics--\n"); 1067 printf("RMS: %d dBFS\n", -apm->level_estimator()->RMS()); 1068 } 1069 if (apm->echo_cancellation()->are_metrics_enabled()) { 1070 EchoCancellation::Metrics metrics; 1071 apm->echo_cancellation()->GetMetrics(&metrics); 1072 printf("\n--Echo metrics--\n"); 1073 printf("(avg, max, min)\n"); 1074 printf("ERL: "); 1075 PrintStat(metrics.echo_return_loss); 1076 printf("ERLE: "); 1077 PrintStat(metrics.echo_return_loss_enhancement); 1078 printf("ANLP: "); 1079 PrintStat(metrics.a_nlp); 1080 } 1081 if (apm->echo_cancellation()->is_delay_logging_enabled()) { 1082 int median = 0; 1083 int std = 0; 1084 apm->echo_cancellation()->GetDelayMetrics(&median, &std); 1085 printf("\n--Delay metrics--\n"); 1086 printf("Median: %3d\n", median); 1087 printf("Standard deviation: %3d\n", std); 1088 } 1089 } 1090 1091 if (!pb_file) { 1092 int8_t temp_int8; 1093 if (far_file) { 1094 read_count = fread(&temp_int8, sizeof(temp_int8), 1, far_file); 1095 EXPECT_NE(0, feof(far_file)) << "Far-end file not fully processed"; 1096 } 1097 1098 read_count = fread(&temp_int8, sizeof(temp_int8), 1, near_file); 1099 EXPECT_NE(0, feof(near_file)) << "Near-end file not fully processed"; 1100 1101 if (!simulating) { 1102 read_count = fread(&temp_int8, sizeof(temp_int8), 1, event_file); 1103 EXPECT_NE(0, feof(event_file)) << "Event file not fully processed"; 1104 read_count = fread(&temp_int8, sizeof(temp_int8), 1, delay_file); 1105 EXPECT_NE(0, feof(delay_file)) << "Delay file not fully processed"; 1106 read_count = fread(&temp_int8, sizeof(temp_int8), 1, drift_file); 1107 EXPECT_NE(0, feof(drift_file)) << "Drift file not fully processed"; 1108 } 1109 } 1110 1111 if (perf_testing) { 1112 if (primary_count > 0) { 1113 int64_t exec_time = acc_ticks.Milliseconds(); 1114 printf("\nTotal time: %.3f s, file time: %.2f s\n", 1115 exec_time * 0.001, primary_count * 0.01); 1116 printf("Time per frame: %.3f ms (average), %.3f ms (max)," 1117 " %.3f ms (min)\n", 1118 (exec_time * 1.0) / primary_count, 1119 (max_time_us + max_time_reverse_us) / 1000.0, 1120 (min_time_us + min_time_reverse_us) / 1000.0); 1121 // Record the results with Perf test tools. 1122 webrtc::test::PrintResult("audioproc", "", "time_per_10ms_frame", 1123 (exec_time * 1000) / primary_count, "us", false); 1124 } else { 1125 printf("Warning: no capture frames\n"); 1126 } 1127 } 1128 } 1129 1130 } // namespace 1131 } // namespace webrtc 1132 1133 int main(int argc, char* argv[]) 1134 { 1135 webrtc::void_main(argc, argv); 1136 1137 // Optional, but removes memory leak noise from Valgrind. 1138 google::protobuf::ShutdownProtobufLibrary(); 1139 return 0; 1140 } 1141