1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <stdio.h> 12 #include <string.h> 13 #ifdef WEBRTC_ANDROID 14 #include <sys/stat.h> 15 #endif 16 17 #include "tick_util.h" 18 #include "gtest/gtest.h" 19 #include "module_common_types.h" 20 21 #include "audio_processing.h" 22 23 #include "cpu_features_wrapper.h" 24 25 using webrtc::AudioFrame; 26 using webrtc::TickInterval; 27 using webrtc::TickTime; 28 29 using webrtc::AudioProcessing; 30 using webrtc::GainControl; 31 using webrtc::NoiseSuppression; 32 33 void usage() { 34 printf( 35 "Usage: process_test [options] [-ir REVERSE_FILE] [-i PRIMARY_FILE]\n"); 36 printf( 37 " [-o OUT_FILE]\n"); 38 printf( 39 "process_test is a test application for AudioProcessing.\n\n" 40 "When -ir or -i is specified the files will be processed directly in a\n" 41 "simulation mode. Otherwise the full set of test files is expected to be\n" 42 "present in the working directory.\n"); 43 printf("\n"); 44 printf("Options\n"); 45 printf("General configuration:\n"); 46 printf(" -fs SAMPLE_RATE_HZ\n"); 47 printf(" -ch CHANNELS_IN CHANNELS_OUT\n"); 48 printf(" -rch REVERSE_CHANNELS\n"); 49 printf("\n"); 50 printf("Component configuration:\n"); 51 printf( 52 "All components are disabled by default. Each block below begins with a\n" 53 "flag to enable the component with default settings. The subsequent flags\n" 54 "in the block are used to provide configuration settings.\n"); 55 printf("\n -aec Echo cancellation\n"); 56 printf(" --drift_compensation\n"); 57 printf(" --no_drift_compensation\n"); 58 printf("\n -aecm Echo control mobile\n"); 59 printf("\n -agc Gain control\n"); 60 printf(" --analog\n"); 61 printf(" --adaptive_digital\n"); 62 printf(" --fixed_digital\n"); 63 printf(" --target_level LEVEL\n"); 64 printf(" --compression_gain GAIN\n"); 65 printf(" --limiter\n"); 66 printf(" --no_limiter\n"); 67 printf("\n -hpf High pass filter\n"); 68 printf("\n -ns Noise suppression\n"); 69 printf(" --ns_low\n"); 70 printf(" --ns_moderate\n"); 71 printf(" --ns_high\n"); 72 printf(" --ns_very_high\n"); 73 printf("\n -vad Voice activity detection\n"); 74 printf(" --vad_out_file FILE"); 75 printf("\n"); 76 printf("Modifiers:\n"); 77 printf(" --perf Measure performance.\n"); 78 printf(" --quiet Suppress text output.\n"); 79 printf(" --no_progress Suppress progress.\n"); 80 printf(" --version Print version information and exit.\n"); 81 } 82 83 // void function for gtest. 84 void void_main(int argc, char* argv[]) { 85 if (argc > 1 && strcmp(argv[1], "--help") == 0) { 86 usage(); 87 return; 88 } 89 90 if (argc < 2) { 91 printf("Did you mean to run without arguments?\n"); 92 printf("Try `process_test --help' for more information.\n\n"); 93 } 94 95 AudioProcessing* apm = AudioProcessing::Create(0); 96 ASSERT_TRUE(apm != NULL); 97 98 WebRtc_Word8 version[1024]; 99 WebRtc_UWord32 version_bytes_remaining = sizeof(version); 100 WebRtc_UWord32 version_position = 0; 101 102 const char* far_filename = NULL; 103 const char* near_filename = NULL; 104 const char* out_filename = NULL; 105 const char* vad_out_filename = NULL; 106 107 int32_t sample_rate_hz = 16000; 108 int32_t device_sample_rate_hz = 16000; 109 110 int num_capture_input_channels = 1; 111 int num_capture_output_channels = 1; 112 int num_render_channels = 1; 113 114 int samples_per_channel = sample_rate_hz / 100; 115 116 bool simulating = false; 117 bool perf_testing = false; 118 bool verbose = true; 119 bool progress = true; 120 //bool interleaved = true; 121 122 for (int i = 1; i < argc; i++) { 123 if (strcmp(argv[i], "-ir") == 0) { 124 i++; 125 ASSERT_LT(i, argc) << "Specify filename after -ir"; 126 far_filename = argv[i]; 127 simulating = true; 128 129 } else if (strcmp(argv[i], "-i") == 0) { 130 i++; 131 ASSERT_LT(i, argc) << "Specify filename after -i"; 132 near_filename = argv[i]; 133 simulating = true; 134 135 } else if (strcmp(argv[i], "-o") == 0) { 136 i++; 137 ASSERT_LT(i, argc) << "Specify filename after -o"; 138 out_filename = argv[i]; 139 140 } else if (strcmp(argv[i], "-fs") == 0) { 141 i++; 142 ASSERT_LT(i, argc) << "Specify sample rate after -fs"; 143 ASSERT_EQ(1, sscanf(argv[i], "%d", &sample_rate_hz)); 144 samples_per_channel = sample_rate_hz / 100; 145 146 ASSERT_EQ(apm->kNoError, 147 apm->set_sample_rate_hz(sample_rate_hz)); 148 149 } else if (strcmp(argv[i], "-ch") == 0) { 150 i++; 151 ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch"; 152 ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_input_channels)); 153 i++; 154 ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_output_channels)); 155 156 ASSERT_EQ(apm->kNoError, 157 apm->set_num_channels(num_capture_input_channels, 158 num_capture_output_channels)); 159 160 } else if (strcmp(argv[i], "-rch") == 0) { 161 i++; 162 ASSERT_LT(i, argc) << "Specify number of channels after -rch"; 163 ASSERT_EQ(1, sscanf(argv[i], "%d", &num_render_channels)); 164 165 ASSERT_EQ(apm->kNoError, 166 apm->set_num_reverse_channels(num_render_channels)); 167 168 } else if (strcmp(argv[i], "-aec") == 0) { 169 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 170 171 } else if (strcmp(argv[i], "-noasm") == 0) { 172 WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM; 173 174 } else if (strcmp(argv[i], "--drift_compensation") == 0) { 175 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 176 // TODO(ajm): this is enabled in the VQE test app by default. Investigate 177 // why it can give better performance despite passing zeros. 178 ASSERT_EQ(apm->kNoError, 179 apm->echo_cancellation()->enable_drift_compensation(true)); 180 } else if (strcmp(argv[i], "--no_drift_compensation") == 0) { 181 ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); 182 ASSERT_EQ(apm->kNoError, 183 apm->echo_cancellation()->enable_drift_compensation(false)); 184 185 } else if (strcmp(argv[i], "-aecm") == 0) { 186 ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); 187 188 } else if (strcmp(argv[i], "-agc") == 0) { 189 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 190 191 } else if (strcmp(argv[i], "--analog") == 0) { 192 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 193 ASSERT_EQ(apm->kNoError, 194 apm->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); 195 196 } else if (strcmp(argv[i], "--adaptive_digital") == 0) { 197 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 198 ASSERT_EQ(apm->kNoError, 199 apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); 200 201 } else if (strcmp(argv[i], "--fixed_digital") == 0) { 202 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 203 ASSERT_EQ(apm->kNoError, 204 apm->gain_control()->set_mode(GainControl::kFixedDigital)); 205 206 } else if (strcmp(argv[i], "--target_level") == 0) { 207 i++; 208 int level; 209 ASSERT_EQ(1, sscanf(argv[i], "%d", &level)); 210 211 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 212 ASSERT_EQ(apm->kNoError, 213 apm->gain_control()->set_target_level_dbfs(level)); 214 215 } else if (strcmp(argv[i], "--compression_gain") == 0) { 216 i++; 217 int gain; 218 ASSERT_EQ(1, sscanf(argv[i], "%d", &gain)); 219 220 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 221 ASSERT_EQ(apm->kNoError, 222 apm->gain_control()->set_compression_gain_db(gain)); 223 224 } else if (strcmp(argv[i], "--limiter") == 0) { 225 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 226 ASSERT_EQ(apm->kNoError, 227 apm->gain_control()->enable_limiter(true)); 228 229 } else if (strcmp(argv[i], "--no_limiter") == 0) { 230 ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); 231 ASSERT_EQ(apm->kNoError, 232 apm->gain_control()->enable_limiter(false)); 233 234 } else if (strcmp(argv[i], "-hpf") == 0) { 235 ASSERT_EQ(apm->kNoError, apm->high_pass_filter()->Enable(true)); 236 237 } else if (strcmp(argv[i], "-ns") == 0) { 238 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 239 240 } else if (strcmp(argv[i], "--ns_low") == 0) { 241 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 242 ASSERT_EQ(apm->kNoError, 243 apm->noise_suppression()->set_level(NoiseSuppression::kLow)); 244 245 } else if (strcmp(argv[i], "--ns_moderate") == 0) { 246 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 247 ASSERT_EQ(apm->kNoError, 248 apm->noise_suppression()->set_level(NoiseSuppression::kModerate)); 249 250 } else if (strcmp(argv[i], "--ns_high") == 0) { 251 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 252 ASSERT_EQ(apm->kNoError, 253 apm->noise_suppression()->set_level(NoiseSuppression::kHigh)); 254 255 } else if (strcmp(argv[i], "--ns_very_high") == 0) { 256 ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); 257 ASSERT_EQ(apm->kNoError, 258 apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh)); 259 260 } else if (strcmp(argv[i], "-vad") == 0) { 261 ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); 262 263 } else if (strcmp(argv[i], "--vad_out_file") == 0) { 264 i++; 265 ASSERT_LT(i, argc) << "Specify filename after --vad_out_file"; 266 vad_out_filename = argv[i]; 267 268 } else if (strcmp(argv[i], "--perf") == 0) { 269 perf_testing = true; 270 271 } else if (strcmp(argv[i], "--quiet") == 0) { 272 verbose = false; 273 progress = false; 274 275 } else if (strcmp(argv[i], "--no_progress") == 0) { 276 progress = false; 277 278 } else if (strcmp(argv[i], "--version") == 0) { 279 ASSERT_EQ(apm->kNoError, apm->Version(version, 280 version_bytes_remaining, 281 version_position)); 282 printf("%s\n", version); 283 return; 284 285 } else { 286 FAIL() << "Unrecognized argument " << argv[i]; 287 } 288 } 289 290 if (verbose) { 291 printf("Sample rate: %d Hz\n", sample_rate_hz); 292 printf("Primary channels: %d (in), %d (out)\n", 293 num_capture_input_channels, 294 num_capture_output_channels); 295 printf("Reverse channels: %d \n", num_render_channels); 296 } 297 298 const char far_file_default[] = "apm_far.pcm"; 299 const char near_file_default[] = "apm_near.pcm"; 300 const char out_file_default[] = "out.pcm"; 301 const char event_filename[] = "apm_event.dat"; 302 const char delay_filename[] = "apm_delay.dat"; 303 const char drift_filename[] = "apm_drift.dat"; 304 const char vad_file_default[] = "vad_out.dat"; 305 306 if (!simulating) { 307 far_filename = far_file_default; 308 near_filename = near_file_default; 309 } 310 311 if (out_filename == NULL) { 312 out_filename = out_file_default; 313 } 314 315 if (vad_out_filename == NULL) { 316 vad_out_filename = vad_file_default; 317 } 318 319 FILE* far_file = NULL; 320 FILE* near_file = NULL; 321 FILE* out_file = NULL; 322 FILE* event_file = NULL; 323 FILE* delay_file = NULL; 324 FILE* drift_file = NULL; 325 FILE* vad_out_file = NULL; 326 327 if (far_filename != NULL) { 328 far_file = fopen(far_filename, "rb"); 329 ASSERT_TRUE(NULL != far_file) << "Unable to open far-end audio file " 330 << far_filename; 331 } 332 333 near_file = fopen(near_filename, "rb"); 334 ASSERT_TRUE(NULL != near_file) << "Unable to open near-end audio file " 335 << near_filename; 336 struct stat st; 337 stat(near_filename, &st); 338 int near_size_samples = st.st_size / sizeof(int16_t); 339 340 out_file = fopen(out_filename, "wb"); 341 ASSERT_TRUE(NULL != out_file) << "Unable to open output audio file " 342 << out_filename; 343 344 if (!simulating) { 345 event_file = fopen(event_filename, "rb"); 346 ASSERT_TRUE(NULL != event_file) << "Unable to open event file " 347 << event_filename; 348 349 delay_file = fopen(delay_filename, "rb"); 350 ASSERT_TRUE(NULL != delay_file) << "Unable to open buffer file " 351 << delay_filename; 352 353 drift_file = fopen(drift_filename, "rb"); 354 ASSERT_TRUE(NULL != drift_file) << "Unable to open drift file " 355 << drift_filename; 356 } 357 358 if (apm->voice_detection()->is_enabled()) { 359 vad_out_file = fopen(vad_out_filename, "wb"); 360 ASSERT_TRUE(NULL != vad_out_file) << "Unable to open VAD output file " 361 << vad_out_file; 362 } 363 364 enum Events { 365 kInitializeEvent, 366 kRenderEvent, 367 kCaptureEvent, 368 kResetEventDeprecated 369 }; 370 int16_t event = 0; 371 size_t read_count = 0; 372 int reverse_count = 0; 373 int primary_count = 0; 374 int near_read_samples = 0; 375 TickInterval acc_ticks; 376 377 AudioFrame far_frame; 378 far_frame._frequencyInHz = sample_rate_hz; 379 380 AudioFrame near_frame; 381 near_frame._frequencyInHz = sample_rate_hz; 382 383 int delay_ms = 0; 384 int drift_samples = 0; 385 int capture_level = 127; 386 int8_t stream_has_voice = 0; 387 388 TickTime t0 = TickTime::Now(); 389 TickTime t1 = t0; 390 WebRtc_Word64 max_time_us = 0; 391 WebRtc_Word64 max_time_reverse_us = 0; 392 WebRtc_Word64 min_time_us = 1e6; 393 WebRtc_Word64 min_time_reverse_us = 1e6; 394 395 while (simulating || feof(event_file) == 0) { 396 std::ostringstream trace_stream; 397 trace_stream << "Processed frames: " << reverse_count << " (reverse), " 398 << primary_count << " (primary)"; 399 SCOPED_TRACE(trace_stream.str()); 400 401 402 if (simulating) { 403 if (far_file == NULL) { 404 event = kCaptureEvent; 405 } else { 406 if (event == kRenderEvent) { 407 event = kCaptureEvent; 408 } else { 409 event = kRenderEvent; 410 } 411 } 412 } else { 413 read_count = fread(&event, sizeof(event), 1, event_file); 414 if (read_count != 1) { 415 break; 416 } 417 //if (fread(&event, sizeof(event), 1, event_file) != 1) { 418 // break; // This is expected. 419 //} 420 } 421 422 if (event == kInitializeEvent || event == kResetEventDeprecated) { 423 ASSERT_EQ(1u, 424 fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file)); 425 samples_per_channel = sample_rate_hz / 100; 426 427 ASSERT_EQ(1u, 428 fread(&device_sample_rate_hz, 429 sizeof(device_sample_rate_hz), 430 1, 431 event_file)); 432 433 ASSERT_EQ(apm->kNoError, 434 apm->set_sample_rate_hz(sample_rate_hz)); 435 436 ASSERT_EQ(apm->kNoError, 437 apm->echo_cancellation()->set_device_sample_rate_hz( 438 device_sample_rate_hz)); 439 440 far_frame._frequencyInHz = sample_rate_hz; 441 near_frame._frequencyInHz = sample_rate_hz; 442 443 if (verbose) { 444 printf("Init at frame: %d (primary), %d (reverse)\n", 445 primary_count, reverse_count); 446 printf(" Sample rate: %d Hz\n", sample_rate_hz); 447 } 448 449 } else if (event == kRenderEvent) { 450 reverse_count++; 451 far_frame._audioChannel = num_render_channels; 452 far_frame._payloadDataLengthInSamples = 453 num_render_channels * samples_per_channel; 454 455 read_count = fread(far_frame._payloadData, 456 sizeof(WebRtc_Word16), 457 far_frame._payloadDataLengthInSamples, 458 far_file); 459 460 if (simulating) { 461 if (read_count != far_frame._payloadDataLengthInSamples) { 462 break; // This is expected. 463 } 464 } else { 465 ASSERT_EQ(read_count, 466 far_frame._payloadDataLengthInSamples); 467 } 468 469 if (perf_testing) { 470 t0 = TickTime::Now(); 471 } 472 473 ASSERT_EQ(apm->kNoError, 474 apm->AnalyzeReverseStream(&far_frame)); 475 476 if (perf_testing) { 477 t1 = TickTime::Now(); 478 TickInterval tick_diff = t1 - t0; 479 acc_ticks += tick_diff; 480 if (tick_diff.Microseconds() > max_time_reverse_us) { 481 max_time_reverse_us = tick_diff.Microseconds(); 482 } 483 if (tick_diff.Microseconds() < min_time_reverse_us) { 484 min_time_reverse_us = tick_diff.Microseconds(); 485 } 486 } 487 488 } else if (event == kCaptureEvent) { 489 primary_count++; 490 near_frame._audioChannel = num_capture_input_channels; 491 near_frame._payloadDataLengthInSamples = 492 num_capture_input_channels * samples_per_channel; 493 494 read_count = fread(near_frame._payloadData, 495 sizeof(WebRtc_Word16), 496 near_frame._payloadDataLengthInSamples, 497 near_file); 498 499 near_read_samples += read_count; 500 if (progress && primary_count % 100 == 0) { 501 printf("%.0f%% complete\r", 502 (near_read_samples * 100.0) / near_size_samples); 503 fflush(stdout); 504 } 505 if (simulating) { 506 if (read_count != near_frame._payloadDataLengthInSamples) { 507 break; // This is expected. 508 } 509 510 delay_ms = 0; 511 drift_samples = 0; 512 } else { 513 ASSERT_EQ(read_count, 514 near_frame._payloadDataLengthInSamples); 515 516 // TODO(ajm): sizeof(delay_ms) for current files? 517 ASSERT_EQ(1u, 518 fread(&delay_ms, 2, 1, delay_file)); 519 ASSERT_EQ(1u, 520 fread(&drift_samples, sizeof(drift_samples), 1, drift_file)); 521 } 522 523 if (perf_testing) { 524 t0 = TickTime::Now(); 525 } 526 527 // TODO(ajm): fake an analog gain while simulating. 528 529 int capture_level_in = capture_level; 530 ASSERT_EQ(apm->kNoError, 531 apm->gain_control()->set_stream_analog_level(capture_level)); 532 ASSERT_EQ(apm->kNoError, 533 apm->set_stream_delay_ms(delay_ms)); 534 ASSERT_EQ(apm->kNoError, 535 apm->echo_cancellation()->set_stream_drift_samples(drift_samples)); 536 537 int err = apm->ProcessStream(&near_frame); 538 if (err == apm->kBadStreamParameterWarning) { 539 printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); 540 } 541 ASSERT_TRUE(err == apm->kNoError || 542 err == apm->kBadStreamParameterWarning); 543 544 capture_level = apm->gain_control()->stream_analog_level(); 545 546 stream_has_voice = 547 static_cast<int8_t>(apm->voice_detection()->stream_has_voice()); 548 if (vad_out_file != NULL) { 549 ASSERT_EQ(1u, fwrite(&stream_has_voice, 550 sizeof(stream_has_voice), 551 1, 552 vad_out_file)); 553 } 554 555 if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { 556 ASSERT_EQ(capture_level_in, capture_level); 557 } 558 559 if (perf_testing) { 560 t1 = TickTime::Now(); 561 TickInterval tick_diff = t1 - t0; 562 acc_ticks += tick_diff; 563 if (tick_diff.Microseconds() > max_time_us) { 564 max_time_us = tick_diff.Microseconds(); 565 } 566 if (tick_diff.Microseconds() < min_time_us) { 567 min_time_us = tick_diff.Microseconds(); 568 } 569 } 570 571 ASSERT_EQ(near_frame._payloadDataLengthInSamples, 572 fwrite(near_frame._payloadData, 573 sizeof(WebRtc_Word16), 574 near_frame._payloadDataLengthInSamples, 575 out_file)); 576 } 577 else { 578 FAIL() << "Event " << event << " is unrecognized"; 579 } 580 } 581 582 if (verbose) { 583 printf("\nProcessed frames: %d (primary), %d (reverse)\n", 584 primary_count, reverse_count); 585 } 586 587 int8_t temp_int8; 588 if (far_file != NULL) { 589 read_count = fread(&temp_int8, sizeof(temp_int8), 1, far_file); 590 EXPECT_NE(0, feof(far_file)) << "Far-end file not fully processed"; 591 } 592 read_count = fread(&temp_int8, sizeof(temp_int8), 1, near_file); 593 EXPECT_NE(0, feof(near_file)) << "Near-end file not fully processed"; 594 595 if (!simulating) { 596 read_count = fread(&temp_int8, sizeof(temp_int8), 1, event_file); 597 EXPECT_NE(0, feof(event_file)) << "Event file not fully processed"; 598 read_count = fread(&temp_int8, sizeof(temp_int8), 1, delay_file); 599 EXPECT_NE(0, feof(delay_file)) << "Delay file not fully processed"; 600 read_count = fread(&temp_int8, sizeof(temp_int8), 1, drift_file); 601 EXPECT_NE(0, feof(drift_file)) << "Drift file not fully processed"; 602 } 603 604 if (perf_testing) { 605 if (primary_count > 0) { 606 WebRtc_Word64 exec_time = acc_ticks.Milliseconds(); 607 printf("\nTotal time: %.3f s, file time: %.2f s\n", 608 exec_time * 0.001, primary_count * 0.01); 609 printf("Time per frame: %.3f ms (average), %.3f ms (max)," 610 " %.3f ms (min)\n", 611 (exec_time * 1.0) / primary_count, 612 (max_time_us + max_time_reverse_us) / 1000.0, 613 (min_time_us + min_time_reverse_us) / 1000.0); 614 } else { 615 printf("Warning: no capture frames\n"); 616 } 617 } 618 619 AudioProcessing::Destroy(apm); 620 apm = NULL; 621 } 622 623 int main(int argc, char* argv[]) 624 { 625 void_main(argc, argv); 626 627 return 0; 628 } 629