Home | History | Annotate | Download | only in vad
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <stddef.h>  // size_t
     12 #include <stdlib.h>
     13 
     14 #include "gtest/gtest.h"
     15 #include "typedefs.h"
     16 #include "webrtc_vad.h"
     17 
     18 // TODO(bjornv): Move the internal unit tests to separate files.
     19 extern "C" {
     20 #include "vad_core.h"
     21 #include "vad_gmm.h"
     22 #include "vad_sp.h"
     23 }
     24 
     25 namespace webrtc {
     26 namespace {
     27 const int16_t kModes[] = { 0, 1, 2, 3 };
     28 const size_t kModesSize = sizeof(kModes) / sizeof(*kModes);
     29 
     30 // Rates we support.
     31 const int16_t kRates[] = { 8000, 12000, 16000, 24000, 32000 };
     32 const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates);
     33 // Frame lengths we support.
     34 const int16_t kMaxFrameLength = 960;
     35 const int16_t kFrameLengths[] = { 80, 120, 160, 240, 320, 480, 640,
     36     kMaxFrameLength };
     37 const size_t kFrameLengthsSize = sizeof(kFrameLengths) / sizeof(*kFrameLengths);
     38 
     39 // Returns true if the rate and frame length combination is valid.
     40 bool ValidRatesAndFrameLengths(int16_t rate, int16_t frame_length) {
     41   if (rate == 8000) {
     42     if (frame_length == 80 || frame_length == 160 || frame_length == 240) {
     43       return true;
     44     }
     45     return false;
     46   } else if (rate == 16000) {
     47     if (frame_length == 160 || frame_length == 320 || frame_length == 480) {
     48       return true;
     49     }
     50     return false;
     51   }
     52   if (rate == 32000) {
     53     if (frame_length == 320 || frame_length == 640 || frame_length == 960) {
     54       return true;
     55     }
     56     return false;
     57   }
     58 
     59   return false;
     60 }
     61 
     62 class VadTest : public ::testing::Test {
     63  protected:
     64   VadTest();
     65   virtual void SetUp();
     66   virtual void TearDown();
     67 };
     68 
     69 VadTest::VadTest() {
     70 }
     71 
     72 void VadTest::SetUp() {
     73 }
     74 
     75 void VadTest::TearDown() {
     76 }
     77 
     78 TEST_F(VadTest, ApiTest) {
     79   // This API test runs through the APIs for all possible valid and invalid
     80   // combinations.
     81 
     82   VadInst* handle = NULL;
     83   int16_t zeros[kMaxFrameLength] = { 0 };
     84 
     85   // Construct a speech signal that will trigger the VAD in all modes. It is
     86   // known that (i * i) will wrap around, but that doesn't matter in this case.
     87   int16_t speech[kMaxFrameLength];
     88   for (int16_t i = 0; i < kMaxFrameLength; i++) {
     89     speech[i] = (i * i);
     90   }
     91 
     92   // WebRtcVad_get_version() tests
     93   char version[32];
     94   EXPECT_EQ(-1, WebRtcVad_get_version(NULL, sizeof(version)));
     95   EXPECT_EQ(-1, WebRtcVad_get_version(version, 1));
     96   EXPECT_EQ(0, WebRtcVad_get_version(version, sizeof(version)));
     97 
     98   // Null instance tests
     99   EXPECT_EQ(-1, WebRtcVad_Create(NULL));
    100   EXPECT_EQ(-1, WebRtcVad_Init(NULL));
    101   EXPECT_EQ(-1, WebRtcVad_Assign(NULL, NULL));
    102   EXPECT_EQ(-1, WebRtcVad_Free(NULL));
    103   EXPECT_EQ(-1, WebRtcVad_set_mode(NULL, kModes[0]));
    104   EXPECT_EQ(-1, WebRtcVad_Process(NULL, kRates[0], speech, kFrameLengths[0]));
    105 
    106   // WebRtcVad_AssignSize tests
    107   int handle_size_bytes = 0;
    108   EXPECT_EQ(0, WebRtcVad_AssignSize(&handle_size_bytes));
    109   EXPECT_EQ(576, handle_size_bytes);
    110 
    111   // WebRtcVad_Assign tests
    112   void* tmp_handle = malloc(handle_size_bytes);
    113   EXPECT_EQ(-1, WebRtcVad_Assign(&handle, NULL));
    114   EXPECT_EQ(0, WebRtcVad_Assign(&handle, tmp_handle));
    115   EXPECT_EQ(handle, tmp_handle);
    116   free(tmp_handle);
    117 
    118   // WebRtcVad_Create()
    119   ASSERT_EQ(0, WebRtcVad_Create(&handle));
    120 
    121   // Not initialized tests
    122   EXPECT_EQ(-1, WebRtcVad_Process(handle, kRates[0], speech, kFrameLengths[0]));
    123   EXPECT_EQ(-1, WebRtcVad_set_mode(handle, kModes[0]));
    124 
    125   // WebRtcVad_Init() test
    126   ASSERT_EQ(0, WebRtcVad_Init(handle));
    127 
    128   // WebRtcVad_set_mode() invalid modes tests
    129   EXPECT_EQ(-1, WebRtcVad_set_mode(handle, kModes[0] - 1));
    130   EXPECT_EQ(-1, WebRtcVad_set_mode(handle, kModes[kModesSize - 1] + 1));
    131 
    132   // WebRtcVad_Process() tests
    133   // NULL speech pointer
    134   EXPECT_EQ(-1, WebRtcVad_Process(handle, kRates[0], NULL, kFrameLengths[0]));
    135   // Invalid sampling rate
    136   EXPECT_EQ(-1, WebRtcVad_Process(handle, 9999, speech, kFrameLengths[0]));
    137   // All zeros as input should work
    138   EXPECT_EQ(0, WebRtcVad_Process(handle, kRates[0], zeros, kFrameLengths[0]));
    139   for (size_t k = 0; k < kModesSize; k++) {
    140     // Test valid modes
    141     EXPECT_EQ(0, WebRtcVad_set_mode(handle, kModes[k]));
    142     // Loop through sampling rate and frame length combinations
    143     for (size_t i = 0; i < kRatesSize; i++) {
    144       for (size_t j = 0; j < kFrameLengthsSize; j++) {
    145         if (ValidRatesAndFrameLengths(kRates[i], kFrameLengths[j])) {
    146           EXPECT_EQ(1, WebRtcVad_Process(handle,
    147                                          kRates[i],
    148                                          speech,
    149                                          kFrameLengths[j]));
    150         } else {
    151           EXPECT_EQ(-1, WebRtcVad_Process(handle,
    152                                           kRates[i],
    153                                           speech,
    154                                           kFrameLengths[j]));
    155         }
    156       }
    157     }
    158   }
    159 
    160   EXPECT_EQ(0, WebRtcVad_Free(handle));
    161 }
    162 
    163 TEST_F(VadTest, GMMTests) {
    164   int16_t delta = 0;
    165   // Input value at mean.
    166   EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(0, 0, 128, &delta));
    167   EXPECT_EQ(0, delta);
    168   EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(16, 128, 128, &delta));
    169   EXPECT_EQ(0, delta);
    170   EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(-16, -128, 128, &delta));
    171   EXPECT_EQ(0, delta);
    172 
    173   // Largest possible input to give non-zero probability.
    174   EXPECT_EQ(1024, WebRtcVad_GaussianProbability(59, 0, 128, &delta));
    175   EXPECT_EQ(7552, delta);
    176   EXPECT_EQ(1024, WebRtcVad_GaussianProbability(75, 128, 128, &delta));
    177   EXPECT_EQ(7552, delta);
    178   EXPECT_EQ(1024, WebRtcVad_GaussianProbability(-75, -128, 128, &delta));
    179   EXPECT_EQ(-7552, delta);
    180 
    181   // Too large input, should give zero probability.
    182   EXPECT_EQ(0, WebRtcVad_GaussianProbability(105, 0, 128, &delta));
    183   EXPECT_EQ(13440, delta);
    184 }
    185 
    186 TEST_F(VadTest, SPTests) {
    187   VadInstT* handle = (VadInstT*) malloc(sizeof(VadInstT));
    188   int16_t zeros[kMaxFrameLength] = { 0 };
    189   int32_t state[2] = { 0 };
    190   int16_t data_in[kMaxFrameLength];
    191   int16_t data_out[kMaxFrameLength];
    192 
    193   const int16_t kReferenceMin[32] = {
    194       1600, 720, 509, 512, 532, 552, 570, 588,
    195       606, 624, 642, 659, 675, 691, 707, 723,
    196       1600, 544, 502, 522, 542, 561, 579, 597,
    197       615, 633, 651, 667, 683, 699, 715, 731
    198   };
    199 
    200   // Construct a speech signal that will trigger the VAD in all modes. It is
    201   // known that (i * i) will wrap around, but that doesn't matter in this case.
    202   for (int16_t i = 0; i < kMaxFrameLength; ++i) {
    203     data_in[i] = (i * i);
    204   }
    205   // Input values all zeros, expect all zeros out.
    206   WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength);
    207   EXPECT_EQ(0, state[0]);
    208   EXPECT_EQ(0, state[1]);
    209   for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) {
    210     EXPECT_EQ(0, data_out[i]);
    211   }
    212   // Make a simple non-zero data test.
    213   WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength);
    214   EXPECT_EQ(207, state[0]);
    215   EXPECT_EQ(2270, state[1]);
    216 
    217   ASSERT_EQ(0, WebRtcVad_InitCore(handle, 0));
    218   for (int16_t i = 0; i < 16; ++i) {
    219     int16_t value = 500 * (i + 1);
    220     for (int j = 0; j < NUM_CHANNELS; ++j) {
    221       // Use values both above and below initialized value.
    222       EXPECT_EQ(kReferenceMin[i], WebRtcVad_FindMinimum(handle, value, j));
    223       EXPECT_EQ(kReferenceMin[i + 16], WebRtcVad_FindMinimum(handle, 12000, j));
    224     }
    225     handle->frame_counter++;
    226   }
    227 
    228   free(handle);
    229 }
    230 
    231 // TODO(bjornv): Add a process test, run on file.
    232 
    233 }  // namespace
    234 }  // namespace webrtc
    235