Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // The MATLAB test data were generated using GenerateTestData.m.
     17 
     18 #include "tensorflow/core/kernels/spectrogram.h"
     19 
     20 #include <complex>
     21 #include <vector>
     22 
     23 #include "tensorflow/core/kernels/spectrogram_test_utils.h"
     24 #include "tensorflow/core/lib/core/status_test_util.h"
     25 #include "tensorflow/core/lib/io/path.h"
     26 #include "tensorflow/core/platform/test.h"
     27 #include "tensorflow/core/platform/types.h"
     28 
     29 namespace tensorflow {
     30 
     31 using ::std::complex;
     32 
     33 const char kInputFilename[] =
     34     "core/kernels/spectrogram_test_data/short_test_segment.wav";
     35 
     36 const char kExpectedFilename[] =
     37     "core/kernels/spectrogram_test_data/short_test_segment_spectrogram.csv.bin";
     38 const int kDataVectorLength = 257;
     39 const int kNumberOfFramesInTestData = 178;
     40 
     41 const char kExpectedNonPowerOfTwoFilename[] =
     42     "core/kernels/spectrogram_test_data/"
     43     "short_test_segment_spectrogram_400_200.csv.bin";
     44 const int kNonPowerOfTwoDataVectorLength = 257;
     45 const int kNumberOfFramesInNonPowerOfTwoTestData = 228;
     46 
     47 TEST(SpectrogramTest, TooLittleDataYieldsNoFrames) {
     48   Spectrogram sgram;
     49   sgram.Initialize(400, 200);
     50   std::vector<double> input;
     51   // Generate 44 samples of audio.
     52   SineWave(44100, 1000.0, 0.001, &input);
     53   EXPECT_EQ(44, input.size());
     54   std::vector<std::vector<complex<double>>> output;
     55   sgram.ComputeComplexSpectrogram(input, &output);
     56   EXPECT_EQ(0, output.size());
     57 }
     58 
     59 TEST(SpectrogramTest, StepSizeSmallerThanWindow) {
     60   Spectrogram sgram;
     61   EXPECT_TRUE(sgram.Initialize(400, 200));
     62   std::vector<double> input;
     63   // Generate 661 samples of audio.
     64   SineWave(44100, 1000.0, 0.015, &input);
     65   EXPECT_EQ(661, input.size());
     66   std::vector<std::vector<complex<double>>> output;
     67   sgram.ComputeComplexSpectrogram(input, &output);
     68   EXPECT_EQ(2, output.size());
     69 }
     70 
     71 TEST(SpectrogramTest, StepSizeBiggerThanWindow) {
     72   Spectrogram sgram;
     73   EXPECT_TRUE(sgram.Initialize(200, 400));
     74   std::vector<double> input;
     75   // Generate 882 samples of audio.
     76   SineWave(44100, 1000.0, 0.02, &input);
     77   EXPECT_EQ(882, input.size());
     78   std::vector<std::vector<complex<double>>> output;
     79   sgram.ComputeComplexSpectrogram(input, &output);
     80   EXPECT_EQ(2, output.size());
     81 }
     82 
     83 TEST(SpectrogramTest, StepSizeBiggerThanWindow2) {
     84   Spectrogram sgram;
     85   EXPECT_TRUE(sgram.Initialize(200, 400));
     86   std::vector<double> input;
     87   // Generate more than 600 but fewer than 800 samples of audio.
     88   SineWave(44100, 1000.0, 0.016, &input);
     89   EXPECT_GT(input.size(), 600);
     90   EXPECT_LT(input.size(), 800);
     91   std::vector<std::vector<complex<double>>> output;
     92   sgram.ComputeComplexSpectrogram(input, &output);
     93   EXPECT_EQ(2, output.size());
     94 }
     95 
     96 TEST(SpectrogramTest,
     97      MultipleCallsToComputeComplexSpectrogramMayYieldDifferentNumbersOfFrames) {
     98   // Repeatedly pass inputs with "extra" samples beyond complete windows
     99   // and check that the excess points cumulate to eventually cause an
    100   // extra output frame.
    101   Spectrogram sgram;
    102   sgram.Initialize(200, 400);
    103   std::vector<double> input;
    104   // Generate 882 samples of audio.
    105   SineWave(44100, 1000.0, 0.02, &input);
    106   EXPECT_EQ(882, input.size());
    107   std::vector<std::vector<complex<double>>> output;
    108   const std::vector<int> expected_output_sizes = {
    109       2,  // One pass of input leaves 82 samples buffered after two steps of
    110           // 400.
    111       2,  // Passing in 882 samples again will now leave 164 samples buffered.
    112       3,  // Third time gives 246 extra samples, triggering an extra output
    113           // frame.
    114   };
    115   for (int expected_output_size : expected_output_sizes) {
    116     sgram.ComputeComplexSpectrogram(input, &output);
    117     EXPECT_EQ(expected_output_size, output.size());
    118   }
    119 }
    120 
    121 TEST(SpectrogramTest, CumulatingExcessInputsForOverlappingFrames) {
    122   // Input frames that don't fit into whole windows are cumulated even when
    123   // the windows have overlap (similar to
    124   // MultipleCallsToComputeComplexSpectrogramMayYieldDifferentNumbersOfFrames
    125   // but with window size/hop size swapped).
    126   Spectrogram sgram;
    127   sgram.Initialize(400, 200);
    128   std::vector<double> input;
    129   // Generate 882 samples of audio.
    130   SineWave(44100, 1000.0, 0.02, &input);
    131   EXPECT_EQ(882, input.size());
    132   std::vector<std::vector<complex<double>>> output;
    133   const std::vector<int> expected_output_sizes = {
    134       3,  // Windows 0..400, 200..600, 400..800 with 82 samples buffered.
    135       4,  // 1764 frames input; outputs from 600, 800, 1000, 1200..1600.
    136       5,  // 2646 frames in; outputs from 1400, 1600, 1800, 2000, 2200..2600.
    137   };
    138   for (int expected_output_size : expected_output_sizes) {
    139     sgram.ComputeComplexSpectrogram(input, &output);
    140     EXPECT_EQ(expected_output_size, output.size());
    141   }
    142 }
    143 
    144 TEST(SpectrogramTest, StepSizeEqualToWindowWorks) {
    145   Spectrogram sgram;
    146   sgram.Initialize(200, 200);
    147   std::vector<double> input;
    148   // Generate 2205 samples of audio.
    149   SineWave(44100, 1000.0, 0.05, &input);
    150   EXPECT_EQ(2205, input.size());
    151   std::vector<std::vector<complex<double>>> output;
    152   sgram.ComputeComplexSpectrogram(input, &output);
    153   EXPECT_EQ(11, output.size());
    154 }
    155 
    156 template <class ExpectedSample, class ActualSample>
    157 void CompareComplexData(
    158     const std::vector<std::vector<complex<ExpectedSample>>>& expected,
    159     const std::vector<std::vector<complex<ActualSample>>>& actual,
    160     double tolerance) {
    161   ASSERT_EQ(actual.size(), expected.size());
    162   for (int i = 0; i < expected.size(); ++i) {
    163     ASSERT_EQ(expected[i].size(), actual[i].size());
    164     for (int j = 0; j < expected[i].size(); ++j) {
    165       ASSERT_NEAR(real(expected[i][j]), real(actual[i][j]), tolerance)
    166           << ": where i=" << i << " and j=" << j << ".";
    167       ASSERT_NEAR(imag(expected[i][j]), imag(actual[i][j]), tolerance)
    168           << ": where i=" << i << " and j=" << j << ".";
    169     }
    170   }
    171 }
    172 
    173 template <class Sample>
    174 double GetMaximumAbsolute(const std::vector<std::vector<Sample>>& spectrogram) {
    175   double max_absolute = 0.0;
    176   for (int i = 0; i < spectrogram.size(); ++i) {
    177     for (int j = 0; j < spectrogram[i].size(); ++j) {
    178       double absolute_value = std::abs(spectrogram[i][j]);
    179       if (absolute_value > max_absolute) {
    180         max_absolute = absolute_value;
    181       }
    182     }
    183   }
    184   return max_absolute;
    185 }
    186 
    187 template <class ExpectedSample, class ActualSample>
    188 void CompareMagnitudeData(
    189     const std::vector<std::vector<complex<ExpectedSample>>>&
    190         expected_complex_output,
    191     const std::vector<std::vector<ActualSample>>& actual_squared_magnitude,
    192     double tolerance) {
    193   ASSERT_EQ(actual_squared_magnitude.size(), expected_complex_output.size());
    194   for (int i = 0; i < expected_complex_output.size(); ++i) {
    195     ASSERT_EQ(expected_complex_output[i].size(),
    196               actual_squared_magnitude[i].size());
    197     for (int j = 0; j < expected_complex_output[i].size(); ++j) {
    198       ASSERT_NEAR(norm(expected_complex_output[i][j]),
    199                   actual_squared_magnitude[i][j], tolerance)
    200           << ": where i=" << i << " and j=" << j << ".";
    201     }
    202   }
    203 }
    204 
    205 TEST(SpectrogramTest, ReInitializationWorks) {
    206   Spectrogram sgram;
    207   sgram.Initialize(512, 256);
    208   std::vector<double> input;
    209   CHECK(ReadWaveFileToVector(
    210       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kInputFilename),
    211       &input));
    212   std::vector<std::vector<complex<double>>> first_output;
    213   std::vector<std::vector<complex<double>>> second_output;
    214   sgram.Initialize(512, 256);
    215   sgram.ComputeComplexSpectrogram(input, &first_output);
    216   // Re-Initialize it.
    217   sgram.Initialize(512, 256);
    218   sgram.ComputeComplexSpectrogram(input, &second_output);
    219   // Verify identical outputs.
    220   ASSERT_EQ(first_output.size(), second_output.size());
    221   int slice_size = first_output[0].size();
    222   for (int i = 0; i < first_output.size(); ++i) {
    223     ASSERT_EQ(slice_size, first_output[i].size());
    224     ASSERT_EQ(slice_size, second_output[i].size());
    225     for (int j = 0; j < slice_size; ++j) {
    226       ASSERT_EQ(first_output[i][j], second_output[i][j]);
    227     }
    228   }
    229 }
    230 
    231 TEST(SpectrogramTest, ComputedComplexDataAgreeWithMatlab) {
    232   const int kInputDataLength = 45870;
    233   Spectrogram sgram;
    234   sgram.Initialize(512, 256);
    235   std::vector<double> input;
    236   CHECK(ReadWaveFileToVector(
    237       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kInputFilename),
    238       &input));
    239   EXPECT_EQ(kInputDataLength, input.size());
    240   std::vector<std::vector<complex<double>>> expected_output;
    241   ASSERT_TRUE(ReadRawFloatFileToComplexVector(
    242       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kExpectedFilename),
    243       kDataVectorLength, &expected_output));
    244   EXPECT_EQ(kNumberOfFramesInTestData, expected_output.size());
    245   EXPECT_EQ(kDataVectorLength, expected_output[0].size());
    246   std::vector<std::vector<complex<double>>> output;
    247   sgram.ComputeComplexSpectrogram(input, &output);
    248   CompareComplexData(expected_output, output, 1e-5);
    249 }
    250 
    251 TEST(SpectrogramTest, ComputedFloatComplexDataAgreeWithMatlab) {
    252   const int kInputDataLength = 45870;
    253   Spectrogram sgram;
    254   sgram.Initialize(512, 256);
    255   std::vector<double> double_input;
    256   CHECK(ReadWaveFileToVector(
    257       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kInputFilename),
    258       &double_input));
    259   std::vector<float> input;
    260   input.assign(double_input.begin(), double_input.end());
    261   EXPECT_EQ(kInputDataLength, input.size());
    262   std::vector<std::vector<complex<double>>> expected_output;
    263   ASSERT_TRUE(ReadRawFloatFileToComplexVector(
    264       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kExpectedFilename),
    265       kDataVectorLength, &expected_output));
    266   EXPECT_EQ(kNumberOfFramesInTestData, expected_output.size());
    267   EXPECT_EQ(kDataVectorLength, expected_output[0].size());
    268   std::vector<std::vector<complex<float>>> output;
    269   sgram.ComputeComplexSpectrogram(input, &output);
    270   CompareComplexData(expected_output, output, 1e-4);
    271 }
    272 
    273 TEST(SpectrogramTest, ComputedSquaredMagnitudeDataAgreeWithMatlab) {
    274   const int kInputDataLength = 45870;
    275   Spectrogram sgram;
    276   sgram.Initialize(512, 256);
    277   std::vector<double> input;
    278   CHECK(ReadWaveFileToVector(
    279       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kInputFilename),
    280       &input));
    281   EXPECT_EQ(kInputDataLength, input.size());
    282   std::vector<std::vector<complex<double>>> expected_output;
    283   ASSERT_TRUE(ReadRawFloatFileToComplexVector(
    284       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kExpectedFilename),
    285       kDataVectorLength, &expected_output));
    286   EXPECT_EQ(kNumberOfFramesInTestData, expected_output.size());
    287   EXPECT_EQ(kDataVectorLength, expected_output[0].size());
    288   std::vector<std::vector<double>> output;
    289   sgram.ComputeSquaredMagnitudeSpectrogram(input, &output);
    290   CompareMagnitudeData(expected_output, output, 1e-3);
    291 }
    292 
    293 TEST(SpectrogramTest, ComputedFloatSquaredMagnitudeDataAgreeWithMatlab) {
    294   const int kInputDataLength = 45870;
    295   Spectrogram sgram;
    296   sgram.Initialize(512, 256);
    297   std::vector<double> double_input;
    298   CHECK(ReadWaveFileToVector(
    299       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kInputFilename),
    300       &double_input));
    301   EXPECT_EQ(kInputDataLength, double_input.size());
    302   std::vector<float> input;
    303   input.assign(double_input.begin(), double_input.end());
    304   std::vector<std::vector<complex<double>>> expected_output;
    305   ASSERT_TRUE(ReadRawFloatFileToComplexVector(
    306       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kExpectedFilename),
    307       kDataVectorLength, &expected_output));
    308   EXPECT_EQ(kNumberOfFramesInTestData, expected_output.size());
    309   EXPECT_EQ(kDataVectorLength, expected_output[0].size());
    310   std::vector<std::vector<float>> output;
    311   sgram.ComputeSquaredMagnitudeSpectrogram(input, &output);
    312   double max_absolute = GetMaximumAbsolute(output);
    313   EXPECT_GT(max_absolute, 2300.0);  // Verify that we have some big numbers.
    314   // Squaring increases dynamic range; max square is about 2300,
    315   // so 2e-4 is about 7 decimal digits; not bad for a float.
    316   CompareMagnitudeData(expected_output, output, 2e-4);
    317 }
    318 
    319 TEST(SpectrogramTest, ComputedNonPowerOfTwoComplexDataAgreeWithMatlab) {
    320   const int kInputDataLength = 45870;
    321   Spectrogram sgram;
    322   sgram.Initialize(400, 200);
    323   std::vector<double> input;
    324   CHECK(ReadWaveFileToVector(
    325       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(), kInputFilename),
    326       &input));
    327   EXPECT_EQ(kInputDataLength, input.size());
    328   std::vector<std::vector<complex<double>>> expected_output;
    329   ASSERT_TRUE(ReadRawFloatFileToComplexVector(
    330       tensorflow::io::JoinPath(testing::TensorFlowSrcRoot(),
    331                                kExpectedNonPowerOfTwoFilename),
    332       kNonPowerOfTwoDataVectorLength, &expected_output));
    333   EXPECT_EQ(kNumberOfFramesInNonPowerOfTwoTestData, expected_output.size());
    334   EXPECT_EQ(kNonPowerOfTwoDataVectorLength, expected_output[0].size());
    335   std::vector<std::vector<complex<double>>> output;
    336   sgram.ComputeComplexSpectrogram(input, &output);
    337   CompareComplexData(expected_output, output, 1e-5);
    338 }
    339 
    340 }  // namespace tensorflow
    341