Home | History | Annotate | Download | only in endpointer
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "content/browser/speech/audio_buffer.h"
      6 #include "content/browser/speech/endpointer/endpointer.h"
      7 #include "testing/gtest/include/gtest/gtest.h"
      8 
      9 namespace {
     10 const int kFrameRate = 50;  // 20 ms long frames for AMR encoding.
     11 const int kSampleRate = 8000;  // 8 k samples per second for AMR encoding.
     12 
     13 // At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
     14 // to the AMR codec.
     15 const int kFrameSize = kSampleRate / kFrameRate;  // 160 samples.
     16 COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size);
     17 }
     18 
     19 namespace content {
     20 
     21 class FrameProcessor {
     22  public:
     23   // Process a single frame of test audio samples.
     24   virtual EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) = 0;
     25 };
     26 
     27 void RunEndpointerEventsTest(FrameProcessor* processor) {
     28   int16 samples[kFrameSize];
     29 
     30   // We will create a white noise signal of 150 frames. The frames from 50 to
     31   // 100 will have more power, and the endpointer should fire on those frames.
     32   const int kNumFrames = 150;
     33 
     34   // Create a random sequence of samples.
     35   srand(1);
     36   float gain = 0.0;
     37   int64 time = 0;
     38   for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) {
     39     // The frames from 50 to 100 will have more power, and the endpointer
     40     // should detect those frames as speech.
     41     if ((frame_count >= 50) && (frame_count < 100)) {
     42       gain = 2000.0;
     43     } else {
     44       gain = 1.0;
     45     }
     46     // Create random samples.
     47     for (int i = 0; i < kFrameSize; ++i) {
     48       float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) /
     49           static_cast<float>(RAND_MAX);
     50       samples[i] = static_cast<int16>(gain * randNum);
     51     }
     52 
     53     EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize);
     54     time += static_cast<int64>(kFrameSize * (1e6 / kSampleRate));
     55 
     56     // Log the status.
     57     if (20 == frame_count)
     58       EXPECT_EQ(EP_PRE_SPEECH, ep_status);
     59     if (70 == frame_count)
     60       EXPECT_EQ(EP_SPEECH_PRESENT, ep_status);
     61     if (120 == frame_count)
     62       EXPECT_EQ(EP_PRE_SPEECH, ep_status);
     63   }
     64 }
     65 
     66 // This test instantiates and initializes a stand alone endpointer module.
     67 // The test creates FrameData objects with random noise and send them
     68 // to the endointer module. The energy of the first 50 frames is low,
     69 // followed by 500 high energy frames, and another 50 low energy frames.
     70 // We test that the correct start and end frames were detected.
     71 class EnergyEndpointerFrameProcessor : public FrameProcessor {
     72  public:
     73   explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer)
     74       : endpointer_(endpointer) {}
     75 
     76   virtual EpStatus ProcessFrame(int64 time,
     77                                 int16* samples,
     78                                 int frame_size) OVERRIDE {
     79     endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL);
     80     int64 ep_time;
     81     return endpointer_->Status(&ep_time);
     82   }
     83 
     84  private:
     85   EnergyEndpointer* endpointer_;
     86 };
     87 
     88 TEST(EndpointerTest, TestEnergyEndpointerEvents) {
     89   // Initialize endpointer and configure it. We specify the parameters
     90   // here for a 20ms window, and a 20ms step size, which corrsponds to
     91   // the narrow band AMR codec.
     92   EnergyEndpointerParams ep_config;
     93   ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
     94   ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
     95   ep_config.set_endpoint_margin(0.2f);
     96   ep_config.set_onset_window(0.15f);
     97   ep_config.set_speech_on_window(0.4f);
     98   ep_config.set_offset_window(0.15f);
     99   ep_config.set_onset_detect_dur(0.09f);
    100   ep_config.set_onset_confirm_dur(0.075f);
    101   ep_config.set_on_maintain_dur(0.10f);
    102   ep_config.set_offset_confirm_dur(0.12f);
    103   ep_config.set_decision_threshold(100.0f);
    104   EnergyEndpointer endpointer;
    105   endpointer.Init(ep_config);
    106 
    107   endpointer.StartSession();
    108 
    109   EnergyEndpointerFrameProcessor frame_processor(&endpointer);
    110   RunEndpointerEventsTest(&frame_processor);
    111 
    112   endpointer.EndSession();
    113 };
    114 
    115 // Test endpointer wrapper class.
    116 class EndpointerFrameProcessor : public FrameProcessor {
    117  public:
    118   explicit EndpointerFrameProcessor(Endpointer* endpointer)
    119       : endpointer_(endpointer) {}
    120 
    121   virtual EpStatus ProcessFrame(int64 time,
    122                                 int16* samples,
    123                                 int frame_size) OVERRIDE {
    124     scoped_refptr<AudioChunk> frame(
    125         new AudioChunk(reinterpret_cast<uint8*>(samples), kFrameSize * 2, 2));
    126     endpointer_->ProcessAudio(*frame.get(), NULL);
    127     int64 ep_time;
    128     return endpointer_->Status(&ep_time);
    129   }
    130 
    131  private:
    132   Endpointer* endpointer_;
    133 };
    134 
    135 TEST(EndpointerTest, TestEmbeddedEndpointerEvents) {
    136   const int kSampleRate = 8000;  // 8 k samples per second for AMR encoding.
    137 
    138   Endpointer endpointer(kSampleRate);
    139   const int64 kMillisecondsPerMicrosecond = 1000;
    140   const int64 short_timeout = 300 * kMillisecondsPerMicrosecond;
    141   endpointer.set_speech_input_possibly_complete_silence_length(short_timeout);
    142   const int64 long_timeout = 500 * kMillisecondsPerMicrosecond;
    143   endpointer.set_speech_input_complete_silence_length(long_timeout);
    144   endpointer.StartSession();
    145 
    146   EndpointerFrameProcessor frame_processor(&endpointer);
    147   RunEndpointerEventsTest(&frame_processor);
    148 
    149   endpointer.EndSession();
    150 }
    151 
    152 }  // namespace content
    153