1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/browser/speech/audio_buffer.h" 6 #include "content/browser/speech/endpointer/endpointer.h" 7 #include "testing/gtest/include/gtest/gtest.h" 8 9 namespace { 10 const int kFrameRate = 50; // 20 ms long frames for AMR encoding. 11 const int kSampleRate = 8000; // 8 k samples per second for AMR encoding. 12 13 // At 8 sample per second a 20 ms frame is 160 samples, which corrsponds 14 // to the AMR codec. 15 const int kFrameSize = kSampleRate / kFrameRate; // 160 samples. 16 COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size); 17 } 18 19 namespace content { 20 21 class FrameProcessor { 22 public: 23 // Process a single frame of test audio samples. 24 virtual EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) = 0; 25 }; 26 27 void RunEndpointerEventsTest(FrameProcessor* processor) { 28 int16 samples[kFrameSize]; 29 30 // We will create a white noise signal of 150 frames. The frames from 50 to 31 // 100 will have more power, and the endpointer should fire on those frames. 32 const int kNumFrames = 150; 33 34 // Create a random sequence of samples. 35 srand(1); 36 float gain = 0.0; 37 int64 time = 0; 38 for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) { 39 // The frames from 50 to 100 will have more power, and the endpointer 40 // should detect those frames as speech. 41 if ((frame_count >= 50) && (frame_count < 100)) { 42 gain = 2000.0; 43 } else { 44 gain = 1.0; 45 } 46 // Create random samples. 47 for (int i = 0; i < kFrameSize; ++i) { 48 float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) / 49 static_cast<float>(RAND_MAX); 50 samples[i] = static_cast<int16>(gain * randNum); 51 } 52 53 EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize); 54 time += static_cast<int64>(kFrameSize * (1e6 / kSampleRate)); 55 56 // Log the status. 57 if (20 == frame_count) 58 EXPECT_EQ(EP_PRE_SPEECH, ep_status); 59 if (70 == frame_count) 60 EXPECT_EQ(EP_SPEECH_PRESENT, ep_status); 61 if (120 == frame_count) 62 EXPECT_EQ(EP_PRE_SPEECH, ep_status); 63 } 64 } 65 66 // This test instantiates and initializes a stand alone endpointer module. 67 // The test creates FrameData objects with random noise and send them 68 // to the endointer module. The energy of the first 50 frames is low, 69 // followed by 500 high energy frames, and another 50 low energy frames. 70 // We test that the correct start and end frames were detected. 71 class EnergyEndpointerFrameProcessor : public FrameProcessor { 72 public: 73 explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer) 74 : endpointer_(endpointer) {} 75 76 virtual EpStatus ProcessFrame(int64 time, 77 int16* samples, 78 int frame_size) OVERRIDE { 79 endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL); 80 int64 ep_time; 81 return endpointer_->Status(&ep_time); 82 } 83 84 private: 85 EnergyEndpointer* endpointer_; 86 }; 87 88 TEST(EndpointerTest, TestEnergyEndpointerEvents) { 89 // Initialize endpointer and configure it. We specify the parameters 90 // here for a 20ms window, and a 20ms step size, which corrsponds to 91 // the narrow band AMR codec. 92 EnergyEndpointerParams ep_config; 93 ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate)); 94 ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate)); 95 ep_config.set_endpoint_margin(0.2f); 96 ep_config.set_onset_window(0.15f); 97 ep_config.set_speech_on_window(0.4f); 98 ep_config.set_offset_window(0.15f); 99 ep_config.set_onset_detect_dur(0.09f); 100 ep_config.set_onset_confirm_dur(0.075f); 101 ep_config.set_on_maintain_dur(0.10f); 102 ep_config.set_offset_confirm_dur(0.12f); 103 ep_config.set_decision_threshold(100.0f); 104 EnergyEndpointer endpointer; 105 endpointer.Init(ep_config); 106 107 endpointer.StartSession(); 108 109 EnergyEndpointerFrameProcessor frame_processor(&endpointer); 110 RunEndpointerEventsTest(&frame_processor); 111 112 endpointer.EndSession(); 113 }; 114 115 // Test endpointer wrapper class. 116 class EndpointerFrameProcessor : public FrameProcessor { 117 public: 118 explicit EndpointerFrameProcessor(Endpointer* endpointer) 119 : endpointer_(endpointer) {} 120 121 virtual EpStatus ProcessFrame(int64 time, 122 int16* samples, 123 int frame_size) OVERRIDE { 124 scoped_refptr<AudioChunk> frame( 125 new AudioChunk(reinterpret_cast<uint8*>(samples), kFrameSize * 2, 2)); 126 endpointer_->ProcessAudio(*frame.get(), NULL); 127 int64 ep_time; 128 return endpointer_->Status(&ep_time); 129 } 130 131 private: 132 Endpointer* endpointer_; 133 }; 134 135 TEST(EndpointerTest, TestEmbeddedEndpointerEvents) { 136 const int kSampleRate = 8000; // 8 k samples per second for AMR encoding. 137 138 Endpointer endpointer(kSampleRate); 139 const int64 kMillisecondsPerMicrosecond = 1000; 140 const int64 short_timeout = 300 * kMillisecondsPerMicrosecond; 141 endpointer.set_speech_input_possibly_complete_silence_length(short_timeout); 142 const int64 long_timeout = 500 * kMillisecondsPerMicrosecond; 143 endpointer.set_speech_input_complete_silence_length(long_timeout); 144 endpointer.StartSession(); 145 146 EndpointerFrameProcessor frame_processor(&endpointer); 147 RunEndpointerEventsTest(&frame_processor); 148 149 endpointer.EndSession(); 150 } 151 152 } // namespace content 153