1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ 6 #define CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ 7 8 #include <string> 9 #include <vector> 10 11 #include "base/basictypes.h" 12 #include "base/memory/ref_counted.h" 13 #include "base/memory/scoped_ptr.h" 14 #include "base/threading/non_thread_safe.h" 15 #include "content/browser/speech/audio_encoder.h" 16 #include "content/browser/speech/chunked_byte_buffer.h" 17 #include "content/browser/speech/speech_recognition_engine.h" 18 #include "content/common/content_export.h" 19 #include "content/public/common/speech_recognition_error.h" 20 #include "net/url_request/url_fetcher_delegate.h" 21 22 namespace net { 23 class URLRequestContextGetter; 24 } 25 26 namespace content { 27 28 class AudioChunk; 29 struct SpeechRecognitionError; 30 struct SpeechRecognitionResult; 31 32 // Implements a SpeechRecognitionEngine supporting continuous recognition by 33 // means of interaction with Google streaming speech recognition webservice. 34 // More in details, this class establishes two HTTP(S) connections with the 35 // webservice, for each session, herein called "upstream" and "downstream". 36 // Audio chunks are sent on the upstream by means of a chunked HTTP POST upload. 37 // Recognition results are retrieved in a full-duplex fashion (i.e. while 38 // pushing audio on the upstream) on the downstream by means of a chunked 39 // HTTP GET request. Pairing between the two stream is handled through a 40 // randomly generated key, unique for each request, which is passed in the 41 // &pair= arg to both stream request URLs. 42 // In the case of a regular session, the upstream is closed when the audio 43 // capture ends (notified through a |AudioChunksEnded| call) and the downstream 44 // waits for a corresponding server closure (eventually some late results can 45 // come after closing the upstream). 46 // Both stream are guaranteed to be closed when |EndRecognition| call is issued. 47 class CONTENT_EXPORT GoogleStreamingRemoteEngine 48 : public NON_EXPORTED_BASE(SpeechRecognitionEngine), 49 public net::URLFetcherDelegate, 50 public NON_EXPORTED_BASE(base::NonThreadSafe) { 51 public: 52 // Duration of each audio packet. 53 static const int kAudioPacketIntervalMs; 54 55 // IDs passed to URLFetcher::Create(). Used for testing. 56 static const int kUpstreamUrlFetcherIdForTesting; 57 static const int kDownstreamUrlFetcherIdForTesting; 58 59 explicit GoogleStreamingRemoteEngine(net::URLRequestContextGetter* context); 60 virtual ~GoogleStreamingRemoteEngine(); 61 62 // SpeechRecognitionEngine methods. 63 virtual void SetConfig(const SpeechRecognitionEngineConfig& config) OVERRIDE; 64 virtual void StartRecognition() OVERRIDE; 65 virtual void EndRecognition() OVERRIDE; 66 virtual void TakeAudioChunk(const AudioChunk& data) OVERRIDE; 67 virtual void AudioChunksEnded() OVERRIDE; 68 virtual bool IsRecognitionPending() const OVERRIDE; 69 virtual int GetDesiredAudioChunkDurationMs() const OVERRIDE; 70 71 // net::URLFetcherDelegate methods. 72 virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE; 73 virtual void OnURLFetchDownloadProgress(const net::URLFetcher* source, 74 int64 current, int64 total) OVERRIDE; 75 76 private: 77 // Response status codes from the speech recognition webservice. 78 static const int kWebserviceStatusNoError; 79 static const int kWebserviceStatusErrorNoMatch; 80 81 // Data types for the internal Finite State Machine (FSM). 82 enum FSMState { 83 STATE_IDLE = 0, 84 STATE_BOTH_STREAMS_CONNECTED, 85 STATE_WAITING_DOWNSTREAM_RESULTS, 86 STATE_MAX_VALUE = STATE_WAITING_DOWNSTREAM_RESULTS 87 }; 88 89 enum FSMEvent { 90 EVENT_END_RECOGNITION = 0, 91 EVENT_START_RECOGNITION, 92 EVENT_AUDIO_CHUNK, 93 EVENT_AUDIO_CHUNKS_ENDED, 94 EVENT_UPSTREAM_ERROR, 95 EVENT_DOWNSTREAM_ERROR, 96 EVENT_DOWNSTREAM_RESPONSE, 97 EVENT_DOWNSTREAM_CLOSED, 98 EVENT_MAX_VALUE = EVENT_DOWNSTREAM_CLOSED 99 }; 100 101 struct FSMEventArgs { 102 explicit FSMEventArgs(FSMEvent event_value); 103 ~FSMEventArgs(); 104 105 FSMEvent event; 106 107 // In case of EVENT_AUDIO_CHUNK, holds the chunk pushed by |TakeAudioChunk|. 108 scoped_refptr<const AudioChunk> audio_data; 109 110 // In case of EVENT_DOWNSTREAM_RESPONSE, hold the current chunk bytes. 111 scoped_ptr<std::vector<uint8> > response; 112 113 private: 114 DISALLOW_COPY_AND_ASSIGN(FSMEventArgs); 115 }; 116 117 // Invoked by both upstream and downstream URLFetcher callbacks to handle 118 // new chunk data, connection closed or errors notifications. 119 void DispatchHTTPResponse(const net::URLFetcher* source, 120 bool end_of_response); 121 122 // Entry point for pushing any new external event into the recognizer FSM. 123 void DispatchEvent(const FSMEventArgs& event_args); 124 125 // Defines the behavior of the recognizer FSM, selecting the appropriate 126 // transition according to the current state and event. 127 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& event_args); 128 129 // The methods below handle transitions of the recognizer FSM. 130 FSMState ConnectBothStreams(const FSMEventArgs& event_args); 131 FSMState TransmitAudioUpstream(const FSMEventArgs& event_args); 132 FSMState ProcessDownstreamResponse(const FSMEventArgs& event_args); 133 FSMState RaiseNoMatchErrorIfGotNoResults(const FSMEventArgs& event_args); 134 FSMState CloseUpstreamAndWaitForResults(const FSMEventArgs& event_args); 135 FSMState CloseDownstream(const FSMEventArgs& event_args); 136 FSMState AbortSilently(const FSMEventArgs& event_args); 137 FSMState AbortWithError(const FSMEventArgs& event_args); 138 FSMState Abort(SpeechRecognitionErrorCode error); 139 FSMState DoNothing(const FSMEventArgs& event_args); 140 FSMState NotFeasible(const FSMEventArgs& event_args); 141 142 std::string GetAcceptedLanguages() const; 143 std::string GenerateRequestKey() const; 144 145 SpeechRecognitionEngineConfig config_; 146 scoped_ptr<net::URLFetcher> upstream_fetcher_; 147 scoped_ptr<net::URLFetcher> downstream_fetcher_; 148 scoped_refptr<net::URLRequestContextGetter> url_context_; 149 scoped_ptr<AudioEncoder> encoder_; 150 ChunkedByteBuffer chunked_byte_buffer_; 151 size_t previous_response_length_; 152 bool got_last_definitive_result_; 153 bool is_dispatching_event_; 154 FSMState state_; 155 156 DISALLOW_COPY_AND_ASSIGN(GoogleStreamingRemoteEngine); 157 }; 158 159 } // namespace content 160 161 #endif // CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ 162