1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ 7 8 #include <map> 9 #include <string> 10 11 #include "base/basictypes.h" 12 #include "base/callback.h" 13 #include "base/compiler_specific.h" 14 #include "base/memory/weak_ptr.h" 15 #include "content/browser/renderer_host/media/media_stream_requester.h" 16 #include "content/public/browser/speech_recognition_event_listener.h" 17 #include "content/public/browser/speech_recognition_manager.h" 18 #include "content/public/browser/speech_recognition_session_config.h" 19 #include "content/public/browser/speech_recognition_session_context.h" 20 #include "content/public/common/speech_recognition_error.h" 21 22 namespace media { 23 class AudioManager; 24 } 25 26 namespace content { 27 class BrowserMainLoop; 28 class MediaStreamManager; 29 class MediaStreamUIProxy; 30 class SpeechRecognitionManagerDelegate; 31 class SpeechRecognizer; 32 33 // This is the manager for speech recognition. It is a single instance in 34 // the browser process and can serve several requests. Each recognition request 35 // corresponds to a session, initiated via |CreateSession|. 36 // 37 // In any moment, the manager has a single session known as the primary session, 38 // |primary_session_id_|. 39 // This is the session that is capturing audio, waiting for user permission, 40 // etc. There may also be other, non-primary, sessions living in parallel that 41 // are waiting for results but not recording audio. 42 // 43 // The SpeechRecognitionManager has the following responsibilities: 44 // - Handles requests received from various render views and makes sure only 45 // one of them accesses the audio device at any given time. 46 // - Handles the instantiation of SpeechRecognitionEngine objects when 47 // requested by SpeechRecognitionSessions. 48 // - Relays recognition results/status/error events of each session to the 49 // corresponding listener (demuxing on the base of their session_id). 50 // - Relays also recognition results/status/error events of every session to 51 // the catch-all snoop listener (optionally) provided by the delegate. 52 class CONTENT_EXPORT SpeechRecognitionManagerImpl : 53 public NON_EXPORTED_BASE(SpeechRecognitionManager), 54 public SpeechRecognitionEventListener { 55 public: 56 // Returns the current SpeechRecognitionManagerImpl or NULL if the call is 57 // issued when it is not created yet or destroyed (by BrowserMainLoop). 58 static SpeechRecognitionManagerImpl* GetInstance(); 59 60 // SpeechRecognitionManager implementation. 61 virtual int CreateSession( 62 const SpeechRecognitionSessionConfig& config) OVERRIDE; 63 virtual void StartSession(int session_id) OVERRIDE; 64 virtual void AbortSession(int session_id) OVERRIDE; 65 virtual void AbortAllSessionsForRenderProcess(int render_process_id) OVERRIDE; 66 virtual void AbortAllSessionsForRenderView(int render_process_id, 67 int render_view_id) OVERRIDE; 68 virtual void StopAudioCaptureForSession(int session_id) OVERRIDE; 69 virtual const SpeechRecognitionSessionConfig& GetSessionConfig( 70 int session_id) const OVERRIDE; 71 virtual SpeechRecognitionSessionContext GetSessionContext( 72 int session_id) const OVERRIDE; 73 virtual int GetSession(int render_process_id, 74 int render_view_id, 75 int request_id) const OVERRIDE; 76 virtual bool HasAudioInputDevices() OVERRIDE; 77 virtual base::string16 GetAudioInputDeviceModel() OVERRIDE; 78 virtual void ShowAudioInputSettings() OVERRIDE; 79 80 // SpeechRecognitionEventListener methods. 81 virtual void OnRecognitionStart(int session_id) OVERRIDE; 82 virtual void OnAudioStart(int session_id) OVERRIDE; 83 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; 84 virtual void OnSoundStart(int session_id) OVERRIDE; 85 virtual void OnSoundEnd(int session_id) OVERRIDE; 86 virtual void OnAudioEnd(int session_id) OVERRIDE; 87 virtual void OnRecognitionEnd(int session_id) OVERRIDE; 88 virtual void OnRecognitionResults( 89 int session_id, const SpeechRecognitionResults& result) OVERRIDE; 90 virtual void OnRecognitionError( 91 int session_id, const SpeechRecognitionError& error) OVERRIDE; 92 virtual void OnAudioLevelsChange(int session_id, float volume, 93 float noise_volume) OVERRIDE; 94 95 SpeechRecognitionManagerDelegate* delegate() const { return delegate_.get(); } 96 97 protected: 98 // BrowserMainLoop is the only one allowed to istantiate and free us. 99 friend class BrowserMainLoop; 100 // Needed for dtor. 101 friend struct base::DefaultDeleter<SpeechRecognitionManagerImpl>; 102 SpeechRecognitionManagerImpl(media::AudioManager* audio_manager, 103 MediaStreamManager* media_stream_manager); 104 virtual ~SpeechRecognitionManagerImpl(); 105 106 private: 107 // Data types for the internal Finite State Machine (FSM). 108 enum FSMState { 109 SESSION_STATE_IDLE = 0, 110 SESSION_STATE_CAPTURING_AUDIO, 111 SESSION_STATE_WAITING_FOR_RESULT, 112 SESSION_STATE_MAX_VALUE = SESSION_STATE_WAITING_FOR_RESULT 113 }; 114 115 enum FSMEvent { 116 EVENT_ABORT = 0, 117 EVENT_START, 118 EVENT_STOP_CAPTURE, 119 EVENT_AUDIO_ENDED, 120 EVENT_RECOGNITION_ENDED, 121 EVENT_MAX_VALUE = EVENT_RECOGNITION_ENDED 122 }; 123 124 struct Session { 125 Session(); 126 ~Session(); 127 128 int id; 129 bool abort_requested; 130 bool listener_is_active; 131 SpeechRecognitionSessionConfig config; 132 SpeechRecognitionSessionContext context; 133 scoped_refptr<SpeechRecognizer> recognizer; 134 scoped_ptr<MediaStreamUIProxy> ui; 135 }; 136 137 // Callback issued by the SpeechRecognitionManagerDelegate for reporting 138 // asynchronously the result of the CheckRecognitionIsAllowed call. 139 void RecognitionAllowedCallback(int session_id, 140 bool ask_user, 141 bool is_allowed); 142 143 // Callback to get back the result of a media request. |devices| is an array 144 // of devices approved to be used for the request, |devices| is empty if the 145 // users deny the request. 146 void MediaRequestPermissionCallback(int session_id, 147 const MediaStreamDevices& devices, 148 scoped_ptr<MediaStreamUIProxy> stream_ui); 149 150 // Entry point for pushing any external event into the session handling FSM. 151 void DispatchEvent(int session_id, FSMEvent event); 152 153 // Defines the behavior of the session handling FSM, selecting the appropriate 154 // transition according to the session, its current state and the event. 155 void ExecuteTransitionAndGetNextState(Session* session, 156 FSMState session_state, 157 FSMEvent event); 158 159 // Retrieves the state of the session, enquiring directly the recognizer. 160 FSMState GetSessionState(int session_id) const; 161 162 // The methods below handle transitions of the session handling FSM. 163 void SessionStart(const Session& session); 164 void SessionAbort(const Session& session); 165 void SessionStopAudioCapture(const Session& session); 166 void ResetCapturingSessionId(const Session& session); 167 void SessionDelete(Session* session); 168 void NotFeasible(const Session& session, FSMEvent event); 169 170 bool SessionExists(int session_id) const; 171 Session* GetSession(int session_id) const; 172 SpeechRecognitionEventListener* GetListener(int session_id) const; 173 SpeechRecognitionEventListener* GetDelegateListener() const; 174 int GetNextSessionID(); 175 176 media::AudioManager* audio_manager_; 177 MediaStreamManager* media_stream_manager_; 178 typedef std::map<int, Session*> SessionsTable; 179 SessionsTable sessions_; 180 int primary_session_id_; 181 int last_session_id_; 182 bool is_dispatching_event_; 183 scoped_ptr<SpeechRecognitionManagerDelegate> delegate_; 184 185 // Used for posting asynchronous tasks (on the IO thread) without worrying 186 // about this class being destroyed in the meanwhile (due to browser shutdown) 187 // since tasks pending on a destroyed WeakPtr are automatically discarded. 188 base::WeakPtrFactory<SpeechRecognitionManagerImpl> weak_factory_; 189 }; 190 191 } // namespace content 192 193 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ 194