Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
      6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
      7 
      8 #include <map>
      9 #include <string>
     10 
     11 #include "base/basictypes.h"
     12 #include "base/callback.h"
     13 #include "base/compiler_specific.h"
     14 #include "base/memory/weak_ptr.h"
     15 #include "content/browser/renderer_host/media/media_stream_requester.h"
     16 #include "content/public/browser/speech_recognition_event_listener.h"
     17 #include "content/public/browser/speech_recognition_manager.h"
     18 #include "content/public/browser/speech_recognition_session_config.h"
     19 #include "content/public/browser/speech_recognition_session_context.h"
     20 #include "content/public/common/speech_recognition_error.h"
     21 
     22 namespace media {
     23 class AudioManager;
     24 }
     25 
     26 namespace content {
     27 class BrowserMainLoop;
     28 class MediaStreamManager;
     29 class MediaStreamUIProxy;
     30 class SpeechRecognitionManagerDelegate;
     31 class SpeechRecognizer;
     32 
     33 // This is the manager for speech recognition. It is a single instance in
     34 // the browser process and can serve several requests. Each recognition request
     35 // corresponds to a session, initiated via |CreateSession|.
     36 //
     37 // In any moment, the manager has a single session known as the primary session,
     38 // |primary_session_id_|.
     39 // This is the session that is capturing audio, waiting for user permission,
     40 // etc. There may also be other, non-primary, sessions living in parallel that
     41 // are waiting for results but not recording audio.
     42 //
     43 // The SpeechRecognitionManager has the following responsibilities:
     44 //  - Handles requests received from various render views and makes sure only
     45 //    one of them accesses the audio device at any given time.
     46 //  - Handles the instantiation of SpeechRecognitionEngine objects when
     47 //    requested by SpeechRecognitionSessions.
     48 //  - Relays recognition results/status/error events of each session to the
     49 //    corresponding listener (demuxing on the base of their session_id).
     50 //  - Relays also recognition results/status/error events of every session to
     51 //    the catch-all snoop listener (optionally) provided by the delegate.
     52 class CONTENT_EXPORT SpeechRecognitionManagerImpl :
     53     public NON_EXPORTED_BASE(SpeechRecognitionManager),
     54     public SpeechRecognitionEventListener {
     55  public:
     56   // Returns the current SpeechRecognitionManagerImpl or NULL if the call is
     57   // issued when it is not created yet or destroyed (by BrowserMainLoop).
     58   static SpeechRecognitionManagerImpl* GetInstance();
     59 
     60   // SpeechRecognitionManager implementation.
     61   virtual int CreateSession(
     62       const SpeechRecognitionSessionConfig& config) OVERRIDE;
     63   virtual void StartSession(int session_id) OVERRIDE;
     64   virtual void AbortSession(int session_id) OVERRIDE;
     65   virtual void AbortAllSessionsForRenderProcess(int render_process_id) OVERRIDE;
     66   virtual void AbortAllSessionsForRenderView(int render_process_id,
     67                                              int render_view_id) OVERRIDE;
     68   virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
     69   virtual const SpeechRecognitionSessionConfig& GetSessionConfig(
     70       int session_id) const OVERRIDE;
     71   virtual SpeechRecognitionSessionContext GetSessionContext(
     72       int session_id) const OVERRIDE;
     73   virtual int GetSession(int render_process_id,
     74                          int render_view_id,
     75                          int request_id) const OVERRIDE;
     76   virtual bool HasAudioInputDevices() OVERRIDE;
     77   virtual base::string16 GetAudioInputDeviceModel() OVERRIDE;
     78   virtual void ShowAudioInputSettings() OVERRIDE;
     79 
     80   // SpeechRecognitionEventListener methods.
     81   virtual void OnRecognitionStart(int session_id) OVERRIDE;
     82   virtual void OnAudioStart(int session_id) OVERRIDE;
     83   virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;
     84   virtual void OnSoundStart(int session_id) OVERRIDE;
     85   virtual void OnSoundEnd(int session_id) OVERRIDE;
     86   virtual void OnAudioEnd(int session_id) OVERRIDE;
     87   virtual void OnRecognitionEnd(int session_id) OVERRIDE;
     88   virtual void OnRecognitionResults(
     89       int session_id, const SpeechRecognitionResults& result) OVERRIDE;
     90   virtual void OnRecognitionError(
     91       int session_id, const SpeechRecognitionError& error) OVERRIDE;
     92   virtual void OnAudioLevelsChange(int session_id, float volume,
     93                                    float noise_volume) OVERRIDE;
     94 
     95   SpeechRecognitionManagerDelegate* delegate() const { return delegate_.get(); }
     96 
     97  protected:
     98   // BrowserMainLoop is the only one allowed to istantiate and free us.
     99   friend class BrowserMainLoop;
    100   // Needed for dtor.
    101   friend struct base::DefaultDeleter<SpeechRecognitionManagerImpl>;
    102   SpeechRecognitionManagerImpl(media::AudioManager* audio_manager,
    103                                MediaStreamManager* media_stream_manager);
    104   virtual ~SpeechRecognitionManagerImpl();
    105 
    106  private:
    107   // Data types for the internal Finite State Machine (FSM).
    108   enum FSMState {
    109     SESSION_STATE_IDLE = 0,
    110     SESSION_STATE_CAPTURING_AUDIO,
    111     SESSION_STATE_WAITING_FOR_RESULT,
    112     SESSION_STATE_MAX_VALUE = SESSION_STATE_WAITING_FOR_RESULT
    113   };
    114 
    115   enum FSMEvent {
    116     EVENT_ABORT = 0,
    117     EVENT_START,
    118     EVENT_STOP_CAPTURE,
    119     EVENT_AUDIO_ENDED,
    120     EVENT_RECOGNITION_ENDED,
    121     EVENT_MAX_VALUE = EVENT_RECOGNITION_ENDED
    122   };
    123 
    124   struct Session {
    125     Session();
    126     ~Session();
    127 
    128     int id;
    129     bool abort_requested;
    130     bool listener_is_active;
    131     SpeechRecognitionSessionConfig config;
    132     SpeechRecognitionSessionContext context;
    133     scoped_refptr<SpeechRecognizer> recognizer;
    134     scoped_ptr<MediaStreamUIProxy> ui;
    135   };
    136 
    137   // Callback issued by the SpeechRecognitionManagerDelegate for reporting
    138   // asynchronously the result of the CheckRecognitionIsAllowed call.
    139   void RecognitionAllowedCallback(int session_id,
    140                                   bool ask_user,
    141                                   bool is_allowed);
    142 
    143   // Callback to get back the result of a media request. |devices| is an array
    144   // of devices approved to be used for the request, |devices| is empty if the
    145   // users deny the request.
    146   void MediaRequestPermissionCallback(int session_id,
    147                                       const MediaStreamDevices& devices,
    148                                       scoped_ptr<MediaStreamUIProxy> stream_ui);
    149 
    150   // Entry point for pushing any external event into the session handling FSM.
    151   void DispatchEvent(int session_id, FSMEvent event);
    152 
    153   // Defines the behavior of the session handling FSM, selecting the appropriate
    154   // transition according to the session, its current state and the event.
    155   void ExecuteTransitionAndGetNextState(Session* session,
    156                                         FSMState session_state,
    157                                         FSMEvent event);
    158 
    159   // Retrieves the state of the session, enquiring directly the recognizer.
    160   FSMState GetSessionState(int session_id) const;
    161 
    162   // The methods below handle transitions of the session handling FSM.
    163   void SessionStart(const Session& session);
    164   void SessionAbort(const Session& session);
    165   void SessionStopAudioCapture(const Session& session);
    166   void ResetCapturingSessionId(const Session& session);
    167   void SessionDelete(Session* session);
    168   void NotFeasible(const Session& session, FSMEvent event);
    169 
    170   bool SessionExists(int session_id) const;
    171   Session* GetSession(int session_id) const;
    172   SpeechRecognitionEventListener* GetListener(int session_id) const;
    173   SpeechRecognitionEventListener* GetDelegateListener() const;
    174   int GetNextSessionID();
    175 
    176   media::AudioManager* audio_manager_;
    177   MediaStreamManager* media_stream_manager_;
    178   typedef std::map<int, Session*> SessionsTable;
    179   SessionsTable sessions_;
    180   int primary_session_id_;
    181   int last_session_id_;
    182   bool is_dispatching_event_;
    183   scoped_ptr<SpeechRecognitionManagerDelegate> delegate_;
    184 
    185   // Used for posting asynchronous tasks (on the IO thread) without worrying
    186   // about this class being destroyed in the meanwhile (due to browser shutdown)
    187   // since tasks pending on a destroyed WeakPtr are automatically discarded.
    188   base::WeakPtrFactory<SpeechRecognitionManagerImpl> weak_factory_;
    189 };
    190 
    191 }  // namespace content
    192 
    193 #endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
    194