Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
      6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
      7 
      8 #include <queue>
      9 #include <set>
     10 #include <string>
     11 #include <vector>
     12 
     13 #include "base/memory/scoped_ptr.h"
     14 #include "base/memory/singleton.h"
     15 #include "base/memory/weak_ptr.h"
     16 #include "url/gurl.h"
     17 
     18 class Utterance;
     19 class TtsPlatformImpl;
     20 
     21 namespace base {
     22 class Value;
     23 }
     24 
     25 namespace content {
     26 class BrowserContext;
     27 }
     28 
     29 // Events sent back from the TTS engine indicating the progress.
     30 enum TtsEventType {
     31   TTS_EVENT_START,
     32   TTS_EVENT_END,
     33   TTS_EVENT_WORD,
     34   TTS_EVENT_SENTENCE,
     35   TTS_EVENT_MARKER,
     36   TTS_EVENT_INTERRUPTED,
     37   TTS_EVENT_CANCELLED,
     38   TTS_EVENT_ERROR,
     39   TTS_EVENT_PAUSE,
     40   TTS_EVENT_RESUME
     41 };
     42 
     43 enum TtsGenderType {
     44   TTS_GENDER_NONE,
     45   TTS_GENDER_MALE,
     46   TTS_GENDER_FEMALE
     47 };
     48 
     49 // Returns true if this event type is one that indicates an utterance
     50 // is finished and can be destroyed.
     51 bool IsFinalTtsEventType(TtsEventType event_type);
     52 
     53 // The continuous parameters that apply to a given utterance.
     54 struct UtteranceContinuousParameters {
     55   UtteranceContinuousParameters();
     56 
     57   double rate;
     58   double pitch;
     59   double volume;
     60 };
     61 
     62 // Information about one voice.
     63 struct VoiceData {
     64   VoiceData();
     65   ~VoiceData();
     66 
     67   std::string name;
     68   std::string lang;
     69   TtsGenderType gender;
     70   std::string extension_id;
     71   std::set<TtsEventType> events;
     72 
     73   // If true, the synthesis engine is a remote network resource.
     74   // It may be higher latency and may incur bandwidth costs.
     75   bool remote;
     76 
     77   // If true, this is implemented by this platform's subclass of
     78   // TtsPlatformImpl. If false, this is implemented by an extension.
     79   bool native;
     80   std::string native_voice_identifier;
     81 };
     82 
     83 // Interface that delegates TTS requests to user-installed extensions.
     84 class TtsEngineDelegate {
     85  public:
     86   virtual ~TtsEngineDelegate() {}
     87 
     88   // Return a list of all available voices registered.
     89   virtual void GetVoices(content::BrowserContext* browser_context,
     90                          std::vector<VoiceData>* out_voices) = 0;
     91 
     92   // Speak the given utterance by sending an event to the given TTS engine.
     93   virtual void Speak(Utterance* utterance, const VoiceData& voice) = 0;
     94 
     95   // Stop speaking the given utterance by sending an event to the target
     96   // associated with this utterance.
     97   virtual void Stop(Utterance* utterance) = 0;
     98 
     99   // Pause in the middle of speaking this utterance.
    100   virtual void Pause(Utterance* utterance) = 0;
    101 
    102   // Resume speaking this utterance.
    103   virtual void Resume(Utterance* utterance) = 0;
    104 
    105   // Load the built-in component extension for ChromeOS.
    106   virtual bool LoadBuiltInTtsExtension(
    107       content::BrowserContext* browser_context) = 0;
    108 };
    109 
    110 // Class that wants to receive events on utterances.
    111 class UtteranceEventDelegate {
    112  public:
    113   virtual ~UtteranceEventDelegate() {}
    114   virtual void OnTtsEvent(Utterance* utterance,
    115                           TtsEventType event_type,
    116                           int char_index,
    117                           const std::string& error_message) = 0;
    118 };
    119 
    120 // Class that wants to be notified when the set of
    121 // voices has changed.
    122 class VoicesChangedDelegate {
    123  public:
    124   virtual ~VoicesChangedDelegate() {}
    125   virtual void OnVoicesChanged() = 0;
    126 };
    127 
    128 // One speech utterance.
    129 class Utterance {
    130  public:
    131   // Construct an utterance given a profile and a completion task to call
    132   // when the utterance is done speaking. Before speaking this utterance,
    133   // its other parameters like text, rate, pitch, etc. should all be set.
    134   explicit Utterance(content::BrowserContext* browser_context);
    135   ~Utterance();
    136 
    137   // Sends an event to the delegate. If the event type is TTS_EVENT_END
    138   // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
    139   // uses the last good value.
    140   void OnTtsEvent(TtsEventType event_type,
    141                   int char_index,
    142                   const std::string& error_message);
    143 
    144   // Finish an utterance without sending an event to the delegate.
    145   void Finish();
    146 
    147   // Getters and setters for the text to speak and other speech options.
    148   void set_text(const std::string& text) { text_ = text; }
    149   const std::string& text() const { return text_; }
    150 
    151   void set_options(const base::Value* options);
    152   const base::Value* options() const { return options_.get(); }
    153 
    154   void set_src_extension_id(const std::string& src_extension_id) {
    155     src_extension_id_ = src_extension_id;
    156   }
    157   const std::string& src_extension_id() { return src_extension_id_; }
    158 
    159   void set_src_id(int src_id) { src_id_ = src_id; }
    160   int src_id() { return src_id_; }
    161 
    162   void set_src_url(const GURL& src_url) { src_url_ = src_url; }
    163   const GURL& src_url() { return src_url_; }
    164 
    165   void set_voice_name(const std::string& voice_name) {
    166     voice_name_ = voice_name;
    167   }
    168   const std::string& voice_name() const { return voice_name_; }
    169 
    170   void set_lang(const std::string& lang) {
    171     lang_ = lang;
    172   }
    173   const std::string& lang() const { return lang_; }
    174 
    175   void set_gender(TtsGenderType gender) {
    176     gender_ = gender;
    177   }
    178   TtsGenderType gender() const { return gender_; }
    179 
    180   void set_continuous_parameters(const UtteranceContinuousParameters& params) {
    181     continuous_parameters_ = params;
    182   }
    183   const UtteranceContinuousParameters& continuous_parameters() {
    184     return continuous_parameters_;
    185   }
    186 
    187   void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
    188   bool can_enqueue() const { return can_enqueue_; }
    189 
    190   void set_required_event_types(const std::set<TtsEventType>& types) {
    191     required_event_types_ = types;
    192   }
    193   const std::set<TtsEventType>& required_event_types() const {
    194     return required_event_types_;
    195   }
    196 
    197   void set_desired_event_types(const std::set<TtsEventType>& types) {
    198     desired_event_types_ = types;
    199   }
    200   const std::set<TtsEventType>& desired_event_types() const {
    201     return desired_event_types_;
    202   }
    203 
    204   const std::string& extension_id() const { return extension_id_; }
    205   void set_extension_id(const std::string& extension_id) {
    206     extension_id_ = extension_id;
    207   }
    208 
    209   UtteranceEventDelegate* event_delegate() const {
    210     return event_delegate_.get();
    211   }
    212   void set_event_delegate(
    213       base::WeakPtr<UtteranceEventDelegate> event_delegate) {
    214     event_delegate_ = event_delegate;
    215   }
    216 
    217   // Getters and setters for internal state.
    218   content::BrowserContext* browser_context() const { return browser_context_; }
    219   int id() const { return id_; }
    220   bool finished() const { return finished_; }
    221 
    222  private:
    223   // The BrowserContext that initiated this utterance.
    224   content::BrowserContext* browser_context_;
    225 
    226   // The extension ID of the extension providing TTS for this utterance, or
    227   // empty if native TTS is being used.
    228   std::string extension_id_;
    229 
    230   // The unique ID of this utterance, used to associate callback functions
    231   // with utterances.
    232   int id_;
    233 
    234   // The id of the next utterance, so we can associate requests with
    235   // responses.
    236   static int next_utterance_id_;
    237 
    238   // The text to speak.
    239   std::string text_;
    240 
    241   // The full options arg passed to tts.speak, which may include fields
    242   // other than the ones we explicitly parse, below.
    243   scoped_ptr<base::Value> options_;
    244 
    245   // The extension ID of the extension that called speak() and should
    246   // receive events.
    247   std::string src_extension_id_;
    248 
    249   // The source extension's ID of this utterance, so that it can associate
    250   // events with the appropriate callback.
    251   int src_id_;
    252 
    253   // The URL of the page where the source extension called speak.
    254   GURL src_url_;
    255 
    256   // The delegate to be called when an utterance event is fired.
    257   base::WeakPtr<UtteranceEventDelegate> event_delegate_;
    258 
    259   // The parsed options.
    260   std::string voice_name_;
    261   std::string lang_;
    262   TtsGenderType gender_;
    263   UtteranceContinuousParameters continuous_parameters_;
    264   bool can_enqueue_;
    265   std::set<TtsEventType> required_event_types_;
    266   std::set<TtsEventType> desired_event_types_;
    267 
    268   // The index of the current char being spoken.
    269   int char_index_;
    270 
    271   // True if this utterance received an event indicating it's done.
    272   bool finished_;
    273 };
    274 
    275 // Singleton class that manages text-to-speech for the TTS and TTS engine
    276 // extension APIs, maintaining a queue of pending utterances and keeping
    277 // track of all state.
    278 class TtsController {
    279  public:
    280   // Get the single instance of this class.
    281   static TtsController* GetInstance();
    282 
    283   // Returns true if we're currently speaking an utterance.
    284   virtual bool IsSpeaking() = 0;
    285 
    286   // Speak the given utterance. If the utterance's can_enqueue flag is true
    287   // and another utterance is in progress, adds it to the end of the queue.
    288   // Otherwise, interrupts any current utterance and speaks this one
    289   // immediately.
    290   virtual void SpeakOrEnqueue(Utterance* utterance) = 0;
    291 
    292   // Stop all utterances and flush the queue. Implies leaving pause mode
    293   // as well.
    294   virtual void Stop() = 0;
    295 
    296   // Pause the speech queue. Some engines may support pausing in the middle
    297   // of an utterance.
    298   virtual void Pause() = 0;
    299 
    300   // Resume speaking.
    301   virtual void Resume() = 0;
    302 
    303   // Handle events received from the speech engine. Events are forwarded to
    304   // the callback function, and in addition, completion and error events
    305   // trigger finishing the current utterance and starting the next one, if
    306   // any.
    307   virtual void OnTtsEvent(int utterance_id,
    308                           TtsEventType event_type,
    309                           int char_index,
    310                           const std::string& error_message) = 0;
    311 
    312   // Return a list of all available voices, including the native voice,
    313   // if supported, and all voices registered by extensions.
    314   virtual void GetVoices(content::BrowserContext* browser_context,
    315                          std::vector<VoiceData>* out_voices) = 0;
    316 
    317   // Called by the extension system or platform implementation when the
    318   // list of voices may have changed and should be re-queried.
    319   virtual void VoicesChanged() = 0;
    320 
    321   // Add a delegate that wants to be notified when the set of voices changes.
    322   virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
    323 
    324   // Remove delegate that wants to be notified when the set of voices changes.
    325   virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
    326 
    327   // Set the delegate that processes TTS requests with user-installed
    328   // extensions.
    329   virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0;
    330 
    331   // Get the delegate that processes TTS requests with user-installed
    332   // extensions.
    333   virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0;
    334 
    335   // For unit testing.
    336   virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) = 0;
    337   virtual int QueueSize() = 0;
    338 
    339  protected:
    340   virtual ~TtsController() {}
    341 };
    342 
    343 #endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
    344