Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
      6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
      7 
      8 #include <queue>
      9 #include <set>
     10 #include <string>
     11 #include <vector>
     12 
     13 #include "base/memory/scoped_ptr.h"
     14 #include "base/memory/singleton.h"
     15 #include "url/gurl.h"
     16 
     17 class Utterance;
     18 class TtsPlatformImpl;
     19 class Profile;
     20 
     21 namespace base {
     22 class Value;
     23 }
     24 
     25 // Events sent back from the TTS engine indicating the progress.
     26 enum TtsEventType {
     27   TTS_EVENT_START,
     28   TTS_EVENT_END,
     29   TTS_EVENT_WORD,
     30   TTS_EVENT_SENTENCE,
     31   TTS_EVENT_MARKER,
     32   TTS_EVENT_INTERRUPTED,
     33   TTS_EVENT_CANCELLED,
     34   TTS_EVENT_ERROR,
     35   TTS_EVENT_PAUSE,
     36   TTS_EVENT_RESUME
     37 };
     38 
     39 enum TtsGenderType {
     40   TTS_GENDER_NONE,
     41   TTS_GENDER_MALE,
     42   TTS_GENDER_FEMALE
     43 };
     44 
     45 // Returns true if this event type is one that indicates an utterance
     46 // is finished and can be destroyed.
     47 bool IsFinalTtsEventType(TtsEventType event_type);
     48 
     49 // The continuous parameters that apply to a given utterance.
     50 struct UtteranceContinuousParameters {
     51   UtteranceContinuousParameters();
     52 
     53   double rate;
     54   double pitch;
     55   double volume;
     56 };
     57 
     58 // Information about one voice.
     59 struct VoiceData {
     60   VoiceData();
     61   ~VoiceData();
     62 
     63   std::string name;
     64   std::string lang;
     65   TtsGenderType gender;
     66   std::string extension_id;
     67   std::set<TtsEventType> events;
     68 
     69   // If true, this is implemented by this platform's subclass of
     70   // TtsPlatformImpl. If false, this is implemented by an extension.
     71   bool native;
     72   std::string native_voice_identifier;
     73 };
     74 
     75 // Class that wants to receive events on utterances.
     76 class UtteranceEventDelegate {
     77  public:
     78   virtual ~UtteranceEventDelegate() {}
     79   virtual void OnTtsEvent(Utterance* utterance,
     80                           TtsEventType event_type,
     81                           int char_index,
     82                           const std::string& error_message) = 0;
     83 };
     84 
     85 // Class that wants to be notified when the set of
     86 // voices has changed.
     87 class VoicesChangedDelegate {
     88  public:
     89   virtual ~VoicesChangedDelegate() {}
     90   virtual void OnVoicesChanged() = 0;
     91 };
     92 
     93 // One speech utterance.
     94 class Utterance {
     95  public:
     96   // Construct an utterance given a profile and a completion task to call
     97   // when the utterance is done speaking. Before speaking this utterance,
     98   // its other parameters like text, rate, pitch, etc. should all be set.
     99   explicit Utterance(Profile* profile);
    100   ~Utterance();
    101 
    102   // Sends an event to the delegate. If the event type is TTS_EVENT_END
    103   // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
    104   // uses the last good value.
    105   void OnTtsEvent(TtsEventType event_type,
    106                   int char_index,
    107                   const std::string& error_message);
    108 
    109   // Finish an utterance without sending an event to the delegate.
    110   void Finish();
    111 
    112   // Getters and setters for the text to speak and other speech options.
    113   void set_text(const std::string& text) { text_ = text; }
    114   const std::string& text() const { return text_; }
    115 
    116   void set_options(const base::Value* options);
    117   const base::Value* options() const { return options_.get(); }
    118 
    119   void set_src_extension_id(const std::string& src_extension_id) {
    120     src_extension_id_ = src_extension_id;
    121   }
    122   const std::string& src_extension_id() { return src_extension_id_; }
    123 
    124   void set_src_id(int src_id) { src_id_ = src_id; }
    125   int src_id() { return src_id_; }
    126 
    127   void set_src_url(const GURL& src_url) { src_url_ = src_url; }
    128   const GURL& src_url() { return src_url_; }
    129 
    130   void set_voice_name(const std::string& voice_name) {
    131     voice_name_ = voice_name;
    132   }
    133   const std::string& voice_name() const { return voice_name_; }
    134 
    135   void set_lang(const std::string& lang) {
    136     lang_ = lang;
    137   }
    138   const std::string& lang() const { return lang_; }
    139 
    140   void set_gender(TtsGenderType gender) {
    141     gender_ = gender;
    142   }
    143   TtsGenderType gender() const { return gender_; }
    144 
    145   void set_continuous_parameters(const UtteranceContinuousParameters& params) {
    146     continuous_parameters_ = params;
    147   }
    148   const UtteranceContinuousParameters& continuous_parameters() {
    149     return continuous_parameters_;
    150   }
    151 
    152   void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
    153   bool can_enqueue() const { return can_enqueue_; }
    154 
    155   void set_required_event_types(const std::set<TtsEventType>& types) {
    156     required_event_types_ = types;
    157   }
    158   const std::set<TtsEventType>& required_event_types() const {
    159     return required_event_types_;
    160   }
    161 
    162   void set_desired_event_types(const std::set<TtsEventType>& types) {
    163     desired_event_types_ = types;
    164   }
    165   const std::set<TtsEventType>& desired_event_types() const {
    166     return desired_event_types_;
    167   }
    168 
    169   const std::string& extension_id() const { return extension_id_; }
    170   void set_extension_id(const std::string& extension_id) {
    171     extension_id_ = extension_id;
    172   }
    173 
    174   UtteranceEventDelegate* event_delegate() const { return event_delegate_; }
    175   void set_event_delegate(UtteranceEventDelegate* event_delegate) {
    176     event_delegate_ = event_delegate;
    177   }
    178 
    179   // Getters and setters for internal state.
    180   Profile* profile() const { return profile_; }
    181   int id() const { return id_; }
    182   bool finished() const { return finished_; }
    183 
    184  private:
    185   // The profile that initiated this utterance.
    186   Profile* profile_;
    187 
    188   // The extension ID of the extension providing TTS for this utterance, or
    189   // empty if native TTS is being used.
    190   std::string extension_id_;
    191 
    192   // The unique ID of this utterance, used to associate callback functions
    193   // with utterances.
    194   int id_;
    195 
    196   // The id of the next utterance, so we can associate requests with
    197   // responses.
    198   static int next_utterance_id_;
    199 
    200   // The text to speak.
    201   std::string text_;
    202 
    203   // The full options arg passed to tts.speak, which may include fields
    204   // other than the ones we explicitly parse, below.
    205   scoped_ptr<base::Value> options_;
    206 
    207   // The extension ID of the extension that called speak() and should
    208   // receive events.
    209   std::string src_extension_id_;
    210 
    211   // The source extension's ID of this utterance, so that it can associate
    212   // events with the appropriate callback.
    213   int src_id_;
    214 
    215   // The URL of the page where the source extension called speak.
    216   GURL src_url_;
    217 
    218   // The delegate to be called when an utterance event is fired.
    219   // Weak reference; it will be cleared after we fire a "final" event
    220   // (as determined by IsFinalTtsEventType).
    221   UtteranceEventDelegate* event_delegate_;
    222 
    223   // The parsed options.
    224   std::string voice_name_;
    225   std::string lang_;
    226   TtsGenderType gender_;
    227   UtteranceContinuousParameters continuous_parameters_;
    228   bool can_enqueue_;
    229   std::set<TtsEventType> required_event_types_;
    230   std::set<TtsEventType> desired_event_types_;
    231 
    232   // The index of the current char being spoken.
    233   int char_index_;
    234 
    235   // True if this utterance received an event indicating it's done.
    236   bool finished_;
    237 };
    238 
    239 // Singleton class that manages text-to-speech for the TTS and TTS engine
    240 // extension APIs, maintaining a queue of pending utterances and keeping
    241 // track of all state.
    242 class TtsController {
    243  public:
    244   // Get the single instance of this class.
    245   static TtsController* GetInstance();
    246 
    247   // Returns true if we're currently speaking an utterance.
    248   bool IsSpeaking();
    249 
    250   // Speak the given utterance. If the utterance's can_enqueue flag is true
    251   // and another utterance is in progress, adds it to the end of the queue.
    252   // Otherwise, interrupts any current utterance and speaks this one
    253   // immediately.
    254   void SpeakOrEnqueue(Utterance* utterance);
    255 
    256   // Stop all utterances and flush the queue. Implies leaving pause mode
    257   // as well.
    258   void Stop();
    259 
    260   // Pause the speech queue. Some engines may support pausing in the middle
    261   // of an utterance.
    262   void Pause();
    263 
    264   // Resume speaking.
    265   void Resume();
    266 
    267   // Handle events received from the speech engine. Events are forwarded to
    268   // the callback function, and in addition, completion and error events
    269   // trigger finishing the current utterance and starting the next one, if
    270   // any.
    271   void OnTtsEvent(int utterance_id,
    272                   TtsEventType event_type,
    273                   int char_index,
    274                   const std::string& error_message);
    275 
    276   // Return a list of all available voices, including the native voice,
    277   // if supported, and all voices registered by extensions.
    278   void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices);
    279 
    280   // Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it
    281   // finishes loading the built-in TTS component extension.
    282   void RetrySpeakingQueuedUtterances();
    283 
    284   // Called by the extension system or platform implementation when the
    285   // list of voices may have changed and should be re-queried.
    286   void VoicesChanged();
    287 
    288   // Add a delegate that wants to be notified when the set of voices changes.
    289   void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate);
    290 
    291   // Remove delegate that wants to be notified when the set of voices changes.
    292   void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate);
    293 
    294   // For unit testing.
    295   void SetPlatformImpl(TtsPlatformImpl* platform_impl);
    296   int QueueSize();
    297 
    298  protected:
    299   TtsController();
    300   virtual ~TtsController();
    301 
    302  private:
    303   // Get the platform TTS implementation (or injected mock).
    304   TtsPlatformImpl* GetPlatformImpl();
    305 
    306   // Start speaking the given utterance. Will either take ownership of
    307   // |utterance| or delete it if there's an error. Returns true on success.
    308   void SpeakNow(Utterance* utterance);
    309 
    310   // Clear the utterance queue. If send_events is true, will send
    311   // TTS_EVENT_CANCELLED events on each one.
    312   void ClearUtteranceQueue(bool send_events);
    313 
    314   // Finalize and delete the current utterance.
    315   void FinishCurrentUtterance();
    316 
    317   // Start speaking the next utterance in the queue.
    318   void SpeakNextUtterance();
    319 
    320   // Given an utterance and a vector of voices, return the
    321   // index of the voice that best matches the utterance.
    322   int GetMatchingVoice(const Utterance* utterance,
    323                        std::vector<VoiceData>& voices);
    324 
    325   friend struct DefaultSingletonTraits<TtsController>;
    326 
    327   // The current utterance being spoken.
    328   Utterance* current_utterance_;
    329 
    330   // Whether the queue is paused or not.
    331   bool paused_;
    332 
    333   // A queue of utterances to speak after the current one finishes.
    334   std::queue<Utterance*> utterance_queue_;
    335 
    336   // A set of delegates that want to be notified when the voices change.
    337   std::set<VoicesChangedDelegate*> voices_changed_delegates_;
    338 
    339   // A pointer to the platform implementation of text-to-speech, for
    340   // dependency injection.
    341   TtsPlatformImpl* platform_impl_;
    342 
    343   DISALLOW_COPY_AND_ASSIGN(TtsController);
    344 };
    345 
    346 #endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
    347