1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 7 8 #include <queue> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/memory/scoped_ptr.h" 14 #include "base/memory/singleton.h" 15 #include "base/memory/weak_ptr.h" 16 #include "url/gurl.h" 17 18 class Utterance; 19 class TtsPlatformImpl; 20 21 namespace base { 22 class Value; 23 } 24 25 namespace content { 26 class BrowserContext; 27 } 28 29 // Events sent back from the TTS engine indicating the progress. 30 enum TtsEventType { 31 TTS_EVENT_START, 32 TTS_EVENT_END, 33 TTS_EVENT_WORD, 34 TTS_EVENT_SENTENCE, 35 TTS_EVENT_MARKER, 36 TTS_EVENT_INTERRUPTED, 37 TTS_EVENT_CANCELLED, 38 TTS_EVENT_ERROR, 39 TTS_EVENT_PAUSE, 40 TTS_EVENT_RESUME 41 }; 42 43 enum TtsGenderType { 44 TTS_GENDER_NONE, 45 TTS_GENDER_MALE, 46 TTS_GENDER_FEMALE 47 }; 48 49 // Returns true if this event type is one that indicates an utterance 50 // is finished and can be destroyed. 51 bool IsFinalTtsEventType(TtsEventType event_type); 52 53 // The continuous parameters that apply to a given utterance. 54 struct UtteranceContinuousParameters { 55 UtteranceContinuousParameters(); 56 57 double rate; 58 double pitch; 59 double volume; 60 }; 61 62 // Information about one voice. 63 struct VoiceData { 64 VoiceData(); 65 ~VoiceData(); 66 67 std::string name; 68 std::string lang; 69 TtsGenderType gender; 70 std::string extension_id; 71 std::set<TtsEventType> events; 72 73 // If true, the synthesis engine is a remote network resource. 74 // It may be higher latency and may incur bandwidth costs. 75 bool remote; 76 77 // If true, this is implemented by this platform's subclass of 78 // TtsPlatformImpl. If false, this is implemented by an extension. 79 bool native; 80 std::string native_voice_identifier; 81 }; 82 83 // Interface that delegates TTS requests to user-installed extensions. 84 class TtsEngineDelegate { 85 public: 86 virtual ~TtsEngineDelegate() {} 87 88 // Return a list of all available voices registered. 89 virtual void GetVoices(content::BrowserContext* browser_context, 90 std::vector<VoiceData>* out_voices) = 0; 91 92 // Speak the given utterance by sending an event to the given TTS engine. 93 virtual void Speak(Utterance* utterance, const VoiceData& voice) = 0; 94 95 // Stop speaking the given utterance by sending an event to the target 96 // associated with this utterance. 97 virtual void Stop(Utterance* utterance) = 0; 98 99 // Pause in the middle of speaking this utterance. 100 virtual void Pause(Utterance* utterance) = 0; 101 102 // Resume speaking this utterance. 103 virtual void Resume(Utterance* utterance) = 0; 104 105 // Load the built-in component extension for ChromeOS. 106 virtual bool LoadBuiltInTtsExtension( 107 content::BrowserContext* browser_context) = 0; 108 }; 109 110 // Class that wants to receive events on utterances. 111 class UtteranceEventDelegate { 112 public: 113 virtual ~UtteranceEventDelegate() {} 114 virtual void OnTtsEvent(Utterance* utterance, 115 TtsEventType event_type, 116 int char_index, 117 const std::string& error_message) = 0; 118 }; 119 120 // Class that wants to be notified when the set of 121 // voices has changed. 122 class VoicesChangedDelegate { 123 public: 124 virtual ~VoicesChangedDelegate() {} 125 virtual void OnVoicesChanged() = 0; 126 }; 127 128 // One speech utterance. 129 class Utterance { 130 public: 131 // Construct an utterance given a profile and a completion task to call 132 // when the utterance is done speaking. Before speaking this utterance, 133 // its other parameters like text, rate, pitch, etc. should all be set. 134 explicit Utterance(content::BrowserContext* browser_context); 135 ~Utterance(); 136 137 // Sends an event to the delegate. If the event type is TTS_EVENT_END 138 // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1, 139 // uses the last good value. 140 void OnTtsEvent(TtsEventType event_type, 141 int char_index, 142 const std::string& error_message); 143 144 // Finish an utterance without sending an event to the delegate. 145 void Finish(); 146 147 // Getters and setters for the text to speak and other speech options. 148 void set_text(const std::string& text) { text_ = text; } 149 const std::string& text() const { return text_; } 150 151 void set_options(const base::Value* options); 152 const base::Value* options() const { return options_.get(); } 153 154 void set_src_extension_id(const std::string& src_extension_id) { 155 src_extension_id_ = src_extension_id; 156 } 157 const std::string& src_extension_id() { return src_extension_id_; } 158 159 void set_src_id(int src_id) { src_id_ = src_id; } 160 int src_id() { return src_id_; } 161 162 void set_src_url(const GURL& src_url) { src_url_ = src_url; } 163 const GURL& src_url() { return src_url_; } 164 165 void set_voice_name(const std::string& voice_name) { 166 voice_name_ = voice_name; 167 } 168 const std::string& voice_name() const { return voice_name_; } 169 170 void set_lang(const std::string& lang) { 171 lang_ = lang; 172 } 173 const std::string& lang() const { return lang_; } 174 175 void set_gender(TtsGenderType gender) { 176 gender_ = gender; 177 } 178 TtsGenderType gender() const { return gender_; } 179 180 void set_continuous_parameters(const UtteranceContinuousParameters& params) { 181 continuous_parameters_ = params; 182 } 183 const UtteranceContinuousParameters& continuous_parameters() { 184 return continuous_parameters_; 185 } 186 187 void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; } 188 bool can_enqueue() const { return can_enqueue_; } 189 190 void set_required_event_types(const std::set<TtsEventType>& types) { 191 required_event_types_ = types; 192 } 193 const std::set<TtsEventType>& required_event_types() const { 194 return required_event_types_; 195 } 196 197 void set_desired_event_types(const std::set<TtsEventType>& types) { 198 desired_event_types_ = types; 199 } 200 const std::set<TtsEventType>& desired_event_types() const { 201 return desired_event_types_; 202 } 203 204 const std::string& extension_id() const { return extension_id_; } 205 void set_extension_id(const std::string& extension_id) { 206 extension_id_ = extension_id; 207 } 208 209 UtteranceEventDelegate* event_delegate() const { 210 return event_delegate_.get(); 211 } 212 void set_event_delegate( 213 base::WeakPtr<UtteranceEventDelegate> event_delegate) { 214 event_delegate_ = event_delegate; 215 } 216 217 // Getters and setters for internal state. 218 content::BrowserContext* browser_context() const { return browser_context_; } 219 int id() const { return id_; } 220 bool finished() const { return finished_; } 221 222 private: 223 // The BrowserContext that initiated this utterance. 224 content::BrowserContext* browser_context_; 225 226 // The extension ID of the extension providing TTS for this utterance, or 227 // empty if native TTS is being used. 228 std::string extension_id_; 229 230 // The unique ID of this utterance, used to associate callback functions 231 // with utterances. 232 int id_; 233 234 // The id of the next utterance, so we can associate requests with 235 // responses. 236 static int next_utterance_id_; 237 238 // The text to speak. 239 std::string text_; 240 241 // The full options arg passed to tts.speak, which may include fields 242 // other than the ones we explicitly parse, below. 243 scoped_ptr<base::Value> options_; 244 245 // The extension ID of the extension that called speak() and should 246 // receive events. 247 std::string src_extension_id_; 248 249 // The source extension's ID of this utterance, so that it can associate 250 // events with the appropriate callback. 251 int src_id_; 252 253 // The URL of the page where the source extension called speak. 254 GURL src_url_; 255 256 // The delegate to be called when an utterance event is fired. 257 base::WeakPtr<UtteranceEventDelegate> event_delegate_; 258 259 // The parsed options. 260 std::string voice_name_; 261 std::string lang_; 262 TtsGenderType gender_; 263 UtteranceContinuousParameters continuous_parameters_; 264 bool can_enqueue_; 265 std::set<TtsEventType> required_event_types_; 266 std::set<TtsEventType> desired_event_types_; 267 268 // The index of the current char being spoken. 269 int char_index_; 270 271 // True if this utterance received an event indicating it's done. 272 bool finished_; 273 }; 274 275 // Singleton class that manages text-to-speech for the TTS and TTS engine 276 // extension APIs, maintaining a queue of pending utterances and keeping 277 // track of all state. 278 class TtsController { 279 public: 280 // Get the single instance of this class. 281 static TtsController* GetInstance(); 282 283 // Returns true if we're currently speaking an utterance. 284 virtual bool IsSpeaking() = 0; 285 286 // Speak the given utterance. If the utterance's can_enqueue flag is true 287 // and another utterance is in progress, adds it to the end of the queue. 288 // Otherwise, interrupts any current utterance and speaks this one 289 // immediately. 290 virtual void SpeakOrEnqueue(Utterance* utterance) = 0; 291 292 // Stop all utterances and flush the queue. Implies leaving pause mode 293 // as well. 294 virtual void Stop() = 0; 295 296 // Pause the speech queue. Some engines may support pausing in the middle 297 // of an utterance. 298 virtual void Pause() = 0; 299 300 // Resume speaking. 301 virtual void Resume() = 0; 302 303 // Handle events received from the speech engine. Events are forwarded to 304 // the callback function, and in addition, completion and error events 305 // trigger finishing the current utterance and starting the next one, if 306 // any. 307 virtual void OnTtsEvent(int utterance_id, 308 TtsEventType event_type, 309 int char_index, 310 const std::string& error_message) = 0; 311 312 // Return a list of all available voices, including the native voice, 313 // if supported, and all voices registered by extensions. 314 virtual void GetVoices(content::BrowserContext* browser_context, 315 std::vector<VoiceData>* out_voices) = 0; 316 317 // Called by the extension system or platform implementation when the 318 // list of voices may have changed and should be re-queried. 319 virtual void VoicesChanged() = 0; 320 321 // Add a delegate that wants to be notified when the set of voices changes. 322 virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0; 323 324 // Remove delegate that wants to be notified when the set of voices changes. 325 virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0; 326 327 // Set the delegate that processes TTS requests with user-installed 328 // extensions. 329 virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0; 330 331 // Get the delegate that processes TTS requests with user-installed 332 // extensions. 333 virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0; 334 335 // For unit testing. 336 virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) = 0; 337 virtual int QueueSize() = 0; 338 339 protected: 340 virtual ~TtsController() {} 341 }; 342 343 #endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 344