1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 7 8 #include <queue> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/memory/scoped_ptr.h" 14 #include "base/memory/singleton.h" 15 #include "url/gurl.h" 16 17 class Utterance; 18 class TtsPlatformImpl; 19 class Profile; 20 21 namespace base { 22 class Value; 23 } 24 25 // Events sent back from the TTS engine indicating the progress. 26 enum TtsEventType { 27 TTS_EVENT_START, 28 TTS_EVENT_END, 29 TTS_EVENT_WORD, 30 TTS_EVENT_SENTENCE, 31 TTS_EVENT_MARKER, 32 TTS_EVENT_INTERRUPTED, 33 TTS_EVENT_CANCELLED, 34 TTS_EVENT_ERROR, 35 TTS_EVENT_PAUSE, 36 TTS_EVENT_RESUME 37 }; 38 39 enum TtsGenderType { 40 TTS_GENDER_NONE, 41 TTS_GENDER_MALE, 42 TTS_GENDER_FEMALE 43 }; 44 45 // Returns true if this event type is one that indicates an utterance 46 // is finished and can be destroyed. 47 bool IsFinalTtsEventType(TtsEventType event_type); 48 49 // The continuous parameters that apply to a given utterance. 50 struct UtteranceContinuousParameters { 51 UtteranceContinuousParameters(); 52 53 double rate; 54 double pitch; 55 double volume; 56 }; 57 58 // Information about one voice. 59 struct VoiceData { 60 VoiceData(); 61 ~VoiceData(); 62 63 std::string name; 64 std::string lang; 65 TtsGenderType gender; 66 std::string extension_id; 67 std::set<TtsEventType> events; 68 69 // If true, this is implemented by this platform's subclass of 70 // TtsPlatformImpl. If false, this is implemented by an extension. 71 bool native; 72 std::string native_voice_identifier; 73 }; 74 75 // Class that wants to receive events on utterances. 76 class UtteranceEventDelegate { 77 public: 78 virtual ~UtteranceEventDelegate() {} 79 virtual void OnTtsEvent(Utterance* utterance, 80 TtsEventType event_type, 81 int char_index, 82 const std::string& error_message) = 0; 83 }; 84 85 // Class that wants to be notified when the set of 86 // voices has changed. 87 class VoicesChangedDelegate { 88 public: 89 virtual ~VoicesChangedDelegate() {} 90 virtual void OnVoicesChanged() = 0; 91 }; 92 93 // One speech utterance. 94 class Utterance { 95 public: 96 // Construct an utterance given a profile and a completion task to call 97 // when the utterance is done speaking. Before speaking this utterance, 98 // its other parameters like text, rate, pitch, etc. should all be set. 99 explicit Utterance(Profile* profile); 100 ~Utterance(); 101 102 // Sends an event to the delegate. If the event type is TTS_EVENT_END 103 // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1, 104 // uses the last good value. 105 void OnTtsEvent(TtsEventType event_type, 106 int char_index, 107 const std::string& error_message); 108 109 // Finish an utterance without sending an event to the delegate. 110 void Finish(); 111 112 // Getters and setters for the text to speak and other speech options. 113 void set_text(const std::string& text) { text_ = text; } 114 const std::string& text() const { return text_; } 115 116 void set_options(const base::Value* options); 117 const base::Value* options() const { return options_.get(); } 118 119 void set_src_extension_id(const std::string& src_extension_id) { 120 src_extension_id_ = src_extension_id; 121 } 122 const std::string& src_extension_id() { return src_extension_id_; } 123 124 void set_src_id(int src_id) { src_id_ = src_id; } 125 int src_id() { return src_id_; } 126 127 void set_src_url(const GURL& src_url) { src_url_ = src_url; } 128 const GURL& src_url() { return src_url_; } 129 130 void set_voice_name(const std::string& voice_name) { 131 voice_name_ = voice_name; 132 } 133 const std::string& voice_name() const { return voice_name_; } 134 135 void set_lang(const std::string& lang) { 136 lang_ = lang; 137 } 138 const std::string& lang() const { return lang_; } 139 140 void set_gender(TtsGenderType gender) { 141 gender_ = gender; 142 } 143 TtsGenderType gender() const { return gender_; } 144 145 void set_continuous_parameters(const UtteranceContinuousParameters& params) { 146 continuous_parameters_ = params; 147 } 148 const UtteranceContinuousParameters& continuous_parameters() { 149 return continuous_parameters_; 150 } 151 152 void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; } 153 bool can_enqueue() const { return can_enqueue_; } 154 155 void set_required_event_types(const std::set<TtsEventType>& types) { 156 required_event_types_ = types; 157 } 158 const std::set<TtsEventType>& required_event_types() const { 159 return required_event_types_; 160 } 161 162 void set_desired_event_types(const std::set<TtsEventType>& types) { 163 desired_event_types_ = types; 164 } 165 const std::set<TtsEventType>& desired_event_types() const { 166 return desired_event_types_; 167 } 168 169 const std::string& extension_id() const { return extension_id_; } 170 void set_extension_id(const std::string& extension_id) { 171 extension_id_ = extension_id; 172 } 173 174 UtteranceEventDelegate* event_delegate() const { return event_delegate_; } 175 void set_event_delegate(UtteranceEventDelegate* event_delegate) { 176 event_delegate_ = event_delegate; 177 } 178 179 // Getters and setters for internal state. 180 Profile* profile() const { return profile_; } 181 int id() const { return id_; } 182 bool finished() const { return finished_; } 183 184 private: 185 // The profile that initiated this utterance. 186 Profile* profile_; 187 188 // The extension ID of the extension providing TTS for this utterance, or 189 // empty if native TTS is being used. 190 std::string extension_id_; 191 192 // The unique ID of this utterance, used to associate callback functions 193 // with utterances. 194 int id_; 195 196 // The id of the next utterance, so we can associate requests with 197 // responses. 198 static int next_utterance_id_; 199 200 // The text to speak. 201 std::string text_; 202 203 // The full options arg passed to tts.speak, which may include fields 204 // other than the ones we explicitly parse, below. 205 scoped_ptr<base::Value> options_; 206 207 // The extension ID of the extension that called speak() and should 208 // receive events. 209 std::string src_extension_id_; 210 211 // The source extension's ID of this utterance, so that it can associate 212 // events with the appropriate callback. 213 int src_id_; 214 215 // The URL of the page where the source extension called speak. 216 GURL src_url_; 217 218 // The delegate to be called when an utterance event is fired. 219 // Weak reference; it will be cleared after we fire a "final" event 220 // (as determined by IsFinalTtsEventType). 221 UtteranceEventDelegate* event_delegate_; 222 223 // The parsed options. 224 std::string voice_name_; 225 std::string lang_; 226 TtsGenderType gender_; 227 UtteranceContinuousParameters continuous_parameters_; 228 bool can_enqueue_; 229 std::set<TtsEventType> required_event_types_; 230 std::set<TtsEventType> desired_event_types_; 231 232 // The index of the current char being spoken. 233 int char_index_; 234 235 // True if this utterance received an event indicating it's done. 236 bool finished_; 237 }; 238 239 // Singleton class that manages text-to-speech for the TTS and TTS engine 240 // extension APIs, maintaining a queue of pending utterances and keeping 241 // track of all state. 242 class TtsController { 243 public: 244 // Get the single instance of this class. 245 static TtsController* GetInstance(); 246 247 // Returns true if we're currently speaking an utterance. 248 bool IsSpeaking(); 249 250 // Speak the given utterance. If the utterance's can_enqueue flag is true 251 // and another utterance is in progress, adds it to the end of the queue. 252 // Otherwise, interrupts any current utterance and speaks this one 253 // immediately. 254 void SpeakOrEnqueue(Utterance* utterance); 255 256 // Stop all utterances and flush the queue. Implies leaving pause mode 257 // as well. 258 void Stop(); 259 260 // Pause the speech queue. Some engines may support pausing in the middle 261 // of an utterance. 262 void Pause(); 263 264 // Resume speaking. 265 void Resume(); 266 267 // Handle events received from the speech engine. Events are forwarded to 268 // the callback function, and in addition, completion and error events 269 // trigger finishing the current utterance and starting the next one, if 270 // any. 271 void OnTtsEvent(int utterance_id, 272 TtsEventType event_type, 273 int char_index, 274 const std::string& error_message); 275 276 // Return a list of all available voices, including the native voice, 277 // if supported, and all voices registered by extensions. 278 void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices); 279 280 // Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it 281 // finishes loading the built-in TTS component extension. 282 void RetrySpeakingQueuedUtterances(); 283 284 // Called by the extension system or platform implementation when the 285 // list of voices may have changed and should be re-queried. 286 void VoicesChanged(); 287 288 // Add a delegate that wants to be notified when the set of voices changes. 289 void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate); 290 291 // Remove delegate that wants to be notified when the set of voices changes. 292 void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate); 293 294 // For unit testing. 295 void SetPlatformImpl(TtsPlatformImpl* platform_impl); 296 int QueueSize(); 297 298 protected: 299 TtsController(); 300 virtual ~TtsController(); 301 302 private: 303 // Get the platform TTS implementation (or injected mock). 304 TtsPlatformImpl* GetPlatformImpl(); 305 306 // Start speaking the given utterance. Will either take ownership of 307 // |utterance| or delete it if there's an error. Returns true on success. 308 void SpeakNow(Utterance* utterance); 309 310 // Clear the utterance queue. If send_events is true, will send 311 // TTS_EVENT_CANCELLED events on each one. 312 void ClearUtteranceQueue(bool send_events); 313 314 // Finalize and delete the current utterance. 315 void FinishCurrentUtterance(); 316 317 // Start speaking the next utterance in the queue. 318 void SpeakNextUtterance(); 319 320 // Given an utterance and a vector of voices, return the 321 // index of the voice that best matches the utterance. 322 int GetMatchingVoice(const Utterance* utterance, 323 std::vector<VoiceData>& voices); 324 325 friend struct DefaultSingletonTraits<TtsController>; 326 327 // The current utterance being spoken. 328 Utterance* current_utterance_; 329 330 // Whether the queue is paused or not. 331 bool paused_; 332 333 // A queue of utterances to speak after the current one finishes. 334 std::queue<Utterance*> utterance_queue_; 335 336 // A set of delegates that want to be notified when the voices change. 337 std::set<VoicesChangedDelegate*> voices_changed_delegates_; 338 339 // A pointer to the platform implementation of text-to-speech, for 340 // dependency injection. 341 TtsPlatformImpl* platform_impl_; 342 343 DISALLOW_COPY_AND_ASSIGN(TtsController); 344 }; 345 346 #endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 347