1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_EXTENSIONS_EXTENSION_TTS_API_H_ 6 #define CHROME_BROWSER_EXTENSIONS_EXTENSION_TTS_API_H_ 7 8 #include <queue> 9 #include <string> 10 11 #include "base/memory/singleton.h" 12 #include "base/task.h" 13 #include "chrome/browser/extensions/extension_function.h" 14 #include "chrome/browser/extensions/extension_tts_api_util.h" 15 16 // Abstract class that defines the native platform TTS interface. 17 class ExtensionTtsPlatformImpl { 18 public: 19 static ExtensionTtsPlatformImpl* GetInstance(); 20 21 // Speak the given utterance with the given parameters if possible, 22 // and return true on success. Utterance will always be nonempty. 23 // If the user does not specify the other values, then locale and gender 24 // will be empty strings, and rate, pitch, and volume will be -1.0. 25 // 26 // The ExtensionTtsController will only try to speak one utterance at 27 // a time. If it wants to interrupt speech, it will always call Stop 28 // before speaking again, otherwise it will wait until IsSpeaking 29 // returns false before calling Speak again. 30 virtual bool Speak( 31 const std::string& utterance, 32 const std::string& locale, 33 const std::string& gender, 34 double rate, 35 double pitch, 36 double volume) = 0; 37 38 // Stop speaking immediately and return true on success. 39 virtual bool StopSpeaking() = 0; 40 41 // Return true if the synthesis engine is currently speaking. 42 virtual bool IsSpeaking() = 0; 43 44 virtual std::string error(); 45 virtual void clear_error(); 46 virtual void set_error(const std::string& error); 47 48 protected: 49 ExtensionTtsPlatformImpl() {} 50 virtual ~ExtensionTtsPlatformImpl() {} 51 52 std::string error_; 53 54 DISALLOW_COPY_AND_ASSIGN(ExtensionTtsPlatformImpl); 55 }; 56 57 // One speech utterance. 58 class Utterance { 59 public: 60 // Construct an utterance given a profile, the text to speak, 61 // the options passed to tts.speak, and a completion task to call 62 // when the utterance is done speaking. 63 Utterance(Profile* profile, 64 const std::string& text, 65 DictionaryValue* options, 66 Task* completion_task); 67 ~Utterance(); 68 69 // Calls the completion task and then destroys itself. 70 void FinishAndDestroy(); 71 72 void set_error(const std::string& error) { error_ = error; } 73 void set_extension_id(const std::string& extension_id) { 74 extension_id_ = extension_id; 75 } 76 77 // Accessors 78 Profile* profile() { return profile_; } 79 const std::string& extension_id() { return extension_id_; } 80 int id() { return id_; } 81 const std::string& text() { return text_; } 82 const Value* options() { return options_.get(); } 83 const std::string& voice_name() { return voice_name_; } 84 const std::string& locale() { return locale_; } 85 const std::string& gender() { return gender_; } 86 double rate() { return rate_; } 87 double pitch() { return pitch_; } 88 double volume() { return volume_; } 89 bool can_enqueue() { return can_enqueue_; } 90 const std::string& error() { return error_; } 91 92 private: 93 // The profile that initiated this utterance. 94 Profile* profile_; 95 96 // The extension ID of the extension providing TTS for this utterance, or 97 // empty if native TTS is being used. 98 std::string extension_id_; 99 100 // The unique ID of this utterance, used to associate callback functions 101 // with utterances. 102 int id_; 103 104 // The id of the next utterance, so we can associate requests with 105 // responses. 106 static int next_utterance_id_; 107 108 // The text to speak. 109 std::string text_; 110 111 // The full options arg passed to tts.speak, which may include fields 112 // other than the ones we explicitly parse, below. 113 scoped_ptr<Value> options_; 114 115 // The parsed options. 116 std::string voice_name_; 117 std::string locale_; 118 std::string gender_; 119 double rate_; 120 double pitch_; 121 double volume_; 122 bool can_enqueue_; 123 124 // The error string to pass to the completion task. Will be empty if 125 // no error occurred. 126 std::string error_; 127 128 // The method to call when this utterance has completed speaking. 129 Task* completion_task_; 130 }; 131 132 // Singleton class that manages text-to-speech. 133 class ExtensionTtsController { 134 public: 135 // Get the single instance of this class. 136 static ExtensionTtsController* GetInstance(); 137 138 // Returns true if we're currently speaking an utterance. 139 bool IsSpeaking() const; 140 141 // Speak the given utterance. If the utterance's can_enqueue flag is true 142 // and another utterance is in progress, adds it to the end of the queue. 143 // Otherwise, interrupts any current utterance and speaks this one 144 // immediately. 145 void SpeakOrEnqueue(Utterance* utterance); 146 147 // Stop all utterances and flush the queue. 148 void Stop(); 149 150 // Called when an extension finishes speaking an utterance. 151 void OnSpeechFinished(int request_id, const std::string& error_message); 152 153 // For unit testing. 154 void SetPlatformImpl(ExtensionTtsPlatformImpl* platform_impl); 155 156 private: 157 ExtensionTtsController(); 158 virtual ~ExtensionTtsController(); 159 160 // Get the platform TTS implementation (or injected mock). 161 ExtensionTtsPlatformImpl* GetPlatformImpl(); 162 163 // Start speaking the given utterance. Will either take ownership of 164 // |utterance| or delete it if there's an error. 165 void SpeakNow(Utterance* utterance); 166 167 // Called periodically when speech is ongoing. Checks to see if the 168 // underlying platform speech system has finished the current utterance, 169 // and if so finishes it and pops the next utterance off the queue. 170 void CheckSpeechStatus(); 171 172 // Clear the utterance queue. 173 void ClearUtteranceQueue(); 174 175 // Finalize and delete the current utterance. 176 void FinishCurrentUtterance(); 177 178 // Start speaking the next utterance in the queue. 179 void SpeakNextUtterance(); 180 181 // Return the id string of the first extension with tts_voices in its 182 // manifest that matches the speech parameters of this utterance, 183 // or the empty string if none is found. 184 std::string GetMatchingExtensionId(Utterance* utterance); 185 186 ScopedRunnableMethodFactory<ExtensionTtsController> method_factory_; 187 friend struct DefaultSingletonTraits<ExtensionTtsController>; 188 189 // The current utterance being spoken. 190 Utterance* current_utterance_; 191 192 // A queue of utterances to speak after the current one finishes. 193 std::queue<Utterance*> utterance_queue_; 194 195 // A pointer to the platform implementation of text-to-speech, for 196 // dependency injection. 197 ExtensionTtsPlatformImpl* platform_impl_; 198 199 DISALLOW_COPY_AND_ASSIGN(ExtensionTtsController); 200 }; 201 202 // 203 // Extension API function definitions 204 // 205 206 class ExtensionTtsSpeakFunction : public AsyncExtensionFunction { 207 private: 208 ~ExtensionTtsSpeakFunction() {} 209 virtual bool RunImpl(); 210 void SpeechFinished(); 211 Utterance* utterance_; 212 DECLARE_EXTENSION_FUNCTION_NAME("experimental.tts.speak") 213 }; 214 215 class ExtensionTtsStopSpeakingFunction : public SyncExtensionFunction { 216 private: 217 ~ExtensionTtsStopSpeakingFunction() {} 218 virtual bool RunImpl(); 219 DECLARE_EXTENSION_FUNCTION_NAME("experimental.tts.stop") 220 }; 221 222 class ExtensionTtsIsSpeakingFunction : public SyncExtensionFunction { 223 private: 224 ~ExtensionTtsIsSpeakingFunction() {} 225 virtual bool RunImpl(); 226 DECLARE_EXTENSION_FUNCTION_NAME("experimental.tts.isSpeaking") 227 }; 228 229 class ExtensionTtsSpeakCompletedFunction : public SyncExtensionFunction { 230 private: 231 ~ExtensionTtsSpeakCompletedFunction() {} 232 virtual bool RunImpl(); 233 DECLARE_EXTENSION_FUNCTION_NAME("experimental.tts.speakCompleted") 234 }; 235 236 #endif // CHROME_BROWSER_EXTENSIONS_EXTENSION_TTS_API_H_ 237