1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/speech/tts_controller.h" 6 7 #include <string> 8 #include <vector> 9 10 #include "base/float_util.h" 11 #include "base/values.h" 12 #include "chrome/browser/extensions/extension_system.h" 13 #include "chrome/browser/profiles/profile.h" 14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h" 15 #include "chrome/browser/speech/extension_api/tts_extension_api.h" 16 #include "chrome/browser/speech/tts_platform.h" 17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h" 18 #include "extensions/common/extension.h" 19 20 namespace { 21 // A value to be used to indicate that there is no char index available. 22 const int kInvalidCharIndex = -1; 23 24 // Given a language/region code of the form 'fr-FR', returns just the basic 25 // language portion, e.g. 'fr'. 26 std::string TrimLanguageCode(std::string lang) { 27 if (lang.size() >= 5 && lang[2] == '-') 28 return lang.substr(0, 2); 29 else 30 return lang; 31 } 32 33 } // namespace 34 35 bool IsFinalTtsEventType(TtsEventType event_type) { 36 return (event_type == TTS_EVENT_END || 37 event_type == TTS_EVENT_INTERRUPTED || 38 event_type == TTS_EVENT_CANCELLED || 39 event_type == TTS_EVENT_ERROR); 40 } 41 42 // 43 // UtteranceContinuousParameters 44 // 45 46 47 UtteranceContinuousParameters::UtteranceContinuousParameters() 48 : rate(-1), 49 pitch(-1), 50 volume(-1) {} 51 52 53 // 54 // VoiceData 55 // 56 57 58 VoiceData::VoiceData() 59 : gender(TTS_GENDER_NONE), 60 remote(false), 61 native(false) {} 62 63 VoiceData::~VoiceData() {} 64 65 66 // 67 // Utterance 68 // 69 70 // static 71 int Utterance::next_utterance_id_ = 0; 72 73 Utterance::Utterance(Profile* profile) 74 : profile_(profile), 75 id_(next_utterance_id_++), 76 src_id_(-1), 77 gender_(TTS_GENDER_NONE), 78 can_enqueue_(false), 79 char_index_(0), 80 finished_(false) { 81 options_.reset(new DictionaryValue()); 82 } 83 84 Utterance::~Utterance() { 85 DCHECK(finished_); 86 } 87 88 void Utterance::OnTtsEvent(TtsEventType event_type, 89 int char_index, 90 const std::string& error_message) { 91 if (char_index >= 0) 92 char_index_ = char_index; 93 if (IsFinalTtsEventType(event_type)) 94 finished_ = true; 95 96 if (event_delegate_) 97 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message); 98 if (finished_) 99 event_delegate_.reset(); 100 } 101 102 void Utterance::Finish() { 103 finished_ = true; 104 } 105 106 void Utterance::set_options(const Value* options) { 107 options_.reset(options->DeepCopy()); 108 } 109 110 // 111 // TtsController 112 // 113 114 // static 115 TtsController* TtsController::GetInstance() { 116 return Singleton<TtsController>::get(); 117 } 118 119 TtsController::TtsController() 120 : current_utterance_(NULL), 121 paused_(false), 122 platform_impl_(NULL) { 123 } 124 125 TtsController::~TtsController() { 126 if (current_utterance_) { 127 current_utterance_->Finish(); 128 delete current_utterance_; 129 } 130 131 // Clear any queued utterances too. 132 ClearUtteranceQueue(false); // Don't sent events. 133 } 134 135 void TtsController::SpeakOrEnqueue(Utterance* utterance) { 136 // If we're paused and we get an utterance that can't be queued, 137 // flush the queue but stay in the paused state. 138 if (paused_ && !utterance->can_enqueue()) { 139 Stop(); 140 paused_ = true; 141 return; 142 } 143 144 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) { 145 utterance_queue_.push(utterance); 146 } else { 147 Stop(); 148 SpeakNow(utterance); 149 } 150 } 151 152 void TtsController::SpeakNow(Utterance* utterance) { 153 // Get all available voices and try to find a matching voice. 154 std::vector<VoiceData> voices; 155 GetVoices(utterance->profile(), &voices); 156 int index = GetMatchingVoice(utterance, voices); 157 158 // Select the matching voice, but if none was found, initialize an 159 // empty VoiceData with native = true, which will give the native 160 // speech synthesizer a chance to try to synthesize the utterance 161 // anyway. 162 VoiceData voice; 163 if (index >= 0 && index < static_cast<int>(voices.size())) 164 voice = voices[index]; 165 else 166 voice.native = true; 167 168 if (!voice.native) { 169 #if !defined(OS_ANDROID) 170 DCHECK(!voice.extension_id.empty()); 171 current_utterance_ = utterance; 172 utterance->set_extension_id(voice.extension_id); 173 ExtensionTtsEngineSpeak(utterance, voice); 174 bool sends_end_event = 175 voice.events.find(TTS_EVENT_END) != voice.events.end(); 176 if (!sends_end_event) { 177 utterance->Finish(); 178 delete utterance; 179 current_utterance_ = NULL; 180 SpeakNextUtterance(); 181 } 182 #endif 183 } else { 184 // It's possible for certain platforms to send start events immediately 185 // during |speak|. 186 current_utterance_ = utterance; 187 GetPlatformImpl()->clear_error(); 188 bool success = GetPlatformImpl()->Speak( 189 utterance->id(), 190 utterance->text(), 191 utterance->lang(), 192 voice, 193 utterance->continuous_parameters()); 194 if (!success) 195 current_utterance_ = NULL; 196 197 // If the native voice wasn't able to process this speech, see if 198 // the browser has built-in TTS that isn't loaded yet. 199 if (!success && 200 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) { 201 utterance_queue_.push(utterance); 202 return; 203 } 204 205 if (!success) { 206 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, 207 GetPlatformImpl()->error()); 208 delete utterance; 209 return; 210 } 211 } 212 } 213 214 void TtsController::Stop() { 215 paused_ = false; 216 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 217 #if !defined(OS_ANDROID) 218 ExtensionTtsEngineStop(current_utterance_); 219 #endif 220 } else { 221 GetPlatformImpl()->clear_error(); 222 GetPlatformImpl()->StopSpeaking(); 223 } 224 225 if (current_utterance_) 226 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 227 std::string()); 228 FinishCurrentUtterance(); 229 ClearUtteranceQueue(true); // Send events. 230 } 231 232 void TtsController::Pause() { 233 paused_ = true; 234 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 235 #if !defined(OS_ANDROID) 236 ExtensionTtsEnginePause(current_utterance_); 237 #endif 238 } else if (current_utterance_) { 239 GetPlatformImpl()->clear_error(); 240 GetPlatformImpl()->Pause(); 241 } 242 } 243 244 void TtsController::Resume() { 245 paused_ = false; 246 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 247 #if !defined(OS_ANDROID) 248 ExtensionTtsEngineResume(current_utterance_); 249 #endif 250 } else if (current_utterance_) { 251 GetPlatformImpl()->clear_error(); 252 GetPlatformImpl()->Resume(); 253 } else { 254 SpeakNextUtterance(); 255 } 256 } 257 258 void TtsController::OnTtsEvent(int utterance_id, 259 TtsEventType event_type, 260 int char_index, 261 const std::string& error_message) { 262 // We may sometimes receive completion callbacks "late", after we've 263 // already finished the utterance (for example because another utterance 264 // interrupted or we got a call to Stop). This is normal and we can 265 // safely just ignore these events. 266 if (!current_utterance_ || utterance_id != current_utterance_->id()) { 267 return; 268 } 269 current_utterance_->OnTtsEvent(event_type, char_index, error_message); 270 if (current_utterance_->finished()) { 271 FinishCurrentUtterance(); 272 SpeakNextUtterance(); 273 } 274 } 275 276 void TtsController::GetVoices(Profile* profile, 277 std::vector<VoiceData>* out_voices) { 278 #if !defined(OS_ANDROID) 279 if (profile) 280 GetExtensionVoices(profile, out_voices); 281 #endif 282 283 TtsPlatformImpl* platform_impl = GetPlatformImpl(); 284 if (platform_impl && platform_impl->PlatformImplAvailable()) 285 platform_impl->GetVoices(out_voices); 286 } 287 288 bool TtsController::IsSpeaking() { 289 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking(); 290 } 291 292 void TtsController::FinishCurrentUtterance() { 293 if (current_utterance_) { 294 if (!current_utterance_->finished()) 295 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 296 std::string()); 297 delete current_utterance_; 298 current_utterance_ = NULL; 299 } 300 } 301 302 void TtsController::SpeakNextUtterance() { 303 if (paused_) 304 return; 305 306 // Start speaking the next utterance in the queue. Keep trying in case 307 // one fails but there are still more in the queue to try. 308 while (!utterance_queue_.empty() && !current_utterance_) { 309 Utterance* utterance = utterance_queue_.front(); 310 utterance_queue_.pop(); 311 SpeakNow(utterance); 312 } 313 } 314 315 void TtsController::RetrySpeakingQueuedUtterances() { 316 if (current_utterance_ == NULL && !utterance_queue_.empty()) 317 SpeakNextUtterance(); 318 } 319 320 void TtsController::ClearUtteranceQueue(bool send_events) { 321 while (!utterance_queue_.empty()) { 322 Utterance* utterance = utterance_queue_.front(); 323 utterance_queue_.pop(); 324 if (send_events) 325 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, 326 std::string()); 327 else 328 utterance->Finish(); 329 delete utterance; 330 } 331 } 332 333 void TtsController::SetPlatformImpl( 334 TtsPlatformImpl* platform_impl) { 335 platform_impl_ = platform_impl; 336 } 337 338 int TtsController::QueueSize() { 339 return static_cast<int>(utterance_queue_.size()); 340 } 341 342 TtsPlatformImpl* TtsController::GetPlatformImpl() { 343 if (!platform_impl_) 344 platform_impl_ = TtsPlatformImpl::GetInstance(); 345 return platform_impl_; 346 } 347 348 int TtsController::GetMatchingVoice( 349 const Utterance* utterance, std::vector<VoiceData>& voices) { 350 // Make two passes: the first time, do strict language matching 351 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix 352 // language matching ('fr-FR' matches 'fr' and 'fr-CA') 353 for (int pass = 0; pass < 2; ++pass) { 354 for (size_t i = 0; i < voices.size(); ++i) { 355 const VoiceData& voice = voices[i]; 356 357 if (!utterance->extension_id().empty() && 358 utterance->extension_id() != voice.extension_id) { 359 continue; 360 } 361 362 if (!voice.name.empty() && 363 !utterance->voice_name().empty() && 364 voice.name != utterance->voice_name()) { 365 continue; 366 } 367 if (!voice.lang.empty() && !utterance->lang().empty()) { 368 std::string voice_lang = voice.lang; 369 std::string utterance_lang = utterance->lang(); 370 if (pass == 1) { 371 voice_lang = TrimLanguageCode(voice_lang); 372 utterance_lang = TrimLanguageCode(utterance_lang); 373 } 374 if (voice_lang != utterance_lang) { 375 continue; 376 } 377 } 378 if (voice.gender != TTS_GENDER_NONE && 379 utterance->gender() != TTS_GENDER_NONE && 380 voice.gender != utterance->gender()) { 381 continue; 382 } 383 384 if (utterance->required_event_types().size() > 0) { 385 bool has_all_required_event_types = true; 386 for (std::set<TtsEventType>::const_iterator iter = 387 utterance->required_event_types().begin(); 388 iter != utterance->required_event_types().end(); 389 ++iter) { 390 if (voice.events.find(*iter) == voice.events.end()) { 391 has_all_required_event_types = false; 392 break; 393 } 394 } 395 if (!has_all_required_event_types) 396 continue; 397 } 398 399 return static_cast<int>(i); 400 } 401 } 402 403 return -1; 404 } 405 406 void TtsController::VoicesChanged() { 407 for (std::set<VoicesChangedDelegate*>::iterator iter = 408 voices_changed_delegates_.begin(); 409 iter != voices_changed_delegates_.end(); ++iter) { 410 (*iter)->OnVoicesChanged(); 411 } 412 } 413 414 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) { 415 voices_changed_delegates_.insert(delegate); 416 } 417 418 void TtsController::RemoveVoicesChangedDelegate( 419 VoicesChangedDelegate* delegate) { 420 voices_changed_delegates_.erase(delegate); 421 } 422