1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/speech/tts_controller_impl.h" 6 7 #include <string> 8 #include <vector> 9 10 #include "base/float_util.h" 11 #include "base/values.h" 12 #include "chrome/browser/browser_process.h" 13 #include "chrome/browser/speech/tts_platform.h" 14 15 namespace { 16 // A value to be used to indicate that there is no char index available. 17 const int kInvalidCharIndex = -1; 18 19 // Given a language/region code of the form 'fr-FR', returns just the basic 20 // language portion, e.g. 'fr'. 21 std::string TrimLanguageCode(std::string lang) { 22 if (lang.size() >= 5 && lang[2] == '-') 23 return lang.substr(0, 2); 24 else 25 return lang; 26 } 27 28 } // namespace 29 30 bool IsFinalTtsEventType(TtsEventType event_type) { 31 return (event_type == TTS_EVENT_END || 32 event_type == TTS_EVENT_INTERRUPTED || 33 event_type == TTS_EVENT_CANCELLED || 34 event_type == TTS_EVENT_ERROR); 35 } 36 37 // 38 // UtteranceContinuousParameters 39 // 40 41 42 UtteranceContinuousParameters::UtteranceContinuousParameters() 43 : rate(-1), 44 pitch(-1), 45 volume(-1) {} 46 47 48 // 49 // VoiceData 50 // 51 52 53 VoiceData::VoiceData() 54 : gender(TTS_GENDER_NONE), 55 remote(false), 56 native(false) {} 57 58 VoiceData::~VoiceData() {} 59 60 61 // 62 // Utterance 63 // 64 65 // static 66 int Utterance::next_utterance_id_ = 0; 67 68 Utterance::Utterance(content::BrowserContext* browser_context) 69 : browser_context_(browser_context), 70 id_(next_utterance_id_++), 71 src_id_(-1), 72 gender_(TTS_GENDER_NONE), 73 can_enqueue_(false), 74 char_index_(0), 75 finished_(false) { 76 options_.reset(new base::DictionaryValue()); 77 } 78 79 Utterance::~Utterance() { 80 DCHECK(finished_); 81 } 82 83 void Utterance::OnTtsEvent(TtsEventType event_type, 84 int char_index, 85 const std::string& error_message) { 86 if (char_index >= 0) 87 char_index_ = char_index; 88 if (IsFinalTtsEventType(event_type)) 89 finished_ = true; 90 91 if (event_delegate_) 92 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message); 93 if (finished_) 94 event_delegate_.reset(); 95 } 96 97 void Utterance::Finish() { 98 finished_ = true; 99 } 100 101 void Utterance::set_options(const base::Value* options) { 102 options_.reset(options->DeepCopy()); 103 } 104 105 TtsController* TtsController::GetInstance() { 106 return TtsControllerImpl::GetInstance(); 107 } 108 109 // 110 // TtsControllerImpl 111 // 112 113 // static 114 TtsControllerImpl* TtsControllerImpl::GetInstance() { 115 return Singleton<TtsControllerImpl>::get(); 116 } 117 118 TtsControllerImpl::TtsControllerImpl() 119 : current_utterance_(NULL), 120 paused_(false), 121 platform_impl_(NULL), 122 tts_engine_delegate_(NULL) { 123 } 124 125 TtsControllerImpl::~TtsControllerImpl() { 126 if (current_utterance_) { 127 current_utterance_->Finish(); 128 delete current_utterance_; 129 } 130 131 // Clear any queued utterances too. 132 ClearUtteranceQueue(false); // Don't sent events. 133 } 134 135 void TtsControllerImpl::SpeakOrEnqueue(Utterance* utterance) { 136 // If we're paused and we get an utterance that can't be queued, 137 // flush the queue but stay in the paused state. 138 if (paused_ && !utterance->can_enqueue()) { 139 Stop(); 140 paused_ = true; 141 delete utterance; 142 return; 143 } 144 145 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) { 146 utterance_queue_.push(utterance); 147 } else { 148 Stop(); 149 SpeakNow(utterance); 150 } 151 } 152 153 void TtsControllerImpl::SpeakNow(Utterance* utterance) { 154 // Ensure we have all built-in voices loaded. This is a no-op if already 155 // loaded. 156 bool loaded_built_in = 157 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context()); 158 159 // Get all available voices and try to find a matching voice. 160 std::vector<VoiceData> voices; 161 GetVoices(utterance->browser_context(), &voices); 162 int index = GetMatchingVoice(utterance, voices); 163 164 VoiceData voice; 165 if (index != -1) { 166 // Select the matching voice. 167 voice = voices[index]; 168 } else { 169 // However, if no match was found on a platform without native tts voices, 170 // attempt to get a voice based only on the current locale without respect 171 // to any supplied voice names. 172 std::vector<VoiceData> native_voices; 173 174 if (GetPlatformImpl()->PlatformImplAvailable()) 175 GetPlatformImpl()->GetVoices(&native_voices); 176 177 if (native_voices.empty() && !voices.empty()) { 178 // TODO(dtseng): Notify extension caller of an error. 179 utterance->set_voice_name(""); 180 // TODO(gaochun): Replace the global variable g_browser_process with 181 // GetContentClient()->browser() to eliminate the dependency of browser 182 // once TTS implementation was moved to content. 183 utterance->set_lang(g_browser_process->GetApplicationLocale()); 184 index = GetMatchingVoice(utterance, voices); 185 186 // If even that fails, just take the first available voice. 187 if (index == -1) 188 index = 0; 189 voice = voices[index]; 190 } else { 191 // Otherwise, simply give native voices a chance to handle this utterance. 192 voice.native = true; 193 } 194 } 195 196 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice); 197 198 if (!voice.native) { 199 #if !defined(OS_ANDROID) 200 DCHECK(!voice.extension_id.empty()); 201 current_utterance_ = utterance; 202 utterance->set_extension_id(voice.extension_id); 203 if (tts_engine_delegate_) 204 tts_engine_delegate_->Speak(utterance, voice); 205 bool sends_end_event = 206 voice.events.find(TTS_EVENT_END) != voice.events.end(); 207 if (!sends_end_event) { 208 utterance->Finish(); 209 delete utterance; 210 current_utterance_ = NULL; 211 SpeakNextUtterance(); 212 } 213 #endif 214 } else { 215 // It's possible for certain platforms to send start events immediately 216 // during |speak|. 217 current_utterance_ = utterance; 218 GetPlatformImpl()->clear_error(); 219 bool success = GetPlatformImpl()->Speak( 220 utterance->id(), 221 utterance->text(), 222 utterance->lang(), 223 voice, 224 utterance->continuous_parameters()); 225 if (!success) 226 current_utterance_ = NULL; 227 228 // If the native voice wasn't able to process this speech, see if 229 // the browser has built-in TTS that isn't loaded yet. 230 if (!success && loaded_built_in) { 231 utterance_queue_.push(utterance); 232 return; 233 } 234 235 if (!success) { 236 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, 237 GetPlatformImpl()->error()); 238 delete utterance; 239 return; 240 } 241 } 242 } 243 244 void TtsControllerImpl::Stop() { 245 paused_ = false; 246 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 247 #if !defined(OS_ANDROID) 248 if (tts_engine_delegate_) 249 tts_engine_delegate_->Stop(current_utterance_); 250 #endif 251 } else { 252 GetPlatformImpl()->clear_error(); 253 GetPlatformImpl()->StopSpeaking(); 254 } 255 256 if (current_utterance_) 257 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 258 std::string()); 259 FinishCurrentUtterance(); 260 ClearUtteranceQueue(true); // Send events. 261 } 262 263 void TtsControllerImpl::Pause() { 264 paused_ = true; 265 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 266 #if !defined(OS_ANDROID) 267 if (tts_engine_delegate_) 268 tts_engine_delegate_->Pause(current_utterance_); 269 #endif 270 } else if (current_utterance_) { 271 GetPlatformImpl()->clear_error(); 272 GetPlatformImpl()->Pause(); 273 } 274 } 275 276 void TtsControllerImpl::Resume() { 277 paused_ = false; 278 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 279 #if !defined(OS_ANDROID) 280 if (tts_engine_delegate_) 281 tts_engine_delegate_->Resume(current_utterance_); 282 #endif 283 } else if (current_utterance_) { 284 GetPlatformImpl()->clear_error(); 285 GetPlatformImpl()->Resume(); 286 } else { 287 SpeakNextUtterance(); 288 } 289 } 290 291 void TtsControllerImpl::OnTtsEvent(int utterance_id, 292 TtsEventType event_type, 293 int char_index, 294 const std::string& error_message) { 295 // We may sometimes receive completion callbacks "late", after we've 296 // already finished the utterance (for example because another utterance 297 // interrupted or we got a call to Stop). This is normal and we can 298 // safely just ignore these events. 299 if (!current_utterance_ || utterance_id != current_utterance_->id()) { 300 return; 301 } 302 current_utterance_->OnTtsEvent(event_type, char_index, error_message); 303 if (current_utterance_->finished()) { 304 FinishCurrentUtterance(); 305 SpeakNextUtterance(); 306 } 307 } 308 309 void TtsControllerImpl::GetVoices(content::BrowserContext* browser_context, 310 std::vector<VoiceData>* out_voices) { 311 #if !defined(OS_ANDROID) 312 if (browser_context && tts_engine_delegate_) 313 tts_engine_delegate_->GetVoices(browser_context, out_voices); 314 #endif 315 316 TtsPlatformImpl* platform_impl = GetPlatformImpl(); 317 if (platform_impl) { 318 // Ensure we have all built-in voices loaded. This is a no-op if already 319 // loaded. 320 platform_impl->LoadBuiltInTtsExtension(browser_context); 321 if (platform_impl->PlatformImplAvailable()) 322 platform_impl->GetVoices(out_voices); 323 } 324 } 325 326 bool TtsControllerImpl::IsSpeaking() { 327 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking(); 328 } 329 330 void TtsControllerImpl::FinishCurrentUtterance() { 331 if (current_utterance_) { 332 if (!current_utterance_->finished()) 333 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 334 std::string()); 335 delete current_utterance_; 336 current_utterance_ = NULL; 337 } 338 } 339 340 void TtsControllerImpl::SpeakNextUtterance() { 341 if (paused_) 342 return; 343 344 // Start speaking the next utterance in the queue. Keep trying in case 345 // one fails but there are still more in the queue to try. 346 while (!utterance_queue_.empty() && !current_utterance_) { 347 Utterance* utterance = utterance_queue_.front(); 348 utterance_queue_.pop(); 349 SpeakNow(utterance); 350 } 351 } 352 353 void TtsControllerImpl::ClearUtteranceQueue(bool send_events) { 354 while (!utterance_queue_.empty()) { 355 Utterance* utterance = utterance_queue_.front(); 356 utterance_queue_.pop(); 357 if (send_events) 358 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, 359 std::string()); 360 else 361 utterance->Finish(); 362 delete utterance; 363 } 364 } 365 366 void TtsControllerImpl::SetPlatformImpl( 367 TtsPlatformImpl* platform_impl) { 368 platform_impl_ = platform_impl; 369 } 370 371 int TtsControllerImpl::QueueSize() { 372 return static_cast<int>(utterance_queue_.size()); 373 } 374 375 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() { 376 if (!platform_impl_) 377 platform_impl_ = TtsPlatformImpl::GetInstance(); 378 return platform_impl_; 379 } 380 381 int TtsControllerImpl::GetMatchingVoice( 382 const Utterance* utterance, std::vector<VoiceData>& voices) { 383 // Make two passes: the first time, do strict language matching 384 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix 385 // language matching ('fr-FR' matches 'fr' and 'fr-CA') 386 for (int pass = 0; pass < 2; ++pass) { 387 for (size_t i = 0; i < voices.size(); ++i) { 388 const VoiceData& voice = voices[i]; 389 390 if (!utterance->extension_id().empty() && 391 utterance->extension_id() != voice.extension_id) { 392 continue; 393 } 394 395 if (!voice.name.empty() && 396 !utterance->voice_name().empty() && 397 voice.name != utterance->voice_name()) { 398 continue; 399 } 400 if (!voice.lang.empty() && !utterance->lang().empty()) { 401 std::string voice_lang = voice.lang; 402 std::string utterance_lang = utterance->lang(); 403 if (pass == 1) { 404 voice_lang = TrimLanguageCode(voice_lang); 405 utterance_lang = TrimLanguageCode(utterance_lang); 406 } 407 if (voice_lang != utterance_lang) { 408 continue; 409 } 410 } 411 if (voice.gender != TTS_GENDER_NONE && 412 utterance->gender() != TTS_GENDER_NONE && 413 voice.gender != utterance->gender()) { 414 continue; 415 } 416 417 if (utterance->required_event_types().size() > 0) { 418 bool has_all_required_event_types = true; 419 for (std::set<TtsEventType>::const_iterator iter = 420 utterance->required_event_types().begin(); 421 iter != utterance->required_event_types().end(); 422 ++iter) { 423 if (voice.events.find(*iter) == voice.events.end()) { 424 has_all_required_event_types = false; 425 break; 426 } 427 } 428 if (!has_all_required_event_types) 429 continue; 430 } 431 432 return static_cast<int>(i); 433 } 434 } 435 436 return -1; 437 } 438 439 void TtsControllerImpl::VoicesChanged() { 440 for (std::set<VoicesChangedDelegate*>::iterator iter = 441 voices_changed_delegates_.begin(); 442 iter != voices_changed_delegates_.end(); ++iter) { 443 (*iter)->OnVoicesChanged(); 444 } 445 } 446 447 void TtsControllerImpl::AddVoicesChangedDelegate( 448 VoicesChangedDelegate* delegate) { 449 voices_changed_delegates_.insert(delegate); 450 } 451 452 void TtsControllerImpl::RemoveVoicesChangedDelegate( 453 VoicesChangedDelegate* delegate) { 454 voices_changed_delegates_.erase(delegate); 455 } 456 457 void TtsControllerImpl::SetTtsEngineDelegate( 458 TtsEngineDelegate* delegate) { 459 tts_engine_delegate_ = delegate; 460 } 461 462 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() { 463 return tts_engine_delegate_; 464 } 465