1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/speech/tts_controller.h" 6 7 #include <string> 8 #include <vector> 9 10 #include "base/float_util.h" 11 #include "base/values.h" 12 #include "chrome/browser/browser_process.h" 13 #include "chrome/browser/profiles/profile.h" 14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h" 15 #include "chrome/browser/speech/extension_api/tts_extension_api.h" 16 #include "chrome/browser/speech/tts_platform.h" 17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h" 18 #include "extensions/browser/extension_system.h" 19 #include "extensions/common/extension.h" 20 21 namespace { 22 // A value to be used to indicate that there is no char index available. 23 const int kInvalidCharIndex = -1; 24 25 // Given a language/region code of the form 'fr-FR', returns just the basic 26 // language portion, e.g. 'fr'. 27 std::string TrimLanguageCode(std::string lang) { 28 if (lang.size() >= 5 && lang[2] == '-') 29 return lang.substr(0, 2); 30 else 31 return lang; 32 } 33 34 } // namespace 35 36 bool IsFinalTtsEventType(TtsEventType event_type) { 37 return (event_type == TTS_EVENT_END || 38 event_type == TTS_EVENT_INTERRUPTED || 39 event_type == TTS_EVENT_CANCELLED || 40 event_type == TTS_EVENT_ERROR); 41 } 42 43 // 44 // UtteranceContinuousParameters 45 // 46 47 48 UtteranceContinuousParameters::UtteranceContinuousParameters() 49 : rate(-1), 50 pitch(-1), 51 volume(-1) {} 52 53 54 // 55 // VoiceData 56 // 57 58 59 VoiceData::VoiceData() 60 : gender(TTS_GENDER_NONE), 61 remote(false), 62 native(false) {} 63 64 VoiceData::~VoiceData() {} 65 66 67 // 68 // Utterance 69 // 70 71 // static 72 int Utterance::next_utterance_id_ = 0; 73 74 Utterance::Utterance(Profile* profile) 75 : profile_(profile), 76 id_(next_utterance_id_++), 77 src_id_(-1), 78 gender_(TTS_GENDER_NONE), 79 can_enqueue_(false), 80 char_index_(0), 81 finished_(false) { 82 options_.reset(new base::DictionaryValue()); 83 } 84 85 Utterance::~Utterance() { 86 DCHECK(finished_); 87 } 88 89 void Utterance::OnTtsEvent(TtsEventType event_type, 90 int char_index, 91 const std::string& error_message) { 92 if (char_index >= 0) 93 char_index_ = char_index; 94 if (IsFinalTtsEventType(event_type)) 95 finished_ = true; 96 97 if (event_delegate_) 98 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message); 99 if (finished_) 100 event_delegate_.reset(); 101 } 102 103 void Utterance::Finish() { 104 finished_ = true; 105 } 106 107 void Utterance::set_options(const base::Value* options) { 108 options_.reset(options->DeepCopy()); 109 } 110 111 // 112 // TtsController 113 // 114 115 // static 116 TtsController* TtsController::GetInstance() { 117 return Singleton<TtsController>::get(); 118 } 119 120 TtsController::TtsController() 121 : current_utterance_(NULL), 122 paused_(false), 123 platform_impl_(NULL) { 124 } 125 126 TtsController::~TtsController() { 127 if (current_utterance_) { 128 current_utterance_->Finish(); 129 delete current_utterance_; 130 } 131 132 // Clear any queued utterances too. 133 ClearUtteranceQueue(false); // Don't sent events. 134 } 135 136 void TtsController::SpeakOrEnqueue(Utterance* utterance) { 137 // If we're paused and we get an utterance that can't be queued, 138 // flush the queue but stay in the paused state. 139 if (paused_ && !utterance->can_enqueue()) { 140 Stop(); 141 paused_ = true; 142 return; 143 } 144 145 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) { 146 utterance_queue_.push(utterance); 147 } else { 148 Stop(); 149 SpeakNow(utterance); 150 } 151 } 152 153 void TtsController::SpeakNow(Utterance* utterance) { 154 // Get all available voices and try to find a matching voice. 155 std::vector<VoiceData> voices; 156 GetVoices(utterance->profile(), &voices); 157 int index = GetMatchingVoice(utterance, voices); 158 159 VoiceData voice; 160 if (index != -1) { 161 // Select the matching voice. 162 voice = voices[index]; 163 } else { 164 // However, if no match was found on a platform without native tts voices, 165 // attempt to get a voice based only on the current locale without respect 166 // to any supplied voice names. 167 std::vector<VoiceData> native_voices; 168 169 if (GetPlatformImpl()->PlatformImplAvailable()) 170 GetPlatformImpl()->GetVoices(&native_voices); 171 172 if (native_voices.empty() && !voices.empty()) { 173 // TODO(dtseng): Notify extension caller of an error. 174 utterance->set_voice_name(""); 175 utterance->set_lang(g_browser_process->GetApplicationLocale()); 176 index = GetMatchingVoice(utterance, voices); 177 178 // If even that fails, just take the first available voice. 179 if (index == -1) 180 index = 0; 181 voice = voices[index]; 182 } else { 183 // Otherwise, simply give native voices a chance to handle this utterance. 184 voice.native = true; 185 } 186 } 187 188 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice); 189 190 if (!voice.native) { 191 #if !defined(OS_ANDROID) 192 DCHECK(!voice.extension_id.empty()); 193 current_utterance_ = utterance; 194 utterance->set_extension_id(voice.extension_id); 195 ExtensionTtsEngineSpeak(utterance, voice); 196 bool sends_end_event = 197 voice.events.find(TTS_EVENT_END) != voice.events.end(); 198 if (!sends_end_event) { 199 utterance->Finish(); 200 delete utterance; 201 current_utterance_ = NULL; 202 SpeakNextUtterance(); 203 } 204 #endif 205 } else { 206 // It's possible for certain platforms to send start events immediately 207 // during |speak|. 208 current_utterance_ = utterance; 209 GetPlatformImpl()->clear_error(); 210 bool success = GetPlatformImpl()->Speak( 211 utterance->id(), 212 utterance->text(), 213 utterance->lang(), 214 voice, 215 utterance->continuous_parameters()); 216 if (!success) 217 current_utterance_ = NULL; 218 219 // If the native voice wasn't able to process this speech, see if 220 // the browser has built-in TTS that isn't loaded yet. 221 if (!success && 222 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) { 223 utterance_queue_.push(utterance); 224 return; 225 } 226 227 if (!success) { 228 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, 229 GetPlatformImpl()->error()); 230 delete utterance; 231 return; 232 } 233 } 234 } 235 236 void TtsController::Stop() { 237 paused_ = false; 238 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 239 #if !defined(OS_ANDROID) 240 ExtensionTtsEngineStop(current_utterance_); 241 #endif 242 } else { 243 GetPlatformImpl()->clear_error(); 244 GetPlatformImpl()->StopSpeaking(); 245 } 246 247 if (current_utterance_) 248 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 249 std::string()); 250 FinishCurrentUtterance(); 251 ClearUtteranceQueue(true); // Send events. 252 } 253 254 void TtsController::Pause() { 255 paused_ = true; 256 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 257 #if !defined(OS_ANDROID) 258 ExtensionTtsEnginePause(current_utterance_); 259 #endif 260 } else if (current_utterance_) { 261 GetPlatformImpl()->clear_error(); 262 GetPlatformImpl()->Pause(); 263 } 264 } 265 266 void TtsController::Resume() { 267 paused_ = false; 268 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 269 #if !defined(OS_ANDROID) 270 ExtensionTtsEngineResume(current_utterance_); 271 #endif 272 } else if (current_utterance_) { 273 GetPlatformImpl()->clear_error(); 274 GetPlatformImpl()->Resume(); 275 } else { 276 SpeakNextUtterance(); 277 } 278 } 279 280 void TtsController::OnTtsEvent(int utterance_id, 281 TtsEventType event_type, 282 int char_index, 283 const std::string& error_message) { 284 // We may sometimes receive completion callbacks "late", after we've 285 // already finished the utterance (for example because another utterance 286 // interrupted or we got a call to Stop). This is normal and we can 287 // safely just ignore these events. 288 if (!current_utterance_ || utterance_id != current_utterance_->id()) { 289 return; 290 } 291 current_utterance_->OnTtsEvent(event_type, char_index, error_message); 292 if (current_utterance_->finished()) { 293 FinishCurrentUtterance(); 294 SpeakNextUtterance(); 295 } 296 } 297 298 void TtsController::GetVoices(Profile* profile, 299 std::vector<VoiceData>* out_voices) { 300 #if !defined(OS_ANDROID) 301 if (profile) 302 GetExtensionVoices(profile, out_voices); 303 #endif 304 305 TtsPlatformImpl* platform_impl = GetPlatformImpl(); 306 if (platform_impl && platform_impl->PlatformImplAvailable()) 307 platform_impl->GetVoices(out_voices); 308 } 309 310 bool TtsController::IsSpeaking() { 311 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking(); 312 } 313 314 void TtsController::FinishCurrentUtterance() { 315 if (current_utterance_) { 316 if (!current_utterance_->finished()) 317 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 318 std::string()); 319 delete current_utterance_; 320 current_utterance_ = NULL; 321 } 322 } 323 324 void TtsController::SpeakNextUtterance() { 325 if (paused_) 326 return; 327 328 // Start speaking the next utterance in the queue. Keep trying in case 329 // one fails but there are still more in the queue to try. 330 while (!utterance_queue_.empty() && !current_utterance_) { 331 Utterance* utterance = utterance_queue_.front(); 332 utterance_queue_.pop(); 333 SpeakNow(utterance); 334 } 335 } 336 337 void TtsController::RetrySpeakingQueuedUtterances() { 338 if (current_utterance_ == NULL && !utterance_queue_.empty()) 339 SpeakNextUtterance(); 340 } 341 342 void TtsController::ClearUtteranceQueue(bool send_events) { 343 while (!utterance_queue_.empty()) { 344 Utterance* utterance = utterance_queue_.front(); 345 utterance_queue_.pop(); 346 if (send_events) 347 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, 348 std::string()); 349 else 350 utterance->Finish(); 351 delete utterance; 352 } 353 } 354 355 void TtsController::SetPlatformImpl( 356 TtsPlatformImpl* platform_impl) { 357 platform_impl_ = platform_impl; 358 } 359 360 int TtsController::QueueSize() { 361 return static_cast<int>(utterance_queue_.size()); 362 } 363 364 TtsPlatformImpl* TtsController::GetPlatformImpl() { 365 if (!platform_impl_) 366 platform_impl_ = TtsPlatformImpl::GetInstance(); 367 return platform_impl_; 368 } 369 370 int TtsController::GetMatchingVoice( 371 const Utterance* utterance, std::vector<VoiceData>& voices) { 372 // Make two passes: the first time, do strict language matching 373 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix 374 // language matching ('fr-FR' matches 'fr' and 'fr-CA') 375 for (int pass = 0; pass < 2; ++pass) { 376 for (size_t i = 0; i < voices.size(); ++i) { 377 const VoiceData& voice = voices[i]; 378 379 if (!utterance->extension_id().empty() && 380 utterance->extension_id() != voice.extension_id) { 381 continue; 382 } 383 384 if (!voice.name.empty() && 385 !utterance->voice_name().empty() && 386 voice.name != utterance->voice_name()) { 387 continue; 388 } 389 if (!voice.lang.empty() && !utterance->lang().empty()) { 390 std::string voice_lang = voice.lang; 391 std::string utterance_lang = utterance->lang(); 392 if (pass == 1) { 393 voice_lang = TrimLanguageCode(voice_lang); 394 utterance_lang = TrimLanguageCode(utterance_lang); 395 } 396 if (voice_lang != utterance_lang) { 397 continue; 398 } 399 } 400 if (voice.gender != TTS_GENDER_NONE && 401 utterance->gender() != TTS_GENDER_NONE && 402 voice.gender != utterance->gender()) { 403 continue; 404 } 405 406 if (utterance->required_event_types().size() > 0) { 407 bool has_all_required_event_types = true; 408 for (std::set<TtsEventType>::const_iterator iter = 409 utterance->required_event_types().begin(); 410 iter != utterance->required_event_types().end(); 411 ++iter) { 412 if (voice.events.find(*iter) == voice.events.end()) { 413 has_all_required_event_types = false; 414 break; 415 } 416 } 417 if (!has_all_required_event_types) 418 continue; 419 } 420 421 return static_cast<int>(i); 422 } 423 } 424 425 return -1; 426 } 427 428 void TtsController::VoicesChanged() { 429 for (std::set<VoicesChangedDelegate*>::iterator iter = 430 voices_changed_delegates_.begin(); 431 iter != voices_changed_delegates_.end(); ++iter) { 432 (*iter)->OnVoicesChanged(); 433 } 434 } 435 436 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) { 437 voices_changed_delegates_.insert(delegate); 438 } 439 440 void TtsController::RemoveVoicesChangedDelegate( 441 VoicesChangedDelegate* delegate) { 442 voices_changed_delegates_.erase(delegate); 443 } 444