1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/speech/tts_controller.h" 6 7 #include <string> 8 #include <vector> 9 10 #include "base/float_util.h" 11 #include "base/values.h" 12 #include "chrome/browser/extensions/extension_system.h" 13 #include "chrome/browser/profiles/profile.h" 14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h" 15 #include "chrome/browser/speech/extension_api/tts_extension_api.h" 16 #include "chrome/browser/speech/tts_platform.h" 17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h" 18 #include "chrome/common/extensions/extension.h" 19 20 namespace { 21 // A value to be used to indicate that there is no char index available. 22 const int kInvalidCharIndex = -1; 23 24 // Given a language/region code of the form 'fr-FR', returns just the basic 25 // language portion, e.g. 'fr'. 26 std::string TrimLanguageCode(std::string lang) { 27 if (lang.size() >= 5 && lang[2] == '-') 28 return lang.substr(0, 2); 29 else 30 return lang; 31 } 32 33 } // namespace 34 35 bool IsFinalTtsEventType(TtsEventType event_type) { 36 return (event_type == TTS_EVENT_END || 37 event_type == TTS_EVENT_INTERRUPTED || 38 event_type == TTS_EVENT_CANCELLED || 39 event_type == TTS_EVENT_ERROR); 40 } 41 42 // 43 // UtteranceContinuousParameters 44 // 45 46 47 UtteranceContinuousParameters::UtteranceContinuousParameters() 48 : rate(-1), 49 pitch(-1), 50 volume(-1) {} 51 52 53 // 54 // VoiceData 55 // 56 57 58 VoiceData::VoiceData() 59 : gender(TTS_GENDER_NONE), 60 native(false) {} 61 62 VoiceData::~VoiceData() {} 63 64 65 // 66 // Utterance 67 // 68 69 // static 70 int Utterance::next_utterance_id_ = 0; 71 72 Utterance::Utterance(Profile* profile) 73 : profile_(profile), 74 id_(next_utterance_id_++), 75 src_id_(-1), 76 event_delegate_(NULL), 77 can_enqueue_(false), 78 char_index_(0), 79 finished_(false) { 80 options_.reset(new DictionaryValue()); 81 } 82 83 Utterance::~Utterance() { 84 DCHECK(finished_); 85 } 86 87 void Utterance::OnTtsEvent(TtsEventType event_type, 88 int char_index, 89 const std::string& error_message) { 90 if (char_index >= 0) 91 char_index_ = char_index; 92 if (IsFinalTtsEventType(event_type)) 93 finished_ = true; 94 95 if (event_delegate_) 96 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message); 97 if (finished_) 98 event_delegate_ = NULL; 99 } 100 101 void Utterance::Finish() { 102 finished_ = true; 103 } 104 105 void Utterance::set_options(const Value* options) { 106 options_.reset(options->DeepCopy()); 107 } 108 109 // 110 // TtsController 111 // 112 113 // static 114 TtsController* TtsController::GetInstance() { 115 return Singleton<TtsController>::get(); 116 } 117 118 TtsController::TtsController() 119 : current_utterance_(NULL), 120 paused_(false), 121 platform_impl_(NULL) { 122 } 123 124 TtsController::~TtsController() { 125 if (current_utterance_) { 126 current_utterance_->Finish(); 127 delete current_utterance_; 128 } 129 130 // Clear any queued utterances too. 131 ClearUtteranceQueue(false); // Don't sent events. 132 } 133 134 void TtsController::SpeakOrEnqueue(Utterance* utterance) { 135 // If we're paused and we get an utterance that can't be queued, 136 // flush the queue but stay in the paused state. 137 if (paused_ && !utterance->can_enqueue()) { 138 Stop(); 139 paused_ = true; 140 return; 141 } 142 143 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) { 144 utterance_queue_.push(utterance); 145 } else { 146 Stop(); 147 SpeakNow(utterance); 148 } 149 } 150 151 void TtsController::SpeakNow(Utterance* utterance) { 152 // Get all available voices and try to find a matching voice. 153 std::vector<VoiceData> voices; 154 GetVoices(utterance->profile(), &voices); 155 int index = GetMatchingVoice(utterance, voices); 156 157 // Select the matching voice, but if none was found, initialize an 158 // empty VoiceData with native = true, which will give the native 159 // speech synthesizer a chance to try to synthesize the utterance 160 // anyway. 161 VoiceData voice; 162 if (index >= 0 && index < static_cast<int>(voices.size())) 163 voice = voices[index]; 164 else 165 voice.native = true; 166 167 if (!voice.native) { 168 #if !defined(OS_ANDROID) 169 DCHECK(!voice.extension_id.empty()); 170 current_utterance_ = utterance; 171 utterance->set_extension_id(voice.extension_id); 172 ExtensionTtsEngineSpeak(utterance, voice); 173 bool sends_end_event = 174 voice.events.find(TTS_EVENT_END) != voice.events.end(); 175 if (!sends_end_event) { 176 utterance->Finish(); 177 delete utterance; 178 current_utterance_ = NULL; 179 SpeakNextUtterance(); 180 } 181 #endif 182 } else { 183 GetPlatformImpl()->clear_error(); 184 bool success = GetPlatformImpl()->Speak( 185 utterance->id(), 186 utterance->text(), 187 utterance->lang(), 188 voice, 189 utterance->continuous_parameters()); 190 191 // If the native voice wasn't able to process this speech, see if 192 // the browser has built-in TTS that isn't loaded yet. 193 if (!success && 194 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) { 195 utterance_queue_.push(utterance); 196 return; 197 } 198 199 if (!success) { 200 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, 201 GetPlatformImpl()->error()); 202 delete utterance; 203 return; 204 } 205 current_utterance_ = utterance; 206 } 207 } 208 209 void TtsController::Stop() { 210 paused_ = false; 211 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 212 #if !defined(OS_ANDROID) 213 ExtensionTtsEngineStop(current_utterance_); 214 #endif 215 } else { 216 GetPlatformImpl()->clear_error(); 217 GetPlatformImpl()->StopSpeaking(); 218 } 219 220 if (current_utterance_) 221 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 222 std::string()); 223 FinishCurrentUtterance(); 224 ClearUtteranceQueue(true); // Send events. 225 } 226 227 void TtsController::Pause() { 228 paused_ = true; 229 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 230 #if !defined(OS_ANDROID) 231 ExtensionTtsEnginePause(current_utterance_); 232 #endif 233 } else if (current_utterance_) { 234 GetPlatformImpl()->clear_error(); 235 GetPlatformImpl()->Pause(); 236 } 237 } 238 239 void TtsController::Resume() { 240 paused_ = false; 241 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 242 #if !defined(OS_ANDROID) 243 ExtensionTtsEngineResume(current_utterance_); 244 #endif 245 } else if (current_utterance_) { 246 GetPlatformImpl()->clear_error(); 247 GetPlatformImpl()->Resume(); 248 } else { 249 SpeakNextUtterance(); 250 } 251 } 252 253 void TtsController::OnTtsEvent(int utterance_id, 254 TtsEventType event_type, 255 int char_index, 256 const std::string& error_message) { 257 // We may sometimes receive completion callbacks "late", after we've 258 // already finished the utterance (for example because another utterance 259 // interrupted or we got a call to Stop). This is normal and we can 260 // safely just ignore these events. 261 if (!current_utterance_ || utterance_id != current_utterance_->id()) 262 return; 263 264 current_utterance_->OnTtsEvent(event_type, char_index, error_message); 265 if (current_utterance_->finished()) { 266 FinishCurrentUtterance(); 267 SpeakNextUtterance(); 268 } 269 } 270 271 void TtsController::GetVoices(Profile* profile, 272 std::vector<VoiceData>* out_voices) { 273 #if !defined(OS_ANDROID) 274 if (profile) 275 GetExtensionVoices(profile, out_voices); 276 #endif 277 278 TtsPlatformImpl* platform_impl = GetPlatformImpl(); 279 if (platform_impl && platform_impl->PlatformImplAvailable()) 280 platform_impl->GetVoices(out_voices); 281 } 282 283 bool TtsController::IsSpeaking() { 284 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking(); 285 } 286 287 void TtsController::FinishCurrentUtterance() { 288 if (current_utterance_) { 289 if (!current_utterance_->finished()) 290 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 291 std::string()); 292 delete current_utterance_; 293 current_utterance_ = NULL; 294 } 295 } 296 297 void TtsController::SpeakNextUtterance() { 298 if (paused_) 299 return; 300 301 // Start speaking the next utterance in the queue. Keep trying in case 302 // one fails but there are still more in the queue to try. 303 while (!utterance_queue_.empty() && !current_utterance_) { 304 Utterance* utterance = utterance_queue_.front(); 305 utterance_queue_.pop(); 306 SpeakNow(utterance); 307 } 308 } 309 310 void TtsController::RetrySpeakingQueuedUtterances() { 311 if (current_utterance_ == NULL && !utterance_queue_.empty()) 312 SpeakNextUtterance(); 313 } 314 315 void TtsController::ClearUtteranceQueue(bool send_events) { 316 while (!utterance_queue_.empty()) { 317 Utterance* utterance = utterance_queue_.front(); 318 utterance_queue_.pop(); 319 if (send_events) 320 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, 321 std::string()); 322 else 323 utterance->Finish(); 324 delete utterance; 325 } 326 } 327 328 void TtsController::SetPlatformImpl( 329 TtsPlatformImpl* platform_impl) { 330 platform_impl_ = platform_impl; 331 } 332 333 int TtsController::QueueSize() { 334 return static_cast<int>(utterance_queue_.size()); 335 } 336 337 TtsPlatformImpl* TtsController::GetPlatformImpl() { 338 if (!platform_impl_) 339 platform_impl_ = TtsPlatformImpl::GetInstance(); 340 return platform_impl_; 341 } 342 343 int TtsController::GetMatchingVoice( 344 const Utterance* utterance, std::vector<VoiceData>& voices) { 345 // Make two passes: the first time, do strict language matching 346 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix 347 // language matching ('fr-FR' matches 'fr' and 'fr-CA') 348 for (int pass = 0; pass < 2; ++pass) { 349 for (size_t i = 0; i < voices.size(); ++i) { 350 const VoiceData& voice = voices[i]; 351 352 if (!utterance->extension_id().empty() && 353 utterance->extension_id() != voice.extension_id) { 354 continue; 355 } 356 357 if (!voice.name.empty() && 358 !utterance->voice_name().empty() && 359 voice.name != utterance->voice_name()) { 360 continue; 361 } 362 if (!voice.lang.empty() && !utterance->lang().empty()) { 363 std::string voice_lang = voice.lang; 364 std::string utterance_lang = utterance->lang(); 365 if (pass == 1) { 366 voice_lang = TrimLanguageCode(voice_lang); 367 utterance_lang = TrimLanguageCode(utterance_lang); 368 } 369 if (voice_lang != utterance_lang) { 370 continue; 371 } 372 } 373 if (voice.gender != TTS_GENDER_NONE && 374 utterance->gender() != TTS_GENDER_NONE && 375 voice.gender != utterance->gender()) { 376 continue; 377 } 378 379 if (utterance->required_event_types().size() > 0) { 380 bool has_all_required_event_types = true; 381 for (std::set<TtsEventType>::const_iterator iter = 382 utterance->required_event_types().begin(); 383 iter != utterance->required_event_types().end(); 384 ++iter) { 385 if (voice.events.find(*iter) == voice.events.end()) { 386 has_all_required_event_types = false; 387 break; 388 } 389 } 390 if (!has_all_required_event_types) 391 continue; 392 } 393 394 return static_cast<int>(i); 395 } 396 } 397 398 return -1; 399 } 400 401 void TtsController::VoicesChanged() { 402 for (std::set<VoicesChangedDelegate*>::iterator iter = 403 voices_changed_delegates_.begin(); 404 iter != voices_changed_delegates_.end(); ++iter) { 405 (*iter)->OnVoicesChanged(); 406 } 407 } 408 409 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) { 410 voices_changed_delegates_.insert(delegate); 411 } 412 413 void TtsController::RemoveVoicesChangedDelegate( 414 VoicesChangedDelegate* delegate) { 415 voices_changed_delegates_.erase(delegate); 416 } 417 418