1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <math.h> 6 7 #include <map> 8 9 #include "base/command_line.h" 10 #include "base/debug/leak_annotations.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "base/memory/singleton.h" 13 #include "base/synchronization/lock.h" 14 #include "chrome/browser/speech/tts_platform.h" 15 #include "chrome/common/chrome_switches.h" 16 #include "content/public/browser/browser_thread.h" 17 18 #include "library_loaders/libspeechd.h" 19 20 using content::BrowserThread; 21 22 namespace { 23 24 const char kNotSupportedError[] = 25 "Native speech synthesis not supported on this platform."; 26 27 struct SPDChromeVoice { 28 std::string name; 29 std::string module; 30 }; 31 32 } // namespace 33 34 class TtsPlatformImplLinux : public TtsPlatformImpl { 35 public: 36 virtual bool PlatformImplAvailable() OVERRIDE; 37 virtual bool Speak( 38 int utterance_id, 39 const std::string& utterance, 40 const std::string& lang, 41 const VoiceData& voice, 42 const UtteranceContinuousParameters& params) OVERRIDE; 43 virtual bool StopSpeaking() OVERRIDE; 44 virtual void Pause() OVERRIDE; 45 virtual void Resume() OVERRIDE; 46 virtual bool IsSpeaking() OVERRIDE; 47 virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE; 48 49 void OnSpeechEvent(SPDNotificationType type); 50 51 // Get the single instance of this class. 52 static TtsPlatformImplLinux* GetInstance(); 53 54 private: 55 TtsPlatformImplLinux(); 56 virtual ~TtsPlatformImplLinux(); 57 58 // Initiate the connection with the speech dispatcher. 59 void Initialize(); 60 61 // Resets the connection with speech dispatcher. 62 void Reset(); 63 64 static void NotificationCallback(size_t msg_id, 65 size_t client_id, 66 SPDNotificationType type); 67 68 static void IndexMarkCallback(size_t msg_id, 69 size_t client_id, 70 SPDNotificationType state, 71 char* index_mark); 72 73 static SPDNotificationType current_notification_; 74 75 base::Lock initialization_lock_; 76 LibSpeechdLoader libspeechd_loader_; 77 SPDConnection* conn_; 78 79 // These apply to the current utterance only. 80 std::string utterance_; 81 int utterance_id_; 82 83 // Map a string composed of a voicename and module to the voicename. Used to 84 // uniquely identify a voice across all available modules. 85 scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_; 86 87 friend struct DefaultSingletonTraits<TtsPlatformImplLinux>; 88 89 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux); 90 }; 91 92 // static 93 SPDNotificationType TtsPlatformImplLinux::current_notification_ = 94 SPD_EVENT_END; 95 96 TtsPlatformImplLinux::TtsPlatformImplLinux() 97 : utterance_id_(0) { 98 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); 99 if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher)) 100 return; 101 102 BrowserThread::PostTask(BrowserThread::FILE, 103 FROM_HERE, 104 base::Bind(&TtsPlatformImplLinux::Initialize, 105 base::Unretained(this))); 106 } 107 108 void TtsPlatformImplLinux::Initialize() { 109 base::AutoLock lock(initialization_lock_); 110 111 if (!libspeechd_loader_.Load("libspeechd.so.2")) 112 return; 113 114 { 115 // spd_open has memory leaks which are hard to suppress. 116 // http://crbug.com/317360 117 ANNOTATE_SCOPED_MEMORY_LEAK; 118 conn_ = libspeechd_loader_.spd_open( 119 "chrome", "extension_api", NULL, SPD_MODE_SINGLE); 120 } 121 if (!conn_) 122 return; 123 124 // Register callbacks for all events. 125 conn_->callback_begin = 126 conn_->callback_end = 127 conn_->callback_cancel = 128 conn_->callback_pause = 129 conn_->callback_resume = 130 &NotificationCallback; 131 132 conn_->callback_im = &IndexMarkCallback; 133 134 libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN); 135 libspeechd_loader_.spd_set_notification_on(conn_, SPD_END); 136 libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL); 137 libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE); 138 libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME); 139 } 140 141 TtsPlatformImplLinux::~TtsPlatformImplLinux() { 142 base::AutoLock lock(initialization_lock_); 143 if (conn_) { 144 libspeechd_loader_.spd_close(conn_); 145 conn_ = NULL; 146 } 147 } 148 149 void TtsPlatformImplLinux::Reset() { 150 base::AutoLock lock(initialization_lock_); 151 if (conn_) 152 libspeechd_loader_.spd_close(conn_); 153 conn_ = libspeechd_loader_.spd_open( 154 "chrome", "extension_api", NULL, SPD_MODE_SINGLE); 155 } 156 157 bool TtsPlatformImplLinux::PlatformImplAvailable() { 158 if (!initialization_lock_.Try()) 159 return false; 160 bool result = libspeechd_loader_.loaded() && (conn_ != NULL); 161 initialization_lock_.Release(); 162 return result; 163 } 164 165 bool TtsPlatformImplLinux::Speak( 166 int utterance_id, 167 const std::string& utterance, 168 const std::string& lang, 169 const VoiceData& voice, 170 const UtteranceContinuousParameters& params) { 171 if (!PlatformImplAvailable()) { 172 error_ = kNotSupportedError; 173 return false; 174 } 175 176 // Speech dispatcher's speech params are around 3x at either limit. 177 float rate = params.rate > 3 ? 3 : params.rate; 178 rate = params.rate < 0.334 ? 0.334 : rate; 179 float pitch = params.pitch > 3 ? 3 : params.pitch; 180 pitch = params.pitch < 0.334 ? 0.334 : pitch; 181 182 std::map<std::string, SPDChromeVoice>::iterator it = 183 all_native_voices_->find(voice.name); 184 if (it != all_native_voices_->end()) { 185 libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str()); 186 libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str()); 187 } 188 189 // Map our multiplicative range to Speech Dispatcher's linear range. 190 // .334 = -100. 191 // 3 = 100. 192 libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3)); 193 libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3)); 194 195 utterance_ = utterance; 196 utterance_id_ = utterance_id; 197 198 if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) { 199 Reset(); 200 return false; 201 } 202 return true; 203 } 204 205 bool TtsPlatformImplLinux::StopSpeaking() { 206 if (!PlatformImplAvailable()) 207 return false; 208 if (libspeechd_loader_.spd_stop(conn_) == -1) { 209 Reset(); 210 return false; 211 } 212 return true; 213 } 214 215 void TtsPlatformImplLinux::Pause() { 216 if (!PlatformImplAvailable()) 217 return; 218 libspeechd_loader_.spd_pause(conn_); 219 } 220 221 void TtsPlatformImplLinux::Resume() { 222 if (!PlatformImplAvailable()) 223 return; 224 libspeechd_loader_.spd_resume(conn_); 225 } 226 227 bool TtsPlatformImplLinux::IsSpeaking() { 228 return current_notification_ == SPD_EVENT_BEGIN; 229 } 230 231 void TtsPlatformImplLinux::GetVoices( 232 std::vector<VoiceData>* out_voices) { 233 if (!all_native_voices_.get()) { 234 all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>()); 235 char** modules = libspeechd_loader_.spd_list_modules(conn_); 236 if (!modules) 237 return; 238 for (int i = 0; modules[i]; i++) { 239 char* module = modules[i]; 240 libspeechd_loader_.spd_set_output_module(conn_, module); 241 SPDVoice** native_voices = 242 libspeechd_loader_.spd_list_synthesis_voices(conn_); 243 if (!native_voices) { 244 free(module); 245 continue; 246 } 247 for (int j = 0; native_voices[j]; j++) { 248 SPDVoice* native_voice = native_voices[j]; 249 SPDChromeVoice native_data; 250 native_data.name = native_voice->name; 251 native_data.module = module; 252 std::string key; 253 key.append(native_data.name); 254 key.append(" "); 255 key.append(native_data.module); 256 all_native_voices_->insert( 257 std::pair<std::string, SPDChromeVoice>(key, native_data)); 258 free(native_voices[j]); 259 } 260 free(modules[i]); 261 } 262 } 263 264 for (std::map<std::string, SPDChromeVoice>::iterator it = 265 all_native_voices_->begin(); 266 it != all_native_voices_->end(); 267 it++) { 268 out_voices->push_back(VoiceData()); 269 VoiceData& voice = out_voices->back(); 270 voice.native = true; 271 voice.name = it->first; 272 voice.events.insert(TTS_EVENT_START); 273 voice.events.insert(TTS_EVENT_END); 274 voice.events.insert(TTS_EVENT_CANCELLED); 275 voice.events.insert(TTS_EVENT_MARKER); 276 voice.events.insert(TTS_EVENT_PAUSE); 277 voice.events.insert(TTS_EVENT_RESUME); 278 } 279 } 280 281 void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) { 282 TtsController* controller = TtsController::GetInstance(); 283 switch (type) { 284 case SPD_EVENT_BEGIN: 285 controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string()); 286 break; 287 case SPD_EVENT_RESUME: 288 controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string()); 289 break; 290 case SPD_EVENT_END: 291 controller->OnTtsEvent( 292 utterance_id_, TTS_EVENT_END, utterance_.size(), std::string()); 293 break; 294 case SPD_EVENT_PAUSE: 295 controller->OnTtsEvent( 296 utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string()); 297 break; 298 case SPD_EVENT_CANCEL: 299 controller->OnTtsEvent( 300 utterance_id_, TTS_EVENT_CANCELLED, 0, std::string()); 301 break; 302 case SPD_EVENT_INDEX_MARK: 303 controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string()); 304 break; 305 } 306 } 307 308 // static 309 void TtsPlatformImplLinux::NotificationCallback( 310 size_t msg_id, size_t client_id, SPDNotificationType type) { 311 // We run Speech Dispatcher in threaded mode, so these callbacks should always 312 // be in a separate thread. 313 if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) { 314 current_notification_ = type; 315 BrowserThread::PostTask( 316 BrowserThread::UI, 317 FROM_HERE, 318 base::Bind(&TtsPlatformImplLinux::OnSpeechEvent, 319 base::Unretained(TtsPlatformImplLinux::GetInstance()), 320 type)); 321 } 322 } 323 324 // static 325 void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id, 326 size_t client_id, 327 SPDNotificationType state, 328 char* index_mark) { 329 // TODO(dtseng): index_mark appears to specify an index type supplied by a 330 // client. Need to explore how this is used before hooking it up with existing 331 // word, sentence events. 332 // We run Speech Dispatcher in threaded mode, so these callbacks should always 333 // be in a separate thread. 334 if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) { 335 current_notification_ = state; 336 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, 337 base::Bind(&TtsPlatformImplLinux::OnSpeechEvent, 338 base::Unretained(TtsPlatformImplLinux::GetInstance()), 339 state)); 340 } 341 } 342 343 // static 344 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() { 345 return Singleton<TtsPlatformImplLinux, 346 LeakySingletonTraits<TtsPlatformImplLinux> >::get(); 347 } 348 349 // static 350 TtsPlatformImpl* TtsPlatformImpl::GetInstance() { 351 return TtsPlatformImplLinux::GetInstance(); 352 } 353