1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <math.h> 6 #include <sapi.h> 7 8 #include "base/memory/singleton.h" 9 #include "base/strings/string_number_conversions.h" 10 #include "base/strings/utf_string_conversions.h" 11 #include "base/values.h" 12 #include "base/win/scoped_comptr.h" 13 #include "chrome/browser/speech/tts_controller.h" 14 #include "chrome/browser/speech/tts_platform.h" 15 16 class TtsPlatformImplWin : public TtsPlatformImpl { 17 public: 18 virtual bool PlatformImplAvailable() { 19 return true; 20 } 21 22 virtual bool Speak( 23 int utterance_id, 24 const std::string& utterance, 25 const std::string& lang, 26 const VoiceData& voice, 27 const UtteranceContinuousParameters& params); 28 29 virtual bool StopSpeaking(); 30 31 virtual void Pause(); 32 33 virtual void Resume(); 34 35 virtual bool IsSpeaking(); 36 37 virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE; 38 39 // Get the single instance of this class. 40 static TtsPlatformImplWin* GetInstance(); 41 42 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); 43 44 private: 45 TtsPlatformImplWin(); 46 virtual ~TtsPlatformImplWin() {} 47 48 void OnSpeechEvent(); 49 50 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; 51 52 // These apply to the current utterance only. 53 std::wstring utterance_; 54 int utterance_id_; 55 int prefix_len_; 56 ULONG stream_number_; 57 int char_position_; 58 bool paused_; 59 60 friend struct DefaultSingletonTraits<TtsPlatformImplWin>; 61 62 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin); 63 }; 64 65 // static 66 TtsPlatformImpl* TtsPlatformImpl::GetInstance() { 67 return TtsPlatformImplWin::GetInstance(); 68 } 69 70 bool TtsPlatformImplWin::Speak( 71 int utterance_id, 72 const std::string& src_utterance, 73 const std::string& lang, 74 const VoiceData& voice, 75 const UtteranceContinuousParameters& params) { 76 std::wstring prefix; 77 std::wstring suffix; 78 79 if (!speech_synthesizer_.get()) 80 return false; 81 82 // TODO(dmazzoni): support languages other than the default: crbug.com/88059 83 84 if (params.rate >= 0.0) { 85 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's 86 // linear range of -10 to 10: 87 // 0.1 -> -10 88 // 1.0 -> 0 89 // 10.0 -> 10 90 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate))); 91 } 92 93 if (params.pitch >= 0.0) { 94 // The TTS api allows a range of -10 to 10 for speech pitch. 95 // TODO(dtseng): cleanup if we ever use any other properties that 96 // require xml. 97 std::wstring pitch_value = 98 base::IntToString16(static_cast<int>(params.pitch * 10 - 10)); 99 prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">"; 100 suffix = L"</pitch>"; 101 } 102 103 if (params.volume >= 0.0) { 104 // The TTS api allows a range of 0 to 100 for speech volume. 105 speech_synthesizer_->SetVolume(static_cast<uint16>(params.volume * 100)); 106 } 107 108 // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072 109 110 utterance_ = UTF8ToWide(src_utterance); 111 utterance_id_ = utterance_id; 112 char_position_ = 0; 113 std::wstring merged_utterance = prefix + utterance_ + suffix; 114 prefix_len_ = prefix.size(); 115 116 HRESULT result = speech_synthesizer_->Speak( 117 merged_utterance.c_str(), 118 SPF_ASYNC, 119 &stream_number_); 120 return (result == S_OK); 121 } 122 123 bool TtsPlatformImplWin::StopSpeaking() { 124 if (speech_synthesizer_.get()) { 125 // Clear the stream number so that any further events relating to this 126 // utterance are ignored. 127 stream_number_ = 0; 128 129 if (IsSpeaking()) { 130 // Stop speech by speaking the empty string with the purge flag. 131 speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL); 132 } 133 if (paused_) { 134 speech_synthesizer_->Resume(); 135 paused_ = false; 136 } 137 } 138 return true; 139 } 140 141 void TtsPlatformImplWin::Pause() { 142 if (speech_synthesizer_.get() && utterance_id_ && !paused_) { 143 speech_synthesizer_->Pause(); 144 paused_ = true; 145 TtsController::GetInstance()->OnTtsEvent( 146 utterance_id_, TTS_EVENT_PAUSE, char_position_, ""); 147 } 148 } 149 150 void TtsPlatformImplWin::Resume() { 151 if (speech_synthesizer_.get() && utterance_id_ && paused_) { 152 speech_synthesizer_->Resume(); 153 paused_ = false; 154 TtsController::GetInstance()->OnTtsEvent( 155 utterance_id_, TTS_EVENT_RESUME, char_position_, ""); 156 } 157 } 158 159 bool TtsPlatformImplWin::IsSpeaking() { 160 if (speech_synthesizer_.get()) { 161 SPVOICESTATUS status; 162 HRESULT result = speech_synthesizer_->GetStatus(&status, NULL); 163 if (result == S_OK) { 164 if (status.dwRunningState == 0 || // 0 == waiting to speak 165 status.dwRunningState == SPRS_IS_SPEAKING) { 166 return true; 167 } 168 } 169 } 170 return false; 171 } 172 173 void TtsPlatformImplWin::GetVoices( 174 std::vector<VoiceData>* out_voices) { 175 // TODO: get all voices, not just default voice. 176 // http://crbug.com/88059 177 out_voices->push_back(VoiceData()); 178 VoiceData& voice = out_voices->back(); 179 voice.native = true; 180 voice.name = "native"; 181 voice.events.insert(TTS_EVENT_START); 182 voice.events.insert(TTS_EVENT_END); 183 voice.events.insert(TTS_EVENT_MARKER); 184 voice.events.insert(TTS_EVENT_WORD); 185 voice.events.insert(TTS_EVENT_SENTENCE); 186 voice.events.insert(TTS_EVENT_PAUSE); 187 voice.events.insert(TTS_EVENT_RESUME); 188 } 189 190 void TtsPlatformImplWin::OnSpeechEvent() { 191 TtsController* controller = TtsController::GetInstance(); 192 SPEVENT event; 193 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { 194 if (event.ulStreamNum != stream_number_) 195 continue; 196 197 switch (event.eEventId) { 198 case SPEI_START_INPUT_STREAM: 199 controller->OnTtsEvent( 200 utterance_id_, TTS_EVENT_START, 0, std::string()); 201 break; 202 case SPEI_END_INPUT_STREAM: 203 char_position_ = utterance_.size(); 204 controller->OnTtsEvent( 205 utterance_id_, TTS_EVENT_END, char_position_, std::string()); 206 break; 207 case SPEI_TTS_BOOKMARK: 208 controller->OnTtsEvent( 209 utterance_id_, TTS_EVENT_MARKER, char_position_, std::string()); 210 break; 211 case SPEI_WORD_BOUNDARY: 212 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_; 213 controller->OnTtsEvent( 214 utterance_id_, TTS_EVENT_WORD, char_position_, 215 std::string()); 216 break; 217 case SPEI_SENTENCE_BOUNDARY: 218 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_; 219 controller->OnTtsEvent( 220 utterance_id_, TTS_EVENT_SENTENCE, char_position_, 221 std::string()); 222 break; 223 } 224 } 225 } 226 227 TtsPlatformImplWin::TtsPlatformImplWin() 228 : utterance_id_(0), 229 prefix_len_(0), 230 stream_number_(0), 231 char_position_(0), 232 paused_(false) { 233 speech_synthesizer_.CreateInstance(CLSID_SpVoice); 234 if (speech_synthesizer_.get()) { 235 ULONGLONG event_mask = 236 SPFEI(SPEI_START_INPUT_STREAM) | 237 SPFEI(SPEI_TTS_BOOKMARK) | 238 SPFEI(SPEI_WORD_BOUNDARY) | 239 SPFEI(SPEI_SENTENCE_BOUNDARY) | 240 SPFEI(SPEI_END_INPUT_STREAM); 241 speech_synthesizer_->SetInterest(event_mask, event_mask); 242 speech_synthesizer_->SetNotifyCallbackFunction( 243 TtsPlatformImplWin::SpeechEventCallback, 0, 0); 244 } 245 } 246 247 // static 248 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { 249 return Singleton<TtsPlatformImplWin, 250 LeakySingletonTraits<TtsPlatformImplWin> >::get(); 251 } 252 253 // static 254 void TtsPlatformImplWin::SpeechEventCallback( 255 WPARAM w_param, LPARAM l_param) { 256 GetInstance()->OnSpeechEvent(); 257 } 258