Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <math.h>
      6 #include <sapi.h>
      7 
      8 #include "base/memory/singleton.h"
      9 #include "base/strings/string_number_conversions.h"
     10 #include "base/strings/utf_string_conversions.h"
     11 #include "base/values.h"
     12 #include "base/win/scoped_comptr.h"
     13 #include "chrome/browser/speech/tts_controller.h"
     14 #include "chrome/browser/speech/tts_platform.h"
     15 
     16 class TtsPlatformImplWin : public TtsPlatformImpl {
     17  public:
     18   virtual bool PlatformImplAvailable() {
     19     return true;
     20   }
     21 
     22   virtual bool Speak(
     23       int utterance_id,
     24       const std::string& utterance,
     25       const std::string& lang,
     26       const VoiceData& voice,
     27       const UtteranceContinuousParameters& params);
     28 
     29   virtual bool StopSpeaking();
     30 
     31   virtual void Pause();
     32 
     33   virtual void Resume();
     34 
     35   virtual bool IsSpeaking();
     36 
     37   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
     38 
     39   // Get the single instance of this class.
     40   static TtsPlatformImplWin* GetInstance();
     41 
     42   static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
     43 
     44  private:
     45   TtsPlatformImplWin();
     46   virtual ~TtsPlatformImplWin() {}
     47 
     48   void OnSpeechEvent();
     49 
     50   base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
     51 
     52   // These apply to the current utterance only.
     53   std::wstring utterance_;
     54   int utterance_id_;
     55   int prefix_len_;
     56   ULONG stream_number_;
     57   int char_position_;
     58   bool paused_;
     59 
     60   friend struct DefaultSingletonTraits<TtsPlatformImplWin>;
     61 
     62   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
     63 };
     64 
     65 // static
     66 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
     67   return TtsPlatformImplWin::GetInstance();
     68 }
     69 
     70 bool TtsPlatformImplWin::Speak(
     71     int utterance_id,
     72     const std::string& src_utterance,
     73     const std::string& lang,
     74     const VoiceData& voice,
     75     const UtteranceContinuousParameters& params) {
     76   std::wstring prefix;
     77   std::wstring suffix;
     78 
     79   if (!speech_synthesizer_.get())
     80     return false;
     81 
     82   // TODO(dmazzoni): support languages other than the default: crbug.com/88059
     83 
     84   if (params.rate >= 0.0) {
     85     // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
     86     // linear range of -10 to 10:
     87     //   0.1 -> -10
     88     //   1.0 -> 0
     89     //  10.0 -> 10
     90     speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));
     91   }
     92 
     93   if (params.pitch >= 0.0) {
     94     // The TTS api allows a range of -10 to 10 for speech pitch.
     95     // TODO(dtseng): cleanup if we ever use any other properties that
     96     // require xml.
     97     std::wstring pitch_value =
     98         base::IntToString16(static_cast<int>(params.pitch * 10 - 10));
     99     prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">";
    100     suffix = L"</pitch>";
    101   }
    102 
    103   if (params.volume >= 0.0) {
    104     // The TTS api allows a range of 0 to 100 for speech volume.
    105     speech_synthesizer_->SetVolume(static_cast<uint16>(params.volume * 100));
    106   }
    107 
    108   // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
    109 
    110   utterance_ = base::UTF8ToWide(src_utterance);
    111   utterance_id_ = utterance_id;
    112   char_position_ = 0;
    113   std::wstring merged_utterance = prefix + utterance_ + suffix;
    114   prefix_len_ = prefix.size();
    115 
    116   HRESULT result = speech_synthesizer_->Speak(
    117       merged_utterance.c_str(),
    118       SPF_ASYNC,
    119       &stream_number_);
    120   return (result == S_OK);
    121 }
    122 
    123 bool TtsPlatformImplWin::StopSpeaking() {
    124   if (speech_synthesizer_.get()) {
    125     // Clear the stream number so that any further events relating to this
    126     // utterance are ignored.
    127     stream_number_ = 0;
    128 
    129     if (IsSpeaking()) {
    130       // Stop speech by speaking the empty string with the purge flag.
    131       speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
    132     }
    133     if (paused_) {
    134       speech_synthesizer_->Resume();
    135       paused_ = false;
    136     }
    137   }
    138   return true;
    139 }
    140 
    141 void TtsPlatformImplWin::Pause() {
    142   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
    143     speech_synthesizer_->Pause();
    144     paused_ = true;
    145     TtsController::GetInstance()->OnTtsEvent(
    146         utterance_id_, TTS_EVENT_PAUSE, char_position_, "");
    147   }
    148 }
    149 
    150 void TtsPlatformImplWin::Resume() {
    151   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
    152     speech_synthesizer_->Resume();
    153     paused_ = false;
    154     TtsController::GetInstance()->OnTtsEvent(
    155         utterance_id_, TTS_EVENT_RESUME, char_position_, "");
    156   }
    157 }
    158 
    159 bool TtsPlatformImplWin::IsSpeaking() {
    160   if (speech_synthesizer_.get()) {
    161     SPVOICESTATUS status;
    162     HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
    163     if (result == S_OK) {
    164       if (status.dwRunningState == 0 ||  // 0 == waiting to speak
    165           status.dwRunningState == SPRS_IS_SPEAKING) {
    166         return true;
    167       }
    168     }
    169   }
    170   return false;
    171 }
    172 
    173 void TtsPlatformImplWin::GetVoices(
    174     std::vector<VoiceData>* out_voices) {
    175   // TODO: get all voices, not just default voice.
    176   // http://crbug.com/88059
    177   out_voices->push_back(VoiceData());
    178   VoiceData& voice = out_voices->back();
    179   voice.native = true;
    180   voice.name = "native";
    181   voice.events.insert(TTS_EVENT_START);
    182   voice.events.insert(TTS_EVENT_END);
    183   voice.events.insert(TTS_EVENT_MARKER);
    184   voice.events.insert(TTS_EVENT_WORD);
    185   voice.events.insert(TTS_EVENT_SENTENCE);
    186   voice.events.insert(TTS_EVENT_PAUSE);
    187   voice.events.insert(TTS_EVENT_RESUME);
    188 }
    189 
    190 void TtsPlatformImplWin::OnSpeechEvent() {
    191   TtsController* controller = TtsController::GetInstance();
    192   SPEVENT event;
    193   while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
    194     if (event.ulStreamNum != stream_number_)
    195       continue;
    196 
    197     switch (event.eEventId) {
    198     case SPEI_START_INPUT_STREAM:
    199       controller->OnTtsEvent(
    200           utterance_id_, TTS_EVENT_START, 0, std::string());
    201       break;
    202     case SPEI_END_INPUT_STREAM:
    203       char_position_ = utterance_.size();
    204       controller->OnTtsEvent(
    205           utterance_id_, TTS_EVENT_END, char_position_, std::string());
    206       break;
    207     case SPEI_TTS_BOOKMARK:
    208       controller->OnTtsEvent(
    209           utterance_id_, TTS_EVENT_MARKER, char_position_, std::string());
    210       break;
    211     case SPEI_WORD_BOUNDARY:
    212       char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
    213       controller->OnTtsEvent(
    214           utterance_id_, TTS_EVENT_WORD, char_position_,
    215           std::string());
    216       break;
    217     case SPEI_SENTENCE_BOUNDARY:
    218       char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
    219       controller->OnTtsEvent(
    220           utterance_id_, TTS_EVENT_SENTENCE, char_position_,
    221           std::string());
    222       break;
    223     }
    224   }
    225 }
    226 
    227 TtsPlatformImplWin::TtsPlatformImplWin()
    228   : utterance_id_(0),
    229     prefix_len_(0),
    230     stream_number_(0),
    231     char_position_(0),
    232     paused_(false) {
    233   speech_synthesizer_.CreateInstance(CLSID_SpVoice);
    234   if (speech_synthesizer_.get()) {
    235     ULONGLONG event_mask =
    236         SPFEI(SPEI_START_INPUT_STREAM) |
    237         SPFEI(SPEI_TTS_BOOKMARK) |
    238         SPFEI(SPEI_WORD_BOUNDARY) |
    239         SPFEI(SPEI_SENTENCE_BOUNDARY) |
    240         SPFEI(SPEI_END_INPUT_STREAM);
    241     speech_synthesizer_->SetInterest(event_mask, event_mask);
    242     speech_synthesizer_->SetNotifyCallbackFunction(
    243         TtsPlatformImplWin::SpeechEventCallback, 0, 0);
    244   }
    245 }
    246 
    247 // static
    248 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
    249   return Singleton<TtsPlatformImplWin,
    250                    LeakySingletonTraits<TtsPlatformImplWin> >::get();
    251 }
    252 
    253 // static
    254 void TtsPlatformImplWin::SpeechEventCallback(
    255     WPARAM w_param, LPARAM l_param) {
    256   GetInstance()->OnSpeechEvent();
    257 }
    258