Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <math.h>
      6 
      7 #include <map>
      8 
      9 #include "base/command_line.h"
     10 #include "base/debug/leak_annotations.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/memory/singleton.h"
     13 #include "base/synchronization/lock.h"
     14 #include "chrome/browser/speech/tts_platform.h"
     15 #include "chrome/common/chrome_switches.h"
     16 #include "content/public/browser/browser_thread.h"
     17 
     18 #include "library_loaders/libspeechd.h"
     19 
     20 using content::BrowserThread;
     21 
     22 namespace {
     23 
     24 const char kNotSupportedError[] =
     25     "Native speech synthesis not supported on this platform.";
     26 
     27 struct SPDChromeVoice {
     28   std::string name;
     29   std::string module;
     30 };
     31 
     32 }  // namespace
     33 
     34 class TtsPlatformImplLinux : public TtsPlatformImpl {
     35  public:
     36   virtual bool PlatformImplAvailable() OVERRIDE;
     37   virtual bool Speak(
     38       int utterance_id,
     39       const std::string& utterance,
     40       const std::string& lang,
     41       const VoiceData& voice,
     42       const UtteranceContinuousParameters& params) OVERRIDE;
     43   virtual bool StopSpeaking() OVERRIDE;
     44   virtual void Pause() OVERRIDE;
     45   virtual void Resume() OVERRIDE;
     46   virtual bool IsSpeaking() OVERRIDE;
     47   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
     48 
     49   void OnSpeechEvent(SPDNotificationType type);
     50 
     51   // Get the single instance of this class.
     52   static TtsPlatformImplLinux* GetInstance();
     53 
     54  private:
     55   TtsPlatformImplLinux();
     56   virtual ~TtsPlatformImplLinux();
     57 
     58   // Initiate the connection with the speech dispatcher.
     59   void Initialize();
     60 
     61   // Resets the connection with speech dispatcher.
     62   void Reset();
     63 
     64   static void NotificationCallback(size_t msg_id,
     65                                    size_t client_id,
     66                                    SPDNotificationType type);
     67 
     68   static void IndexMarkCallback(size_t msg_id,
     69                                 size_t client_id,
     70                                 SPDNotificationType state,
     71                                 char* index_mark);
     72 
     73   static SPDNotificationType current_notification_;
     74 
     75   base::Lock initialization_lock_;
     76   LibSpeechdLoader libspeechd_loader_;
     77   SPDConnection* conn_;
     78 
     79   // These apply to the current utterance only.
     80   std::string utterance_;
     81   int utterance_id_;
     82 
     83   // Map a string composed of a voicename and module to the voicename. Used to
     84   // uniquely identify a voice across all available modules.
     85   scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_;
     86 
     87   friend struct DefaultSingletonTraits<TtsPlatformImplLinux>;
     88 
     89   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
     90 };
     91 
     92 // static
     93 SPDNotificationType TtsPlatformImplLinux::current_notification_ =
     94     SPD_EVENT_END;
     95 
     96 TtsPlatformImplLinux::TtsPlatformImplLinux()
     97     : utterance_id_(0) {
     98   const CommandLine& command_line = *CommandLine::ForCurrentProcess();
     99   if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
    100     return;
    101 
    102   BrowserThread::PostTask(BrowserThread::FILE,
    103                           FROM_HERE,
    104                           base::Bind(&TtsPlatformImplLinux::Initialize,
    105                                      base::Unretained(this)));
    106 }
    107 
    108 void TtsPlatformImplLinux::Initialize() {
    109   base::AutoLock lock(initialization_lock_);
    110 
    111   if (!libspeechd_loader_.Load("libspeechd.so.2"))
    112     return;
    113 
    114   {
    115     // spd_open has memory leaks which are hard to suppress.
    116     // http://crbug.com/317360
    117     ANNOTATE_SCOPED_MEMORY_LEAK;
    118     conn_ = libspeechd_loader_.spd_open(
    119         "chrome", "extension_api", NULL, SPD_MODE_SINGLE);
    120   }
    121   if (!conn_)
    122     return;
    123 
    124   // Register callbacks for all events.
    125   conn_->callback_begin =
    126     conn_->callback_end =
    127     conn_->callback_cancel =
    128     conn_->callback_pause =
    129     conn_->callback_resume =
    130     &NotificationCallback;
    131 
    132   conn_->callback_im = &IndexMarkCallback;
    133 
    134   libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
    135   libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
    136   libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
    137   libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
    138   libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
    139 }
    140 
    141 TtsPlatformImplLinux::~TtsPlatformImplLinux() {
    142   base::AutoLock lock(initialization_lock_);
    143   if (conn_) {
    144     libspeechd_loader_.spd_close(conn_);
    145     conn_ = NULL;
    146   }
    147 }
    148 
    149 void TtsPlatformImplLinux::Reset() {
    150   base::AutoLock lock(initialization_lock_);
    151   if (conn_)
    152     libspeechd_loader_.spd_close(conn_);
    153   conn_ = libspeechd_loader_.spd_open(
    154       "chrome", "extension_api", NULL, SPD_MODE_SINGLE);
    155 }
    156 
    157 bool TtsPlatformImplLinux::PlatformImplAvailable() {
    158   if (!initialization_lock_.Try())
    159     return false;
    160   bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
    161   initialization_lock_.Release();
    162   return result;
    163 }
    164 
    165 bool TtsPlatformImplLinux::Speak(
    166     int utterance_id,
    167     const std::string& utterance,
    168     const std::string& lang,
    169     const VoiceData& voice,
    170     const UtteranceContinuousParameters& params) {
    171   if (!PlatformImplAvailable()) {
    172     error_ = kNotSupportedError;
    173     return false;
    174   }
    175 
    176   // Speech dispatcher's speech params are around 3x at either limit.
    177   float rate = params.rate > 3 ? 3 : params.rate;
    178   rate = params.rate < 0.334 ? 0.334 : rate;
    179   float pitch = params.pitch > 3 ? 3 : params.pitch;
    180   pitch = params.pitch < 0.334 ? 0.334 : pitch;
    181 
    182   std::map<std::string, SPDChromeVoice>::iterator it =
    183       all_native_voices_->find(voice.name);
    184   if (it != all_native_voices_->end()) {
    185     libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
    186     libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
    187   }
    188 
    189   // Map our multiplicative range to Speech Dispatcher's linear range.
    190   // .334 = -100.
    191   // 3 = 100.
    192   libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
    193   libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
    194 
    195   utterance_ = utterance;
    196   utterance_id_ = utterance_id;
    197 
    198   if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
    199     Reset();
    200     return false;
    201   }
    202   return true;
    203 }
    204 
    205 bool TtsPlatformImplLinux::StopSpeaking() {
    206   if (!PlatformImplAvailable())
    207     return false;
    208   if (libspeechd_loader_.spd_stop(conn_) == -1) {
    209     Reset();
    210     return false;
    211   }
    212   return true;
    213 }
    214 
    215 void TtsPlatformImplLinux::Pause() {
    216   if (!PlatformImplAvailable())
    217     return;
    218   libspeechd_loader_.spd_pause(conn_);
    219 }
    220 
    221 void TtsPlatformImplLinux::Resume() {
    222   if (!PlatformImplAvailable())
    223     return;
    224   libspeechd_loader_.spd_resume(conn_);
    225 }
    226 
    227 bool TtsPlatformImplLinux::IsSpeaking() {
    228   return current_notification_ == SPD_EVENT_BEGIN;
    229 }
    230 
    231 void TtsPlatformImplLinux::GetVoices(
    232     std::vector<VoiceData>* out_voices) {
    233   if (!all_native_voices_.get()) {
    234     all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
    235     char** modules = libspeechd_loader_.spd_list_modules(conn_);
    236     if (!modules)
    237       return;
    238     for (int i = 0; modules[i]; i++) {
    239       char* module = modules[i];
    240       libspeechd_loader_.spd_set_output_module(conn_, module);
    241       SPDVoice** native_voices =
    242           libspeechd_loader_.spd_list_synthesis_voices(conn_);
    243       if (!native_voices) {
    244         free(module);
    245         continue;
    246       }
    247       for (int j = 0; native_voices[j]; j++) {
    248         SPDVoice* native_voice = native_voices[j];
    249         SPDChromeVoice native_data;
    250         native_data.name = native_voice->name;
    251         native_data.module = module;
    252         std::string key;
    253         key.append(native_data.name);
    254         key.append(" ");
    255         key.append(native_data.module);
    256         all_native_voices_->insert(
    257             std::pair<std::string, SPDChromeVoice>(key, native_data));
    258         free(native_voices[j]);
    259       }
    260       free(modules[i]);
    261     }
    262   }
    263 
    264   for (std::map<std::string, SPDChromeVoice>::iterator it =
    265            all_native_voices_->begin();
    266        it != all_native_voices_->end();
    267        it++) {
    268     out_voices->push_back(VoiceData());
    269     VoiceData& voice = out_voices->back();
    270     voice.native = true;
    271     voice.name = it->first;
    272     voice.events.insert(TTS_EVENT_START);
    273     voice.events.insert(TTS_EVENT_END);
    274     voice.events.insert(TTS_EVENT_CANCELLED);
    275     voice.events.insert(TTS_EVENT_MARKER);
    276     voice.events.insert(TTS_EVENT_PAUSE);
    277     voice.events.insert(TTS_EVENT_RESUME);
    278   }
    279 }
    280 
    281 void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
    282   TtsController* controller = TtsController::GetInstance();
    283   switch (type) {
    284   case SPD_EVENT_BEGIN:
    285     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
    286     break;
    287   case SPD_EVENT_RESUME:
    288     controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
    289     break;
    290   case SPD_EVENT_END:
    291     controller->OnTtsEvent(
    292         utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
    293     break;
    294   case SPD_EVENT_PAUSE:
    295     controller->OnTtsEvent(
    296         utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
    297     break;
    298   case SPD_EVENT_CANCEL:
    299     controller->OnTtsEvent(
    300         utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
    301     break;
    302   case SPD_EVENT_INDEX_MARK:
    303     controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
    304     break;
    305   }
    306 }
    307 
    308 // static
    309 void TtsPlatformImplLinux::NotificationCallback(
    310     size_t msg_id, size_t client_id, SPDNotificationType type) {
    311   // We run Speech Dispatcher in threaded mode, so these callbacks should always
    312   // be in a separate thread.
    313   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
    314     current_notification_ = type;
    315     BrowserThread::PostTask(
    316         BrowserThread::UI,
    317         FROM_HERE,
    318         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
    319                    base::Unretained(TtsPlatformImplLinux::GetInstance()),
    320                    type));
    321   }
    322 }
    323 
    324 // static
    325 void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
    326                                                       size_t client_id,
    327                                                       SPDNotificationType state,
    328                                                       char* index_mark) {
    329   // TODO(dtseng): index_mark appears to specify an index type supplied by a
    330   // client. Need to explore how this is used before hooking it up with existing
    331   // word, sentence events.
    332   // We run Speech Dispatcher in threaded mode, so these callbacks should always
    333   // be in a separate thread.
    334   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
    335     current_notification_ = state;
    336     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
    337         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
    338         base::Unretained(TtsPlatformImplLinux::GetInstance()),
    339         state));
    340   }
    341 }
    342 
    343 // static
    344 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
    345   return Singleton<TtsPlatformImplLinux,
    346                    LeakySingletonTraits<TtsPlatformImplLinux> >::get();
    347 }
    348 
    349 // static
    350 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
    351   return TtsPlatformImplLinux::GetInstance();
    352 }
    353