Home | History | Annotate | Download | only in speech
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/speech/tts_controller_impl.h"
      6 
      7 #include <string>
      8 #include <vector>
      9 
     10 #include "base/float_util.h"
     11 #include "base/values.h"
     12 #include "chrome/browser/browser_process.h"
     13 #include "chrome/browser/speech/tts_platform.h"
     14 
     15 namespace {
     16 // A value to be used to indicate that there is no char index available.
     17 const int kInvalidCharIndex = -1;
     18 
     19 // Given a language/region code of the form 'fr-FR', returns just the basic
     20 // language portion, e.g. 'fr'.
     21 std::string TrimLanguageCode(std::string lang) {
     22   if (lang.size() >= 5 && lang[2] == '-')
     23     return lang.substr(0, 2);
     24   else
     25     return lang;
     26 }
     27 
     28 }  // namespace
     29 
     30 bool IsFinalTtsEventType(TtsEventType event_type) {
     31   return (event_type == TTS_EVENT_END ||
     32           event_type == TTS_EVENT_INTERRUPTED ||
     33           event_type == TTS_EVENT_CANCELLED ||
     34           event_type == TTS_EVENT_ERROR);
     35 }
     36 
     37 //
     38 // UtteranceContinuousParameters
     39 //
     40 
     41 
     42 UtteranceContinuousParameters::UtteranceContinuousParameters()
     43     : rate(-1),
     44       pitch(-1),
     45       volume(-1) {}
     46 
     47 
     48 //
     49 // VoiceData
     50 //
     51 
     52 
     53 VoiceData::VoiceData()
     54     : gender(TTS_GENDER_NONE),
     55       remote(false),
     56       native(false) {}
     57 
     58 VoiceData::~VoiceData() {}
     59 
     60 
     61 //
     62 // Utterance
     63 //
     64 
     65 // static
     66 int Utterance::next_utterance_id_ = 0;
     67 
     68 Utterance::Utterance(content::BrowserContext* browser_context)
     69     : browser_context_(browser_context),
     70       id_(next_utterance_id_++),
     71       src_id_(-1),
     72       gender_(TTS_GENDER_NONE),
     73       can_enqueue_(false),
     74       char_index_(0),
     75       finished_(false) {
     76   options_.reset(new base::DictionaryValue());
     77 }
     78 
     79 Utterance::~Utterance() {
     80   DCHECK(finished_);
     81 }
     82 
     83 void Utterance::OnTtsEvent(TtsEventType event_type,
     84                            int char_index,
     85                            const std::string& error_message) {
     86   if (char_index >= 0)
     87     char_index_ = char_index;
     88   if (IsFinalTtsEventType(event_type))
     89     finished_ = true;
     90 
     91   if (event_delegate_)
     92     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
     93   if (finished_)
     94     event_delegate_.reset();
     95 }
     96 
     97 void Utterance::Finish() {
     98   finished_ = true;
     99 }
    100 
    101 void Utterance::set_options(const base::Value* options) {
    102   options_.reset(options->DeepCopy());
    103 }
    104 
    105 TtsController* TtsController::GetInstance() {
    106   return TtsControllerImpl::GetInstance();
    107 }
    108 
    109 //
    110 // TtsControllerImpl
    111 //
    112 
    113 // static
    114 TtsControllerImpl* TtsControllerImpl::GetInstance() {
    115   return Singleton<TtsControllerImpl>::get();
    116 }
    117 
    118 TtsControllerImpl::TtsControllerImpl()
    119     : current_utterance_(NULL),
    120       paused_(false),
    121       platform_impl_(NULL),
    122       tts_engine_delegate_(NULL) {
    123 }
    124 
    125 TtsControllerImpl::~TtsControllerImpl() {
    126   if (current_utterance_) {
    127     current_utterance_->Finish();
    128     delete current_utterance_;
    129   }
    130 
    131   // Clear any queued utterances too.
    132   ClearUtteranceQueue(false);  // Don't sent events.
    133 }
    134 
    135 void TtsControllerImpl::SpeakOrEnqueue(Utterance* utterance) {
    136   // If we're paused and we get an utterance that can't be queued,
    137   // flush the queue but stay in the paused state.
    138   if (paused_ && !utterance->can_enqueue()) {
    139     Stop();
    140     paused_ = true;
    141     delete utterance;
    142     return;
    143   }
    144 
    145   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
    146     utterance_queue_.push(utterance);
    147   } else {
    148     Stop();
    149     SpeakNow(utterance);
    150   }
    151 }
    152 
    153 void TtsControllerImpl::SpeakNow(Utterance* utterance) {
    154   // Ensure we have all built-in voices loaded. This is a no-op if already
    155   // loaded.
    156   bool loaded_built_in =
    157       GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context());
    158 
    159   // Get all available voices and try to find a matching voice.
    160   std::vector<VoiceData> voices;
    161   GetVoices(utterance->browser_context(), &voices);
    162   int index = GetMatchingVoice(utterance, voices);
    163 
    164   VoiceData voice;
    165   if (index != -1) {
    166     // Select the matching voice.
    167     voice = voices[index];
    168   } else {
    169     // However, if no match was found on a platform without native tts voices,
    170     // attempt to get a voice based only on the current locale without respect
    171     // to any supplied voice names.
    172     std::vector<VoiceData> native_voices;
    173 
    174     if (GetPlatformImpl()->PlatformImplAvailable())
    175       GetPlatformImpl()->GetVoices(&native_voices);
    176 
    177     if (native_voices.empty() && !voices.empty()) {
    178       // TODO(dtseng): Notify extension caller of an error.
    179       utterance->set_voice_name("");
    180       // TODO(gaochun): Replace the global variable g_browser_process with
    181       // GetContentClient()->browser() to eliminate the dependency of browser
    182       // once TTS implementation was moved to content.
    183       utterance->set_lang(g_browser_process->GetApplicationLocale());
    184       index = GetMatchingVoice(utterance, voices);
    185 
    186       // If even that fails, just take the first available voice.
    187       if (index == -1)
    188         index = 0;
    189       voice = voices[index];
    190     } else {
    191       // Otherwise, simply give native voices a chance to handle this utterance.
    192       voice.native = true;
    193     }
    194   }
    195 
    196   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
    197 
    198   if (!voice.native) {
    199 #if !defined(OS_ANDROID)
    200     DCHECK(!voice.extension_id.empty());
    201     current_utterance_ = utterance;
    202     utterance->set_extension_id(voice.extension_id);
    203     if (tts_engine_delegate_)
    204       tts_engine_delegate_->Speak(utterance, voice);
    205     bool sends_end_event =
    206         voice.events.find(TTS_EVENT_END) != voice.events.end();
    207     if (!sends_end_event) {
    208       utterance->Finish();
    209       delete utterance;
    210       current_utterance_ = NULL;
    211       SpeakNextUtterance();
    212     }
    213 #endif
    214   } else {
    215     // It's possible for certain platforms to send start events immediately
    216     // during |speak|.
    217     current_utterance_ = utterance;
    218     GetPlatformImpl()->clear_error();
    219     bool success = GetPlatformImpl()->Speak(
    220         utterance->id(),
    221         utterance->text(),
    222         utterance->lang(),
    223         voice,
    224         utterance->continuous_parameters());
    225     if (!success)
    226       current_utterance_ = NULL;
    227 
    228     // If the native voice wasn't able to process this speech, see if
    229     // the browser has built-in TTS that isn't loaded yet.
    230     if (!success && loaded_built_in) {
    231       utterance_queue_.push(utterance);
    232       return;
    233     }
    234 
    235     if (!success) {
    236       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
    237                             GetPlatformImpl()->error());
    238       delete utterance;
    239       return;
    240     }
    241   }
    242 }
    243 
    244 void TtsControllerImpl::Stop() {
    245   paused_ = false;
    246   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    247 #if !defined(OS_ANDROID)
    248     if (tts_engine_delegate_)
    249       tts_engine_delegate_->Stop(current_utterance_);
    250 #endif
    251   } else {
    252     GetPlatformImpl()->clear_error();
    253     GetPlatformImpl()->StopSpeaking();
    254   }
    255 
    256   if (current_utterance_)
    257     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
    258                                    std::string());
    259   FinishCurrentUtterance();
    260   ClearUtteranceQueue(true);  // Send events.
    261 }
    262 
    263 void TtsControllerImpl::Pause() {
    264   paused_ = true;
    265   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    266 #if !defined(OS_ANDROID)
    267     if (tts_engine_delegate_)
    268       tts_engine_delegate_->Pause(current_utterance_);
    269 #endif
    270   } else if (current_utterance_) {
    271     GetPlatformImpl()->clear_error();
    272     GetPlatformImpl()->Pause();
    273   }
    274 }
    275 
    276 void TtsControllerImpl::Resume() {
    277   paused_ = false;
    278   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    279 #if !defined(OS_ANDROID)
    280     if (tts_engine_delegate_)
    281       tts_engine_delegate_->Resume(current_utterance_);
    282 #endif
    283   } else if (current_utterance_) {
    284     GetPlatformImpl()->clear_error();
    285     GetPlatformImpl()->Resume();
    286   } else {
    287     SpeakNextUtterance();
    288   }
    289 }
    290 
    291 void TtsControllerImpl::OnTtsEvent(int utterance_id,
    292                                         TtsEventType event_type,
    293                                         int char_index,
    294                                         const std::string& error_message) {
    295   // We may sometimes receive completion callbacks "late", after we've
    296   // already finished the utterance (for example because another utterance
    297   // interrupted or we got a call to Stop). This is normal and we can
    298   // safely just ignore these events.
    299   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
    300     return;
    301   }
    302   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
    303   if (current_utterance_->finished()) {
    304     FinishCurrentUtterance();
    305     SpeakNextUtterance();
    306   }
    307 }
    308 
    309 void TtsControllerImpl::GetVoices(content::BrowserContext* browser_context,
    310                               std::vector<VoiceData>* out_voices) {
    311 #if !defined(OS_ANDROID)
    312   if (browser_context && tts_engine_delegate_)
    313     tts_engine_delegate_->GetVoices(browser_context, out_voices);
    314 #endif
    315 
    316   TtsPlatformImpl* platform_impl = GetPlatformImpl();
    317   if (platform_impl) {
    318     // Ensure we have all built-in voices loaded. This is a no-op if already
    319     // loaded.
    320     platform_impl->LoadBuiltInTtsExtension(browser_context);
    321     if (platform_impl->PlatformImplAvailable())
    322       platform_impl->GetVoices(out_voices);
    323   }
    324 }
    325 
    326 bool TtsControllerImpl::IsSpeaking() {
    327   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
    328 }
    329 
    330 void TtsControllerImpl::FinishCurrentUtterance() {
    331   if (current_utterance_) {
    332     if (!current_utterance_->finished())
    333       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
    334                                      std::string());
    335     delete current_utterance_;
    336     current_utterance_ = NULL;
    337   }
    338 }
    339 
    340 void TtsControllerImpl::SpeakNextUtterance() {
    341   if (paused_)
    342     return;
    343 
    344   // Start speaking the next utterance in the queue.  Keep trying in case
    345   // one fails but there are still more in the queue to try.
    346   while (!utterance_queue_.empty() && !current_utterance_) {
    347     Utterance* utterance = utterance_queue_.front();
    348     utterance_queue_.pop();
    349     SpeakNow(utterance);
    350   }
    351 }
    352 
    353 void TtsControllerImpl::ClearUtteranceQueue(bool send_events) {
    354   while (!utterance_queue_.empty()) {
    355     Utterance* utterance = utterance_queue_.front();
    356     utterance_queue_.pop();
    357     if (send_events)
    358       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
    359                             std::string());
    360     else
    361       utterance->Finish();
    362     delete utterance;
    363   }
    364 }
    365 
    366 void TtsControllerImpl::SetPlatformImpl(
    367     TtsPlatformImpl* platform_impl) {
    368   platform_impl_ = platform_impl;
    369 }
    370 
    371 int TtsControllerImpl::QueueSize() {
    372   return static_cast<int>(utterance_queue_.size());
    373 }
    374 
    375 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {
    376   if (!platform_impl_)
    377     platform_impl_ = TtsPlatformImpl::GetInstance();
    378   return platform_impl_;
    379 }
    380 
    381 int TtsControllerImpl::GetMatchingVoice(
    382     const Utterance* utterance, std::vector<VoiceData>& voices) {
    383   // Make two passes: the first time, do strict language matching
    384   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
    385   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
    386   for (int pass = 0; pass < 2; ++pass) {
    387     for (size_t i = 0; i < voices.size(); ++i) {
    388       const VoiceData& voice = voices[i];
    389 
    390       if (!utterance->extension_id().empty() &&
    391           utterance->extension_id() != voice.extension_id) {
    392         continue;
    393       }
    394 
    395       if (!voice.name.empty() &&
    396           !utterance->voice_name().empty() &&
    397           voice.name != utterance->voice_name()) {
    398         continue;
    399       }
    400       if (!voice.lang.empty() && !utterance->lang().empty()) {
    401         std::string voice_lang = voice.lang;
    402         std::string utterance_lang = utterance->lang();
    403         if (pass == 1) {
    404           voice_lang = TrimLanguageCode(voice_lang);
    405           utterance_lang = TrimLanguageCode(utterance_lang);
    406         }
    407         if (voice_lang != utterance_lang) {
    408           continue;
    409         }
    410       }
    411       if (voice.gender != TTS_GENDER_NONE &&
    412           utterance->gender() != TTS_GENDER_NONE &&
    413           voice.gender != utterance->gender()) {
    414         continue;
    415       }
    416 
    417       if (utterance->required_event_types().size() > 0) {
    418         bool has_all_required_event_types = true;
    419         for (std::set<TtsEventType>::const_iterator iter =
    420                  utterance->required_event_types().begin();
    421              iter != utterance->required_event_types().end();
    422              ++iter) {
    423           if (voice.events.find(*iter) == voice.events.end()) {
    424             has_all_required_event_types = false;
    425             break;
    426           }
    427         }
    428         if (!has_all_required_event_types)
    429           continue;
    430       }
    431 
    432       return static_cast<int>(i);
    433     }
    434   }
    435 
    436   return -1;
    437 }
    438 
    439 void TtsControllerImpl::VoicesChanged() {
    440   for (std::set<VoicesChangedDelegate*>::iterator iter =
    441            voices_changed_delegates_.begin();
    442        iter != voices_changed_delegates_.end(); ++iter) {
    443     (*iter)->OnVoicesChanged();
    444   }
    445 }
    446 
    447 void TtsControllerImpl::AddVoicesChangedDelegate(
    448     VoicesChangedDelegate* delegate) {
    449   voices_changed_delegates_.insert(delegate);
    450 }
    451 
    452 void TtsControllerImpl::RemoveVoicesChangedDelegate(
    453     VoicesChangedDelegate* delegate) {
    454   voices_changed_delegates_.erase(delegate);
    455 }
    456 
    457 void TtsControllerImpl::SetTtsEngineDelegate(
    458     TtsEngineDelegate* delegate) {
    459   tts_engine_delegate_ = delegate;
    460 }
    461 
    462 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {
    463   return tts_engine_delegate_;
    464 }
    465