Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/speech/tts_controller.h"
      6 
      7 #include <string>
      8 #include <vector>
      9 
     10 #include "base/float_util.h"
     11 #include "base/values.h"
     12 #include "chrome/browser/browser_process.h"
     13 #include "chrome/browser/profiles/profile.h"
     14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
     15 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
     16 #include "chrome/browser/speech/tts_platform.h"
     17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
     18 #include "extensions/browser/extension_system.h"
     19 #include "extensions/common/extension.h"
     20 
     21 namespace {
     22 // A value to be used to indicate that there is no char index available.
     23 const int kInvalidCharIndex = -1;
     24 
     25 // Given a language/region code of the form 'fr-FR', returns just the basic
     26 // language portion, e.g. 'fr'.
     27 std::string TrimLanguageCode(std::string lang) {
     28   if (lang.size() >= 5 && lang[2] == '-')
     29     return lang.substr(0, 2);
     30   else
     31     return lang;
     32 }
     33 
     34 }  // namespace
     35 
     36 bool IsFinalTtsEventType(TtsEventType event_type) {
     37   return (event_type == TTS_EVENT_END ||
     38           event_type == TTS_EVENT_INTERRUPTED ||
     39           event_type == TTS_EVENT_CANCELLED ||
     40           event_type == TTS_EVENT_ERROR);
     41 }
     42 
     43 //
     44 // UtteranceContinuousParameters
     45 //
     46 
     47 
     48 UtteranceContinuousParameters::UtteranceContinuousParameters()
     49     : rate(-1),
     50       pitch(-1),
     51       volume(-1) {}
     52 
     53 
     54 //
     55 // VoiceData
     56 //
     57 
     58 
     59 VoiceData::VoiceData()
     60     : gender(TTS_GENDER_NONE),
     61       remote(false),
     62       native(false) {}
     63 
     64 VoiceData::~VoiceData() {}
     65 
     66 
     67 //
     68 // Utterance
     69 //
     70 
     71 // static
     72 int Utterance::next_utterance_id_ = 0;
     73 
     74 Utterance::Utterance(Profile* profile)
     75     : profile_(profile),
     76       id_(next_utterance_id_++),
     77       src_id_(-1),
     78       gender_(TTS_GENDER_NONE),
     79       can_enqueue_(false),
     80       char_index_(0),
     81       finished_(false) {
     82   options_.reset(new base::DictionaryValue());
     83 }
     84 
     85 Utterance::~Utterance() {
     86   DCHECK(finished_);
     87 }
     88 
     89 void Utterance::OnTtsEvent(TtsEventType event_type,
     90                            int char_index,
     91                            const std::string& error_message) {
     92   if (char_index >= 0)
     93     char_index_ = char_index;
     94   if (IsFinalTtsEventType(event_type))
     95     finished_ = true;
     96 
     97   if (event_delegate_)
     98     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
     99   if (finished_)
    100     event_delegate_.reset();
    101 }
    102 
    103 void Utterance::Finish() {
    104   finished_ = true;
    105 }
    106 
    107 void Utterance::set_options(const base::Value* options) {
    108   options_.reset(options->DeepCopy());
    109 }
    110 
    111 //
    112 // TtsController
    113 //
    114 
    115 // static
    116 TtsController* TtsController::GetInstance() {
    117   return Singleton<TtsController>::get();
    118 }
    119 
    120 TtsController::TtsController()
    121     : current_utterance_(NULL),
    122       paused_(false),
    123       platform_impl_(NULL) {
    124 }
    125 
    126 TtsController::~TtsController() {
    127   if (current_utterance_) {
    128     current_utterance_->Finish();
    129     delete current_utterance_;
    130   }
    131 
    132   // Clear any queued utterances too.
    133   ClearUtteranceQueue(false);  // Don't sent events.
    134 }
    135 
    136 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
    137   // If we're paused and we get an utterance that can't be queued,
    138   // flush the queue but stay in the paused state.
    139   if (paused_ && !utterance->can_enqueue()) {
    140     Stop();
    141     paused_ = true;
    142     return;
    143   }
    144 
    145   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
    146     utterance_queue_.push(utterance);
    147   } else {
    148     Stop();
    149     SpeakNow(utterance);
    150   }
    151 }
    152 
    153 void TtsController::SpeakNow(Utterance* utterance) {
    154   // Get all available voices and try to find a matching voice.
    155   std::vector<VoiceData> voices;
    156   GetVoices(utterance->profile(), &voices);
    157   int index = GetMatchingVoice(utterance, voices);
    158 
    159   VoiceData voice;
    160   if (index != -1) {
    161     // Select the matching voice.
    162     voice = voices[index];
    163   } else {
    164     // However, if no match was found on a platform without native tts voices,
    165     // attempt to get a voice based only on the current locale without respect
    166     // to any supplied voice names.
    167     std::vector<VoiceData> native_voices;
    168 
    169     if (GetPlatformImpl()->PlatformImplAvailable())
    170       GetPlatformImpl()->GetVoices(&native_voices);
    171 
    172     if (native_voices.empty() && !voices.empty()) {
    173       // TODO(dtseng): Notify extension caller of an error.
    174       utterance->set_voice_name("");
    175       utterance->set_lang(g_browser_process->GetApplicationLocale());
    176       index = GetMatchingVoice(utterance, voices);
    177 
    178       // If even that fails, just take the first available voice.
    179       if (index == -1)
    180         index = 0;
    181       voice = voices[index];
    182     } else {
    183       // Otherwise, simply give native voices a chance to handle this utterance.
    184       voice.native = true;
    185     }
    186   }
    187 
    188   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
    189 
    190   if (!voice.native) {
    191 #if !defined(OS_ANDROID)
    192     DCHECK(!voice.extension_id.empty());
    193     current_utterance_ = utterance;
    194     utterance->set_extension_id(voice.extension_id);
    195     ExtensionTtsEngineSpeak(utterance, voice);
    196     bool sends_end_event =
    197         voice.events.find(TTS_EVENT_END) != voice.events.end();
    198     if (!sends_end_event) {
    199       utterance->Finish();
    200       delete utterance;
    201       current_utterance_ = NULL;
    202       SpeakNextUtterance();
    203     }
    204 #endif
    205   } else {
    206     // It's possible for certain platforms to send start events immediately
    207     // during |speak|.
    208     current_utterance_ = utterance;
    209     GetPlatformImpl()->clear_error();
    210     bool success = GetPlatformImpl()->Speak(
    211         utterance->id(),
    212         utterance->text(),
    213         utterance->lang(),
    214         voice,
    215         utterance->continuous_parameters());
    216     if (!success)
    217       current_utterance_ = NULL;
    218 
    219     // If the native voice wasn't able to process this speech, see if
    220     // the browser has built-in TTS that isn't loaded yet.
    221     if (!success &&
    222         GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
    223       utterance_queue_.push(utterance);
    224       return;
    225     }
    226 
    227     if (!success) {
    228       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
    229                             GetPlatformImpl()->error());
    230       delete utterance;
    231       return;
    232     }
    233   }
    234 }
    235 
    236 void TtsController::Stop() {
    237   paused_ = false;
    238   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    239 #if !defined(OS_ANDROID)
    240     ExtensionTtsEngineStop(current_utterance_);
    241 #endif
    242   } else {
    243     GetPlatformImpl()->clear_error();
    244     GetPlatformImpl()->StopSpeaking();
    245   }
    246 
    247   if (current_utterance_)
    248     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
    249                                    std::string());
    250   FinishCurrentUtterance();
    251   ClearUtteranceQueue(true);  // Send events.
    252 }
    253 
    254 void TtsController::Pause() {
    255   paused_ = true;
    256   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    257 #if !defined(OS_ANDROID)
    258     ExtensionTtsEnginePause(current_utterance_);
    259 #endif
    260   } else if (current_utterance_) {
    261     GetPlatformImpl()->clear_error();
    262     GetPlatformImpl()->Pause();
    263   }
    264 }
    265 
    266 void TtsController::Resume() {
    267   paused_ = false;
    268   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    269 #if !defined(OS_ANDROID)
    270     ExtensionTtsEngineResume(current_utterance_);
    271 #endif
    272   } else if (current_utterance_) {
    273     GetPlatformImpl()->clear_error();
    274     GetPlatformImpl()->Resume();
    275   } else {
    276     SpeakNextUtterance();
    277   }
    278 }
    279 
    280 void TtsController::OnTtsEvent(int utterance_id,
    281                                         TtsEventType event_type,
    282                                         int char_index,
    283                                         const std::string& error_message) {
    284   // We may sometimes receive completion callbacks "late", after we've
    285   // already finished the utterance (for example because another utterance
    286   // interrupted or we got a call to Stop). This is normal and we can
    287   // safely just ignore these events.
    288   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
    289     return;
    290   }
    291   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
    292   if (current_utterance_->finished()) {
    293     FinishCurrentUtterance();
    294     SpeakNextUtterance();
    295   }
    296 }
    297 
    298 void TtsController::GetVoices(Profile* profile,
    299                               std::vector<VoiceData>* out_voices) {
    300 #if !defined(OS_ANDROID)
    301   if (profile)
    302     GetExtensionVoices(profile, out_voices);
    303 #endif
    304 
    305   TtsPlatformImpl* platform_impl = GetPlatformImpl();
    306   if (platform_impl && platform_impl->PlatformImplAvailable())
    307     platform_impl->GetVoices(out_voices);
    308 }
    309 
    310 bool TtsController::IsSpeaking() {
    311   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
    312 }
    313 
    314 void TtsController::FinishCurrentUtterance() {
    315   if (current_utterance_) {
    316     if (!current_utterance_->finished())
    317       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
    318                                      std::string());
    319     delete current_utterance_;
    320     current_utterance_ = NULL;
    321   }
    322 }
    323 
    324 void TtsController::SpeakNextUtterance() {
    325   if (paused_)
    326     return;
    327 
    328   // Start speaking the next utterance in the queue.  Keep trying in case
    329   // one fails but there are still more in the queue to try.
    330   while (!utterance_queue_.empty() && !current_utterance_) {
    331     Utterance* utterance = utterance_queue_.front();
    332     utterance_queue_.pop();
    333     SpeakNow(utterance);
    334   }
    335 }
    336 
    337 void TtsController::RetrySpeakingQueuedUtterances() {
    338   if (current_utterance_ == NULL && !utterance_queue_.empty())
    339     SpeakNextUtterance();
    340 }
    341 
    342 void TtsController::ClearUtteranceQueue(bool send_events) {
    343   while (!utterance_queue_.empty()) {
    344     Utterance* utterance = utterance_queue_.front();
    345     utterance_queue_.pop();
    346     if (send_events)
    347       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
    348                             std::string());
    349     else
    350       utterance->Finish();
    351     delete utterance;
    352   }
    353 }
    354 
    355 void TtsController::SetPlatformImpl(
    356     TtsPlatformImpl* platform_impl) {
    357   platform_impl_ = platform_impl;
    358 }
    359 
    360 int TtsController::QueueSize() {
    361   return static_cast<int>(utterance_queue_.size());
    362 }
    363 
    364 TtsPlatformImpl* TtsController::GetPlatformImpl() {
    365   if (!platform_impl_)
    366     platform_impl_ = TtsPlatformImpl::GetInstance();
    367   return platform_impl_;
    368 }
    369 
    370 int TtsController::GetMatchingVoice(
    371     const Utterance* utterance, std::vector<VoiceData>& voices) {
    372   // Make two passes: the first time, do strict language matching
    373   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
    374   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
    375   for (int pass = 0; pass < 2; ++pass) {
    376     for (size_t i = 0; i < voices.size(); ++i) {
    377       const VoiceData& voice = voices[i];
    378 
    379       if (!utterance->extension_id().empty() &&
    380           utterance->extension_id() != voice.extension_id) {
    381         continue;
    382       }
    383 
    384       if (!voice.name.empty() &&
    385           !utterance->voice_name().empty() &&
    386           voice.name != utterance->voice_name()) {
    387         continue;
    388       }
    389       if (!voice.lang.empty() && !utterance->lang().empty()) {
    390         std::string voice_lang = voice.lang;
    391         std::string utterance_lang = utterance->lang();
    392         if (pass == 1) {
    393           voice_lang = TrimLanguageCode(voice_lang);
    394           utterance_lang = TrimLanguageCode(utterance_lang);
    395         }
    396         if (voice_lang != utterance_lang) {
    397           continue;
    398         }
    399       }
    400       if (voice.gender != TTS_GENDER_NONE &&
    401           utterance->gender() != TTS_GENDER_NONE &&
    402           voice.gender != utterance->gender()) {
    403         continue;
    404       }
    405 
    406       if (utterance->required_event_types().size() > 0) {
    407         bool has_all_required_event_types = true;
    408         for (std::set<TtsEventType>::const_iterator iter =
    409                  utterance->required_event_types().begin();
    410              iter != utterance->required_event_types().end();
    411              ++iter) {
    412           if (voice.events.find(*iter) == voice.events.end()) {
    413             has_all_required_event_types = false;
    414             break;
    415           }
    416         }
    417         if (!has_all_required_event_types)
    418           continue;
    419       }
    420 
    421       return static_cast<int>(i);
    422     }
    423   }
    424 
    425   return -1;
    426 }
    427 
    428 void TtsController::VoicesChanged() {
    429   for (std::set<VoicesChangedDelegate*>::iterator iter =
    430            voices_changed_delegates_.begin();
    431        iter != voices_changed_delegates_.end(); ++iter) {
    432     (*iter)->OnVoicesChanged();
    433   }
    434 }
    435 
    436 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
    437   voices_changed_delegates_.insert(delegate);
    438 }
    439 
    440 void TtsController::RemoveVoicesChangedDelegate(
    441     VoicesChangedDelegate* delegate) {
    442   voices_changed_delegates_.erase(delegate);
    443 }
    444