Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/speech/tts_controller.h"
      6 
      7 #include <string>
      8 #include <vector>
      9 
     10 #include "base/float_util.h"
     11 #include "base/values.h"
     12 #include "chrome/browser/extensions/extension_system.h"
     13 #include "chrome/browser/profiles/profile.h"
     14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
     15 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
     16 #include "chrome/browser/speech/tts_platform.h"
     17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
     18 #include "extensions/common/extension.h"
     19 
     20 namespace {
     21 // A value to be used to indicate that there is no char index available.
     22 const int kInvalidCharIndex = -1;
     23 
     24 // Given a language/region code of the form 'fr-FR', returns just the basic
     25 // language portion, e.g. 'fr'.
     26 std::string TrimLanguageCode(std::string lang) {
     27   if (lang.size() >= 5 && lang[2] == '-')
     28     return lang.substr(0, 2);
     29   else
     30     return lang;
     31 }
     32 
     33 }  // namespace
     34 
     35 bool IsFinalTtsEventType(TtsEventType event_type) {
     36   return (event_type == TTS_EVENT_END ||
     37           event_type == TTS_EVENT_INTERRUPTED ||
     38           event_type == TTS_EVENT_CANCELLED ||
     39           event_type == TTS_EVENT_ERROR);
     40 }
     41 
     42 //
     43 // UtteranceContinuousParameters
     44 //
     45 
     46 
     47 UtteranceContinuousParameters::UtteranceContinuousParameters()
     48     : rate(-1),
     49       pitch(-1),
     50       volume(-1) {}
     51 
     52 
     53 //
     54 // VoiceData
     55 //
     56 
     57 
     58 VoiceData::VoiceData()
     59     : gender(TTS_GENDER_NONE),
     60       remote(false),
     61       native(false) {}
     62 
     63 VoiceData::~VoiceData() {}
     64 
     65 
     66 //
     67 // Utterance
     68 //
     69 
     70 // static
     71 int Utterance::next_utterance_id_ = 0;
     72 
     73 Utterance::Utterance(Profile* profile)
     74     : profile_(profile),
     75       id_(next_utterance_id_++),
     76       src_id_(-1),
     77       gender_(TTS_GENDER_NONE),
     78       can_enqueue_(false),
     79       char_index_(0),
     80       finished_(false) {
     81   options_.reset(new DictionaryValue());
     82 }
     83 
     84 Utterance::~Utterance() {
     85   DCHECK(finished_);
     86 }
     87 
     88 void Utterance::OnTtsEvent(TtsEventType event_type,
     89                            int char_index,
     90                            const std::string& error_message) {
     91   if (char_index >= 0)
     92     char_index_ = char_index;
     93   if (IsFinalTtsEventType(event_type))
     94     finished_ = true;
     95 
     96   if (event_delegate_)
     97     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
     98   if (finished_)
     99     event_delegate_.reset();
    100 }
    101 
    102 void Utterance::Finish() {
    103   finished_ = true;
    104 }
    105 
    106 void Utterance::set_options(const Value* options) {
    107   options_.reset(options->DeepCopy());
    108 }
    109 
    110 //
    111 // TtsController
    112 //
    113 
    114 // static
    115 TtsController* TtsController::GetInstance() {
    116   return Singleton<TtsController>::get();
    117 }
    118 
    119 TtsController::TtsController()
    120     : current_utterance_(NULL),
    121       paused_(false),
    122       platform_impl_(NULL) {
    123 }
    124 
    125 TtsController::~TtsController() {
    126   if (current_utterance_) {
    127     current_utterance_->Finish();
    128     delete current_utterance_;
    129   }
    130 
    131   // Clear any queued utterances too.
    132   ClearUtteranceQueue(false);  // Don't sent events.
    133 }
    134 
    135 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
    136   // If we're paused and we get an utterance that can't be queued,
    137   // flush the queue but stay in the paused state.
    138   if (paused_ && !utterance->can_enqueue()) {
    139     Stop();
    140     paused_ = true;
    141     return;
    142   }
    143 
    144   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
    145     utterance_queue_.push(utterance);
    146   } else {
    147     Stop();
    148     SpeakNow(utterance);
    149   }
    150 }
    151 
    152 void TtsController::SpeakNow(Utterance* utterance) {
    153   // Get all available voices and try to find a matching voice.
    154   std::vector<VoiceData> voices;
    155   GetVoices(utterance->profile(), &voices);
    156   int index = GetMatchingVoice(utterance, voices);
    157 
    158   // Select the matching voice, but if none was found, initialize an
    159   // empty VoiceData with native = true, which will give the native
    160   // speech synthesizer a chance to try to synthesize the utterance
    161   // anyway.
    162   VoiceData voice;
    163   if (index >= 0 && index < static_cast<int>(voices.size()))
    164     voice = voices[index];
    165   else
    166     voice.native = true;
    167 
    168   if (!voice.native) {
    169 #if !defined(OS_ANDROID)
    170     DCHECK(!voice.extension_id.empty());
    171     current_utterance_ = utterance;
    172     utterance->set_extension_id(voice.extension_id);
    173     ExtensionTtsEngineSpeak(utterance, voice);
    174     bool sends_end_event =
    175         voice.events.find(TTS_EVENT_END) != voice.events.end();
    176     if (!sends_end_event) {
    177       utterance->Finish();
    178       delete utterance;
    179       current_utterance_ = NULL;
    180       SpeakNextUtterance();
    181     }
    182 #endif
    183   } else {
    184     // It's possible for certain platforms to send start events immediately
    185     // during |speak|.
    186     current_utterance_ = utterance;
    187     GetPlatformImpl()->clear_error();
    188     bool success = GetPlatformImpl()->Speak(
    189         utterance->id(),
    190         utterance->text(),
    191         utterance->lang(),
    192         voice,
    193         utterance->continuous_parameters());
    194     if (!success)
    195       current_utterance_ = NULL;
    196 
    197     // If the native voice wasn't able to process this speech, see if
    198     // the browser has built-in TTS that isn't loaded yet.
    199     if (!success &&
    200         GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
    201       utterance_queue_.push(utterance);
    202       return;
    203     }
    204 
    205     if (!success) {
    206       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
    207                             GetPlatformImpl()->error());
    208       delete utterance;
    209       return;
    210     }
    211   }
    212 }
    213 
    214 void TtsController::Stop() {
    215   paused_ = false;
    216   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    217 #if !defined(OS_ANDROID)
    218     ExtensionTtsEngineStop(current_utterance_);
    219 #endif
    220   } else {
    221     GetPlatformImpl()->clear_error();
    222     GetPlatformImpl()->StopSpeaking();
    223   }
    224 
    225   if (current_utterance_)
    226     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
    227                                    std::string());
    228   FinishCurrentUtterance();
    229   ClearUtteranceQueue(true);  // Send events.
    230 }
    231 
    232 void TtsController::Pause() {
    233   paused_ = true;
    234   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    235 #if !defined(OS_ANDROID)
    236     ExtensionTtsEnginePause(current_utterance_);
    237 #endif
    238   } else if (current_utterance_) {
    239     GetPlatformImpl()->clear_error();
    240     GetPlatformImpl()->Pause();
    241   }
    242 }
    243 
    244 void TtsController::Resume() {
    245   paused_ = false;
    246   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    247 #if !defined(OS_ANDROID)
    248     ExtensionTtsEngineResume(current_utterance_);
    249 #endif
    250   } else if (current_utterance_) {
    251     GetPlatformImpl()->clear_error();
    252     GetPlatformImpl()->Resume();
    253   } else {
    254     SpeakNextUtterance();
    255   }
    256 }
    257 
    258 void TtsController::OnTtsEvent(int utterance_id,
    259                                         TtsEventType event_type,
    260                                         int char_index,
    261                                         const std::string& error_message) {
    262   // We may sometimes receive completion callbacks "late", after we've
    263   // already finished the utterance (for example because another utterance
    264   // interrupted or we got a call to Stop). This is normal and we can
    265   // safely just ignore these events.
    266   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
    267     return;
    268   }
    269   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
    270   if (current_utterance_->finished()) {
    271     FinishCurrentUtterance();
    272     SpeakNextUtterance();
    273   }
    274 }
    275 
    276 void TtsController::GetVoices(Profile* profile,
    277                               std::vector<VoiceData>* out_voices) {
    278 #if !defined(OS_ANDROID)
    279   if (profile)
    280     GetExtensionVoices(profile, out_voices);
    281 #endif
    282 
    283   TtsPlatformImpl* platform_impl = GetPlatformImpl();
    284   if (platform_impl && platform_impl->PlatformImplAvailable())
    285     platform_impl->GetVoices(out_voices);
    286 }
    287 
    288 bool TtsController::IsSpeaking() {
    289   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
    290 }
    291 
    292 void TtsController::FinishCurrentUtterance() {
    293   if (current_utterance_) {
    294     if (!current_utterance_->finished())
    295       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
    296                                      std::string());
    297     delete current_utterance_;
    298     current_utterance_ = NULL;
    299   }
    300 }
    301 
    302 void TtsController::SpeakNextUtterance() {
    303   if (paused_)
    304     return;
    305 
    306   // Start speaking the next utterance in the queue.  Keep trying in case
    307   // one fails but there are still more in the queue to try.
    308   while (!utterance_queue_.empty() && !current_utterance_) {
    309     Utterance* utterance = utterance_queue_.front();
    310     utterance_queue_.pop();
    311     SpeakNow(utterance);
    312   }
    313 }
    314 
    315 void TtsController::RetrySpeakingQueuedUtterances() {
    316   if (current_utterance_ == NULL && !utterance_queue_.empty())
    317     SpeakNextUtterance();
    318 }
    319 
    320 void TtsController::ClearUtteranceQueue(bool send_events) {
    321   while (!utterance_queue_.empty()) {
    322     Utterance* utterance = utterance_queue_.front();
    323     utterance_queue_.pop();
    324     if (send_events)
    325       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
    326                             std::string());
    327     else
    328       utterance->Finish();
    329     delete utterance;
    330   }
    331 }
    332 
    333 void TtsController::SetPlatformImpl(
    334     TtsPlatformImpl* platform_impl) {
    335   platform_impl_ = platform_impl;
    336 }
    337 
    338 int TtsController::QueueSize() {
    339   return static_cast<int>(utterance_queue_.size());
    340 }
    341 
    342 TtsPlatformImpl* TtsController::GetPlatformImpl() {
    343   if (!platform_impl_)
    344     platform_impl_ = TtsPlatformImpl::GetInstance();
    345   return platform_impl_;
    346 }
    347 
    348 int TtsController::GetMatchingVoice(
    349     const Utterance* utterance, std::vector<VoiceData>& voices) {
    350   // Make two passes: the first time, do strict language matching
    351   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
    352   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
    353   for (int pass = 0; pass < 2; ++pass) {
    354     for (size_t i = 0; i < voices.size(); ++i) {
    355       const VoiceData& voice = voices[i];
    356 
    357       if (!utterance->extension_id().empty() &&
    358           utterance->extension_id() != voice.extension_id) {
    359         continue;
    360       }
    361 
    362       if (!voice.name.empty() &&
    363           !utterance->voice_name().empty() &&
    364           voice.name != utterance->voice_name()) {
    365         continue;
    366       }
    367       if (!voice.lang.empty() && !utterance->lang().empty()) {
    368         std::string voice_lang = voice.lang;
    369         std::string utterance_lang = utterance->lang();
    370         if (pass == 1) {
    371           voice_lang = TrimLanguageCode(voice_lang);
    372           utterance_lang = TrimLanguageCode(utterance_lang);
    373         }
    374         if (voice_lang != utterance_lang) {
    375           continue;
    376         }
    377       }
    378       if (voice.gender != TTS_GENDER_NONE &&
    379           utterance->gender() != TTS_GENDER_NONE &&
    380           voice.gender != utterance->gender()) {
    381         continue;
    382       }
    383 
    384       if (utterance->required_event_types().size() > 0) {
    385         bool has_all_required_event_types = true;
    386         for (std::set<TtsEventType>::const_iterator iter =
    387                  utterance->required_event_types().begin();
    388              iter != utterance->required_event_types().end();
    389              ++iter) {
    390           if (voice.events.find(*iter) == voice.events.end()) {
    391             has_all_required_event_types = false;
    392             break;
    393           }
    394         }
    395         if (!has_all_required_event_types)
    396           continue;
    397       }
    398 
    399       return static_cast<int>(i);
    400     }
    401   }
    402 
    403   return -1;
    404 }
    405 
    406 void TtsController::VoicesChanged() {
    407   for (std::set<VoicesChangedDelegate*>::iterator iter =
    408            voices_changed_delegates_.begin();
    409        iter != voices_changed_delegates_.end(); ++iter) {
    410     (*iter)->OnVoicesChanged();
    411   }
    412 }
    413 
    414 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
    415   voices_changed_delegates_.insert(delegate);
    416 }
    417 
    418 void TtsController::RemoveVoicesChangedDelegate(
    419     VoicesChangedDelegate* delegate) {
    420   voices_changed_delegates_.erase(delegate);
    421 }
    422