Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/speech/tts_controller.h"
      6 
      7 #include <string>
      8 #include <vector>
      9 
     10 #include "base/float_util.h"
     11 #include "base/values.h"
     12 #include "chrome/browser/extensions/extension_system.h"
     13 #include "chrome/browser/profiles/profile.h"
     14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
     15 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
     16 #include "chrome/browser/speech/tts_platform.h"
     17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
     18 #include "chrome/common/extensions/extension.h"
     19 
     20 namespace {
     21 // A value to be used to indicate that there is no char index available.
     22 const int kInvalidCharIndex = -1;
     23 
     24 // Given a language/region code of the form 'fr-FR', returns just the basic
     25 // language portion, e.g. 'fr'.
     26 std::string TrimLanguageCode(std::string lang) {
     27   if (lang.size() >= 5 && lang[2] == '-')
     28     return lang.substr(0, 2);
     29   else
     30     return lang;
     31 }
     32 
     33 }  // namespace
     34 
     35 bool IsFinalTtsEventType(TtsEventType event_type) {
     36   return (event_type == TTS_EVENT_END ||
     37           event_type == TTS_EVENT_INTERRUPTED ||
     38           event_type == TTS_EVENT_CANCELLED ||
     39           event_type == TTS_EVENT_ERROR);
     40 }
     41 
     42 //
     43 // UtteranceContinuousParameters
     44 //
     45 
     46 
     47 UtteranceContinuousParameters::UtteranceContinuousParameters()
     48     : rate(-1),
     49       pitch(-1),
     50       volume(-1) {}
     51 
     52 
     53 //
     54 // VoiceData
     55 //
     56 
     57 
     58 VoiceData::VoiceData()
     59     : gender(TTS_GENDER_NONE),
     60       native(false) {}
     61 
     62 VoiceData::~VoiceData() {}
     63 
     64 
     65 //
     66 // Utterance
     67 //
     68 
     69 // static
     70 int Utterance::next_utterance_id_ = 0;
     71 
     72 Utterance::Utterance(Profile* profile)
     73     : profile_(profile),
     74       id_(next_utterance_id_++),
     75       src_id_(-1),
     76       event_delegate_(NULL),
     77       can_enqueue_(false),
     78       char_index_(0),
     79       finished_(false) {
     80   options_.reset(new DictionaryValue());
     81 }
     82 
     83 Utterance::~Utterance() {
     84   DCHECK(finished_);
     85 }
     86 
     87 void Utterance::OnTtsEvent(TtsEventType event_type,
     88                            int char_index,
     89                            const std::string& error_message) {
     90   if (char_index >= 0)
     91     char_index_ = char_index;
     92   if (IsFinalTtsEventType(event_type))
     93     finished_ = true;
     94 
     95   if (event_delegate_)
     96     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
     97   if (finished_)
     98     event_delegate_ = NULL;
     99 }
    100 
    101 void Utterance::Finish() {
    102   finished_ = true;
    103 }
    104 
    105 void Utterance::set_options(const Value* options) {
    106   options_.reset(options->DeepCopy());
    107 }
    108 
    109 //
    110 // TtsController
    111 //
    112 
    113 // static
    114 TtsController* TtsController::GetInstance() {
    115   return Singleton<TtsController>::get();
    116 }
    117 
    118 TtsController::TtsController()
    119     : current_utterance_(NULL),
    120       paused_(false),
    121       platform_impl_(NULL) {
    122 }
    123 
    124 TtsController::~TtsController() {
    125   if (current_utterance_) {
    126     current_utterance_->Finish();
    127     delete current_utterance_;
    128   }
    129 
    130   // Clear any queued utterances too.
    131   ClearUtteranceQueue(false);  // Don't sent events.
    132 }
    133 
    134 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
    135   // If we're paused and we get an utterance that can't be queued,
    136   // flush the queue but stay in the paused state.
    137   if (paused_ && !utterance->can_enqueue()) {
    138     Stop();
    139     paused_ = true;
    140     return;
    141   }
    142 
    143   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
    144     utterance_queue_.push(utterance);
    145   } else {
    146     Stop();
    147     SpeakNow(utterance);
    148   }
    149 }
    150 
    151 void TtsController::SpeakNow(Utterance* utterance) {
    152   // Get all available voices and try to find a matching voice.
    153   std::vector<VoiceData> voices;
    154   GetVoices(utterance->profile(), &voices);
    155   int index = GetMatchingVoice(utterance, voices);
    156 
    157   // Select the matching voice, but if none was found, initialize an
    158   // empty VoiceData with native = true, which will give the native
    159   // speech synthesizer a chance to try to synthesize the utterance
    160   // anyway.
    161   VoiceData voice;
    162   if (index >= 0 && index < static_cast<int>(voices.size()))
    163     voice = voices[index];
    164   else
    165     voice.native = true;
    166 
    167   if (!voice.native) {
    168 #if !defined(OS_ANDROID)
    169     DCHECK(!voice.extension_id.empty());
    170     current_utterance_ = utterance;
    171     utterance->set_extension_id(voice.extension_id);
    172     ExtensionTtsEngineSpeak(utterance, voice);
    173     bool sends_end_event =
    174         voice.events.find(TTS_EVENT_END) != voice.events.end();
    175     if (!sends_end_event) {
    176       utterance->Finish();
    177       delete utterance;
    178       current_utterance_ = NULL;
    179       SpeakNextUtterance();
    180     }
    181 #endif
    182   } else {
    183     GetPlatformImpl()->clear_error();
    184     bool success = GetPlatformImpl()->Speak(
    185         utterance->id(),
    186         utterance->text(),
    187         utterance->lang(),
    188         voice,
    189         utterance->continuous_parameters());
    190 
    191     // If the native voice wasn't able to process this speech, see if
    192     // the browser has built-in TTS that isn't loaded yet.
    193     if (!success &&
    194         GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
    195       utterance_queue_.push(utterance);
    196       return;
    197     }
    198 
    199     if (!success) {
    200       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
    201                             GetPlatformImpl()->error());
    202       delete utterance;
    203       return;
    204     }
    205     current_utterance_ = utterance;
    206   }
    207 }
    208 
    209 void TtsController::Stop() {
    210   paused_ = false;
    211   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    212 #if !defined(OS_ANDROID)
    213     ExtensionTtsEngineStop(current_utterance_);
    214 #endif
    215   } else {
    216     GetPlatformImpl()->clear_error();
    217     GetPlatformImpl()->StopSpeaking();
    218   }
    219 
    220   if (current_utterance_)
    221     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
    222                                    std::string());
    223   FinishCurrentUtterance();
    224   ClearUtteranceQueue(true);  // Send events.
    225 }
    226 
    227 void TtsController::Pause() {
    228   paused_ = true;
    229   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    230 #if !defined(OS_ANDROID)
    231     ExtensionTtsEnginePause(current_utterance_);
    232 #endif
    233   } else if (current_utterance_) {
    234     GetPlatformImpl()->clear_error();
    235     GetPlatformImpl()->Pause();
    236   }
    237 }
    238 
    239 void TtsController::Resume() {
    240   paused_ = false;
    241   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
    242 #if !defined(OS_ANDROID)
    243     ExtensionTtsEngineResume(current_utterance_);
    244 #endif
    245   } else if (current_utterance_) {
    246     GetPlatformImpl()->clear_error();
    247     GetPlatformImpl()->Resume();
    248   } else {
    249     SpeakNextUtterance();
    250   }
    251 }
    252 
    253 void TtsController::OnTtsEvent(int utterance_id,
    254                                         TtsEventType event_type,
    255                                         int char_index,
    256                                         const std::string& error_message) {
    257   // We may sometimes receive completion callbacks "late", after we've
    258   // already finished the utterance (for example because another utterance
    259   // interrupted or we got a call to Stop). This is normal and we can
    260   // safely just ignore these events.
    261   if (!current_utterance_ || utterance_id != current_utterance_->id())
    262     return;
    263 
    264   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
    265   if (current_utterance_->finished()) {
    266     FinishCurrentUtterance();
    267     SpeakNextUtterance();
    268   }
    269 }
    270 
    271 void TtsController::GetVoices(Profile* profile,
    272                               std::vector<VoiceData>* out_voices) {
    273 #if !defined(OS_ANDROID)
    274   if (profile)
    275     GetExtensionVoices(profile, out_voices);
    276 #endif
    277 
    278   TtsPlatformImpl* platform_impl = GetPlatformImpl();
    279   if (platform_impl && platform_impl->PlatformImplAvailable())
    280     platform_impl->GetVoices(out_voices);
    281 }
    282 
    283 bool TtsController::IsSpeaking() {
    284   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
    285 }
    286 
    287 void TtsController::FinishCurrentUtterance() {
    288   if (current_utterance_) {
    289     if (!current_utterance_->finished())
    290       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
    291                                      std::string());
    292     delete current_utterance_;
    293     current_utterance_ = NULL;
    294   }
    295 }
    296 
    297 void TtsController::SpeakNextUtterance() {
    298   if (paused_)
    299     return;
    300 
    301   // Start speaking the next utterance in the queue.  Keep trying in case
    302   // one fails but there are still more in the queue to try.
    303   while (!utterance_queue_.empty() && !current_utterance_) {
    304     Utterance* utterance = utterance_queue_.front();
    305     utterance_queue_.pop();
    306     SpeakNow(utterance);
    307   }
    308 }
    309 
    310 void TtsController::RetrySpeakingQueuedUtterances() {
    311   if (current_utterance_ == NULL && !utterance_queue_.empty())
    312     SpeakNextUtterance();
    313 }
    314 
    315 void TtsController::ClearUtteranceQueue(bool send_events) {
    316   while (!utterance_queue_.empty()) {
    317     Utterance* utterance = utterance_queue_.front();
    318     utterance_queue_.pop();
    319     if (send_events)
    320       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
    321                             std::string());
    322     else
    323       utterance->Finish();
    324     delete utterance;
    325   }
    326 }
    327 
    328 void TtsController::SetPlatformImpl(
    329     TtsPlatformImpl* platform_impl) {
    330   platform_impl_ = platform_impl;
    331 }
    332 
    333 int TtsController::QueueSize() {
    334   return static_cast<int>(utterance_queue_.size());
    335 }
    336 
    337 TtsPlatformImpl* TtsController::GetPlatformImpl() {
    338   if (!platform_impl_)
    339     platform_impl_ = TtsPlatformImpl::GetInstance();
    340   return platform_impl_;
    341 }
    342 
    343 int TtsController::GetMatchingVoice(
    344     const Utterance* utterance, std::vector<VoiceData>& voices) {
    345   // Make two passes: the first time, do strict language matching
    346   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
    347   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
    348   for (int pass = 0; pass < 2; ++pass) {
    349     for (size_t i = 0; i < voices.size(); ++i) {
    350       const VoiceData& voice = voices[i];
    351 
    352       if (!utterance->extension_id().empty() &&
    353           utterance->extension_id() != voice.extension_id) {
    354         continue;
    355       }
    356 
    357       if (!voice.name.empty() &&
    358           !utterance->voice_name().empty() &&
    359           voice.name != utterance->voice_name()) {
    360         continue;
    361       }
    362       if (!voice.lang.empty() && !utterance->lang().empty()) {
    363         std::string voice_lang = voice.lang;
    364         std::string utterance_lang = utterance->lang();
    365         if (pass == 1) {
    366           voice_lang = TrimLanguageCode(voice_lang);
    367           utterance_lang = TrimLanguageCode(utterance_lang);
    368         }
    369         if (voice_lang != utterance_lang) {
    370           continue;
    371         }
    372       }
    373       if (voice.gender != TTS_GENDER_NONE &&
    374           utterance->gender() != TTS_GENDER_NONE &&
    375           voice.gender != utterance->gender()) {
    376         continue;
    377       }
    378 
    379       if (utterance->required_event_types().size() > 0) {
    380         bool has_all_required_event_types = true;
    381         for (std::set<TtsEventType>::const_iterator iter =
    382                  utterance->required_event_types().begin();
    383              iter != utterance->required_event_types().end();
    384              ++iter) {
    385           if (voice.events.find(*iter) == voice.events.end()) {
    386             has_all_required_event_types = false;
    387             break;
    388           }
    389         }
    390         if (!has_all_required_event_types)
    391           continue;
    392       }
    393 
    394       return static_cast<int>(i);
    395     }
    396   }
    397 
    398   return -1;
    399 }
    400 
    401 void TtsController::VoicesChanged() {
    402   for (std::set<VoicesChangedDelegate*>::iterator iter =
    403            voices_changed_delegates_.begin();
    404        iter != voices_changed_delegates_.end(); ++iter) {
    405     (*iter)->OnVoicesChanged();
    406   }
    407 }
    408 
    409 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
    410   voices_changed_delegates_.insert(delegate);
    411 }
    412 
    413 void TtsController::RemoveVoicesChangedDelegate(
    414     VoicesChangedDelegate* delegate) {
    415   voices_changed_delegates_.erase(delegate);
    416 }
    417 
    418