Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <string>
      6 
      7 #include "base/mac/scoped_nsobject.h"
      8 #include "base/memory/singleton.h"
      9 #include "base/strings/sys_string_conversions.h"
     10 #include "base/values.h"
     11 #include "chrome/browser/speech/tts_controller.h"
     12 #include "chrome/browser/speech/tts_platform.h"
     13 #include "extensions/browser/extension_function.h"
     14 
     15 #import <Cocoa/Cocoa.h>
     16 
     17 class TtsPlatformImplMac;
     18 
     19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
     20  @private
     21   TtsPlatformImplMac* ttsImplMac_;  // weak.
     22 }
     23 
     24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
     25 
     26 @end
     27 
     28 // Subclass of NSSpeechSynthesizer that takes an utterance
     29 // string on initialization, retains it and only allows it
     30 // to be spoken once.
     31 //
     32 // We construct a new NSSpeechSynthesizer for each utterance, for
     33 // two reasons:
     34 // 1. To associate delegate callbacks with a particular utterance,
     35 //    without assuming anything undocumented about the protocol.
     36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
     37 //    where Nuance voices don't retain the utterance string and
     38 //    crash when trying to call willSpeakWord.
     39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
     40  @private
     41   base::scoped_nsobject<NSString> utterance_;
     42   bool didSpeak_;
     43 }
     44 
     45 - (id)initWithUtterance:(NSString*)utterance;
     46 - (bool)startSpeakingRetainedUtterance;
     47 - (bool)startSpeakingString:(NSString*)utterance;
     48 
     49 @end
     50 
     51 class TtsPlatformImplMac : public TtsPlatformImpl {
     52  public:
     53   virtual bool PlatformImplAvailable() OVERRIDE {
     54     return true;
     55   }
     56 
     57   virtual bool Speak(
     58       int utterance_id,
     59       const std::string& utterance,
     60       const std::string& lang,
     61       const VoiceData& voice,
     62       const UtteranceContinuousParameters& params) OVERRIDE;
     63 
     64   virtual bool StopSpeaking() OVERRIDE;
     65 
     66   virtual void Pause() OVERRIDE;
     67 
     68   virtual void Resume() OVERRIDE;
     69 
     70   virtual bool IsSpeaking() OVERRIDE;
     71 
     72   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
     73 
     74   // Called by ChromeTtsDelegate when we get a callback from the
     75   // native speech engine.
     76   void OnSpeechEvent(NSSpeechSynthesizer* sender,
     77                      TtsEventType event_type,
     78                      int char_index,
     79                      const std::string& error_message);
     80 
     81   // Get the single instance of this class.
     82   static TtsPlatformImplMac* GetInstance();
     83 
     84  private:
     85   TtsPlatformImplMac();
     86   virtual ~TtsPlatformImplMac();
     87 
     88   base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
     89   base::scoped_nsobject<ChromeTtsDelegate> delegate_;
     90   int utterance_id_;
     91   std::string utterance_;
     92   int last_char_index_;
     93   bool paused_;
     94 
     95   friend struct DefaultSingletonTraits<TtsPlatformImplMac>;
     96 
     97   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
     98 };
     99 
    100 // static
    101 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
    102   return TtsPlatformImplMac::GetInstance();
    103 }
    104 
    105 bool TtsPlatformImplMac::Speak(
    106     int utterance_id,
    107     const std::string& utterance,
    108     const std::string& lang,
    109     const VoiceData& voice,
    110     const UtteranceContinuousParameters& params) {
    111   // TODO: convert SSML to SAPI xml. http://crbug.com/88072
    112   utterance_ = utterance;
    113   paused_ = false;
    114 
    115   NSString* utterance_nsstring =
    116       [NSString stringWithUTF8String:utterance_.c_str()];
    117 
    118   // Deliberately construct a new speech synthesizer every time Speak is
    119   // called, otherwise there's no way to know whether calls to the delegate
    120   // apply to the current utterance or a previous utterance. In
    121   // experimentation, the overhead of constructing and destructing a
    122   // NSSpeechSynthesizer is minimal.
    123   speech_synthesizer_.reset(
    124       [[SingleUseSpeechSynthesizer alloc]
    125         initWithUtterance:utterance_nsstring]);
    126   [speech_synthesizer_ setDelegate:delegate_];
    127 
    128   if (!voice.native_voice_identifier.empty()) {
    129     NSString* native_voice_identifier =
    130         [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
    131     [speech_synthesizer_ setVoice:native_voice_identifier];
    132   }
    133 
    134   utterance_id_ = utterance_id;
    135 
    136   // TODO: support languages other than the default: crbug.com/88059
    137 
    138   if (params.rate >= 0.0) {
    139     // The TTS api defines rate via words per minute. Let 200 be the default.
    140     [speech_synthesizer_
    141         setObject:[NSNumber numberWithInt:params.rate * 200]
    142         forProperty:NSSpeechRateProperty error:nil];
    143   }
    144 
    145   if (params.pitch >= 0.0) {
    146     // The input is a float from 0.0 to 2.0, with 1.0 being the default.
    147     // Get the default pitch for this voice and modulate it by 50% - 150%.
    148     NSError* errorCode;
    149     NSNumber* defaultPitchObj =
    150         [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
    151                                          error:&errorCode];
    152     int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
    153     int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
    154     [speech_synthesizer_
    155         setObject:[NSNumber numberWithInt:newPitch]
    156         forProperty:NSSpeechPitchBaseProperty error:nil];
    157   }
    158 
    159   if (params.volume >= 0.0) {
    160     [speech_synthesizer_
    161         setObject: [NSNumber numberWithFloat:params.volume]
    162         forProperty:NSSpeechVolumeProperty error:nil];
    163   }
    164 
    165   bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
    166   if (success) {
    167     TtsController* controller = TtsController::GetInstance();
    168     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, "");
    169   }
    170   return success;
    171 }
    172 
    173 bool TtsPlatformImplMac::StopSpeaking() {
    174   if (speech_synthesizer_.get()) {
    175     [speech_synthesizer_ stopSpeaking];
    176     speech_synthesizer_.reset(nil);
    177   }
    178   paused_ = false;
    179   return true;
    180 }
    181 
    182 void TtsPlatformImplMac::Pause() {
    183   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
    184     [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
    185     paused_ = true;
    186     TtsController::GetInstance()->OnTtsEvent(
    187         utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
    188   }
    189 }
    190 
    191 void TtsPlatformImplMac::Resume() {
    192   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
    193     [speech_synthesizer_ continueSpeaking];
    194     paused_ = false;
    195     TtsController::GetInstance()->OnTtsEvent(
    196         utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
    197   }
    198 }
    199 
    200 bool TtsPlatformImplMac::IsSpeaking() {
    201   if (speech_synthesizer_)
    202     return [speech_synthesizer_ isSpeaking];
    203   return false;
    204 }
    205 
    206 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
    207   NSArray* voices = [NSSpeechSynthesizer availableVoices];
    208 
    209   // Create a new temporary array of the available voices with
    210   // the default voice first.
    211   NSMutableArray* orderedVoices =
    212       [NSMutableArray arrayWithCapacity:[voices count]];
    213   NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
    214   [orderedVoices addObject:defaultVoice];
    215   for (NSString* voiceIdentifier in voices) {
    216     if (![voiceIdentifier isEqualToString:defaultVoice])
    217       [orderedVoices addObject:voiceIdentifier];
    218   }
    219 
    220   for (NSString* voiceIdentifier in orderedVoices) {
    221     outVoices->push_back(VoiceData());
    222     VoiceData& data = outVoices->back();
    223 
    224     NSDictionary* attributes =
    225         [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
    226     NSString* name = [attributes objectForKey:NSVoiceName];
    227     NSString* gender = [attributes objectForKey:NSVoiceGender];
    228     NSString* localeIdentifier =
    229         [attributes objectForKey:NSVoiceLocaleIdentifier];
    230 
    231     data.native = true;
    232     data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
    233     data.name = base::SysNSStringToUTF8(name);
    234 
    235     NSDictionary* localeComponents =
    236         [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
    237     NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
    238     NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
    239     if (language && country) {
    240       data.lang =
    241           [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
    242     } else {
    243       data.lang = base::SysNSStringToUTF8(language);
    244     }
    245     if ([gender isEqualToString:NSVoiceGenderMale])
    246       data.gender = TTS_GENDER_MALE;
    247     else if ([gender isEqualToString:NSVoiceGenderFemale])
    248       data.gender = TTS_GENDER_FEMALE;
    249     else
    250       data.gender = TTS_GENDER_NONE;
    251     data.events.insert(TTS_EVENT_START);
    252     data.events.insert(TTS_EVENT_END);
    253     data.events.insert(TTS_EVENT_WORD);
    254     data.events.insert(TTS_EVENT_ERROR);
    255     data.events.insert(TTS_EVENT_CANCELLED);
    256     data.events.insert(TTS_EVENT_INTERRUPTED);
    257     data.events.insert(TTS_EVENT_PAUSE);
    258     data.events.insert(TTS_EVENT_RESUME);
    259   }
    260 }
    261 
    262 void TtsPlatformImplMac::OnSpeechEvent(
    263     NSSpeechSynthesizer* sender,
    264     TtsEventType event_type,
    265     int char_index,
    266     const std::string& error_message) {
    267   // Don't send events from an utterance that's already completed.
    268   // This depends on the fact that we construct a new NSSpeechSynthesizer
    269   // each time we call Speak.
    270   if (sender != speech_synthesizer_.get())
    271     return;
    272 
    273   if (event_type == TTS_EVENT_END)
    274     char_index = utterance_.size();
    275   TtsController* controller = TtsController::GetInstance();
    276 controller->OnTtsEvent(
    277       utterance_id_, event_type, char_index, error_message);
    278   last_char_index_ = char_index;
    279 }
    280 
    281 TtsPlatformImplMac::TtsPlatformImplMac() {
    282   utterance_id_ = -1;
    283   paused_ = false;
    284 
    285   delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
    286 }
    287 
    288 TtsPlatformImplMac::~TtsPlatformImplMac() {
    289 }
    290 
    291 // static
    292 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
    293   return Singleton<TtsPlatformImplMac>::get();
    294 }
    295 
    296 @implementation ChromeTtsDelegate
    297 
    298 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
    299   if ((self = [super init])) {
    300     ttsImplMac_ = ttsImplMac;
    301   }
    302   return self;
    303 }
    304 
    305 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
    306         didFinishSpeaking:(BOOL)finished_speaking {
    307   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
    308 }
    309 
    310 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
    311             willSpeakWord:(NSRange)character_range
    312                  ofString:(NSString*)string {
    313   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
    314       character_range.location, "");
    315 }
    316 
    317 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
    318  didEncounterErrorAtIndex:(NSUInteger)character_index
    319                  ofString:(NSString*)string
    320                   message:(NSString*)message {
    321   std::string message_utf8 = base::SysNSStringToUTF8(message);
    322   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
    323       message_utf8);
    324 }
    325 
    326 @end
    327 
    328 @implementation SingleUseSpeechSynthesizer
    329 
    330 - (id)initWithUtterance:(NSString*)utterance {
    331   self = [super init];
    332   if (self) {
    333     utterance_.reset([utterance retain]);
    334     didSpeak_ = false;
    335   }
    336   return self;
    337 }
    338 
    339 - (bool)startSpeakingRetainedUtterance {
    340   CHECK(!didSpeak_);
    341   CHECK(utterance_);
    342   didSpeak_ = true;
    343   return [super startSpeakingString:utterance_];
    344 }
    345 
    346 - (bool)startSpeakingString:(NSString*)utterance {
    347   CHECK(false);
    348   return false;
    349 }
    350 
    351 @end
    352