Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <string>
      6 
      7 #include "base/mac/scoped_nsobject.h"
      8 #include "base/memory/singleton.h"
      9 #include "base/strings/sys_string_conversions.h"
     10 #include "base/values.h"
     11 #include "chrome/browser/extensions/extension_function.h"
     12 #include "chrome/browser/speech/tts_controller.h"
     13 #include "chrome/browser/speech/tts_platform.h"
     14 
     15 #import <Cocoa/Cocoa.h>
     16 
     17 class TtsPlatformImplMac;
     18 
     19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
     20  @private
     21   TtsPlatformImplMac* ttsImplMac_;  // weak.
     22 }
     23 
     24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
     25 
     26 @end
     27 
     28 // Subclass of NSSpeechSynthesizer that takes an utterance
     29 // string on initialization, retains it and only allows it
     30 // to be spoken once.
     31 //
     32 // We construct a new NSSpeechSynthesizer for each utterance, for
     33 // two reasons:
     34 // 1. To associate delegate callbacks with a particular utterance,
     35 //    without assuming anything undocumented about the protocol.
     36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
     37 //    where Nuance voices don't retain the utterance string and
     38 //    crash when trying to call willSpeakWord.
     39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
     40  @private
     41   base::scoped_nsobject<NSString> utterance_;
     42   bool didSpeak_;
     43 }
     44 
     45 - (id)initWithUtterance:(NSString*)utterance;
     46 - (bool)startSpeakingRetainedUtterance;
     47 - (bool)startSpeakingString:(NSString*)utterance;
     48 
     49 @end
     50 
     51 class TtsPlatformImplMac : public TtsPlatformImpl {
     52  public:
     53   virtual bool PlatformImplAvailable() OVERRIDE {
     54     return true;
     55   }
     56 
     57   virtual bool Speak(
     58       int utterance_id,
     59       const std::string& utterance,
     60       const std::string& lang,
     61       const VoiceData& voice,
     62       const UtteranceContinuousParameters& params) OVERRIDE;
     63 
     64   virtual bool StopSpeaking() OVERRIDE;
     65 
     66   virtual void Pause() OVERRIDE;
     67 
     68   virtual void Resume() OVERRIDE;
     69 
     70   virtual bool IsSpeaking() OVERRIDE;
     71 
     72   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
     73 
     74   // Called by ChromeTtsDelegate when we get a callback from the
     75   // native speech engine.
     76   void OnSpeechEvent(NSSpeechSynthesizer* sender,
     77                      TtsEventType event_type,
     78                      int char_index,
     79                      const std::string& error_message);
     80 
     81   // Get the single instance of this class.
     82   static TtsPlatformImplMac* GetInstance();
     83 
     84  private:
     85   TtsPlatformImplMac();
     86   virtual ~TtsPlatformImplMac();
     87 
     88   base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
     89   base::scoped_nsobject<ChromeTtsDelegate> delegate_;
     90   int utterance_id_;
     91   std::string utterance_;
     92   bool sent_start_event_;
     93   int last_char_index_;
     94   bool paused_;
     95 
     96   friend struct DefaultSingletonTraits<TtsPlatformImplMac>;
     97 
     98   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
     99 };
    100 
    101 // static
    102 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
    103   return TtsPlatformImplMac::GetInstance();
    104 }
    105 
    106 bool TtsPlatformImplMac::Speak(
    107     int utterance_id,
    108     const std::string& utterance,
    109     const std::string& lang,
    110     const VoiceData& voice,
    111     const UtteranceContinuousParameters& params) {
    112   // TODO: convert SSML to SAPI xml. http://crbug.com/88072
    113   utterance_ = utterance;
    114   paused_ = false;
    115 
    116   NSString* utterance_nsstring =
    117       [NSString stringWithUTF8String:utterance_.c_str()];
    118 
    119   // Deliberately construct a new speech synthesizer every time Speak is
    120   // called, otherwise there's no way to know whether calls to the delegate
    121   // apply to the current utterance or a previous utterance. In
    122   // experimentation, the overhead of constructing and destructing a
    123   // NSSpeechSynthesizer is minimal.
    124   speech_synthesizer_.reset(
    125       [[SingleUseSpeechSynthesizer alloc]
    126         initWithUtterance:utterance_nsstring]);
    127   [speech_synthesizer_ setDelegate:delegate_];
    128 
    129   if (!voice.native_voice_identifier.empty()) {
    130     NSString* native_voice_identifier =
    131         [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
    132     [speech_synthesizer_ setVoice:native_voice_identifier];
    133   }
    134 
    135   utterance_id_ = utterance_id;
    136   sent_start_event_ = false;
    137 
    138   // TODO: support languages other than the default: crbug.com/88059
    139 
    140   if (params.rate >= 0.0) {
    141     // The TTS api defines rate via words per minute. Let 200 be the default.
    142     [speech_synthesizer_
    143         setObject:[NSNumber numberWithInt:params.rate * 200]
    144         forProperty:NSSpeechRateProperty error:nil];
    145   }
    146 
    147   if (params.pitch >= 0.0) {
    148     // The input is a float from 0.0 to 2.0, with 1.0 being the default.
    149     // Get the default pitch for this voice and modulate it by 50% - 150%.
    150     NSError* errorCode;
    151     NSNumber* defaultPitchObj =
    152         [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
    153                                          error:&errorCode];
    154     int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
    155     int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
    156     [speech_synthesizer_
    157         setObject:[NSNumber numberWithInt:newPitch]
    158         forProperty:NSSpeechPitchBaseProperty error:nil];
    159   }
    160 
    161   if (params.volume >= 0.0) {
    162     [speech_synthesizer_
    163         setObject: [NSNumber numberWithFloat:params.volume]
    164         forProperty:NSSpeechVolumeProperty error:nil];
    165   }
    166 
    167   return [speech_synthesizer_ startSpeakingRetainedUtterance];
    168 }
    169 
    170 bool TtsPlatformImplMac::StopSpeaking() {
    171   if (speech_synthesizer_.get()) {
    172     [speech_synthesizer_ stopSpeaking];
    173     speech_synthesizer_.reset(nil);
    174   }
    175   paused_ = false;
    176   return true;
    177 }
    178 
    179 void TtsPlatformImplMac::Pause() {
    180   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
    181     [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
    182     paused_ = true;
    183     TtsController::GetInstance()->OnTtsEvent(
    184         utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
    185   }
    186 }
    187 
    188 void TtsPlatformImplMac::Resume() {
    189   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
    190     [speech_synthesizer_ continueSpeaking];
    191     paused_ = false;
    192     TtsController::GetInstance()->OnTtsEvent(
    193         utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
    194   }
    195 }
    196 
    197 bool TtsPlatformImplMac::IsSpeaking() {
    198   return [NSSpeechSynthesizer isAnyApplicationSpeaking];
    199 }
    200 
    201 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
    202   NSArray* voices = [NSSpeechSynthesizer availableVoices];
    203 
    204   // Create a new temporary array of the available voices with
    205   // the default voice first.
    206   NSMutableArray* orderedVoices =
    207       [NSMutableArray arrayWithCapacity:[voices count]];
    208   NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
    209   [orderedVoices addObject:defaultVoice];
    210   for (NSString* voiceIdentifier in voices) {
    211     if (![voiceIdentifier isEqualToString:defaultVoice])
    212       [orderedVoices addObject:voiceIdentifier];
    213   }
    214 
    215   for (NSString* voiceIdentifier in orderedVoices) {
    216     outVoices->push_back(VoiceData());
    217     VoiceData& data = outVoices->back();
    218 
    219     NSDictionary* attributes =
    220         [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
    221     NSString* name = [attributes objectForKey:NSVoiceName];
    222     NSString* gender = [attributes objectForKey:NSVoiceGender];
    223     NSString* localeIdentifier =
    224         [attributes objectForKey:NSVoiceLocaleIdentifier];
    225 
    226     data.native = true;
    227     data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
    228     data.name = base::SysNSStringToUTF8(name);
    229 
    230     NSDictionary* localeComponents =
    231         [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
    232     NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
    233     NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
    234     if (language && country) {
    235       data.lang =
    236           [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
    237     } else {
    238       data.lang = base::SysNSStringToUTF8(language);
    239     }
    240     if ([gender isEqualToString:NSVoiceGenderMale])
    241       data.gender = TTS_GENDER_MALE;
    242     else if ([gender isEqualToString:NSVoiceGenderFemale])
    243       data.gender = TTS_GENDER_FEMALE;
    244     else
    245       data.gender = TTS_GENDER_NONE;
    246     data.events.insert(TTS_EVENT_START);
    247     data.events.insert(TTS_EVENT_END);
    248     data.events.insert(TTS_EVENT_WORD);
    249     data.events.insert(TTS_EVENT_ERROR);
    250     data.events.insert(TTS_EVENT_CANCELLED);
    251     data.events.insert(TTS_EVENT_INTERRUPTED);
    252     data.events.insert(TTS_EVENT_PAUSE);
    253     data.events.insert(TTS_EVENT_RESUME);
    254   }
    255 }
    256 
    257 void TtsPlatformImplMac::OnSpeechEvent(
    258     NSSpeechSynthesizer* sender,
    259     TtsEventType event_type,
    260     int char_index,
    261     const std::string& error_message) {
    262   // Don't send events from an utterance that's already completed.
    263   // This depends on the fact that we construct a new NSSpeechSynthesizer
    264   // each time we call Speak.
    265   if (sender != speech_synthesizer_.get())
    266     return;
    267 
    268   if (event_type == TTS_EVENT_END)
    269     char_index = utterance_.size();
    270   TtsController* controller = TtsController::GetInstance();
    271   if (event_type == TTS_EVENT_WORD && !sent_start_event_) {
    272     controller->OnTtsEvent(
    273         utterance_id_, TTS_EVENT_START, 0, "");
    274     sent_start_event_ = true;
    275   }
    276   controller->OnTtsEvent(
    277       utterance_id_, event_type, char_index, error_message);
    278   last_char_index_ = char_index;
    279 }
    280 
    281 TtsPlatformImplMac::TtsPlatformImplMac() {
    282   utterance_id_ = -1;
    283   sent_start_event_ = true;
    284   paused_ = false;
    285 
    286   delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
    287 }
    288 
    289 TtsPlatformImplMac::~TtsPlatformImplMac() {
    290 }
    291 
    292 // static
    293 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
    294   return Singleton<TtsPlatformImplMac>::get();
    295 }
    296 
    297 @implementation ChromeTtsDelegate
    298 
    299 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
    300   if ((self = [super init])) {
    301     ttsImplMac_ = ttsImplMac;
    302   }
    303   return self;
    304 }
    305 
    306 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
    307         didFinishSpeaking:(BOOL)finished_speaking {
    308   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
    309 }
    310 
    311 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
    312             willSpeakWord:(NSRange)character_range
    313                  ofString:(NSString*)string {
    314   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
    315       character_range.location, "");
    316 }
    317 
    318 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
    319  didEncounterErrorAtIndex:(NSUInteger)character_index
    320                  ofString:(NSString*)string
    321                   message:(NSString*)message {
    322   std::string message_utf8 = base::SysNSStringToUTF8(message);
    323   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
    324       message_utf8);
    325 }
    326 
    327 @end
    328 
    329 @implementation SingleUseSpeechSynthesizer
    330 
    331 - (id)initWithUtterance:(NSString*)utterance {
    332   self = [super init];
    333   if (self) {
    334     utterance_.reset([utterance retain]);
    335     didSpeak_ = false;
    336   }
    337   return self;
    338 }
    339 
    340 - (bool)startSpeakingRetainedUtterance {
    341   CHECK(!didSpeak_);
    342   CHECK(utterance_);
    343   didSpeak_ = true;
    344   return [super startSpeakingString:utterance_];
    345 }
    346 
    347 - (bool)startSpeakingString:(NSString*)utterance {
    348   CHECK(false);
    349   return false;
    350 }
    351 
    352 @end
    353