1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <string> 6 7 #include "base/mac/scoped_nsobject.h" 8 #include "base/memory/singleton.h" 9 #include "base/strings/sys_string_conversions.h" 10 #include "base/values.h" 11 #include "chrome/browser/extensions/extension_function.h" 12 #include "chrome/browser/speech/tts_controller.h" 13 #include "chrome/browser/speech/tts_platform.h" 14 15 #import <Cocoa/Cocoa.h> 16 17 class TtsPlatformImplMac; 18 19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> { 20 @private 21 TtsPlatformImplMac* ttsImplMac_; // weak. 22 } 23 24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac; 25 26 @end 27 28 // Subclass of NSSpeechSynthesizer that takes an utterance 29 // string on initialization, retains it and only allows it 30 // to be spoken once. 31 // 32 // We construct a new NSSpeechSynthesizer for each utterance, for 33 // two reasons: 34 // 1. To associate delegate callbacks with a particular utterance, 35 // without assuming anything undocumented about the protocol. 36 // 2. To work around http://openradar.appspot.com/radar?id=2854403, 37 // where Nuance voices don't retain the utterance string and 38 // crash when trying to call willSpeakWord. 39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer { 40 @private 41 base::scoped_nsobject<NSString> utterance_; 42 bool didSpeak_; 43 } 44 45 - (id)initWithUtterance:(NSString*)utterance; 46 - (bool)startSpeakingRetainedUtterance; 47 - (bool)startSpeakingString:(NSString*)utterance; 48 49 @end 50 51 class TtsPlatformImplMac : public TtsPlatformImpl { 52 public: 53 virtual bool PlatformImplAvailable() OVERRIDE { 54 return true; 55 } 56 57 virtual bool Speak( 58 int utterance_id, 59 const std::string& utterance, 60 const std::string& lang, 61 const VoiceData& voice, 62 const UtteranceContinuousParameters& params) OVERRIDE; 63 64 virtual bool StopSpeaking() OVERRIDE; 65 66 virtual void Pause() OVERRIDE; 67 68 virtual void Resume() OVERRIDE; 69 70 virtual bool IsSpeaking() OVERRIDE; 71 72 virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE; 73 74 // Called by ChromeTtsDelegate when we get a callback from the 75 // native speech engine. 76 void OnSpeechEvent(NSSpeechSynthesizer* sender, 77 TtsEventType event_type, 78 int char_index, 79 const std::string& error_message); 80 81 // Get the single instance of this class. 82 static TtsPlatformImplMac* GetInstance(); 83 84 private: 85 TtsPlatformImplMac(); 86 virtual ~TtsPlatformImplMac(); 87 88 base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_; 89 base::scoped_nsobject<ChromeTtsDelegate> delegate_; 90 int utterance_id_; 91 std::string utterance_; 92 bool sent_start_event_; 93 int last_char_index_; 94 bool paused_; 95 96 friend struct DefaultSingletonTraits<TtsPlatformImplMac>; 97 98 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac); 99 }; 100 101 // static 102 TtsPlatformImpl* TtsPlatformImpl::GetInstance() { 103 return TtsPlatformImplMac::GetInstance(); 104 } 105 106 bool TtsPlatformImplMac::Speak( 107 int utterance_id, 108 const std::string& utterance, 109 const std::string& lang, 110 const VoiceData& voice, 111 const UtteranceContinuousParameters& params) { 112 // TODO: convert SSML to SAPI xml. http://crbug.com/88072 113 utterance_ = utterance; 114 paused_ = false; 115 116 NSString* utterance_nsstring = 117 [NSString stringWithUTF8String:utterance_.c_str()]; 118 119 // Deliberately construct a new speech synthesizer every time Speak is 120 // called, otherwise there's no way to know whether calls to the delegate 121 // apply to the current utterance or a previous utterance. In 122 // experimentation, the overhead of constructing and destructing a 123 // NSSpeechSynthesizer is minimal. 124 speech_synthesizer_.reset( 125 [[SingleUseSpeechSynthesizer alloc] 126 initWithUtterance:utterance_nsstring]); 127 [speech_synthesizer_ setDelegate:delegate_]; 128 129 if (!voice.native_voice_identifier.empty()) { 130 NSString* native_voice_identifier = 131 [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()]; 132 [speech_synthesizer_ setVoice:native_voice_identifier]; 133 } 134 135 utterance_id_ = utterance_id; 136 sent_start_event_ = false; 137 138 // TODO: support languages other than the default: crbug.com/88059 139 140 if (params.rate >= 0.0) { 141 // The TTS api defines rate via words per minute. Let 200 be the default. 142 [speech_synthesizer_ 143 setObject:[NSNumber numberWithInt:params.rate * 200] 144 forProperty:NSSpeechRateProperty error:nil]; 145 } 146 147 if (params.pitch >= 0.0) { 148 // The input is a float from 0.0 to 2.0, with 1.0 being the default. 149 // Get the default pitch for this voice and modulate it by 50% - 150%. 150 NSError* errorCode; 151 NSNumber* defaultPitchObj = 152 [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty 153 error:&errorCode]; 154 int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48; 155 int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5)); 156 [speech_synthesizer_ 157 setObject:[NSNumber numberWithInt:newPitch] 158 forProperty:NSSpeechPitchBaseProperty error:nil]; 159 } 160 161 if (params.volume >= 0.0) { 162 [speech_synthesizer_ 163 setObject: [NSNumber numberWithFloat:params.volume] 164 forProperty:NSSpeechVolumeProperty error:nil]; 165 } 166 167 return [speech_synthesizer_ startSpeakingRetainedUtterance]; 168 } 169 170 bool TtsPlatformImplMac::StopSpeaking() { 171 if (speech_synthesizer_.get()) { 172 [speech_synthesizer_ stopSpeaking]; 173 speech_synthesizer_.reset(nil); 174 } 175 paused_ = false; 176 return true; 177 } 178 179 void TtsPlatformImplMac::Pause() { 180 if (speech_synthesizer_.get() && utterance_id_ && !paused_) { 181 [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; 182 paused_ = true; 183 TtsController::GetInstance()->OnTtsEvent( 184 utterance_id_, TTS_EVENT_PAUSE, last_char_index_, ""); 185 } 186 } 187 188 void TtsPlatformImplMac::Resume() { 189 if (speech_synthesizer_.get() && utterance_id_ && paused_) { 190 [speech_synthesizer_ continueSpeaking]; 191 paused_ = false; 192 TtsController::GetInstance()->OnTtsEvent( 193 utterance_id_, TTS_EVENT_RESUME, last_char_index_, ""); 194 } 195 } 196 197 bool TtsPlatformImplMac::IsSpeaking() { 198 return [NSSpeechSynthesizer isAnyApplicationSpeaking]; 199 } 200 201 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) { 202 NSArray* voices = [NSSpeechSynthesizer availableVoices]; 203 204 // Create a new temporary array of the available voices with 205 // the default voice first. 206 NSMutableArray* orderedVoices = 207 [NSMutableArray arrayWithCapacity:[voices count]]; 208 NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; 209 [orderedVoices addObject:defaultVoice]; 210 for (NSString* voiceIdentifier in voices) { 211 if (![voiceIdentifier isEqualToString:defaultVoice]) 212 [orderedVoices addObject:voiceIdentifier]; 213 } 214 215 for (NSString* voiceIdentifier in orderedVoices) { 216 outVoices->push_back(VoiceData()); 217 VoiceData& data = outVoices->back(); 218 219 NSDictionary* attributes = 220 [NSSpeechSynthesizer attributesForVoice:voiceIdentifier]; 221 NSString* name = [attributes objectForKey:NSVoiceName]; 222 NSString* gender = [attributes objectForKey:NSVoiceGender]; 223 NSString* localeIdentifier = 224 [attributes objectForKey:NSVoiceLocaleIdentifier]; 225 226 data.native = true; 227 data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier); 228 data.name = base::SysNSStringToUTF8(name); 229 230 NSDictionary* localeComponents = 231 [NSLocale componentsFromLocaleIdentifier:localeIdentifier]; 232 NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode]; 233 NSString* country = [localeComponents objectForKey:NSLocaleCountryCode]; 234 if (language && country) { 235 data.lang = 236 [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String]; 237 } else { 238 data.lang = base::SysNSStringToUTF8(language); 239 } 240 if ([gender isEqualToString:NSVoiceGenderMale]) 241 data.gender = TTS_GENDER_MALE; 242 else if ([gender isEqualToString:NSVoiceGenderFemale]) 243 data.gender = TTS_GENDER_FEMALE; 244 else 245 data.gender = TTS_GENDER_NONE; 246 data.events.insert(TTS_EVENT_START); 247 data.events.insert(TTS_EVENT_END); 248 data.events.insert(TTS_EVENT_WORD); 249 data.events.insert(TTS_EVENT_ERROR); 250 data.events.insert(TTS_EVENT_CANCELLED); 251 data.events.insert(TTS_EVENT_INTERRUPTED); 252 data.events.insert(TTS_EVENT_PAUSE); 253 data.events.insert(TTS_EVENT_RESUME); 254 } 255 } 256 257 void TtsPlatformImplMac::OnSpeechEvent( 258 NSSpeechSynthesizer* sender, 259 TtsEventType event_type, 260 int char_index, 261 const std::string& error_message) { 262 // Don't send events from an utterance that's already completed. 263 // This depends on the fact that we construct a new NSSpeechSynthesizer 264 // each time we call Speak. 265 if (sender != speech_synthesizer_.get()) 266 return; 267 268 if (event_type == TTS_EVENT_END) 269 char_index = utterance_.size(); 270 TtsController* controller = TtsController::GetInstance(); 271 if (event_type == TTS_EVENT_WORD && !sent_start_event_) { 272 controller->OnTtsEvent( 273 utterance_id_, TTS_EVENT_START, 0, ""); 274 sent_start_event_ = true; 275 } 276 controller->OnTtsEvent( 277 utterance_id_, event_type, char_index, error_message); 278 last_char_index_ = char_index; 279 } 280 281 TtsPlatformImplMac::TtsPlatformImplMac() { 282 utterance_id_ = -1; 283 sent_start_event_ = true; 284 paused_ = false; 285 286 delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]); 287 } 288 289 TtsPlatformImplMac::~TtsPlatformImplMac() { 290 } 291 292 // static 293 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() { 294 return Singleton<TtsPlatformImplMac>::get(); 295 } 296 297 @implementation ChromeTtsDelegate 298 299 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac { 300 if ((self = [super init])) { 301 ttsImplMac_ = ttsImplMac; 302 } 303 return self; 304 } 305 306 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender 307 didFinishSpeaking:(BOOL)finished_speaking { 308 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, ""); 309 } 310 311 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender 312 willSpeakWord:(NSRange)character_range 313 ofString:(NSString*)string { 314 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD, 315 character_range.location, ""); 316 } 317 318 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender 319 didEncounterErrorAtIndex:(NSUInteger)character_index 320 ofString:(NSString*)string 321 message:(NSString*)message { 322 std::string message_utf8 = base::SysNSStringToUTF8(message); 323 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index, 324 message_utf8); 325 } 326 327 @end 328 329 @implementation SingleUseSpeechSynthesizer 330 331 - (id)initWithUtterance:(NSString*)utterance { 332 self = [super init]; 333 if (self) { 334 utterance_.reset([utterance retain]); 335 didSpeak_ = false; 336 } 337 return self; 338 } 339 340 - (bool)startSpeakingRetainedUtterance { 341 CHECK(!didSpeak_); 342 CHECK(utterance_); 343 didSpeak_ = true; 344 return [super startSpeakingString:utterance_]; 345 } 346 347 - (bool)startSpeakingString:(NSString*)utterance { 348 CHECK(false); 349 return false; 350 } 351 352 @end 353