1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <string> 6 7 #include "base/mac/scoped_nsobject.h" 8 #include "base/memory/singleton.h" 9 #include "base/strings/sys_string_conversions.h" 10 #include "base/values.h" 11 #include "chrome/browser/speech/tts_controller.h" 12 #include "chrome/browser/speech/tts_platform.h" 13 #include "extensions/browser/extension_function.h" 14 15 #import <Cocoa/Cocoa.h> 16 17 class TtsPlatformImplMac; 18 19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> { 20 @private 21 TtsPlatformImplMac* ttsImplMac_; // weak. 22 } 23 24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac; 25 26 @end 27 28 // Subclass of NSSpeechSynthesizer that takes an utterance 29 // string on initialization, retains it and only allows it 30 // to be spoken once. 31 // 32 // We construct a new NSSpeechSynthesizer for each utterance, for 33 // two reasons: 34 // 1. To associate delegate callbacks with a particular utterance, 35 // without assuming anything undocumented about the protocol. 36 // 2. To work around http://openradar.appspot.com/radar?id=2854403, 37 // where Nuance voices don't retain the utterance string and 38 // crash when trying to call willSpeakWord. 39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer { 40 @private 41 base::scoped_nsobject<NSString> utterance_; 42 bool didSpeak_; 43 } 44 45 - (id)initWithUtterance:(NSString*)utterance; 46 - (bool)startSpeakingRetainedUtterance; 47 - (bool)startSpeakingString:(NSString*)utterance; 48 49 @end 50 51 class TtsPlatformImplMac : public TtsPlatformImpl { 52 public: 53 virtual bool PlatformImplAvailable() OVERRIDE { 54 return true; 55 } 56 57 virtual bool Speak( 58 int utterance_id, 59 const std::string& utterance, 60 const std::string& lang, 61 const VoiceData& voice, 62 const UtteranceContinuousParameters& params) OVERRIDE; 63 64 virtual bool StopSpeaking() OVERRIDE; 65 66 virtual void Pause() OVERRIDE; 67 68 virtual void Resume() OVERRIDE; 69 70 virtual bool IsSpeaking() OVERRIDE; 71 72 virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE; 73 74 // Called by ChromeTtsDelegate when we get a callback from the 75 // native speech engine. 76 void OnSpeechEvent(NSSpeechSynthesizer* sender, 77 TtsEventType event_type, 78 int char_index, 79 const std::string& error_message); 80 81 // Get the single instance of this class. 82 static TtsPlatformImplMac* GetInstance(); 83 84 private: 85 TtsPlatformImplMac(); 86 virtual ~TtsPlatformImplMac(); 87 88 base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_; 89 base::scoped_nsobject<ChromeTtsDelegate> delegate_; 90 int utterance_id_; 91 std::string utterance_; 92 int last_char_index_; 93 bool paused_; 94 95 friend struct DefaultSingletonTraits<TtsPlatformImplMac>; 96 97 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac); 98 }; 99 100 // static 101 TtsPlatformImpl* TtsPlatformImpl::GetInstance() { 102 return TtsPlatformImplMac::GetInstance(); 103 } 104 105 bool TtsPlatformImplMac::Speak( 106 int utterance_id, 107 const std::string& utterance, 108 const std::string& lang, 109 const VoiceData& voice, 110 const UtteranceContinuousParameters& params) { 111 // TODO: convert SSML to SAPI xml. http://crbug.com/88072 112 utterance_ = utterance; 113 paused_ = false; 114 115 NSString* utterance_nsstring = 116 [NSString stringWithUTF8String:utterance_.c_str()]; 117 118 // Deliberately construct a new speech synthesizer every time Speak is 119 // called, otherwise there's no way to know whether calls to the delegate 120 // apply to the current utterance or a previous utterance. In 121 // experimentation, the overhead of constructing and destructing a 122 // NSSpeechSynthesizer is minimal. 123 speech_synthesizer_.reset( 124 [[SingleUseSpeechSynthesizer alloc] 125 initWithUtterance:utterance_nsstring]); 126 [speech_synthesizer_ setDelegate:delegate_]; 127 128 if (!voice.native_voice_identifier.empty()) { 129 NSString* native_voice_identifier = 130 [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()]; 131 [speech_synthesizer_ setVoice:native_voice_identifier]; 132 } 133 134 utterance_id_ = utterance_id; 135 136 // TODO: support languages other than the default: crbug.com/88059 137 138 if (params.rate >= 0.0) { 139 // The TTS api defines rate via words per minute. Let 200 be the default. 140 [speech_synthesizer_ 141 setObject:[NSNumber numberWithInt:params.rate * 200] 142 forProperty:NSSpeechRateProperty error:nil]; 143 } 144 145 if (params.pitch >= 0.0) { 146 // The input is a float from 0.0 to 2.0, with 1.0 being the default. 147 // Get the default pitch for this voice and modulate it by 50% - 150%. 148 NSError* errorCode; 149 NSNumber* defaultPitchObj = 150 [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty 151 error:&errorCode]; 152 int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48; 153 int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5)); 154 [speech_synthesizer_ 155 setObject:[NSNumber numberWithInt:newPitch] 156 forProperty:NSSpeechPitchBaseProperty error:nil]; 157 } 158 159 if (params.volume >= 0.0) { 160 [speech_synthesizer_ 161 setObject: [NSNumber numberWithFloat:params.volume] 162 forProperty:NSSpeechVolumeProperty error:nil]; 163 } 164 165 bool success = [speech_synthesizer_ startSpeakingRetainedUtterance]; 166 if (success) { 167 TtsController* controller = TtsController::GetInstance(); 168 controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, ""); 169 } 170 return success; 171 } 172 173 bool TtsPlatformImplMac::StopSpeaking() { 174 if (speech_synthesizer_.get()) { 175 [speech_synthesizer_ stopSpeaking]; 176 speech_synthesizer_.reset(nil); 177 } 178 paused_ = false; 179 return true; 180 } 181 182 void TtsPlatformImplMac::Pause() { 183 if (speech_synthesizer_.get() && utterance_id_ && !paused_) { 184 [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; 185 paused_ = true; 186 TtsController::GetInstance()->OnTtsEvent( 187 utterance_id_, TTS_EVENT_PAUSE, last_char_index_, ""); 188 } 189 } 190 191 void TtsPlatformImplMac::Resume() { 192 if (speech_synthesizer_.get() && utterance_id_ && paused_) { 193 [speech_synthesizer_ continueSpeaking]; 194 paused_ = false; 195 TtsController::GetInstance()->OnTtsEvent( 196 utterance_id_, TTS_EVENT_RESUME, last_char_index_, ""); 197 } 198 } 199 200 bool TtsPlatformImplMac::IsSpeaking() { 201 if (speech_synthesizer_) 202 return [speech_synthesizer_ isSpeaking]; 203 return false; 204 } 205 206 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) { 207 NSArray* voices = [NSSpeechSynthesizer availableVoices]; 208 209 // Create a new temporary array of the available voices with 210 // the default voice first. 211 NSMutableArray* orderedVoices = 212 [NSMutableArray arrayWithCapacity:[voices count]]; 213 NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; 214 [orderedVoices addObject:defaultVoice]; 215 for (NSString* voiceIdentifier in voices) { 216 if (![voiceIdentifier isEqualToString:defaultVoice]) 217 [orderedVoices addObject:voiceIdentifier]; 218 } 219 220 for (NSString* voiceIdentifier in orderedVoices) { 221 outVoices->push_back(VoiceData()); 222 VoiceData& data = outVoices->back(); 223 224 NSDictionary* attributes = 225 [NSSpeechSynthesizer attributesForVoice:voiceIdentifier]; 226 NSString* name = [attributes objectForKey:NSVoiceName]; 227 NSString* gender = [attributes objectForKey:NSVoiceGender]; 228 NSString* localeIdentifier = 229 [attributes objectForKey:NSVoiceLocaleIdentifier]; 230 231 data.native = true; 232 data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier); 233 data.name = base::SysNSStringToUTF8(name); 234 235 NSDictionary* localeComponents = 236 [NSLocale componentsFromLocaleIdentifier:localeIdentifier]; 237 NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode]; 238 NSString* country = [localeComponents objectForKey:NSLocaleCountryCode]; 239 if (language && country) { 240 data.lang = 241 [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String]; 242 } else { 243 data.lang = base::SysNSStringToUTF8(language); 244 } 245 if ([gender isEqualToString:NSVoiceGenderMale]) 246 data.gender = TTS_GENDER_MALE; 247 else if ([gender isEqualToString:NSVoiceGenderFemale]) 248 data.gender = TTS_GENDER_FEMALE; 249 else 250 data.gender = TTS_GENDER_NONE; 251 data.events.insert(TTS_EVENT_START); 252 data.events.insert(TTS_EVENT_END); 253 data.events.insert(TTS_EVENT_WORD); 254 data.events.insert(TTS_EVENT_ERROR); 255 data.events.insert(TTS_EVENT_CANCELLED); 256 data.events.insert(TTS_EVENT_INTERRUPTED); 257 data.events.insert(TTS_EVENT_PAUSE); 258 data.events.insert(TTS_EVENT_RESUME); 259 } 260 } 261 262 void TtsPlatformImplMac::OnSpeechEvent( 263 NSSpeechSynthesizer* sender, 264 TtsEventType event_type, 265 int char_index, 266 const std::string& error_message) { 267 // Don't send events from an utterance that's already completed. 268 // This depends on the fact that we construct a new NSSpeechSynthesizer 269 // each time we call Speak. 270 if (sender != speech_synthesizer_.get()) 271 return; 272 273 if (event_type == TTS_EVENT_END) 274 char_index = utterance_.size(); 275 TtsController* controller = TtsController::GetInstance(); 276 controller->OnTtsEvent( 277 utterance_id_, event_type, char_index, error_message); 278 last_char_index_ = char_index; 279 } 280 281 TtsPlatformImplMac::TtsPlatformImplMac() { 282 utterance_id_ = -1; 283 paused_ = false; 284 285 delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]); 286 } 287 288 TtsPlatformImplMac::~TtsPlatformImplMac() { 289 } 290 291 // static 292 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() { 293 return Singleton<TtsPlatformImplMac>::get(); 294 } 295 296 @implementation ChromeTtsDelegate 297 298 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac { 299 if ((self = [super init])) { 300 ttsImplMac_ = ttsImplMac; 301 } 302 return self; 303 } 304 305 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender 306 didFinishSpeaking:(BOOL)finished_speaking { 307 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, ""); 308 } 309 310 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender 311 willSpeakWord:(NSRange)character_range 312 ofString:(NSString*)string { 313 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD, 314 character_range.location, ""); 315 } 316 317 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender 318 didEncounterErrorAtIndex:(NSUInteger)character_index 319 ofString:(NSString*)string 320 message:(NSString*)message { 321 std::string message_utf8 = base::SysNSStringToUTF8(message); 322 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index, 323 message_utf8); 324 } 325 326 @end 327 328 @implementation SingleUseSpeechSynthesizer 329 330 - (id)initWithUtterance:(NSString*)utterance { 331 self = [super init]; 332 if (self) { 333 utterance_.reset([utterance retain]); 334 didSpeak_ = false; 335 } 336 return self; 337 } 338 339 - (bool)startSpeakingRetainedUtterance { 340 CHECK(!didSpeak_); 341 CHECK(utterance_); 342 didSpeak_ = true; 343 return [super startSpeakingString:utterance_]; 344 } 345 346 - (bool)startSpeakingString:(NSString*)utterance { 347 CHECK(false); 348 return false; 349 } 350 351 @end 352