1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 /** 6 * @fileoverview 7 * This is a component extension that implements a text-to-speech (TTS) 8 * engine powered by Google's speech synthesis API. 9 * 10 * This is an "event page", so it's not loaded when the API isn't being used, 11 * and doesn't waste resources. When a web page or web app makes a speech 12 * request and the parameters match one of the voices in this extension's 13 * manifest, it makes a request to Google's API using Chrome's private key 14 * and plays the resulting speech using HTML5 audio. 15 */ 16 17 /** 18 * The main class for this extension. Adds listeners to 19 * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements 20 * them using Google's speech synthesis API. 21 * @constructor 22 */ 23 function TtsExtension() {} 24 25 TtsExtension.prototype = { 26 /** 27 * The url prefix of the speech server, including static query 28 * parameters that don't change. 29 * @type {string} 30 * @const 31 * @private 32 */ 33 SPEECH_SERVER_URL_: 34 'https://www.google.com/speech-api/v2/synthesize?' + 35 'enc=mpeg&client=chromium', 36 37 /** 38 * A mapping from language and gender to voice name, hardcoded for now 39 * until the speech synthesis server capabilities response provides this. 40 * The key of this map is of the form '<lang>-<gender>'. 41 * @type {Object.<string, string>} 42 * @private 43 */ 44 LANG_AND_GENDER_TO_VOICE_NAME_: { 45 'en-gb-male': 'rjs', 46 'en-gb-female': 'fis', 47 }, 48 49 /** 50 * The arguments passed to the onSpeak event handler for the utterance 51 * that's currently being spoken. Should be null when no object is 52 * pending. 53 * 54 * @type {?{utterance: string, options: Object, callback: Function}} 55 * @private 56 */ 57 currentUtterance_: null, 58 59 /** 60 * The HTML5 audio element we use for playing the sound served by the 61 * speech server. 62 * @type {HTMLAudioElement} 63 * @private 64 */ 65 audioElement_: null, 66 67 /** 68 * A mapping from voice name to language and gender, derived from the 69 * manifest file. This is used in case the speech synthesis request 70 * specifies a voice name but doesn't specify a language code or gender. 71 * @type {Object.<string, {lang: string, gender: string}>} 72 * @private 73 */ 74 voiceNameToLangAndGender_: {}, 75 76 /** 77 * This is the main function called to initialize this extension. 78 * Initializes data structures and adds event listeners. 79 */ 80 init: function() { 81 // Get voices from manifest. 82 var voices = chrome.app.getDetails().tts_engine.voices; 83 for (var i = 0; i < voices.length; i++) { 84 this.voiceNameToLangAndGender_[voices[i].voice_name] = { 85 lang: voices[i].lang, 86 gender: voices[i].gender 87 }; 88 } 89 90 // Initialize the audio element and event listeners on it. 91 this.audioElement_ = document.createElement('audio'); 92 document.body.appendChild(this.audioElement_); 93 this.audioElement_.addEventListener( 94 'ended', this.onStop_.bind(this), false); 95 this.audioElement_.addEventListener( 96 'canplaythrough', this.onStart_.bind(this), false); 97 98 // Install event listeners for the ttsEngine API. 99 chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this)); 100 chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this)); 101 chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this)); 102 chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this)); 103 }, 104 105 /** 106 * Handler for the chrome.ttsEngine.onSpeak interface. 107 * Gets Chrome's Google API key and then uses it to generate a request 108 * url for the requested speech utterance. Sets that url as the source 109 * of the HTML5 audio element. 110 * @param {string} utterance The text to be spoken. 111 * @param {Object} options Options to control the speech, as defined 112 * in the Chrome ttsEngine extension API. 113 * @private 114 */ 115 onSpeak_: function(utterance, options, callback) { 116 // Truncate the utterance if it's too long. Both Chrome's tts 117 // extension api and the web speech api specify 32k as the 118 // maximum limit for an utterance. 119 if (utterance.length > 32768) 120 utterance = utterance.substr(0, 32768); 121 122 try { 123 // First, stop any pending audio. 124 this.onStop_(); 125 126 this.currentUtterance_ = { 127 utterance: utterance, 128 options: options, 129 callback: callback 130 }; 131 132 var lang = options.lang; 133 var gender = options.gender; 134 if (options.voiceName) { 135 lang = this.voiceNameToLangAndGender_[options.voiceName].lang; 136 gender = this.voiceNameToLangAndGender_[options.voiceName].gender; 137 } 138 139 if (!lang) 140 lang = navigator.language; 141 142 // Look up the specific voice name for this language and gender. 143 // If it's not in the map, it doesn't matter - the language will 144 // be used directly. This is only used for languages where more 145 // than one gender is actually available. 146 var key = lang.toLowerCase() + '-' + gender; 147 var voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key]; 148 149 var url = this.SPEECH_SERVER_URL_; 150 chrome.systemPrivate.getApiKey((function(key) { 151 url += '&key=' + key; 152 url += '&text=' + encodeURIComponent(utterance); 153 url += '&lang=' + lang.toLowerCase(); 154 155 if (voiceName) 156 url += '&name=' + voiceName; 157 158 if (options.rate) { 159 // Input rate is between 0.1 and 10.0 with a default of 1.0. 160 // Output speed is between 0.0 and 1.0 with a default of 0.5. 161 url += '&speed=' + (options.rate / 2.0); 162 } 163 164 if (options.pitch) { 165 // Input pitch is between 0.0 and 2.0 with a default of 1.0. 166 // Output pitch is between 0.0 and 1.0 with a default of 0.5. 167 url += '&pitch=' + (options.pitch / 2.0); 168 } 169 170 // This begins loading the audio but does not play it. 171 // When enough of the audio has loaded to begin playback, 172 // the 'canplaythrough' handler will call this.onStart_, 173 // which sends a start event to the ttsEngine callback and 174 // then begins playing audio. 175 this.audioElement_.src = url; 176 }).bind(this)); 177 } catch (err) { 178 console.error(String(err)); 179 callback({ 180 'type': 'error', 181 'errorMessage': String(err) 182 }); 183 this.currentUtterance_ = null; 184 } 185 }, 186 187 /** 188 * Handler for the chrome.ttsEngine.onStop interface. 189 * Called either when the ttsEngine API requests us to stop, or when 190 * we reach the end of the audio stream. Pause the audio element to 191 * silence it, and send a callback to the ttsEngine API to let it know 192 * that we've completed. Note that the ttsEngine API manages callback 193 * messages and will automatically replace the 'end' event with a 194 * more specific callback like 'interrupted' when sending it to the 195 * TTS client. 196 * @private 197 */ 198 onStop_: function() { 199 if (this.currentUtterance_) { 200 this.audioElement_.pause(); 201 this.currentUtterance_.callback({ 202 'type': 'end', 203 'charIndex': this.currentUtterance_.utterance.length 204 }); 205 } 206 this.currentUtterance_ = null; 207 }, 208 209 /** 210 * Handler for the canplaythrough event on the audio element. 211 * Called when the audio element has buffered enough audio to begin 212 * playback. Send the 'start' event to the ttsEngine callback and 213 * then begin playing the audio element. 214 * @private 215 */ 216 onStart_: function() { 217 if (this.currentUtterance_) { 218 if (this.currentUtterance_.options.volume !== undefined) { 219 // Both APIs use the same range for volume, between 0.0 and 1.0. 220 this.audioElement_.volume = this.currentUtterance_.options.volume; 221 } 222 this.audioElement_.play(); 223 this.currentUtterance_.callback({ 224 'type': 'start', 225 'charIndex': 0 226 }); 227 } 228 }, 229 230 /** 231 * Handler for the chrome.ttsEngine.onPause interface. 232 * Pauses audio if we're in the middle of an utterance. 233 * @private 234 */ 235 onPause_: function() { 236 if (this.currentUtterance_) { 237 this.audioElement_.pause(); 238 } 239 }, 240 241 /** 242 * Handler for the chrome.ttsEngine.onPause interface. 243 * Resumes audio if we're in the middle of an utterance. 244 * @private 245 */ 246 onResume_: function() { 247 if (this.currentUtterance_) { 248 this.audioElement_.play(); 249 } 250 } 251 252 }; 253 254 (new TtsExtension()).init(); 255