1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 package com.example.android.ttsengine; 17 18 import android.content.Context; 19 import android.content.SharedPreferences; 20 import android.media.AudioFormat; 21 import android.speech.tts.SynthesisCallback; 22 import android.speech.tts.SynthesisRequest; 23 import android.speech.tts.TextToSpeech; 24 import android.speech.tts.TextToSpeechService; 25 import android.util.Log; 26 27 import java.io.BufferedReader; 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.io.InputStreamReader; 31 import java.nio.ByteBuffer; 32 import java.nio.ByteOrder; 33 import java.util.HashMap; 34 import java.util.Map; 35 36 /** 37 * A text to speech engine that generates "speech" that a robot might understand. 38 * The engine supports two different "languages", each with their own frequency 39 * mappings. 40 * 41 * It exercises all aspects of the Text to speech engine API 42 * {@link android.speech.tts.TextToSpeechService}. 43 */ 44 public class RobotSpeakTtsService extends TextToSpeechService { 45 private static final String TAG = "ExampleTtsService"; 46 47 /* 48 * This is the sampling rate of our output audio. This engine outputs 49 * audio at 16khz 16bits per sample PCM audio. 50 */ 51 private static final int SAMPLING_RATE_HZ = 16000; 52 53 /* 54 * We multiply by a factor of two since each sample contains 16 bits (2 bytes). 55 */ 56 private final byte[] mAudioBuffer = new byte[SAMPLING_RATE_HZ * 2]; 57 58 private Map<Character, Integer> mFrequenciesMap; 59 private volatile String[] mCurrentLanguage = null; 60 private volatile boolean mStopRequested = false; 61 private SharedPreferences mSharedPrefs = null; 62 63 @Override 64 public void onCreate() { 65 super.onCreate(); 66 mSharedPrefs = getSharedPreferences(GeneralSettingsFragment.SHARED_PREFS_NAME, 67 Context.MODE_PRIVATE); 68 // We load the default language when we start up. This isn't strictly 69 // required though, it can always be loaded lazily on the first call to 70 // onLoadLanguage or onSynthesizeText. This a tradeoff between memory usage 71 // and the latency of the first call. 72 onLoadLanguage("eng", "usa", ""); 73 } 74 75 @Override 76 public void onDestroy() { 77 super.onDestroy(); 78 } 79 80 @Override 81 protected String[] onGetLanguage() { 82 // Note that mCurrentLanguage is volatile because this can be called from 83 // multiple threads. 84 return mCurrentLanguage; 85 } 86 87 @Override 88 protected int onIsLanguageAvailable(String lang, String country, String variant) { 89 // The robot speak synthesizer supports only english. 90 if ("eng".equals(lang)) { 91 // We support two specific robot languages, the british robot language 92 // and the american robot language. 93 if ("USA".equals(country) || "GBR".equals(country)) { 94 // If the engine supported a specific variant, we would have 95 // something like. 96 // 97 // if ("android".equals(variant)) { 98 // return TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE; 99 // } 100 return TextToSpeech.LANG_COUNTRY_AVAILABLE; 101 } 102 103 // We support the language, but not the country. 104 return TextToSpeech.LANG_AVAILABLE; 105 } 106 107 return TextToSpeech.LANG_NOT_SUPPORTED; 108 } 109 110 /* 111 * Note that this method is synchronized, as is onSynthesizeText because 112 * onLoadLanguage can be called from multiple threads (while onSynthesizeText 113 * is always called from a single thread only). 114 */ 115 @Override 116 protected synchronized int onLoadLanguage(String lang, String country, String variant) { 117 final int isLanguageAvailable = onIsLanguageAvailable(lang, country, variant); 118 119 if (isLanguageAvailable == TextToSpeech.LANG_NOT_SUPPORTED) { 120 return isLanguageAvailable; 121 } 122 123 String loadCountry = country; 124 if (isLanguageAvailable == TextToSpeech.LANG_AVAILABLE) { 125 loadCountry = "USA"; 126 } 127 128 // If we've already loaded the requested language, we can return early. 129 if (mCurrentLanguage != null) { 130 if (mCurrentLanguage[0].equals(lang) && mCurrentLanguage[1].equals(country)) { 131 return isLanguageAvailable; 132 } 133 } 134 135 Map<Character, Integer> newFrequenciesMap = null; 136 try { 137 InputStream file = getAssets().open(lang + "-" + loadCountry + ".freq"); 138 newFrequenciesMap = buildFrequencyMap(file); 139 file.close(); 140 } catch (IOException e) { 141 Log.e(TAG, "Error loading data for : " + lang + "-" + country); 142 } 143 144 mFrequenciesMap = newFrequenciesMap; 145 mCurrentLanguage = new String[] { lang, loadCountry, ""}; 146 147 return isLanguageAvailable; 148 } 149 150 @Override 151 protected void onStop() { 152 mStopRequested = true; 153 } 154 155 @Override 156 protected synchronized void onSynthesizeText(SynthesisRequest request, 157 SynthesisCallback callback) { 158 // Note that we call onLoadLanguage here since there is no guarantee 159 // that there would have been a prior call to this function. 160 int load = onLoadLanguage(request.getLanguage(), request.getCountry(), 161 request.getVariant()); 162 163 // We might get requests for a language we don't support - in which case 164 // we error out early before wasting too much time. 165 if (load == TextToSpeech.LANG_NOT_SUPPORTED) { 166 callback.error(); 167 return; 168 } 169 170 // At this point, we have loaded the language we need for synthesis and 171 // it is guaranteed that we support it so we proceed with synthesis. 172 173 // We denote that we are ready to start sending audio across to the 174 // framework. We use a fixed sampling rate (16khz), and send data across 175 // in 16bit PCM mono. 176 callback.start(SAMPLING_RATE_HZ, 177 AudioFormat.ENCODING_PCM_16BIT, 1 /* Number of channels. */); 178 179 // We then scan through each character of the request string and 180 // generate audio for it. 181 final String text = request.getText().toLowerCase(); 182 for (int i = 0; i < text.length(); ++i) { 183 char value = normalize(text.charAt(i)); 184 // It is crucial to call either of callback.error() or callback.done() to ensure 185 // that audio / other resources are released as soon as possible. 186 if (!generateOneSecondOfAudio(value, callback)) { 187 callback.error(); 188 return; 189 } 190 } 191 192 // Alright, we're done with our synthesis - yay! 193 callback.done(); 194 } 195 196 /* 197 * Normalizes a given character to the range 'a' - 'z' (inclusive). Our 198 * frequency mappings contain frequencies for each of these characters. 199 */ 200 private static char normalize(char input) { 201 if (input == ' ') { 202 return input; 203 } 204 205 if (input < 'a') { 206 return 'a'; 207 } 208 if (input > 'z') { 209 return 'z'; 210 } 211 212 return input; 213 } 214 215 private Map<Character, Integer> buildFrequencyMap(InputStream is) throws IOException { 216 BufferedReader br = new BufferedReader(new InputStreamReader(is)); 217 String line = null; 218 Map<Character, Integer> map = new HashMap<Character, Integer>(); 219 try { 220 while ((line = br.readLine()) != null) { 221 String[] parts = line.split(":"); 222 if (parts.length != 2) { 223 throw new IOException("Invalid line encountered: " + line); 224 } 225 map.put(parts[0].charAt(0), Integer.parseInt(parts[1])); 226 } 227 map.put(' ', 0); 228 return map; 229 } finally { 230 is.close(); 231 } 232 } 233 234 private boolean generateOneSecondOfAudio(char alphabet, SynthesisCallback cb) { 235 ByteBuffer buffer = ByteBuffer.wrap(mAudioBuffer).order(ByteOrder.LITTLE_ENDIAN); 236 237 // Someone called onStop, end the current synthesis and return. 238 // The mStopRequested variable will be reset at the beginning of the 239 // next synthesis. 240 // 241 // In general, a call to onStop( ) should make a best effort attempt 242 // to stop all processing for the *current* onSynthesizeText request (if 243 // one is active). 244 if (mStopRequested) { 245 return false; 246 } 247 248 249 if (mFrequenciesMap == null || !mFrequenciesMap.containsKey(alphabet)) { 250 return false; 251 } 252 253 final int frequency = mFrequenciesMap.get(alphabet); 254 255 if (frequency > 0) { 256 // This is the wavelength in samples. The frequency is chosen so that the 257 // waveLength is always a multiple of two and frequency divides the 258 // SAMPLING_RATE exactly. 259 final int waveLength = SAMPLING_RATE_HZ / frequency; 260 final int times = SAMPLING_RATE_HZ / waveLength; 261 262 for (int j = 0; j < times; ++j) { 263 // For a square curve, half of the values will be at Short.MIN_VALUE 264 // and the other half will be Short.MAX_VALUE. 265 for (int i = 0; i < waveLength / 2; ++i) { 266 buffer.putShort((short)(getAmplitude() * -1)); 267 } 268 for (int i = 0; i < waveLength / 2; ++i) { 269 buffer.putShort(getAmplitude()); 270 } 271 } 272 } else { 273 // Play a second of silence. 274 for (int i = 0; i < mAudioBuffer.length / 2; ++i) { 275 buffer.putShort((short) 0); 276 } 277 } 278 279 // Get the maximum allowed size of data we can send across in audioAvailable. 280 final int maxBufferSize = cb.getMaxBufferSize(); 281 int offset = 0; 282 while (offset < mAudioBuffer.length) { 283 int bytesToWrite = Math.min(maxBufferSize, mAudioBuffer.length - offset); 284 cb.audioAvailable(mAudioBuffer, offset, bytesToWrite); 285 offset += bytesToWrite; 286 } 287 return true; 288 } 289 290 private short getAmplitude() { 291 boolean whisper = mSharedPrefs.getBoolean(GeneralSettingsFragment.WHISPER_KEY, false); 292 return (short) (whisper ? 2048 : 8192); 293 } 294 } 295