Home | History | Annotate | Download | only in ttsengine
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package com.example.android.ttsengine;
     17 
     18 import android.content.Context;
     19 import android.content.SharedPreferences;
     20 import android.media.AudioFormat;
     21 import android.speech.tts.SynthesisCallback;
     22 import android.speech.tts.SynthesisRequest;
     23 import android.speech.tts.TextToSpeech;
     24 import android.speech.tts.TextToSpeechService;
     25 import android.util.Log;
     26 
     27 import java.io.BufferedReader;
     28 import java.io.IOException;
     29 import java.io.InputStream;
     30 import java.io.InputStreamReader;
     31 import java.nio.ByteBuffer;
     32 import java.nio.ByteOrder;
     33 import java.util.HashMap;
     34 import java.util.Map;
     35 
     36 /**
     37  * A text to speech engine that generates "speech" that a robot might understand.
     38  * The engine supports two different "languages", each with their own frequency
     39  * mappings.
     40  *
     41  * It exercises all aspects of the Text to speech engine API
     42  * {@link android.speech.tts.TextToSpeechService}.
     43  */
     44 public class RobotSpeakTtsService extends TextToSpeechService {
     45     private static final String TAG = "ExampleTtsService";
     46 
     47     /*
     48      * This is the sampling rate of our output audio. This engine outputs
     49      * audio at 16khz 16bits per sample PCM audio.
     50      */
     51     private static final int SAMPLING_RATE_HZ = 16000;
     52 
     53     /*
     54      * We multiply by a factor of two since each sample contains 16 bits (2 bytes).
     55      */
     56     private final byte[] mAudioBuffer = new byte[SAMPLING_RATE_HZ * 2];
     57 
     58     private Map<Character, Integer> mFrequenciesMap;
     59     private volatile String[] mCurrentLanguage = null;
     60     private volatile boolean mStopRequested = false;
     61     private SharedPreferences mSharedPrefs = null;
     62 
     63     @Override
     64     public void onCreate() {
     65         super.onCreate();
     66         mSharedPrefs = getSharedPreferences(GeneralSettingsFragment.SHARED_PREFS_NAME,
     67                 Context.MODE_PRIVATE);
     68         // We load the default language when we start up. This isn't strictly
     69         // required though, it can always be loaded lazily on the first call to
     70         // onLoadLanguage or onSynthesizeText. This a tradeoff between memory usage
     71         // and the latency of the first call.
     72         onLoadLanguage("eng", "usa", "");
     73     }
     74 
     75     @Override
     76     public void onDestroy() {
     77         super.onDestroy();
     78     }
     79 
     80     @Override
     81     protected String[] onGetLanguage() {
     82         // Note that mCurrentLanguage is volatile because this can be called from
     83         // multiple threads.
     84         return mCurrentLanguage;
     85     }
     86 
     87     @Override
     88     protected int onIsLanguageAvailable(String lang, String country, String variant) {
     89         // The robot speak synthesizer supports only english.
     90         if ("eng".equals(lang)) {
     91             // We support two specific robot languages, the british robot language
     92             // and the american robot language.
     93             if ("USA".equals(country) || "GBR".equals(country)) {
     94                 // If the engine supported a specific variant, we would have
     95                 // something like.
     96                 //
     97                 // if ("android".equals(variant)) {
     98                 //     return TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE;
     99                 // }
    100                 return TextToSpeech.LANG_COUNTRY_AVAILABLE;
    101             }
    102 
    103             // We support the language, but not the country.
    104             return TextToSpeech.LANG_AVAILABLE;
    105         }
    106 
    107         return TextToSpeech.LANG_NOT_SUPPORTED;
    108     }
    109 
    110     /*
    111      * Note that this method is synchronized, as is onSynthesizeText because
    112      * onLoadLanguage can be called from multiple threads (while onSynthesizeText
    113      * is always called from a single thread only).
    114      */
    115     @Override
    116     protected synchronized int onLoadLanguage(String lang, String country, String variant) {
    117         final int isLanguageAvailable = onIsLanguageAvailable(lang, country, variant);
    118 
    119         if (isLanguageAvailable == TextToSpeech.LANG_NOT_SUPPORTED) {
    120             return isLanguageAvailable;
    121         }
    122 
    123         String loadCountry = country;
    124         if (isLanguageAvailable == TextToSpeech.LANG_AVAILABLE) {
    125             loadCountry = "USA";
    126         }
    127 
    128         // If we've already loaded the requested language, we can return early.
    129         if (mCurrentLanguage != null) {
    130             if (mCurrentLanguage[0].equals(lang) && mCurrentLanguage[1].equals(country)) {
    131                 return isLanguageAvailable;
    132             }
    133         }
    134 
    135         Map<Character, Integer> newFrequenciesMap = null;
    136         try {
    137             InputStream file = getAssets().open(lang + "-" + loadCountry + ".freq");
    138             newFrequenciesMap = buildFrequencyMap(file);
    139             file.close();
    140         } catch (IOException e) {
    141             Log.e(TAG, "Error loading data for : " + lang + "-" + country);
    142         }
    143 
    144         mFrequenciesMap = newFrequenciesMap;
    145         mCurrentLanguage = new String[] { lang, loadCountry, ""};
    146 
    147         return isLanguageAvailable;
    148     }
    149 
    150     @Override
    151     protected void onStop() {
    152         mStopRequested = true;
    153     }
    154 
    155     @Override
    156     protected synchronized void onSynthesizeText(SynthesisRequest request,
    157             SynthesisCallback callback) {
    158         // Note that we call onLoadLanguage here since there is no guarantee
    159         // that there would have been a prior call to this function.
    160         int load = onLoadLanguage(request.getLanguage(), request.getCountry(),
    161                 request.getVariant());
    162 
    163         // We might get requests for a language we don't support - in which case
    164         // we error out early before wasting too much time.
    165         if (load == TextToSpeech.LANG_NOT_SUPPORTED) {
    166             callback.error();
    167             return;
    168         }
    169 
    170         // At this point, we have loaded the language we need for synthesis and
    171         // it is guaranteed that we support it so we proceed with synthesis.
    172 
    173         // We denote that we are ready to start sending audio across to the
    174         // framework. We use a fixed sampling rate (16khz), and send data across
    175         // in 16bit PCM mono.
    176         callback.start(SAMPLING_RATE_HZ,
    177                 AudioFormat.ENCODING_PCM_16BIT, 1 /* Number of channels. */);
    178 
    179         // We then scan through each character of the request string and
    180         // generate audio for it.
    181         final String text = request.getText().toLowerCase();
    182         for (int i = 0; i < text.length(); ++i) {
    183             char value = normalize(text.charAt(i));
    184             // It is crucial to call either of callback.error() or callback.done() to ensure
    185             // that audio / other resources are released as soon as possible.
    186             if (!generateOneSecondOfAudio(value, callback)) {
    187                 callback.error();
    188                 return;
    189             }
    190         }
    191 
    192         // Alright, we're done with our synthesis - yay!
    193         callback.done();
    194     }
    195 
    196     /*
    197      * Normalizes a given character to the range 'a' - 'z' (inclusive). Our
    198      * frequency mappings contain frequencies for each of these characters.
    199      */
    200     private static char normalize(char input) {
    201         if (input == ' ') {
    202             return input;
    203         }
    204 
    205         if (input < 'a') {
    206             return 'a';
    207         }
    208         if (input > 'z') {
    209             return 'z';
    210         }
    211 
    212         return input;
    213     }
    214 
    215     private Map<Character, Integer> buildFrequencyMap(InputStream is) throws IOException {
    216         BufferedReader br = new BufferedReader(new InputStreamReader(is));
    217         String line = null;
    218         Map<Character, Integer> map = new HashMap<Character, Integer>();
    219         try {
    220             while ((line = br.readLine()) != null) {
    221                 String[] parts = line.split(":");
    222                 if (parts.length != 2) {
    223                     throw new IOException("Invalid line encountered: " + line);
    224                 }
    225                 map.put(parts[0].charAt(0), Integer.parseInt(parts[1]));
    226             }
    227             map.put(' ', 0);
    228             return map;
    229         } finally {
    230             is.close();
    231         }
    232     }
    233 
    234     private boolean generateOneSecondOfAudio(char alphabet, SynthesisCallback cb) {
    235         ByteBuffer buffer = ByteBuffer.wrap(mAudioBuffer).order(ByteOrder.LITTLE_ENDIAN);
    236 
    237         // Someone called onStop, end the current synthesis and return.
    238         // The mStopRequested variable will be reset at the beginning of the
    239         // next synthesis.
    240         //
    241         // In general, a call to onStop( ) should make a best effort attempt
    242         // to stop all processing for the *current* onSynthesizeText request (if
    243         // one is active).
    244         if (mStopRequested) {
    245             return false;
    246         }
    247 
    248 
    249         if (mFrequenciesMap == null || !mFrequenciesMap.containsKey(alphabet)) {
    250             return false;
    251         }
    252 
    253         final int frequency = mFrequenciesMap.get(alphabet);
    254 
    255         if (frequency > 0) {
    256             // This is the wavelength in samples. The frequency is chosen so that the
    257             // waveLength is always a multiple of two and frequency divides the
    258             // SAMPLING_RATE exactly.
    259             final int waveLength = SAMPLING_RATE_HZ / frequency;
    260             final int times = SAMPLING_RATE_HZ / waveLength;
    261 
    262             for (int j = 0; j < times; ++j) {
    263                 // For a square curve, half of the values will be at Short.MIN_VALUE
    264                 // and the other half will be Short.MAX_VALUE.
    265                 for (int i = 0; i < waveLength / 2; ++i) {
    266                     buffer.putShort((short)(getAmplitude() * -1));
    267                 }
    268                 for (int i = 0; i < waveLength / 2; ++i) {
    269                     buffer.putShort(getAmplitude());
    270                 }
    271             }
    272         } else {
    273             // Play a second of silence.
    274             for (int i = 0; i < mAudioBuffer.length / 2; ++i) {
    275                 buffer.putShort((short) 0);
    276             }
    277         }
    278 
    279         // Get the maximum allowed size of data we can send across in audioAvailable.
    280         final int maxBufferSize = cb.getMaxBufferSize();
    281         int offset = 0;
    282         while (offset < mAudioBuffer.length) {
    283             int bytesToWrite = Math.min(maxBufferSize, mAudioBuffer.length - offset);
    284             cb.audioAvailable(mAudioBuffer, offset, bytesToWrite);
    285             offset += bytesToWrite;
    286         }
    287         return true;
    288     }
    289 
    290     private short getAmplitude() {
    291         boolean whisper = mSharedPrefs.getBoolean(GeneralSettingsFragment.WHISPER_KEY, false);
    292         return (short) (whisper ? 2048 : 8192);
    293     }
    294 }
    295