Home | History | Annotate | Download | only in tts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package android.speech.tts;
     17 
     18 import android.media.AudioFormat;
     19 import android.os.FileUtils;
     20 import android.util.Log;
     21 
     22 import java.io.File;
     23 import java.io.IOException;
     24 import java.io.RandomAccessFile;
     25 import java.nio.ByteBuffer;
     26 import java.nio.ByteOrder;
     27 
     28 /**
     29  * Speech synthesis request that writes the audio to a WAV file.
     30  */
     31 class FileSynthesisCallback extends AbstractSynthesisCallback {
     32 
     33     private static final String TAG = "FileSynthesisRequest";
     34     private static final boolean DBG = false;
     35 
     36     private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
     37 
     38     private static final int WAV_HEADER_LENGTH = 44;
     39     private static final short WAV_FORMAT_PCM = 0x0001;
     40 
     41     private final Object mStateLock = new Object();
     42     private final File mFileName;
     43     private int mSampleRateInHz;
     44     private int mAudioFormat;
     45     private int mChannelCount;
     46     private RandomAccessFile mFile;
     47     private boolean mStopped = false;
     48     private boolean mDone = false;
     49 
     50     FileSynthesisCallback(File fileName) {
     51         mFileName = fileName;
     52     }
     53 
     54     @Override
     55     void stop() {
     56         synchronized (mStateLock) {
     57             mStopped = true;
     58             cleanUp();
     59         }
     60     }
     61 
     62     /**
     63      * Must be called while holding the monitor on {@link #mStateLock}.
     64      */
     65     private void cleanUp() {
     66         closeFileAndWidenPermissions();
     67         if (mFile != null) {
     68             mFileName.delete();
     69         }
     70     }
     71 
     72     /**
     73      * Must be called while holding the monitor on {@link #mStateLock}.
     74      */
     75     private void closeFileAndWidenPermissions() {
     76         try {
     77             if (mFile != null) {
     78                 mFile.close();
     79                 mFile = null;
     80             }
     81         } catch (IOException ex) {
     82             Log.e(TAG, "Failed to close " + mFileName + ": " + ex);
     83         }
     84 
     85         try {
     86             // Make the written file readable and writeable by everyone.
     87             // This allows the app that requested synthesis to read the file.
     88             //
     89             // Note that the directory this file was written must have already
     90             // been world writeable in order it to have been
     91             // written to in the first place.
     92             FileUtils.setPermissions(mFileName.getAbsolutePath(), 0666, -1, -1); //-rw-rw-rw
     93         } catch (SecurityException se) {
     94             Log.e(TAG, "Security exception setting rw permissions on : " + mFileName);
     95         }
     96     }
     97 
     98     @Override
     99     public int getMaxBufferSize() {
    100         return MAX_AUDIO_BUFFER_SIZE;
    101     }
    102 
    103     @Override
    104     boolean isDone() {
    105         return mDone;
    106     }
    107 
    108     @Override
    109     public int start(int sampleRateInHz, int audioFormat, int channelCount) {
    110         if (DBG) {
    111             Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
    112                     + "," + channelCount + ")");
    113         }
    114         synchronized (mStateLock) {
    115             if (mStopped) {
    116                 if (DBG) Log.d(TAG, "Request has been aborted.");
    117                 return TextToSpeech.ERROR;
    118             }
    119             if (mFile != null) {
    120                 cleanUp();
    121                 throw new IllegalArgumentException("FileSynthesisRequest.start() called twice");
    122             }
    123             mSampleRateInHz = sampleRateInHz;
    124             mAudioFormat = audioFormat;
    125             mChannelCount = channelCount;
    126             try {
    127                 mFile = new RandomAccessFile(mFileName, "rw");
    128                 // Reserve space for WAV header
    129                 mFile.write(new byte[WAV_HEADER_LENGTH]);
    130                 return TextToSpeech.SUCCESS;
    131             } catch (IOException ex) {
    132                 Log.e(TAG, "Failed to open " + mFileName + ": " + ex);
    133                 cleanUp();
    134                 return TextToSpeech.ERROR;
    135             }
    136         }
    137     }
    138 
    139     @Override
    140     public int audioAvailable(byte[] buffer, int offset, int length) {
    141         if (DBG) {
    142             Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
    143                     + "," + length + ")");
    144         }
    145         synchronized (mStateLock) {
    146             if (mStopped) {
    147                 if (DBG) Log.d(TAG, "Request has been aborted.");
    148                 return TextToSpeech.ERROR;
    149             }
    150             if (mFile == null) {
    151                 Log.e(TAG, "File not open");
    152                 return TextToSpeech.ERROR;
    153             }
    154             try {
    155                 mFile.write(buffer, offset, length);
    156                 return TextToSpeech.SUCCESS;
    157             } catch (IOException ex) {
    158                 Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
    159                 cleanUp();
    160                 return TextToSpeech.ERROR;
    161             }
    162         }
    163     }
    164 
    165     @Override
    166     public int done() {
    167         if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
    168         synchronized (mStateLock) {
    169             if (mStopped) {
    170                 if (DBG) Log.d(TAG, "Request has been aborted.");
    171                 return TextToSpeech.ERROR;
    172             }
    173             if (mFile == null) {
    174                 Log.e(TAG, "File not open");
    175                 return TextToSpeech.ERROR;
    176             }
    177             try {
    178                 // Write WAV header at start of file
    179                 mFile.seek(0);
    180                 int dataLength = (int) (mFile.length() - WAV_HEADER_LENGTH);
    181                 mFile.write(
    182                         makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
    183                 closeFileAndWidenPermissions();
    184                 mDone = true;
    185                 return TextToSpeech.SUCCESS;
    186             } catch (IOException ex) {
    187                 Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
    188                 cleanUp();
    189                 return TextToSpeech.ERROR;
    190             }
    191         }
    192     }
    193 
    194     @Override
    195     public void error() {
    196         if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
    197         synchronized (mStateLock) {
    198             cleanUp();
    199         }
    200     }
    201 
    202     private byte[] makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
    203             int dataLength) {
    204         // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
    205         int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
    206         int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
    207         short blockAlign = (short) (sampleSizeInBytes * channelCount);
    208         short bitsPerSample = (short) (sampleSizeInBytes * 8);
    209 
    210         byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
    211         ByteBuffer header = ByteBuffer.wrap(headerBuf);
    212         header.order(ByteOrder.LITTLE_ENDIAN);
    213 
    214         header.put(new byte[]{ 'R', 'I', 'F', 'F' });
    215         header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
    216         header.put(new byte[]{ 'W', 'A', 'V', 'E' });
    217         header.put(new byte[]{ 'f', 'm', 't', ' ' });
    218         header.putInt(16);  // size of fmt chunk
    219         header.putShort(WAV_FORMAT_PCM);
    220         header.putShort((short) channelCount);
    221         header.putInt(sampleRateInHz);
    222         header.putInt(byteRate);
    223         header.putShort(blockAlign);
    224         header.putShort(bitsPerSample);
    225         header.put(new byte[]{ 'd', 'a', 't', 'a' });
    226         header.putInt(dataLength);
    227 
    228         return headerBuf;
    229     }
    230 
    231 }
    232