Home | History | Annotate | Download | only in tts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package android.speech.tts;
     17 
     18 import android.media.AudioFormat;
     19 import android.util.Log;
     20 
     21 import java.io.File;
     22 import java.io.FileOutputStream;
     23 import java.io.IOException;
     24 import java.io.RandomAccessFile;
     25 import java.nio.ByteBuffer;
     26 import java.nio.ByteOrder;
     27 
     28 /**
     29  * Speech synthesis request that writes the audio to a WAV file.
     30  */
     31 class FileSynthesisCallback extends AbstractSynthesisCallback {
     32 
     33     private static final String TAG = "FileSynthesisRequest";
     34     private static final boolean DBG = false;
     35 
     36     private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
     37 
     38     private static final int WAV_HEADER_LENGTH = 44;
     39     private static final short WAV_FORMAT_PCM = 0x0001;
     40 
     41     private final Object mStateLock = new Object();
     42     private final File mFileName;
     43     private int mSampleRateInHz;
     44     private int mAudioFormat;
     45     private int mChannelCount;
     46     private RandomAccessFile mFile;
     47     private boolean mStopped = false;
     48     private boolean mDone = false;
     49 
     50     FileSynthesisCallback(File fileName) {
     51         mFileName = fileName;
     52     }
     53 
     54     @Override
     55     void stop() {
     56         synchronized (mStateLock) {
     57             mStopped = true;
     58             cleanUp();
     59         }
     60     }
     61 
     62     /**
     63      * Must be called while holding the monitor on {@link #mStateLock}.
     64      */
     65     private void cleanUp() {
     66         closeFile();
     67         if (mFile != null) {
     68             mFileName.delete();
     69         }
     70     }
     71 
     72     /**
     73      * Must be called while holding the monitor on {@link #mStateLock}.
     74      */
     75     private void closeFile() {
     76         try {
     77             if (mFile != null) {
     78                 mFile.close();
     79                 mFile = null;
     80             }
     81         } catch (IOException ex) {
     82             Log.e(TAG, "Failed to close " + mFileName + ": " + ex);
     83         }
     84     }
     85 
     86     @Override
     87     public int getMaxBufferSize() {
     88         return MAX_AUDIO_BUFFER_SIZE;
     89     }
     90 
     91     @Override
     92     boolean isDone() {
     93         return mDone;
     94     }
     95 
     96     @Override
     97     public int start(int sampleRateInHz, int audioFormat, int channelCount) {
     98         if (DBG) {
     99             Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
    100                     + "," + channelCount + ")");
    101         }
    102         synchronized (mStateLock) {
    103             if (mStopped) {
    104                 if (DBG) Log.d(TAG, "Request has been aborted.");
    105                 return TextToSpeech.ERROR;
    106             }
    107             if (mFile != null) {
    108                 cleanUp();
    109                 throw new IllegalArgumentException("FileSynthesisRequest.start() called twice");
    110             }
    111             mSampleRateInHz = sampleRateInHz;
    112             mAudioFormat = audioFormat;
    113             mChannelCount = channelCount;
    114             try {
    115                 mFile = new RandomAccessFile(mFileName, "rw");
    116                 // Reserve space for WAV header
    117                 mFile.write(new byte[WAV_HEADER_LENGTH]);
    118                 return TextToSpeech.SUCCESS;
    119             } catch (IOException ex) {
    120                 Log.e(TAG, "Failed to open " + mFileName + ": " + ex);
    121                 cleanUp();
    122                 return TextToSpeech.ERROR;
    123             }
    124         }
    125     }
    126 
    127     @Override
    128     public int audioAvailable(byte[] buffer, int offset, int length) {
    129         if (DBG) {
    130             Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
    131                     + "," + length + ")");
    132         }
    133         synchronized (mStateLock) {
    134             if (mStopped) {
    135                 if (DBG) Log.d(TAG, "Request has been aborted.");
    136                 return TextToSpeech.ERROR;
    137             }
    138             if (mFile == null) {
    139                 Log.e(TAG, "File not open");
    140                 return TextToSpeech.ERROR;
    141             }
    142             try {
    143                 mFile.write(buffer, offset, length);
    144                 return TextToSpeech.SUCCESS;
    145             } catch (IOException ex) {
    146                 Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
    147                 cleanUp();
    148                 return TextToSpeech.ERROR;
    149             }
    150         }
    151     }
    152 
    153     @Override
    154     public int done() {
    155         if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
    156         synchronized (mStateLock) {
    157             if (mStopped) {
    158                 if (DBG) Log.d(TAG, "Request has been aborted.");
    159                 return TextToSpeech.ERROR;
    160             }
    161             if (mFile == null) {
    162                 Log.e(TAG, "File not open");
    163                 return TextToSpeech.ERROR;
    164             }
    165             try {
    166                 // Write WAV header at start of file
    167                 mFile.seek(0);
    168                 int dataLength = (int) (mFile.length() - WAV_HEADER_LENGTH);
    169                 mFile.write(
    170                         makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
    171                 closeFile();
    172                 mDone = true;
    173                 return TextToSpeech.SUCCESS;
    174             } catch (IOException ex) {
    175                 Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
    176                 cleanUp();
    177                 return TextToSpeech.ERROR;
    178             }
    179         }
    180     }
    181 
    182     @Override
    183     public void error() {
    184         if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
    185         synchronized (mStateLock) {
    186             cleanUp();
    187         }
    188     }
    189 
    190     private byte[] makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
    191             int dataLength) {
    192         // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
    193         int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
    194         int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
    195         short blockAlign = (short) (sampleSizeInBytes * channelCount);
    196         short bitsPerSample = (short) (sampleSizeInBytes * 8);
    197 
    198         byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
    199         ByteBuffer header = ByteBuffer.wrap(headerBuf);
    200         header.order(ByteOrder.LITTLE_ENDIAN);
    201 
    202         header.put(new byte[]{ 'R', 'I', 'F', 'F' });
    203         header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
    204         header.put(new byte[]{ 'W', 'A', 'V', 'E' });
    205         header.put(new byte[]{ 'f', 'm', 't', ' ' });
    206         header.putInt(16);  // size of fmt chunk
    207         header.putShort(WAV_FORMAT_PCM);
    208         header.putShort((short) channelCount);
    209         header.putInt(sampleRateInHz);
    210         header.putInt(byteRate);
    211         header.putShort(blockAlign);
    212         header.putShort(bitsPerSample);
    213         header.put(new byte[]{ 'd', 'a', 't', 'a' });
    214         header.putInt(dataLength);
    215 
    216         return headerBuf;
    217     }
    218 
    219 }
    220