Home | History | Annotate | Download | only in tts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package android.speech.tts;
     17 
     18 import android.media.AudioFormat;
     19 import android.os.FileUtils;
     20 import android.util.Log;
     21 
     22 import java.io.File;
     23 import java.io.FileOutputStream;
     24 import java.io.IOException;
     25 import java.io.RandomAccessFile;
     26 import java.nio.ByteBuffer;
     27 import java.nio.ByteOrder;
     28 import java.nio.channels.FileChannel;
     29 
     30 /**
     31  * Speech synthesis request that writes the audio to a WAV file.
     32  */
     33 class FileSynthesisCallback extends AbstractSynthesisCallback {
     34 
     35     private static final String TAG = "FileSynthesisRequest";
     36     private static final boolean DBG = false;
     37 
     38     private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
     39 
     40     private static final int WAV_HEADER_LENGTH = 44;
     41     private static final short WAV_FORMAT_PCM = 0x0001;
     42 
     43     private final Object mStateLock = new Object();
     44 
     45     private int mSampleRateInHz;
     46     private int mAudioFormat;
     47     private int mChannelCount;
     48 
     49     private FileChannel mFileChannel;
     50 
     51     private boolean mStarted = false;
     52     private boolean mStopped = false;
     53     private boolean mDone = false;
     54 
     55     FileSynthesisCallback(FileChannel fileChannel) {
     56         mFileChannel = fileChannel;
     57     }
     58 
     59     @Override
     60     void stop() {
     61         synchronized (mStateLock) {
     62             mStopped = true;
     63             cleanUp();
     64         }
     65     }
     66 
     67     /**
     68      * Must be called while holding the monitor on {@link #mStateLock}.
     69      */
     70     private void cleanUp() {
     71         closeFile();
     72     }
     73 
     74     /**
     75      * Must be called while holding the monitor on {@link #mStateLock}.
     76      */
     77     private void closeFile() {
     78         try {
     79             if (mFileChannel != null) {
     80                 mFileChannel.close();
     81                 mFileChannel = null;
     82             }
     83         } catch (IOException ex) {
     84             Log.e(TAG, "Failed to close output file descriptor", ex);
     85         }
     86     }
     87 
     88     @Override
     89     public int getMaxBufferSize() {
     90         return MAX_AUDIO_BUFFER_SIZE;
     91     }
     92 
     93     @Override
     94     boolean isDone() {
     95         return mDone;
     96     }
     97 
     98     @Override
     99     public int start(int sampleRateInHz, int audioFormat, int channelCount) {
    100         if (DBG) {
    101             Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
    102                     + "," + channelCount + ")");
    103         }
    104         synchronized (mStateLock) {
    105             if (mStopped) {
    106                 if (DBG) Log.d(TAG, "Request has been aborted.");
    107                 return TextToSpeech.ERROR;
    108             }
    109             if (mStarted) {
    110                 cleanUp();
    111                 throw new IllegalArgumentException("FileSynthesisRequest.start() called twice");
    112             }
    113             mStarted = true;
    114             mSampleRateInHz = sampleRateInHz;
    115             mAudioFormat = audioFormat;
    116             mChannelCount = channelCount;
    117 
    118             try {
    119                 mFileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
    120                 return TextToSpeech.SUCCESS;
    121             } catch (IOException ex) {
    122                 Log.e(TAG, "Failed to write wav header to output file descriptor" + ex);
    123                 cleanUp();
    124                 return TextToSpeech.ERROR;
    125             }
    126         }
    127     }
    128 
    129     @Override
    130     public int audioAvailable(byte[] buffer, int offset, int length) {
    131         if (DBG) {
    132             Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
    133                     + "," + length + ")");
    134         }
    135         synchronized (mStateLock) {
    136             if (mStopped) {
    137                 if (DBG) Log.d(TAG, "Request has been aborted.");
    138                 return TextToSpeech.ERROR;
    139             }
    140             if (mFileChannel == null) {
    141                 Log.e(TAG, "File not open");
    142                 return TextToSpeech.ERROR;
    143             }
    144             try {
    145                 mFileChannel.write(ByteBuffer.wrap(buffer,  offset,  length));
    146                 return TextToSpeech.SUCCESS;
    147             } catch (IOException ex) {
    148                 Log.e(TAG, "Failed to write to output file descriptor", ex);
    149                 cleanUp();
    150                 return TextToSpeech.ERROR;
    151             }
    152         }
    153     }
    154 
    155     @Override
    156     public int done() {
    157         if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
    158         synchronized (mStateLock) {
    159             if (mDone) {
    160                 if (DBG) Log.d(TAG, "Duplicate call to done()");
    161                 // This preserves existing behaviour. Earlier, if done was called twice
    162                 // we'd return ERROR because mFile == null and we'd add to logspam.
    163                 return TextToSpeech.ERROR;
    164             }
    165             if (mStopped) {
    166                 if (DBG) Log.d(TAG, "Request has been aborted.");
    167                 return TextToSpeech.ERROR;
    168             }
    169             if (mFileChannel == null) {
    170                 Log.e(TAG, "File not open");
    171                 return TextToSpeech.ERROR;
    172             }
    173             try {
    174                 // Write WAV header at start of file
    175                 mFileChannel.position(0);
    176                 int dataLength = (int) (mFileChannel.size() - WAV_HEADER_LENGTH);
    177                 mFileChannel.write(
    178                         makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
    179                 closeFile();
    180                 mDone = true;
    181                 return TextToSpeech.SUCCESS;
    182             } catch (IOException ex) {
    183                 Log.e(TAG, "Failed to write to output file descriptor", ex);
    184                 cleanUp();
    185                 return TextToSpeech.ERROR;
    186             }
    187         }
    188     }
    189 
    190     @Override
    191     public void error() {
    192         if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
    193         synchronized (mStateLock) {
    194             cleanUp();
    195         }
    196     }
    197 
    198     private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
    199             int dataLength) {
    200         // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
    201         int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
    202         int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
    203         short blockAlign = (short) (sampleSizeInBytes * channelCount);
    204         short bitsPerSample = (short) (sampleSizeInBytes * 8);
    205 
    206         byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
    207         ByteBuffer header = ByteBuffer.wrap(headerBuf);
    208         header.order(ByteOrder.LITTLE_ENDIAN);
    209 
    210         header.put(new byte[]{ 'R', 'I', 'F', 'F' });
    211         header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
    212         header.put(new byte[]{ 'W', 'A', 'V', 'E' });
    213         header.put(new byte[]{ 'f', 'm', 't', ' ' });
    214         header.putInt(16);  // size of fmt chunk
    215         header.putShort(WAV_FORMAT_PCM);
    216         header.putShort((short) channelCount);
    217         header.putInt(sampleRateInHz);
    218         header.putInt(byteRate);
    219         header.putShort(blockAlign);
    220         header.putShort(bitsPerSample);
    221         header.put(new byte[]{ 'd', 'a', 't', 'a' });
    222         header.putInt(dataLength);
    223         header.flip();
    224 
    225         return header;
    226     }
    227 
    228 }
    229