Home | History | Annotate | Download | only in tts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package android.speech.tts;
     17 
     18 import android.media.AudioFormat;
     19 import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
     20 import android.util.Log;
     21 
     22 import java.io.IOException;
     23 import java.nio.ByteBuffer;
     24 import java.nio.ByteOrder;
     25 import java.nio.channels.FileChannel;
     26 
     27 /**
     28  * Speech synthesis request that writes the audio to a WAV file.
     29  */
     30 class FileSynthesisCallback extends AbstractSynthesisCallback {
     31 
     32     private static final String TAG = "FileSynthesisRequest";
     33     private static final boolean DBG = false;
     34 
     35     private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
     36 
     37     private static final int WAV_HEADER_LENGTH = 44;
     38     private static final short WAV_FORMAT_PCM = 0x0001;
     39 
     40     private final Object mStateLock = new Object();
     41 
     42     private int mSampleRateInHz;
     43     private int mAudioFormat;
     44     private int mChannelCount;
     45 
     46     private FileChannel mFileChannel;
     47 
     48     private final UtteranceProgressDispatcher mDispatcher;
     49     private final Object mCallerIdentity;
     50 
     51     private boolean mStarted = false;
     52     private boolean mDone = false;
     53 
     54     /** Status code of synthesis */
     55     protected int mStatusCode;
     56 
     57     FileSynthesisCallback(FileChannel fileChannel, UtteranceProgressDispatcher dispatcher,
     58             Object callerIdentity, boolean clientIsUsingV2) {
     59         super(clientIsUsingV2);
     60         mFileChannel = fileChannel;
     61         mDispatcher = dispatcher;
     62         mCallerIdentity = callerIdentity;
     63         mStatusCode = TextToSpeech.SUCCESS;
     64     }
     65 
     66     @Override
     67     void stop() {
     68         synchronized (mStateLock) {
     69             if (mDone) {
     70                 return;
     71             }
     72             if (mStatusCode == TextToSpeech.STOPPED) {
     73                 return;
     74             }
     75 
     76             mStatusCode = TextToSpeech.STOPPED;
     77             cleanUp();
     78             if (mDispatcher != null) {
     79                 mDispatcher.dispatchOnStop();
     80             }
     81         }
     82     }
     83 
     84     /**
     85      * Must be called while holding the monitor on {@link #mStateLock}.
     86      */
     87     private void cleanUp() {
     88         closeFile();
     89     }
     90 
     91     /**
     92      * Must be called while holding the monitor on {@link #mStateLock}.
     93      */
     94     private void closeFile() {
     95         // File will be closed by the SpeechItem in the speech service.
     96         mFileChannel = null;
     97     }
     98 
     99     @Override
    100     public int getMaxBufferSize() {
    101         return MAX_AUDIO_BUFFER_SIZE;
    102     }
    103 
    104     @Override
    105     public int start(int sampleRateInHz, int audioFormat, int channelCount) {
    106         if (DBG) {
    107             Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
    108                     + "," + channelCount + ")");
    109         }
    110         FileChannel fileChannel = null;
    111         synchronized (mStateLock) {
    112             if (mStatusCode == TextToSpeech.STOPPED) {
    113                 if (DBG) Log.d(TAG, "Request has been aborted.");
    114                 return errorCodeOnStop();
    115             }
    116             if (mStatusCode != TextToSpeech.SUCCESS) {
    117                 if (DBG) Log.d(TAG, "Error was raised");
    118                 return TextToSpeech.ERROR;
    119             }
    120             if (mStarted) {
    121                 Log.e(TAG, "Start called twice");
    122                 return TextToSpeech.ERROR;
    123             }
    124             mStarted = true;
    125             mSampleRateInHz = sampleRateInHz;
    126             mAudioFormat = audioFormat;
    127             mChannelCount = channelCount;
    128 
    129             if (mDispatcher != null) {
    130                 mDispatcher.dispatchOnStart();
    131             }
    132             fileChannel = mFileChannel;
    133         }
    134 
    135         try {
    136             fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
    137                 return TextToSpeech.SUCCESS;
    138         } catch (IOException ex) {
    139             Log.e(TAG, "Failed to write wav header to output file descriptor", ex);
    140             synchronized (mStateLock) {
    141                 cleanUp();
    142                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    143             }
    144             return TextToSpeech.ERROR;
    145         }
    146     }
    147 
    148     @Override
    149     public int audioAvailable(byte[] buffer, int offset, int length) {
    150         if (DBG) {
    151             Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
    152                     + "," + length + ")");
    153         }
    154         FileChannel fileChannel = null;
    155         synchronized (mStateLock) {
    156             if (mStatusCode == TextToSpeech.STOPPED) {
    157                 if (DBG) Log.d(TAG, "Request has been aborted.");
    158                 return errorCodeOnStop();
    159             }
    160             if (mStatusCode != TextToSpeech.SUCCESS) {
    161                 if (DBG) Log.d(TAG, "Error was raised");
    162                 return TextToSpeech.ERROR;
    163             }
    164             if (mFileChannel == null) {
    165                 Log.e(TAG, "File not open");
    166                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    167                 return TextToSpeech.ERROR;
    168             }
    169             if (!mStarted) {
    170                 Log.e(TAG, "Start method was not called");
    171                 return TextToSpeech.ERROR;
    172             }
    173             fileChannel = mFileChannel;
    174         }
    175 
    176         try {
    177             fileChannel.write(ByteBuffer.wrap(buffer,  offset,  length));
    178             return TextToSpeech.SUCCESS;
    179         } catch (IOException ex) {
    180             Log.e(TAG, "Failed to write to output file descriptor", ex);
    181             synchronized (mStateLock) {
    182                 cleanUp();
    183                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    184             }
    185             return TextToSpeech.ERROR;
    186         }
    187     }
    188 
    189     @Override
    190     public int done() {
    191         if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
    192         FileChannel fileChannel = null;
    193 
    194         int sampleRateInHz = 0;
    195         int audioFormat = 0;
    196         int channelCount = 0;
    197 
    198         synchronized (mStateLock) {
    199             if (mDone) {
    200                 Log.w(TAG, "Duplicate call to done()");
    201                 // This is not an error that would prevent synthesis. Hence no
    202                 // setStatusCode is set.
    203                 return TextToSpeech.ERROR;
    204             }
    205             if (mStatusCode == TextToSpeech.STOPPED) {
    206                 if (DBG) Log.d(TAG, "Request has been aborted.");
    207                 return errorCodeOnStop();
    208             }
    209             if (mDispatcher != null && mStatusCode != TextToSpeech.SUCCESS &&
    210                     mStatusCode != TextToSpeech.STOPPED) {
    211                 mDispatcher.dispatchOnError(mStatusCode);
    212                 return TextToSpeech.ERROR;
    213             }
    214             if (mFileChannel == null) {
    215                 Log.e(TAG, "File not open");
    216                 return TextToSpeech.ERROR;
    217             }
    218             mDone = true;
    219             fileChannel = mFileChannel;
    220             sampleRateInHz = mSampleRateInHz;
    221             audioFormat = mAudioFormat;
    222             channelCount = mChannelCount;
    223         }
    224 
    225         try {
    226             // Write WAV header at start of file
    227             fileChannel.position(0);
    228             int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH);
    229             fileChannel.write(
    230                     makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength));
    231 
    232             synchronized (mStateLock) {
    233                 closeFile();
    234                 if (mDispatcher != null) {
    235                     mDispatcher.dispatchOnSuccess();
    236                 }
    237                 return TextToSpeech.SUCCESS;
    238             }
    239         } catch (IOException ex) {
    240             Log.e(TAG, "Failed to write to output file descriptor", ex);
    241             synchronized (mStateLock) {
    242                 cleanUp();
    243             }
    244             return TextToSpeech.ERROR;
    245         }
    246     }
    247 
    248     @Override
    249     public void error() {
    250         error(TextToSpeech.ERROR_SYNTHESIS);
    251     }
    252 
    253     @Override
    254     public void error(int errorCode) {
    255         if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
    256         synchronized (mStateLock) {
    257             if (mDone) {
    258                 return;
    259             }
    260             cleanUp();
    261             mStatusCode = errorCode;
    262         }
    263     }
    264 
    265     @Override
    266     public boolean hasStarted() {
    267         synchronized (mStateLock) {
    268             return mStarted;
    269         }
    270     }
    271 
    272     @Override
    273     public boolean hasFinished() {
    274         synchronized (mStateLock) {
    275             return mDone;
    276         }
    277     }
    278 
    279     private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
    280             int dataLength) {
    281         int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat);
    282         int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
    283         short blockAlign = (short) (sampleSizeInBytes * channelCount);
    284         short bitsPerSample = (short) (sampleSizeInBytes * 8);
    285 
    286         byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
    287         ByteBuffer header = ByteBuffer.wrap(headerBuf);
    288         header.order(ByteOrder.LITTLE_ENDIAN);
    289 
    290         header.put(new byte[]{ 'R', 'I', 'F', 'F' });
    291         header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
    292         header.put(new byte[]{ 'W', 'A', 'V', 'E' });
    293         header.put(new byte[]{ 'f', 'm', 't', ' ' });
    294         header.putInt(16);  // size of fmt chunk
    295         header.putShort(WAV_FORMAT_PCM);
    296         header.putShort((short) channelCount);
    297         header.putInt(sampleRateInHz);
    298         header.putInt(byteRate);
    299         header.putShort(blockAlign);
    300         header.putShort(bitsPerSample);
    301         header.put(new byte[]{ 'd', 'a', 't', 'a' });
    302         header.putInt(dataLength);
    303         header.flip();
    304 
    305         return header;
    306     }
    307 }
    308