Home | History | Annotate | Download | only in tts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package android.speech.tts;
     17 
     18 import android.annotation.NonNull;
     19 import android.media.AudioFormat;
     20 import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
     21 import android.util.Log;
     22 
     23 import java.io.IOException;
     24 import java.nio.ByteBuffer;
     25 import java.nio.ByteOrder;
     26 import java.nio.channels.FileChannel;
     27 
     28 /**
     29  * Speech synthesis request that writes the audio to a WAV file.
     30  */
     31 class FileSynthesisCallback extends AbstractSynthesisCallback {
     32 
     33     private static final String TAG = "FileSynthesisRequest";
     34     private static final boolean DBG = false;
     35 
     36     private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
     37 
     38     private static final int WAV_HEADER_LENGTH = 44;
     39     private static final short WAV_FORMAT_PCM = 0x0001;
     40 
     41     private final Object mStateLock = new Object();
     42 
     43     private int mSampleRateInHz;
     44     private int mAudioFormat;
     45     private int mChannelCount;
     46 
     47     private FileChannel mFileChannel;
     48 
     49     private final UtteranceProgressDispatcher mDispatcher;
     50 
     51     private boolean mStarted = false;
     52     private boolean mDone = false;
     53 
     54     /** Status code of synthesis */
     55     protected int mStatusCode;
     56 
     57     FileSynthesisCallback(@NonNull FileChannel fileChannel,
     58             @NonNull UtteranceProgressDispatcher dispatcher, boolean clientIsUsingV2) {
     59         super(clientIsUsingV2);
     60         mFileChannel = fileChannel;
     61         mDispatcher = dispatcher;
     62         mStatusCode = TextToSpeech.SUCCESS;
     63     }
     64 
     65     @Override
     66     void stop() {
     67         synchronized (mStateLock) {
     68             if (mDone) {
     69                 return;
     70             }
     71             if (mStatusCode == TextToSpeech.STOPPED) {
     72                 return;
     73             }
     74 
     75             mStatusCode = TextToSpeech.STOPPED;
     76             cleanUp();
     77             mDispatcher.dispatchOnStop();
     78         }
     79     }
     80 
     81     /**
     82      * Must be called while holding the monitor on {@link #mStateLock}.
     83      */
     84     private void cleanUp() {
     85         closeFile();
     86     }
     87 
     88     /**
     89      * Must be called while holding the monitor on {@link #mStateLock}.
     90      */
     91     private void closeFile() {
     92         // File will be closed by the SpeechItem in the speech service.
     93         mFileChannel = null;
     94     }
     95 
     96     @Override
     97     public int getMaxBufferSize() {
     98         return MAX_AUDIO_BUFFER_SIZE;
     99     }
    100 
    101     @Override
    102     public int start(int sampleRateInHz, int audioFormat, int channelCount) {
    103         if (DBG) {
    104             Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
    105                     + "," + channelCount + ")");
    106         }
    107         if (audioFormat != AudioFormat.ENCODING_PCM_8BIT &&
    108             audioFormat != AudioFormat.ENCODING_PCM_16BIT &&
    109             audioFormat != AudioFormat.ENCODING_PCM_FLOAT) {
    110             Log.e(TAG, "Audio format encoding " + audioFormat + " not supported. Please use one " +
    111                        "of AudioFormat.ENCODING_PCM_8BIT, AudioFormat.ENCODING_PCM_16BIT or " +
    112                        "AudioFormat.ENCODING_PCM_FLOAT");
    113         }
    114         mDispatcher.dispatchOnBeginSynthesis(sampleRateInHz, audioFormat, channelCount);
    115 
    116         FileChannel fileChannel = null;
    117         synchronized (mStateLock) {
    118             if (mStatusCode == TextToSpeech.STOPPED) {
    119                 if (DBG) Log.d(TAG, "Request has been aborted.");
    120                 return errorCodeOnStop();
    121             }
    122             if (mStatusCode != TextToSpeech.SUCCESS) {
    123                 if (DBG) Log.d(TAG, "Error was raised");
    124                 return TextToSpeech.ERROR;
    125             }
    126             if (mStarted) {
    127                 Log.e(TAG, "Start called twice");
    128                 return TextToSpeech.ERROR;
    129             }
    130             mStarted = true;
    131             mSampleRateInHz = sampleRateInHz;
    132             mAudioFormat = audioFormat;
    133             mChannelCount = channelCount;
    134 
    135             mDispatcher.dispatchOnStart();
    136             fileChannel = mFileChannel;
    137         }
    138 
    139         try {
    140             fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
    141                 return TextToSpeech.SUCCESS;
    142         } catch (IOException ex) {
    143             Log.e(TAG, "Failed to write wav header to output file descriptor", ex);
    144             synchronized (mStateLock) {
    145                 cleanUp();
    146                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    147             }
    148             return TextToSpeech.ERROR;
    149         }
    150     }
    151 
    152     @Override
    153     public int audioAvailable(byte[] buffer, int offset, int length) {
    154         if (DBG) {
    155             Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
    156                     + "," + length + ")");
    157         }
    158         FileChannel fileChannel = null;
    159         synchronized (mStateLock) {
    160             if (mStatusCode == TextToSpeech.STOPPED) {
    161                 if (DBG) Log.d(TAG, "Request has been aborted.");
    162                 return errorCodeOnStop();
    163             }
    164             if (mStatusCode != TextToSpeech.SUCCESS) {
    165                 if (DBG) Log.d(TAG, "Error was raised");
    166                 return TextToSpeech.ERROR;
    167             }
    168             if (mFileChannel == null) {
    169                 Log.e(TAG, "File not open");
    170                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    171                 return TextToSpeech.ERROR;
    172             }
    173             if (!mStarted) {
    174                 Log.e(TAG, "Start method was not called");
    175                 return TextToSpeech.ERROR;
    176             }
    177             fileChannel = mFileChannel;
    178         }
    179 
    180         final byte[] bufferCopy = new byte[length];
    181         System.arraycopy(buffer, offset, bufferCopy, 0, length);
    182         mDispatcher.dispatchOnAudioAvailable(bufferCopy);
    183 
    184         try {
    185             fileChannel.write(ByteBuffer.wrap(buffer,  offset,  length));
    186             return TextToSpeech.SUCCESS;
    187         } catch (IOException ex) {
    188             Log.e(TAG, "Failed to write to output file descriptor", ex);
    189             synchronized (mStateLock) {
    190                 cleanUp();
    191                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    192             }
    193             return TextToSpeech.ERROR;
    194         }
    195     }
    196 
    197     @Override
    198     public int done() {
    199         if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
    200         FileChannel fileChannel = null;
    201 
    202         int sampleRateInHz = 0;
    203         int audioFormat = 0;
    204         int channelCount = 0;
    205 
    206         synchronized (mStateLock) {
    207             if (mDone) {
    208                 Log.w(TAG, "Duplicate call to done()");
    209                 // This is not an error that would prevent synthesis. Hence no
    210                 // setStatusCode is set.
    211                 return TextToSpeech.ERROR;
    212             }
    213             if (mStatusCode == TextToSpeech.STOPPED) {
    214                 if (DBG) Log.d(TAG, "Request has been aborted.");
    215                 return errorCodeOnStop();
    216             }
    217             if (mStatusCode != TextToSpeech.SUCCESS && mStatusCode != TextToSpeech.STOPPED) {
    218                 mDispatcher.dispatchOnError(mStatusCode);
    219                 return TextToSpeech.ERROR;
    220             }
    221             if (mFileChannel == null) {
    222                 Log.e(TAG, "File not open");
    223                 return TextToSpeech.ERROR;
    224             }
    225             mDone = true;
    226             fileChannel = mFileChannel;
    227             sampleRateInHz = mSampleRateInHz;
    228             audioFormat = mAudioFormat;
    229             channelCount = mChannelCount;
    230         }
    231 
    232         try {
    233             // Write WAV header at start of file
    234             fileChannel.position(0);
    235             int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH);
    236             fileChannel.write(
    237                     makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength));
    238 
    239             synchronized (mStateLock) {
    240                 closeFile();
    241                 mDispatcher.dispatchOnSuccess();
    242                 return TextToSpeech.SUCCESS;
    243             }
    244         } catch (IOException ex) {
    245             Log.e(TAG, "Failed to write to output file descriptor", ex);
    246             synchronized (mStateLock) {
    247                 cleanUp();
    248             }
    249             return TextToSpeech.ERROR;
    250         }
    251     }
    252 
    253     @Override
    254     public void error() {
    255         error(TextToSpeech.ERROR_SYNTHESIS);
    256     }
    257 
    258     @Override
    259     public void error(int errorCode) {
    260         if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
    261         synchronized (mStateLock) {
    262             if (mDone) {
    263                 return;
    264             }
    265             cleanUp();
    266             mStatusCode = errorCode;
    267         }
    268     }
    269 
    270     @Override
    271     public boolean hasStarted() {
    272         synchronized (mStateLock) {
    273             return mStarted;
    274         }
    275     }
    276 
    277     @Override
    278     public boolean hasFinished() {
    279         synchronized (mStateLock) {
    280             return mDone;
    281         }
    282     }
    283 
    284     private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
    285             int dataLength) {
    286         int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat);
    287         int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
    288         short blockAlign = (short) (sampleSizeInBytes * channelCount);
    289         short bitsPerSample = (short) (sampleSizeInBytes * 8);
    290 
    291         byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
    292         ByteBuffer header = ByteBuffer.wrap(headerBuf);
    293         header.order(ByteOrder.LITTLE_ENDIAN);
    294 
    295         header.put(new byte[]{ 'R', 'I', 'F', 'F' });
    296         header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
    297         header.put(new byte[]{ 'W', 'A', 'V', 'E' });
    298         header.put(new byte[]{ 'f', 'm', 't', ' ' });
    299         header.putInt(16);  // size of fmt chunk
    300         header.putShort(WAV_FORMAT_PCM);
    301         header.putShort((short) channelCount);
    302         header.putInt(sampleRateInHz);
    303         header.putInt(byteRate);
    304         header.putShort(blockAlign);
    305         header.putShort(bitsPerSample);
    306         header.put(new byte[]{ 'd', 'a', 't', 'a' });
    307         header.putInt(dataLength);
    308         header.flip();
    309 
    310         return header;
    311     }
    312 
    313     @Override
    314     public void rangeStart(int markerInFrames, int start, int end) {
    315         mDispatcher.dispatchOnRangeStart(markerInFrames, start, end);
    316     }
    317 }
    318