Home | History | Annotate | Download | only in tts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package android.speech.tts;
     17 
     18 import android.annotation.NonNull;
     19 import android.media.AudioFormat;
     20 import android.speech.tts.TextToSpeechService.AudioOutputParams;
     21 import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
     22 import android.util.Log;
     23 
     24 /**
     25  * Speech synthesis request that plays the audio as it is received.
     26  */
     27 class PlaybackSynthesisCallback extends AbstractSynthesisCallback {
     28 
     29     private static final String TAG = "PlaybackSynthesisRequest";
     30     private static final boolean DBG = false;
     31 
     32     private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
     33 
     34     private final AudioOutputParams mAudioParams;
     35 
     36     /**
     37      * Guards {@link #mAudioTrackHandler}, {@link #mItem} and {@link #mStopped}.
     38      */
     39     private final Object mStateLock = new Object();
     40 
     41     // Handler associated with a thread that plays back audio requests.
     42     private final AudioPlaybackHandler mAudioTrackHandler;
     43     // A request "token", which will be non null after start() has been called.
     44     private SynthesisPlaybackQueueItem mItem = null;
     45 
     46     private volatile boolean mDone = false;
     47 
     48     /** Status code of synthesis */
     49     protected int mStatusCode;
     50 
     51     private final UtteranceProgressDispatcher mDispatcher;
     52     private final Object mCallerIdentity;
     53     private final AbstractEventLogger mLogger;
     54 
     55     PlaybackSynthesisCallback(@NonNull AudioOutputParams audioParams,
     56             @NonNull AudioPlaybackHandler audioTrackHandler,
     57             @NonNull UtteranceProgressDispatcher dispatcher, @NonNull Object callerIdentity,
     58             @NonNull AbstractEventLogger logger, boolean clientIsUsingV2) {
     59         super(clientIsUsingV2);
     60         mAudioParams = audioParams;
     61         mAudioTrackHandler = audioTrackHandler;
     62         mDispatcher = dispatcher;
     63         mCallerIdentity = callerIdentity;
     64         mLogger = logger;
     65         mStatusCode = TextToSpeech.SUCCESS;
     66     }
     67 
     68     @Override
     69     void stop() {
     70         if (DBG) Log.d(TAG, "stop()");
     71 
     72         SynthesisPlaybackQueueItem item;
     73         synchronized (mStateLock) {
     74             if (mDone) {
     75                 return;
     76             }
     77             if (mStatusCode == TextToSpeech.STOPPED) {
     78                 Log.w(TAG, "stop() called twice");
     79                 return;
     80             }
     81 
     82             item = mItem;
     83             mStatusCode = TextToSpeech.STOPPED;
     84         }
     85 
     86         if (item != null) {
     87             // This might result in the synthesis thread being woken up, at which
     88             // point it will write an additional buffer to the item - but we
     89             // won't worry about that because the audio playback queue will be cleared
     90             // soon after (see SynthHandler#stop(String).
     91             item.stop(TextToSpeech.STOPPED);
     92         } else {
     93             // This happens when stop() or error() were called before start() was.
     94 
     95             // In all other cases, mAudioTrackHandler.stop() will
     96             // result in onSynthesisDone being called, and we will
     97             // write data there.
     98             mLogger.onCompleted(TextToSpeech.STOPPED);
     99             mDispatcher.dispatchOnStop();
    100         }
    101     }
    102 
    103     @Override
    104     public int getMaxBufferSize() {
    105         // The AudioTrack buffer will be at least MIN_AUDIO_BUFFER_SIZE, so that should always be
    106         // a safe buffer size to pass in.
    107         return MIN_AUDIO_BUFFER_SIZE;
    108     }
    109 
    110     @Override
    111     public boolean hasStarted() {
    112         synchronized (mStateLock) {
    113             return mItem != null;
    114         }
    115     }
    116 
    117     @Override
    118     public boolean hasFinished() {
    119         synchronized (mStateLock) {
    120             return mDone;
    121         }
    122     }
    123 
    124     @Override
    125     public int start(int sampleRateInHz, int audioFormat, int channelCount) {
    126         if (DBG) Log.d(TAG, "start(" + sampleRateInHz + "," + audioFormat + "," + channelCount
    127                 + ")");
    128         if (audioFormat != AudioFormat.ENCODING_PCM_8BIT &&
    129             audioFormat != AudioFormat.ENCODING_PCM_16BIT &&
    130             audioFormat != AudioFormat.ENCODING_PCM_FLOAT) {
    131             Log.w(TAG, "Audio format encoding " + audioFormat + " not supported. Please use one " +
    132                        "of AudioFormat.ENCODING_PCM_8BIT, AudioFormat.ENCODING_PCM_16BIT or " +
    133                        "AudioFormat.ENCODING_PCM_FLOAT");
    134         }
    135         mDispatcher.dispatchOnBeginSynthesis(sampleRateInHz, audioFormat, channelCount);
    136 
    137         int channelConfig = BlockingAudioTrack.getChannelConfig(channelCount);
    138 
    139         synchronized (mStateLock) {
    140             if (channelConfig == 0) {
    141                 Log.e(TAG, "Unsupported number of channels :" + channelCount);
    142                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    143                 return TextToSpeech.ERROR;
    144             }
    145             if (mStatusCode == TextToSpeech.STOPPED) {
    146                 if (DBG) Log.d(TAG, "stop() called before start(), returning.");
    147                 return errorCodeOnStop();
    148             }
    149             if (mStatusCode != TextToSpeech.SUCCESS) {
    150                 if (DBG) Log.d(TAG, "Error was raised");
    151                 return TextToSpeech.ERROR;
    152             }
    153             if (mItem != null) {
    154                 Log.e(TAG, "Start called twice");
    155                 return TextToSpeech.ERROR;
    156             }
    157             SynthesisPlaybackQueueItem item = new SynthesisPlaybackQueueItem(
    158                     mAudioParams, sampleRateInHz, audioFormat, channelCount,
    159                     mDispatcher, mCallerIdentity, mLogger);
    160             mAudioTrackHandler.enqueue(item);
    161             mItem = item;
    162         }
    163 
    164         return TextToSpeech.SUCCESS;
    165     }
    166 
    167     @Override
    168     public int audioAvailable(byte[] buffer, int offset, int length) {
    169         if (DBG) Log.d(TAG, "audioAvailable(byte[" + buffer.length + "]," + offset + "," + length
    170                 + ")");
    171 
    172         if (length > getMaxBufferSize() || length <= 0) {
    173             throw new IllegalArgumentException("buffer is too large or of zero length (" +
    174                     + length + " bytes)");
    175         }
    176 
    177         SynthesisPlaybackQueueItem item = null;
    178         synchronized (mStateLock) {
    179             if (mItem == null) {
    180                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    181                 return TextToSpeech.ERROR;
    182             }
    183             if (mStatusCode != TextToSpeech.SUCCESS) {
    184                 if (DBG) Log.d(TAG, "Error was raised");
    185                 return TextToSpeech.ERROR;
    186             }
    187             if (mStatusCode == TextToSpeech.STOPPED) {
    188                 return errorCodeOnStop();
    189             }
    190             item = mItem;
    191         }
    192 
    193         // Sigh, another copy.
    194         final byte[] bufferCopy = new byte[length];
    195         System.arraycopy(buffer, offset, bufferCopy, 0, length);
    196         mDispatcher.dispatchOnAudioAvailable(bufferCopy);
    197 
    198         // Might block on mItem.this, if there are too many buffers waiting to
    199         // be consumed.
    200         try {
    201             item.put(bufferCopy);
    202         } catch (InterruptedException ie) {
    203             synchronized (mStateLock) {
    204                 mStatusCode = TextToSpeech.ERROR_OUTPUT;
    205                 return TextToSpeech.ERROR;
    206             }
    207         }
    208 
    209         mLogger.onEngineDataReceived();
    210         return TextToSpeech.SUCCESS;
    211     }
    212 
    213     @Override
    214     public int done() {
    215         if (DBG) Log.d(TAG, "done()");
    216 
    217         int statusCode = 0;
    218         SynthesisPlaybackQueueItem item = null;
    219         synchronized (mStateLock) {
    220             if (mDone) {
    221                 Log.w(TAG, "Duplicate call to done()");
    222                 // Not an error that would prevent synthesis. Hence no
    223                 // setStatusCode
    224                 return TextToSpeech.ERROR;
    225             }
    226             if (mStatusCode == TextToSpeech.STOPPED) {
    227                 if (DBG) Log.d(TAG, "Request has been aborted.");
    228                 return errorCodeOnStop();
    229             }
    230             mDone = true;
    231 
    232             if (mItem == null) {
    233                 // .done() was called before .start. Treat it as successful synthesis
    234                 // for a client, despite service bad implementation.
    235                 Log.w(TAG, "done() was called before start() call");
    236                 if (mStatusCode == TextToSpeech.SUCCESS) {
    237                     mDispatcher.dispatchOnSuccess();
    238                 } else {
    239                     mDispatcher.dispatchOnError(mStatusCode);
    240                 }
    241                 mLogger.onEngineComplete();
    242                 return TextToSpeech.ERROR;
    243             }
    244 
    245             item = mItem;
    246             statusCode = mStatusCode;
    247         }
    248 
    249         // Signal done or error to item
    250         if (statusCode == TextToSpeech.SUCCESS) {
    251             item.done();
    252         } else {
    253             item.stop(statusCode);
    254         }
    255         mLogger.onEngineComplete();
    256         return TextToSpeech.SUCCESS;
    257     }
    258 
    259     @Override
    260     public void error() {
    261         error(TextToSpeech.ERROR_SYNTHESIS);
    262     }
    263 
    264     @Override
    265     public void error(int errorCode) {
    266         if (DBG) Log.d(TAG, "error() [will call stop]");
    267         synchronized (mStateLock) {
    268             if (mDone) {
    269                 return;
    270             }
    271             mStatusCode = errorCode;
    272         }
    273     }
    274 
    275     @Override
    276     public void rangeStart(int markerInFrames, int start, int end) {
    277         if (mItem == null) {
    278             Log.e(TAG, "mItem is null");
    279             return;
    280         }
    281         mItem.rangeStart(markerInFrames, start, end);
    282     }
    283 }
    284