Home | History | Annotate | Download | only in tts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 package android.speech.tts;
     17 
     18 import android.media.AudioFormat;
     19 import android.media.AudioTrack;
     20 import android.text.TextUtils;
     21 import android.util.Log;
     22 
     23 import java.util.Iterator;
     24 import java.util.concurrent.PriorityBlockingQueue;
     25 import java.util.concurrent.atomic.AtomicLong;
     26 
     27 class AudioPlaybackHandler {
     28     private static final String TAG = "TTS.AudioPlaybackHandler";
     29     private static final boolean DBG_THREADING = false;
     30     private static final boolean DBG = false;
     31 
     32     private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
     33 
     34     private static final int SYNTHESIS_START = 1;
     35     private static final int SYNTHESIS_DATA_AVAILABLE = 2;
     36     private static final int SYNTHESIS_DONE = 3;
     37 
     38     private static final int PLAY_AUDIO = 5;
     39     private static final int PLAY_SILENCE = 6;
     40 
     41     private static final int SHUTDOWN = -1;
     42 
     43     private static final int DEFAULT_PRIORITY = 1;
     44     private static final int HIGH_PRIORITY = 0;
     45 
     46     private final PriorityBlockingQueue<ListEntry> mQueue =
     47             new PriorityBlockingQueue<ListEntry>();
     48     private final Thread mHandlerThread;
     49 
     50     private volatile MessageParams mCurrentParams = null;
     51     // Used only for book keeping and error detection.
     52     private volatile SynthesisMessageParams mLastSynthesisRequest = null;
     53     // Used to order incoming messages in our priority queue.
     54     private final AtomicLong mSequenceIdCtr = new AtomicLong(0);
     55 
     56 
     57     AudioPlaybackHandler() {
     58         mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread");
     59     }
     60 
     61     public void start() {
     62         mHandlerThread.start();
     63     }
     64 
     65     /**
     66      * Stops all synthesis for a given {@code token}. If the current token
     67      * is currently being processed, an effort will be made to stop it but
     68      * that is not guaranteed.
     69      *
     70      * NOTE: This assumes that all other messages in the queue with {@code token}
     71      * have been removed already.
     72      *
     73      * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}.
     74      */
     75     private void stop(MessageParams token) {
     76         if (token == null) {
     77             return;
     78         }
     79 
     80         if (DBG) Log.d(TAG, "Stopping token : " + token);
     81 
     82         if (token.getType() == MessageParams.TYPE_SYNTHESIS) {
     83             AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack();
     84             if (current != null) {
     85                 // Stop the current audio track if it's still playing.
     86                 // The audio track is thread safe in this regard. The current
     87                 // handleSynthesisDataAvailable call will return soon after this
     88                 // call.
     89                 current.stop();
     90             }
     91             // This is safe because PlaybackSynthesisCallback#stop would have
     92             // been called before this method, and will no longer enqueue any
     93             // audio for this token.
     94             //
     95             // (Even if it did, all it would result in is a warning message).
     96             mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY));
     97         } else if (token.getType() == MessageParams.TYPE_AUDIO) {
     98             ((AudioMessageParams) token).getPlayer().stop();
     99             // No cleanup required for audio messages.
    100         } else if (token.getType() == MessageParams.TYPE_SILENCE) {
    101             ((SilenceMessageParams) token).getConditionVariable().open();
    102             // No cleanup required for silence messages.
    103         }
    104     }
    105 
    106     // -----------------------------------------------------
    107     // Methods that add and remove elements from the queue. These do not
    108     // need to be synchronized strictly speaking, but they make the behaviour
    109     // a lot more predictable. (though it would still be correct without
    110     // synchronization).
    111     // -----------------------------------------------------
    112 
    113     synchronized public void removePlaybackItems(String callingApp) {
    114         if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp);
    115         removeMessages(callingApp);
    116 
    117         final MessageParams current = getCurrentParams();
    118         if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) {
    119             stop(current);
    120         }
    121     }
    122 
    123     synchronized public void removeAllItems() {
    124         if (DBG_THREADING) Log.d(TAG, "Removing all items");
    125         removeAllMessages();
    126         stop(getCurrentParams());
    127     }
    128 
    129     /**
    130      * @return false iff the queue is empty and no queue item is currently
    131      *        being handled, true otherwise.
    132      */
    133     public boolean isSpeaking() {
    134         return (mQueue.peek() != null) || (mCurrentParams != null);
    135     }
    136 
    137     /**
    138      * Shut down the audio playback thread.
    139      */
    140     synchronized public void quit() {
    141         removeAllMessages();
    142         stop(getCurrentParams());
    143         mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY));
    144     }
    145 
    146     synchronized void enqueueSynthesisStart(SynthesisMessageParams token) {
    147         if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token);
    148         mQueue.add(new ListEntry(SYNTHESIS_START, token));
    149     }
    150 
    151     synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) {
    152         if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token);
    153         mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token));
    154     }
    155 
    156     synchronized void enqueueSynthesisDone(SynthesisMessageParams token) {
    157         if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token);
    158         mQueue.add(new ListEntry(SYNTHESIS_DONE, token));
    159     }
    160 
    161     synchronized void enqueueAudio(AudioMessageParams token) {
    162         if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token);
    163         mQueue.add(new ListEntry(PLAY_AUDIO, token));
    164     }
    165 
    166     synchronized void enqueueSilence(SilenceMessageParams token) {
    167         if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token);
    168         mQueue.add(new ListEntry(PLAY_SILENCE, token));
    169     }
    170 
    171     // -----------------------------------------
    172     // End of public API methods.
    173     // -----------------------------------------
    174 
    175     // -----------------------------------------
    176     // Methods for managing the message queue.
    177     // -----------------------------------------
    178 
    179     /*
    180      * The MessageLoop is a handler like implementation that
    181      * processes messages from a priority queue.
    182      */
    183     private final class MessageLoop implements Runnable {
    184         @Override
    185         public void run() {
    186             while (true) {
    187                 ListEntry entry = null;
    188                 try {
    189                     entry = mQueue.take();
    190                 } catch (InterruptedException ie) {
    191                     return;
    192                 }
    193 
    194                 if (entry.mWhat == SHUTDOWN) {
    195                     if (DBG) Log.d(TAG, "MessageLoop : Shutting down");
    196                     return;
    197                 }
    198 
    199                 if (DBG) {
    200                     Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat
    201                             + " ,seqId : " + entry.mSequenceId);
    202                 }
    203 
    204                 setCurrentParams(entry.mMessage);
    205                 handleMessage(entry);
    206                 setCurrentParams(null);
    207             }
    208         }
    209     }
    210 
    211     /*
    212      * Atomically clear the queue of all messages.
    213      */
    214     synchronized private void removeAllMessages() {
    215         mQueue.clear();
    216     }
    217 
    218     /*
    219      * Remove all messages that originate from a given calling app.
    220      */
    221     synchronized private void removeMessages(String callingApp) {
    222         Iterator<ListEntry> it = mQueue.iterator();
    223 
    224         while (it.hasNext()) {
    225             final ListEntry current = it.next();
    226             // The null check is to prevent us from removing control messages,
    227             // such as a shutdown message.
    228             if (current.mMessage != null &&
    229                     callingApp.equals(current.mMessage.getCallingApp())) {
    230                 it.remove();
    231             }
    232         }
    233     }
    234 
    235     /*
    236      * An element of our priority queue of messages. Each message has a priority,
    237      * and a sequence id (defined by the order of enqueue calls). Among messages
    238      * with the same priority, messages that were received earlier win out.
    239      */
    240     private final class ListEntry implements Comparable<ListEntry> {
    241         final int mWhat;
    242         final MessageParams mMessage;
    243         final int mPriority;
    244         final long mSequenceId;
    245 
    246         private ListEntry(int what, MessageParams message) {
    247             this(what, message, DEFAULT_PRIORITY);
    248         }
    249 
    250         private ListEntry(int what, MessageParams message, int priority) {
    251             mWhat = what;
    252             mMessage = message;
    253             mPriority = priority;
    254             mSequenceId = mSequenceIdCtr.incrementAndGet();
    255         }
    256 
    257         @Override
    258         public int compareTo(ListEntry that) {
    259             if (that == this) {
    260                 return 0;
    261             }
    262 
    263             // Note that this is always 0, 1 or -1.
    264             int priorityDiff = mPriority - that.mPriority;
    265             if (priorityDiff == 0) {
    266                 // The == case cannot occur.
    267                 return (mSequenceId < that.mSequenceId) ? -1 : 1;
    268             }
    269 
    270             return priorityDiff;
    271         }
    272     }
    273 
    274     private void setCurrentParams(MessageParams p) {
    275         if (DBG_THREADING) {
    276             if (p != null) {
    277                 Log.d(TAG, "Started handling :" + p);
    278             } else {
    279                 Log.d(TAG, "End handling : " + mCurrentParams);
    280             }
    281         }
    282         mCurrentParams = p;
    283     }
    284 
    285     private MessageParams getCurrentParams() {
    286         return mCurrentParams;
    287     }
    288 
    289     // -----------------------------------------
    290     // Methods for dealing with individual messages, the methods
    291     // below do the actual work.
    292     // -----------------------------------------
    293 
    294     private void handleMessage(ListEntry entry) {
    295         final MessageParams msg = entry.mMessage;
    296         if (entry.mWhat == SYNTHESIS_START) {
    297             handleSynthesisStart(msg);
    298         } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) {
    299             handleSynthesisDataAvailable(msg);
    300         } else if (entry.mWhat == SYNTHESIS_DONE) {
    301             handleSynthesisDone(msg);
    302         } else if (entry.mWhat == PLAY_AUDIO) {
    303             handleAudio(msg);
    304         } else if (entry.mWhat == PLAY_SILENCE) {
    305             handleSilence(msg);
    306         }
    307     }
    308 
    309     // Currently implemented as blocking the audio playback thread for the
    310     // specified duration. If a call to stop() is made, the thread
    311     // unblocks.
    312     private void handleSilence(MessageParams msg) {
    313         if (DBG) Log.d(TAG, "handleSilence()");
    314         SilenceMessageParams params = (SilenceMessageParams) msg;
    315         if (params.getSilenceDurationMs() > 0) {
    316             params.getConditionVariable().block(params.getSilenceDurationMs());
    317         }
    318         params.getDispatcher().dispatchUtteranceCompleted();
    319         if (DBG) Log.d(TAG, "handleSilence() done.");
    320     }
    321 
    322     // Plays back audio from a given URI. No TTS engine involvement here.
    323     private void handleAudio(MessageParams msg) {
    324         if (DBG) Log.d(TAG, "handleAudio()");
    325         AudioMessageParams params = (AudioMessageParams) msg;
    326         // Note that the BlockingMediaPlayer spawns a separate thread.
    327         //
    328         // TODO: This can be avoided.
    329         params.getPlayer().startAndWait();
    330         params.getDispatcher().dispatchUtteranceCompleted();
    331         if (DBG) Log.d(TAG, "handleAudio() done.");
    332     }
    333 
    334     // Denotes the start of a new synthesis request. We create a new
    335     // audio track, and prepare it for incoming data.
    336     //
    337     // Note that since all TTS synthesis happens on a single thread, we
    338     // should ALWAYS see the following order :
    339     //
    340     // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone
    341     // OR
    342     // handleSynthesisCompleteDataAvailable.
    343     private void handleSynthesisStart(MessageParams msg) {
    344         if (DBG) Log.d(TAG, "handleSynthesisStart()");
    345         final SynthesisMessageParams param = (SynthesisMessageParams) msg;
    346 
    347         // Oops, looks like the engine forgot to call done(). We go through
    348         // extra trouble to clean the data to prevent the AudioTrack resources
    349         // from being leaked.
    350         if (mLastSynthesisRequest != null) {
    351             Log.w(TAG, "Error : Missing call to done() for request : " +
    352                     mLastSynthesisRequest);
    353             handleSynthesisDone(mLastSynthesisRequest);
    354         }
    355 
    356         mLastSynthesisRequest = param;
    357 
    358         // Create the audio track.
    359         final AudioTrack audioTrack = createStreamingAudioTrack(param);
    360 
    361         if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]");
    362 
    363         param.setAudioTrack(audioTrack);
    364     }
    365 
    366     // More data available to be flushed to the audio track.
    367     private void handleSynthesisDataAvailable(MessageParams msg) {
    368         final SynthesisMessageParams param = (SynthesisMessageParams) msg;
    369         if (param.getAudioTrack() == null) {
    370             Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param);
    371             return;
    372         }
    373 
    374         if (param != mLastSynthesisRequest) {
    375             Log.e(TAG, "Call to dataAvailable without done() / start()");
    376             return;
    377         }
    378 
    379         final AudioTrack audioTrack = param.getAudioTrack();
    380         final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer();
    381 
    382         if (bufferCopy == null) {
    383             Log.e(TAG, "No buffers available to play.");
    384             return;
    385         }
    386 
    387         int playState = audioTrack.getPlayState();
    388         if (playState == AudioTrack.PLAYSTATE_STOPPED) {
    389             if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode());
    390             audioTrack.play();
    391         }
    392         int count = 0;
    393         while (count < bufferCopy.mBytes.length) {
    394             // Note that we don't take bufferCopy.mOffset into account because
    395             // it is guaranteed to be 0.
    396             int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length);
    397             if (written <= 0) {
    398                 break;
    399             }
    400             count += written;
    401         }
    402         param.mBytesWritten += count;
    403         param.mLogger.onPlaybackStart();
    404     }
    405 
    406     // Wait for the audio track to stop playing, and then release its resources.
    407     private void handleSynthesisDone(MessageParams msg) {
    408         final SynthesisMessageParams params = (SynthesisMessageParams) msg;
    409 
    410         if (DBG) Log.d(TAG, "handleSynthesisDone()");
    411         final AudioTrack audioTrack = params.getAudioTrack();
    412 
    413         if (audioTrack == null) {
    414             return;
    415         }
    416 
    417         if (params.mBytesWritten < params.mAudioBufferSize) {
    418             if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " +
    419                     audioTrack.getPlayState());
    420             params.mIsShortUtterance = true;
    421             audioTrack.stop();
    422         }
    423 
    424         if (DBG) Log.d(TAG, "Waiting for audio track to complete : " +
    425                 audioTrack.hashCode());
    426         blockUntilDone(params);
    427         if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]");
    428 
    429         // The last call to AudioTrack.write( ) will return only after
    430         // all data from the audioTrack has been sent to the mixer, so
    431         // it's safe to release at this point. Make sure release() and the call
    432         // that set the audio track to null are performed atomically.
    433         synchronized (this) {
    434             // Never allow the audioTrack to be observed in a state where
    435             // it is released but non null. The only case this might happen
    436             // is in the various stopFoo methods that call AudioTrack#stop from
    437             // different threads, but they are synchronized on AudioPlayBackHandler#this
    438             // too.
    439             audioTrack.release();
    440             params.setAudioTrack(null);
    441         }
    442         params.getDispatcher().dispatchUtteranceCompleted();
    443         mLastSynthesisRequest = null;
    444         params.mLogger.onWriteData();
    445     }
    446 
    447     /**
    448      * The minimum increment of time to wait for an audiotrack to finish
    449      * playing.
    450      */
    451     private static final long MIN_SLEEP_TIME_MS = 20;
    452 
    453     /**
    454      * The maximum increment of time to sleep while waiting for an audiotrack
    455      * to finish playing.
    456      */
    457     private static final long MAX_SLEEP_TIME_MS = 2500;
    458 
    459     /**
    460      * The maximum amount of time to wait for an audio track to make progress while
    461      * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
    462      * could happen in exceptional circumstances like a media_server crash.
    463      */
    464     private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
    465 
    466     private static void blockUntilDone(SynthesisMessageParams params) {
    467         if (params.mAudioTrack == null || params.mBytesWritten <= 0) {
    468             return;
    469         }
    470 
    471         if (params.mIsShortUtterance) {
    472             // In this case we would have called AudioTrack#stop() to flush
    473             // buffers to the mixer. This makes the playback head position
    474             // unobservable and notification markers do not work reliably. We
    475             // have no option but to wait until we think the track would finish
    476             // playing and release it after.
    477             //
    478             // This isn't as bad as it looks because (a) We won't end up waiting
    479             // for much longer than we should because even at 4khz mono, a short
    480             // utterance weighs in at about 2 seconds, and (b) such short utterances
    481             // are expected to be relatively infrequent and in a stream of utterances
    482             // this shows up as a slightly longer pause.
    483             blockUntilEstimatedCompletion(params);
    484         } else {
    485             blockUntilCompletion(params);
    486         }
    487     }
    488 
    489     private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) {
    490         final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame;
    491         final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz);
    492 
    493         if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
    494 
    495         try {
    496             Thread.sleep(estimatedTimeMs);
    497         } catch (InterruptedException ie) {
    498             // Do nothing.
    499         }
    500     }
    501 
    502     private static void blockUntilCompletion(SynthesisMessageParams params) {
    503         final AudioTrack audioTrack = params.mAudioTrack;
    504         final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame;
    505 
    506         int previousPosition = -1;
    507         int currentPosition = 0;
    508         long blockedTimeMs = 0;
    509 
    510         while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
    511                 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) {
    512 
    513             final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
    514                     audioTrack.getSampleRate();
    515             final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
    516 
    517             // Check if the audio track has made progress since the last loop
    518             // iteration. We should then add in the amount of time that was
    519             // spent sleeping in the last iteration.
    520             if (currentPosition == previousPosition) {
    521                 // This works only because the sleep time that would have been calculated
    522                 // would be the same in the previous iteration too.
    523                 blockedTimeMs += sleepTimeMs;
    524                 // If we've taken too long to make progress, bail.
    525                 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
    526                     Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
    527                             "for AudioTrack to make progress, Aborting");
    528                     break;
    529                 }
    530             } else {
    531                 blockedTimeMs = 0;
    532             }
    533             previousPosition = currentPosition;
    534 
    535             if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
    536                     " Playback position : " + currentPosition + ", Length in frames : "
    537                     + lengthInFrames);
    538             try {
    539                 Thread.sleep(sleepTimeMs);
    540             } catch (InterruptedException ie) {
    541                 break;
    542             }
    543         }
    544     }
    545 
    546     private static final long clip(long value, long min, long max) {
    547         if (value < min) {
    548             return min;
    549         }
    550 
    551         if (value > max) {
    552             return max;
    553         }
    554 
    555         return value;
    556     }
    557 
    558     private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) {
    559         final int channelConfig = getChannelConfig(params.mChannelCount);
    560         final int sampleRateInHz = params.mSampleRateInHz;
    561         final int audioFormat = params.mAudioFormat;
    562 
    563         int minBufferSizeInBytes
    564                 = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
    565         int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
    566 
    567         AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig,
    568                 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
    569         if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
    570             Log.w(TAG, "Unable to create audio track.");
    571             audioTrack.release();
    572             return null;
    573         }
    574         params.mAudioBufferSize = bufferSizeInBytes;
    575 
    576         setupVolume(audioTrack, params.mVolume, params.mPan);
    577         return audioTrack;
    578     }
    579 
    580     static int getChannelConfig(int channelCount) {
    581         if (channelCount == 1) {
    582             return AudioFormat.CHANNEL_OUT_MONO;
    583         } else if (channelCount == 2){
    584             return AudioFormat.CHANNEL_OUT_STEREO;
    585         }
    586 
    587         return 0;
    588     }
    589 
    590     private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
    591         float vol = clip(volume, 0.0f, 1.0f);
    592         float panning = clip(pan, -1.0f, 1.0f);
    593         float volLeft = vol;
    594         float volRight = vol;
    595         if (panning > 0.0f) {
    596             volLeft *= (1.0f - panning);
    597         } else if (panning < 0.0f) {
    598             volRight *= (1.0f + panning);
    599         }
    600         if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
    601         if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
    602             Log.e(TAG, "Failed to set volume");
    603         }
    604     }
    605 
    606     private static float clip(float value, float min, float max) {
    607         return value > max ? max : (value < min ? min : value);
    608     }
    609 
    610 }
    611