Home | History | Annotate | Download | only in tts
      1 // Copyright 2011 Google Inc. All Rights Reserved.
      2 
      3 package android.speech.tts;
      4 
      5 import android.media.AudioFormat;
      6 import android.media.AudioTrack;
      7 import android.util.Log;
      8 
      9 /**
     10  * Exposes parts of the {@link AudioTrack} API by delegating calls to an
     11  * underlying {@link AudioTrack}. Additionally, provides methods like
     12  * {@link #waitAndRelease()} that will block until all audiotrack
     13  * data has been flushed to the mixer, and is estimated to have completed
     14  * playback.
     15  */
     16 class BlockingAudioTrack {
     17     private static final String TAG = "TTS.BlockingAudioTrack";
     18     private static final boolean DBG = false;
     19 
     20 
     21     /**
     22      * The minimum increment of time to wait for an AudioTrack to finish
     23      * playing.
     24      */
     25     private static final long MIN_SLEEP_TIME_MS = 20;
     26 
     27     /**
     28      * The maximum increment of time to sleep while waiting for an AudioTrack
     29      * to finish playing.
     30      */
     31     private static final long MAX_SLEEP_TIME_MS = 2500;
     32 
     33     /**
     34      * The maximum amount of time to wait for an audio track to make progress while
     35      * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
     36      * could happen in exceptional circumstances like a media_server crash.
     37      */
     38     private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
     39 
     40     /**
     41      * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
     42      * we create.
     43      */
     44     private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
     45 
     46 
     47     private final int mStreamType;
     48     private final int mSampleRateInHz;
     49     private final int mAudioFormat;
     50     private final int mChannelCount;
     51     private final float mVolume;
     52     private final float mPan;
     53 
     54     private final int mBytesPerFrame;
     55     /**
     56      * A "short utterance" is one that uses less bytes than the audio
     57      * track buffer size (mAudioBufferSize). In this case, we need to call
     58      * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
     59      * different logic is required to wait for the track to finish.
     60      *
     61      * Not volatile, accessed only from the audio playback thread.
     62      */
     63     private boolean mIsShortUtterance;
     64     /**
     65      * Will be valid after a call to {@link #init()}.
     66      */
     67     private int mAudioBufferSize;
     68     private int mBytesWritten = 0;
     69 
     70     // Need to be seen by stop() which can be called from another thread. mAudioTrack will be
     71     // set to null only after waitAndRelease().
     72     private Object mAudioTrackLock = new Object();
     73     private AudioTrack mAudioTrack;
     74     private volatile boolean mStopped;
     75 
     76     BlockingAudioTrack(int streamType, int sampleRate,
     77             int audioFormat, int channelCount,
     78             float volume, float pan) {
     79         mStreamType = streamType;
     80         mSampleRateInHz = sampleRate;
     81         mAudioFormat = audioFormat;
     82         mChannelCount = channelCount;
     83         mVolume = volume;
     84         mPan = pan;
     85 
     86         mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount;
     87         mIsShortUtterance = false;
     88         mAudioBufferSize = 0;
     89         mBytesWritten = 0;
     90 
     91         mAudioTrack = null;
     92         mStopped = false;
     93     }
     94 
     95     public boolean init() {
     96         AudioTrack track = createStreamingAudioTrack();
     97         synchronized (mAudioTrackLock) {
     98             mAudioTrack = track;
     99         }
    100 
    101         if (track == null) {
    102             return false;
    103         } else {
    104             return true;
    105         }
    106     }
    107 
    108     public void stop() {
    109         synchronized (mAudioTrackLock) {
    110             if (mAudioTrack != null) {
    111                 mAudioTrack.stop();
    112             }
    113             mStopped = true;
    114         }
    115     }
    116 
    117     public int write(byte[] data) {
    118         AudioTrack track = null;
    119         synchronized (mAudioTrackLock) {
    120             track = mAudioTrack;
    121         }
    122 
    123         if (track == null || mStopped) {
    124             return -1;
    125         }
    126         final int bytesWritten = writeToAudioTrack(track, data);
    127 
    128         mBytesWritten += bytesWritten;
    129         return bytesWritten;
    130     }
    131 
    132     public void waitAndRelease() {
    133         AudioTrack track = null;
    134         synchronized (mAudioTrackLock) {
    135             track = mAudioTrack;
    136         }
    137         if (track == null) {
    138             if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]");
    139             return;
    140         }
    141 
    142         // For "small" audio tracks, we have to stop() them to make them mixable,
    143         // else the audio subsystem will wait indefinitely for us to fill the buffer
    144         // before rendering the track mixable.
    145         //
    146         // If mStopped is true, the track would already have been stopped, so not
    147         // much point not doing that again.
    148         if (mBytesWritten < mAudioBufferSize && !mStopped) {
    149             if (DBG) {
    150                 Log.d(TAG, "Stopping audio track to flush audio, state was : " +
    151                         track.getPlayState() + ",stopped= " + mStopped);
    152             }
    153 
    154             mIsShortUtterance = true;
    155             track.stop();
    156         }
    157 
    158         // Block until the audio track is done only if we haven't stopped yet.
    159         if (!mStopped) {
    160             if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
    161             blockUntilDone(mAudioTrack);
    162         }
    163 
    164         // The last call to AudioTrack.write( ) will return only after
    165         // all data from the audioTrack has been sent to the mixer, so
    166         // it's safe to release at this point.
    167         if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
    168         synchronized(mAudioTrackLock) {
    169             mAudioTrack = null;
    170         }
    171         track.release();
    172     }
    173 
    174 
    175     static int getChannelConfig(int channelCount) {
    176         if (channelCount == 1) {
    177             return AudioFormat.CHANNEL_OUT_MONO;
    178         } else if (channelCount == 2){
    179             return AudioFormat.CHANNEL_OUT_STEREO;
    180         }
    181 
    182         return 0;
    183     }
    184 
    185     long getAudioLengthMs(int numBytes) {
    186         final int unconsumedFrames = numBytes / mBytesPerFrame;
    187         final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
    188 
    189         return estimatedTimeMs;
    190     }
    191 
    192     private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
    193         if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
    194             if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
    195             audioTrack.play();
    196         }
    197 
    198         int count = 0;
    199         while (count < bytes.length) {
    200             // Note that we don't take bufferCopy.mOffset into account because
    201             // it is guaranteed to be 0.
    202             int written = audioTrack.write(bytes, count, bytes.length);
    203             if (written <= 0) {
    204                 break;
    205             }
    206             count += written;
    207         }
    208         return count;
    209     }
    210 
    211     private AudioTrack createStreamingAudioTrack() {
    212         final int channelConfig = getChannelConfig(mChannelCount);
    213 
    214         int minBufferSizeInBytes
    215                 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
    216         int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
    217 
    218         AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig,
    219                 mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
    220         if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
    221             Log.w(TAG, "Unable to create audio track.");
    222             audioTrack.release();
    223             return null;
    224         }
    225 
    226         mAudioBufferSize = bufferSizeInBytes;
    227 
    228         setupVolume(audioTrack, mVolume, mPan);
    229         return audioTrack;
    230     }
    231 
    232     private static int getBytesPerFrame(int audioFormat) {
    233         if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
    234             return 1;
    235         } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
    236             return 2;
    237         }
    238 
    239         return -1;
    240     }
    241 
    242 
    243     private void blockUntilDone(AudioTrack audioTrack) {
    244         if (mBytesWritten <= 0) {
    245             return;
    246         }
    247 
    248         if (mIsShortUtterance) {
    249             // In this case we would have called AudioTrack#stop() to flush
    250             // buffers to the mixer. This makes the playback head position
    251             // unobservable and notification markers do not work reliably. We
    252             // have no option but to wait until we think the track would finish
    253             // playing and release it after.
    254             //
    255             // This isn't as bad as it looks because (a) We won't end up waiting
    256             // for much longer than we should because even at 4khz mono, a short
    257             // utterance weighs in at about 2 seconds, and (b) such short utterances
    258             // are expected to be relatively infrequent and in a stream of utterances
    259             // this shows up as a slightly longer pause.
    260             blockUntilEstimatedCompletion();
    261         } else {
    262             blockUntilCompletion(audioTrack);
    263         }
    264     }
    265 
    266     private void blockUntilEstimatedCompletion() {
    267         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
    268         final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
    269 
    270         if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
    271 
    272         try {
    273             Thread.sleep(estimatedTimeMs);
    274         } catch (InterruptedException ie) {
    275             // Do nothing.
    276         }
    277     }
    278 
    279     private void blockUntilCompletion(AudioTrack audioTrack) {
    280         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
    281 
    282         int previousPosition = -1;
    283         int currentPosition = 0;
    284         long blockedTimeMs = 0;
    285 
    286         while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
    287                 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
    288 
    289             final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
    290                     audioTrack.getSampleRate();
    291             final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
    292 
    293             // Check if the audio track has made progress since the last loop
    294             // iteration. We should then add in the amount of time that was
    295             // spent sleeping in the last iteration.
    296             if (currentPosition == previousPosition) {
    297                 // This works only because the sleep time that would have been calculated
    298                 // would be the same in the previous iteration too.
    299                 blockedTimeMs += sleepTimeMs;
    300                 // If we've taken too long to make progress, bail.
    301                 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
    302                     Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
    303                             "for AudioTrack to make progress, Aborting");
    304                     break;
    305                 }
    306             } else {
    307                 blockedTimeMs = 0;
    308             }
    309             previousPosition = currentPosition;
    310 
    311             if (DBG) {
    312                 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
    313                         " Playback position : " + currentPosition + ", Length in frames : "
    314                         + lengthInFrames);
    315             }
    316             try {
    317                 Thread.sleep(sleepTimeMs);
    318             } catch (InterruptedException ie) {
    319                 break;
    320             }
    321         }
    322     }
    323 
    324     private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
    325         final float vol = clip(volume, 0.0f, 1.0f);
    326         final float panning = clip(pan, -1.0f, 1.0f);
    327 
    328         float volLeft = vol;
    329         float volRight = vol;
    330         if (panning > 0.0f) {
    331             volLeft *= (1.0f - panning);
    332         } else if (panning < 0.0f) {
    333             volRight *= (1.0f + panning);
    334         }
    335         if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
    336         if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
    337             Log.e(TAG, "Failed to set volume");
    338         }
    339     }
    340 
    341     private static final long clip(long value, long min, long max) {
    342         if (value < min) {
    343             return min;
    344         }
    345 
    346         if (value > max) {
    347             return max;
    348         }
    349 
    350         return value;
    351     }
    352 
    353     private static float clip(float value, float min, float max) {
    354         return value > max ? max : (value < min ? min : value);
    355     }
    356 
    357 }
    358