Home | History | Annotate | Download | only in tts
      1 // Copyright 2011 Google Inc. All Rights Reserved.
      2 
      3 package android.speech.tts;
      4 
      5 import android.media.AudioFormat;
      6 import android.media.AudioTrack;
      7 import android.util.Log;
      8 
      9 /**
     10  * Exposes parts of the {@link AudioTrack} API by delegating calls to an
     11  * underlying {@link AudioTrack}. Additionally, provides methods like
     12  * {@link #waitAndRelease()} that will block until all audiotrack
     13  * data has been flushed to the mixer, and is estimated to have completed
     14  * playback.
     15  */
     16 class BlockingAudioTrack {
     17     private static final String TAG = "TTS.BlockingAudioTrack";
     18     private static final boolean DBG = false;
     19 
     20 
     21     /**
     22      * The minimum increment of time to wait for an AudioTrack to finish
     23      * playing.
     24      */
     25     private static final long MIN_SLEEP_TIME_MS = 20;
     26 
     27     /**
     28      * The maximum increment of time to sleep while waiting for an AudioTrack
     29      * to finish playing.
     30      */
     31     private static final long MAX_SLEEP_TIME_MS = 2500;
     32 
     33     /**
     34      * The maximum amount of time to wait for an audio track to make progress while
     35      * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
     36      * could happen in exceptional circumstances like a media_server crash.
     37      */
     38     private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
     39 
     40     /**
     41      * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
     42      * we create.
     43      */
     44     private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
     45 
     46 
     47     private final int mStreamType;
     48     private final int mSampleRateInHz;
     49     private final int mAudioFormat;
     50     private final int mChannelCount;
     51     private final float mVolume;
     52     private final float mPan;
     53 
     54     private final int mBytesPerFrame;
     55     /**
     56      * A "short utterance" is one that uses less bytes than the audio
     57      * track buffer size (mAudioBufferSize). In this case, we need to call
     58      * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
     59      * different logic is required to wait for the track to finish.
     60      *
     61      * Not volatile, accessed only from the audio playback thread.
     62      */
     63     private boolean mIsShortUtterance;
     64     /**
     65      * Will be valid after a call to {@link #init()}.
     66      */
     67     private int mAudioBufferSize;
     68     private int mBytesWritten = 0;
     69 
     70     private AudioTrack mAudioTrack;
     71     private volatile boolean mStopped;
     72     // Locks the initialization / uninitialization of the audio track.
     73     // This is required because stop() will throw an illegal state exception
     74     // if called before init() or after mAudioTrack.release().
     75     private final Object mAudioTrackLock = new Object();
     76 
     77     BlockingAudioTrack(int streamType, int sampleRate,
     78             int audioFormat, int channelCount,
     79             float volume, float pan) {
     80         mStreamType = streamType;
     81         mSampleRateInHz = sampleRate;
     82         mAudioFormat = audioFormat;
     83         mChannelCount = channelCount;
     84         mVolume = volume;
     85         mPan = pan;
     86 
     87         mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount;
     88         mIsShortUtterance = false;
     89         mAudioBufferSize = 0;
     90         mBytesWritten = 0;
     91 
     92         mAudioTrack = null;
     93         mStopped = false;
     94     }
     95 
     96     public void init() {
     97         AudioTrack track = createStreamingAudioTrack();
     98 
     99         synchronized (mAudioTrackLock) {
    100             mAudioTrack = track;
    101         }
    102     }
    103 
    104     public void stop() {
    105         synchronized (mAudioTrackLock) {
    106             if (mAudioTrack != null) {
    107                 mAudioTrack.stop();
    108             }
    109         }
    110         mStopped = true;
    111     }
    112 
    113     public int write(byte[] data) {
    114         if (mAudioTrack == null || mStopped) {
    115             return -1;
    116         }
    117         final int bytesWritten = writeToAudioTrack(mAudioTrack, data);
    118         mBytesWritten += bytesWritten;
    119         return bytesWritten;
    120     }
    121 
    122     public void waitAndRelease() {
    123         // For "small" audio tracks, we have to stop() them to make them mixable,
    124         // else the audio subsystem will wait indefinitely for us to fill the buffer
    125         // before rendering the track mixable.
    126         //
    127         // If mStopped is true, the track would already have been stopped, so not
    128         // much point not doing that again.
    129         if (mBytesWritten < mAudioBufferSize && !mStopped) {
    130             if (DBG) {
    131                 Log.d(TAG, "Stopping audio track to flush audio, state was : " +
    132                         mAudioTrack.getPlayState() + ",stopped= " + mStopped);
    133             }
    134 
    135             mIsShortUtterance = true;
    136             mAudioTrack.stop();
    137         }
    138 
    139         // Block until the audio track is done only if we haven't stopped yet.
    140         if (!mStopped) {
    141             if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
    142             blockUntilDone(mAudioTrack);
    143         }
    144 
    145         // The last call to AudioTrack.write( ) will return only after
    146         // all data from the audioTrack has been sent to the mixer, so
    147         // it's safe to release at this point.
    148         if (DBG) Log.d(TAG, "Releasing audio track [" + mAudioTrack.hashCode() + "]");
    149         synchronized (mAudioTrackLock) {
    150             mAudioTrack.release();
    151             mAudioTrack = null;
    152         }
    153     }
    154 
    155 
    156     static int getChannelConfig(int channelCount) {
    157         if (channelCount == 1) {
    158             return AudioFormat.CHANNEL_OUT_MONO;
    159         } else if (channelCount == 2){
    160             return AudioFormat.CHANNEL_OUT_STEREO;
    161         }
    162 
    163         return 0;
    164     }
    165 
    166     long getAudioLengthMs(int numBytes) {
    167         final int unconsumedFrames = numBytes / mBytesPerFrame;
    168         final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
    169 
    170         return estimatedTimeMs;
    171     }
    172 
    173     private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
    174         if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
    175             if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
    176             audioTrack.play();
    177         }
    178 
    179         int count = 0;
    180         while (count < bytes.length) {
    181             // Note that we don't take bufferCopy.mOffset into account because
    182             // it is guaranteed to be 0.
    183             int written = audioTrack.write(bytes, count, bytes.length);
    184             if (written <= 0) {
    185                 break;
    186             }
    187             count += written;
    188         }
    189         return count;
    190     }
    191 
    192     private AudioTrack createStreamingAudioTrack() {
    193         final int channelConfig = getChannelConfig(mChannelCount);
    194 
    195         int minBufferSizeInBytes
    196                 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
    197         int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
    198 
    199         AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig,
    200                 mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
    201         if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
    202             Log.w(TAG, "Unable to create audio track.");
    203             audioTrack.release();
    204             return null;
    205         }
    206 
    207         mAudioBufferSize = bufferSizeInBytes;
    208 
    209         setupVolume(audioTrack, mVolume, mPan);
    210         return audioTrack;
    211     }
    212 
    213     private static int getBytesPerFrame(int audioFormat) {
    214         if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
    215             return 1;
    216         } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
    217             return 2;
    218         }
    219 
    220         return -1;
    221     }
    222 
    223 
    224     private void blockUntilDone(AudioTrack audioTrack) {
    225         if (mBytesWritten <= 0) {
    226             return;
    227         }
    228 
    229         if (mIsShortUtterance) {
    230             // In this case we would have called AudioTrack#stop() to flush
    231             // buffers to the mixer. This makes the playback head position
    232             // unobservable and notification markers do not work reliably. We
    233             // have no option but to wait until we think the track would finish
    234             // playing and release it after.
    235             //
    236             // This isn't as bad as it looks because (a) We won't end up waiting
    237             // for much longer than we should because even at 4khz mono, a short
    238             // utterance weighs in at about 2 seconds, and (b) such short utterances
    239             // are expected to be relatively infrequent and in a stream of utterances
    240             // this shows up as a slightly longer pause.
    241             blockUntilEstimatedCompletion();
    242         } else {
    243             blockUntilCompletion(audioTrack);
    244         }
    245     }
    246 
    247     private void blockUntilEstimatedCompletion() {
    248         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
    249         final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
    250 
    251         if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
    252 
    253         try {
    254             Thread.sleep(estimatedTimeMs);
    255         } catch (InterruptedException ie) {
    256             // Do nothing.
    257         }
    258     }
    259 
    260     private void blockUntilCompletion(AudioTrack audioTrack) {
    261         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
    262 
    263         int previousPosition = -1;
    264         int currentPosition = 0;
    265         long blockedTimeMs = 0;
    266 
    267         while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
    268                 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
    269 
    270             final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
    271                     audioTrack.getSampleRate();
    272             final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
    273 
    274             // Check if the audio track has made progress since the last loop
    275             // iteration. We should then add in the amount of time that was
    276             // spent sleeping in the last iteration.
    277             if (currentPosition == previousPosition) {
    278                 // This works only because the sleep time that would have been calculated
    279                 // would be the same in the previous iteration too.
    280                 blockedTimeMs += sleepTimeMs;
    281                 // If we've taken too long to make progress, bail.
    282                 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
    283                     Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
    284                             "for AudioTrack to make progress, Aborting");
    285                     break;
    286                 }
    287             } else {
    288                 blockedTimeMs = 0;
    289             }
    290             previousPosition = currentPosition;
    291 
    292             if (DBG) {
    293                 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
    294                         " Playback position : " + currentPosition + ", Length in frames : "
    295                         + lengthInFrames);
    296             }
    297             try {
    298                 Thread.sleep(sleepTimeMs);
    299             } catch (InterruptedException ie) {
    300                 break;
    301             }
    302         }
    303     }
    304 
    305     private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
    306         final float vol = clip(volume, 0.0f, 1.0f);
    307         final float panning = clip(pan, -1.0f, 1.0f);
    308 
    309         float volLeft = vol;
    310         float volRight = vol;
    311         if (panning > 0.0f) {
    312             volLeft *= (1.0f - panning);
    313         } else if (panning < 0.0f) {
    314             volRight *= (1.0f + panning);
    315         }
    316         if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
    317         if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
    318             Log.e(TAG, "Failed to set volume");
    319         }
    320     }
    321 
    322     private static final long clip(long value, long min, long max) {
    323         if (value < min) {
    324             return min;
    325         }
    326 
    327         if (value > max) {
    328             return max;
    329         }
    330 
    331         return value;
    332     }
    333 
    334     private static float clip(float value, float min, float max) {
    335         return value > max ? max : (value < min ? min : value);
    336     }
    337 
    338 }
    339