Home | History | Annotate | Download | only in tts
      1 // Copyright 2011 Google Inc. All Rights Reserved.
      2 
      3 package android.speech.tts;
      4 
      5 import android.media.AudioFormat;
      6 import android.media.AudioTrack;
      7 import android.speech.tts.TextToSpeechService.AudioOutputParams;
      8 import android.util.Log;
      9 
     10 /**
     11  * Exposes parts of the {@link AudioTrack} API by delegating calls to an
     12  * underlying {@link AudioTrack}. Additionally, provides methods like
     13  * {@link #waitAndRelease()} that will block until all audiotrack
     14  * data has been flushed to the mixer, and is estimated to have completed
     15  * playback.
     16  */
     17 class BlockingAudioTrack {
     18     private static final String TAG = "TTS.BlockingAudioTrack";
     19     private static final boolean DBG = false;
     20 
     21 
     22     /**
     23      * The minimum increment of time to wait for an AudioTrack to finish
     24      * playing.
     25      */
     26     private static final long MIN_SLEEP_TIME_MS = 20;
     27 
     28     /**
     29      * The maximum increment of time to sleep while waiting for an AudioTrack
     30      * to finish playing.
     31      */
     32     private static final long MAX_SLEEP_TIME_MS = 2500;
     33 
     34     /**
     35      * The maximum amount of time to wait for an audio track to make progress while
     36      * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
     37      * could happen in exceptional circumstances like a media_server crash.
     38      */
     39     private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
     40 
     41     /**
     42      * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
     43      * we create.
     44      */
     45     private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
     46 
     47 
     48     private final AudioOutputParams mAudioParams;
     49     private final int mSampleRateInHz;
     50     private final int mAudioFormat;
     51     private final int mChannelCount;
     52 
     53 
     54     private final int mBytesPerFrame;
     55     /**
     56      * A "short utterance" is one that uses less bytes than the audio
     57      * track buffer size (mAudioBufferSize). In this case, we need to call
     58      * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
     59      * different logic is required to wait for the track to finish.
     60      *
     61      * Not volatile, accessed only from the audio playback thread.
     62      */
     63     private boolean mIsShortUtterance;
     64     /**
     65      * Will be valid after a call to {@link #init()}.
     66      */
     67     private int mAudioBufferSize;
     68     private int mBytesWritten = 0;
     69 
     70     // Need to be seen by stop() which can be called from another thread. mAudioTrack will be
     71     // set to null only after waitAndRelease().
     72     private Object mAudioTrackLock = new Object();
     73     private AudioTrack mAudioTrack;
     74     private volatile boolean mStopped;
     75 
     76     private int mSessionId;
     77 
     78     BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate,
     79             int audioFormat, int channelCount) {
     80         mAudioParams = audioParams;
     81         mSampleRateInHz = sampleRate;
     82         mAudioFormat = audioFormat;
     83         mChannelCount = channelCount;
     84 
     85         mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount;
     86         mIsShortUtterance = false;
     87         mAudioBufferSize = 0;
     88         mBytesWritten = 0;
     89 
     90         mAudioTrack = null;
     91         mStopped = false;
     92     }
     93 
     94     public boolean init() {
     95         AudioTrack track = createStreamingAudioTrack();
     96         synchronized (mAudioTrackLock) {
     97             mAudioTrack = track;
     98         }
     99 
    100         if (track == null) {
    101             return false;
    102         } else {
    103             return true;
    104         }
    105     }
    106 
    107     public void stop() {
    108         synchronized (mAudioTrackLock) {
    109             if (mAudioTrack != null) {
    110                 mAudioTrack.stop();
    111             }
    112             mStopped = true;
    113         }
    114     }
    115 
    116     public int write(byte[] data) {
    117         AudioTrack track = null;
    118         synchronized (mAudioTrackLock) {
    119             track = mAudioTrack;
    120         }
    121 
    122         if (track == null || mStopped) {
    123             return -1;
    124         }
    125         final int bytesWritten = writeToAudioTrack(track, data);
    126 
    127         mBytesWritten += bytesWritten;
    128         return bytesWritten;
    129     }
    130 
    131     public void waitAndRelease() {
    132         AudioTrack track = null;
    133         synchronized (mAudioTrackLock) {
    134             track = mAudioTrack;
    135         }
    136         if (track == null) {
    137             if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]");
    138             return;
    139         }
    140 
    141         // For "small" audio tracks, we have to stop() them to make them mixable,
    142         // else the audio subsystem will wait indefinitely for us to fill the buffer
    143         // before rendering the track mixable.
    144         //
    145         // If mStopped is true, the track would already have been stopped, so not
    146         // much point not doing that again.
    147         if (mBytesWritten < mAudioBufferSize && !mStopped) {
    148             if (DBG) {
    149                 Log.d(TAG, "Stopping audio track to flush audio, state was : " +
    150                         track.getPlayState() + ",stopped= " + mStopped);
    151             }
    152 
    153             mIsShortUtterance = true;
    154             track.stop();
    155         }
    156 
    157         // Block until the audio track is done only if we haven't stopped yet.
    158         if (!mStopped) {
    159             if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
    160             blockUntilDone(mAudioTrack);
    161         }
    162 
    163         // The last call to AudioTrack.write( ) will return only after
    164         // all data from the audioTrack has been sent to the mixer, so
    165         // it's safe to release at this point.
    166         if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
    167         synchronized (mAudioTrackLock) {
    168             mAudioTrack = null;
    169         }
    170         track.release();
    171     }
    172 
    173 
    174     static int getChannelConfig(int channelCount) {
    175         if (channelCount == 1) {
    176             return AudioFormat.CHANNEL_OUT_MONO;
    177         } else if (channelCount == 2){
    178             return AudioFormat.CHANNEL_OUT_STEREO;
    179         }
    180 
    181         return 0;
    182     }
    183 
    184     long getAudioLengthMs(int numBytes) {
    185         final int unconsumedFrames = numBytes / mBytesPerFrame;
    186         final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
    187 
    188         return estimatedTimeMs;
    189     }
    190 
    191     private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
    192         if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
    193             if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
    194             audioTrack.play();
    195         }
    196 
    197         int count = 0;
    198         while (count < bytes.length) {
    199             // Note that we don't take bufferCopy.mOffset into account because
    200             // it is guaranteed to be 0.
    201             int written = audioTrack.write(bytes, count, bytes.length);
    202             if (written <= 0) {
    203                 break;
    204             }
    205             count += written;
    206         }
    207         return count;
    208     }
    209 
    210     private AudioTrack createStreamingAudioTrack() {
    211         final int channelConfig = getChannelConfig(mChannelCount);
    212 
    213         int minBufferSizeInBytes
    214                 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
    215         int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
    216 
    217         AudioFormat audioFormat = (new AudioFormat.Builder())
    218                 .setChannelMask(channelConfig)
    219                 .setEncoding(mAudioFormat)
    220                 .setSampleRate(mSampleRateInHz).build();
    221         AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes,
    222                 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM,
    223                 mAudioParams.mSessionId);
    224 
    225         if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
    226             Log.w(TAG, "Unable to create audio track.");
    227             audioTrack.release();
    228             return null;
    229         }
    230 
    231         mAudioBufferSize = bufferSizeInBytes;
    232 
    233         setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan);
    234         return audioTrack;
    235     }
    236 
    237     private void blockUntilDone(AudioTrack audioTrack) {
    238         if (mBytesWritten <= 0) {
    239             return;
    240         }
    241 
    242         if (mIsShortUtterance) {
    243             // In this case we would have called AudioTrack#stop() to flush
    244             // buffers to the mixer. This makes the playback head position
    245             // unobservable and notification markers do not work reliably. We
    246             // have no option but to wait until we think the track would finish
    247             // playing and release it after.
    248             //
    249             // This isn't as bad as it looks because (a) We won't end up waiting
    250             // for much longer than we should because even at 4khz mono, a short
    251             // utterance weighs in at about 2 seconds, and (b) such short utterances
    252             // are expected to be relatively infrequent and in a stream of utterances
    253             // this shows up as a slightly longer pause.
    254             blockUntilEstimatedCompletion();
    255         } else {
    256             blockUntilCompletion(audioTrack);
    257         }
    258     }
    259 
    260     private void blockUntilEstimatedCompletion() {
    261         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
    262         final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
    263 
    264         if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
    265 
    266         try {
    267             Thread.sleep(estimatedTimeMs);
    268         } catch (InterruptedException ie) {
    269             // Do nothing.
    270         }
    271     }
    272 
    273     private void blockUntilCompletion(AudioTrack audioTrack) {
    274         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
    275 
    276         int previousPosition = -1;
    277         int currentPosition = 0;
    278         long blockedTimeMs = 0;
    279 
    280         while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
    281                 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
    282 
    283             final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
    284                     audioTrack.getSampleRate();
    285             final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
    286 
    287             // Check if the audio track has made progress since the last loop
    288             // iteration. We should then add in the amount of time that was
    289             // spent sleeping in the last iteration.
    290             if (currentPosition == previousPosition) {
    291                 // This works only because the sleep time that would have been calculated
    292                 // would be the same in the previous iteration too.
    293                 blockedTimeMs += sleepTimeMs;
    294                 // If we've taken too long to make progress, bail.
    295                 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
    296                     Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
    297                             "for AudioTrack to make progress, Aborting");
    298                     break;
    299                 }
    300             } else {
    301                 blockedTimeMs = 0;
    302             }
    303             previousPosition = currentPosition;
    304 
    305             if (DBG) {
    306                 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
    307                         " Playback position : " + currentPosition + ", Length in frames : "
    308                         + lengthInFrames);
    309             }
    310             try {
    311                 Thread.sleep(sleepTimeMs);
    312             } catch (InterruptedException ie) {
    313                 break;
    314             }
    315         }
    316     }
    317 
    318     private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
    319         final float vol = clip(volume, 0.0f, 1.0f);
    320         final float panning = clip(pan, -1.0f, 1.0f);
    321 
    322         float volLeft = vol;
    323         float volRight = vol;
    324         if (panning > 0.0f) {
    325             volLeft *= (1.0f - panning);
    326         } else if (panning < 0.0f) {
    327             volRight *= (1.0f + panning);
    328         }
    329         if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
    330         if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
    331             Log.e(TAG, "Failed to set volume");
    332         }
    333     }
    334 
    335     private static final long clip(long value, long min, long max) {
    336         return value < min ? min : (value < max ? value : max);
    337     }
    338 
    339     private static final float clip(float value, float min, float max) {
    340         return value < min ? min : (value < max ? value : max);
    341     }
    342 
    343     /**
    344      * @see
    345      *     AudioTrack#setPlaybackPositionUpdateListener(AudioTrack.OnPlaybackPositionUpdateListener).
    346      */
    347     public void setPlaybackPositionUpdateListener(
    348             AudioTrack.OnPlaybackPositionUpdateListener listener) {
    349         synchronized (mAudioTrackLock) {
    350             if (mAudioTrack != null) {
    351                 mAudioTrack.setPlaybackPositionUpdateListener(listener);
    352             }
    353         }
    354     }
    355 
    356     /** @see AudioTrack#setNotificationMarkerPosition(int). */
    357     public void setNotificationMarkerPosition(int frames) {
    358         synchronized (mAudioTrackLock) {
    359             if (mAudioTrack != null) {
    360                 mAudioTrack.setNotificationMarkerPosition(frames);
    361             }
    362         }
    363     }
    364 }
    365