Home | History | Annotate | Download | only in tts
      1 // Copyright 2011 Google Inc. All Rights Reserved.
      2 
      3 package android.speech.tts;
      4 
      5 import android.media.AudioAttributes;
      6 import android.media.AudioFormat;
      7 import android.media.AudioTrack;
      8 import android.speech.tts.TextToSpeechService.AudioOutputParams;
      9 import android.util.Log;
     10 
     11 /**
     12  * Exposes parts of the {@link AudioTrack} API by delegating calls to an
     13  * underlying {@link AudioTrack}. Additionally, provides methods like
     14  * {@link #waitAndRelease()} that will block until all audiotrack
     15  * data has been flushed to the mixer, and is estimated to have completed
     16  * playback.
     17  */
     18 class BlockingAudioTrack {
     19     private static final String TAG = "TTS.BlockingAudioTrack";
     20     private static final boolean DBG = false;
     21 
     22 
     23     /**
     24      * The minimum increment of time to wait for an AudioTrack to finish
     25      * playing.
     26      */
     27     private static final long MIN_SLEEP_TIME_MS = 20;
     28 
     29     /**
     30      * The maximum increment of time to sleep while waiting for an AudioTrack
     31      * to finish playing.
     32      */
     33     private static final long MAX_SLEEP_TIME_MS = 2500;
     34 
     35     /**
     36      * The maximum amount of time to wait for an audio track to make progress while
     37      * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
     38      * could happen in exceptional circumstances like a media_server crash.
     39      */
     40     private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
     41 
     42     /**
     43      * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
     44      * we create.
     45      */
     46     private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
     47 
     48 
     49     private final AudioOutputParams mAudioParams;
     50     private final int mSampleRateInHz;
     51     private final int mAudioFormat;
     52     private final int mChannelCount;
     53 
     54 
     55     private final int mBytesPerFrame;
     56     /**
     57      * A "short utterance" is one that uses less bytes than the audio
     58      * track buffer size (mAudioBufferSize). In this case, we need to call
     59      * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
     60      * different logic is required to wait for the track to finish.
     61      *
     62      * Not volatile, accessed only from the audio playback thread.
     63      */
     64     private boolean mIsShortUtterance;
     65     /**
     66      * Will be valid after a call to {@link #init()}.
     67      */
     68     private int mAudioBufferSize;
     69     private int mBytesWritten = 0;
     70 
     71     // Need to be seen by stop() which can be called from another thread. mAudioTrack will be
     72     // set to null only after waitAndRelease().
     73     private Object mAudioTrackLock = new Object();
     74     private AudioTrack mAudioTrack;
     75     private volatile boolean mStopped;
     76 
     77     private int mSessionId;
     78 
     79     BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate,
     80             int audioFormat, int channelCount) {
     81         mAudioParams = audioParams;
     82         mSampleRateInHz = sampleRate;
     83         mAudioFormat = audioFormat;
     84         mChannelCount = channelCount;
     85 
     86         mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount;
     87         mIsShortUtterance = false;
     88         mAudioBufferSize = 0;
     89         mBytesWritten = 0;
     90 
     91         mAudioTrack = null;
     92         mStopped = false;
     93     }
     94 
     95     public boolean init() {
     96         AudioTrack track = createStreamingAudioTrack();
     97         synchronized (mAudioTrackLock) {
     98             mAudioTrack = track;
     99         }
    100 
    101         if (track == null) {
    102             return false;
    103         } else {
    104             return true;
    105         }
    106     }
    107 
    108     public void stop() {
    109         synchronized (mAudioTrackLock) {
    110             if (mAudioTrack != null) {
    111                 mAudioTrack.stop();
    112             }
    113             mStopped = true;
    114         }
    115     }
    116 
    117     public int write(byte[] data) {
    118         AudioTrack track = null;
    119         synchronized (mAudioTrackLock) {
    120             track = mAudioTrack;
    121         }
    122 
    123         if (track == null || mStopped) {
    124             return -1;
    125         }
    126         final int bytesWritten = writeToAudioTrack(track, data);
    127 
    128         mBytesWritten += bytesWritten;
    129         return bytesWritten;
    130     }
    131 
    132     public void waitAndRelease() {
    133         AudioTrack track = null;
    134         synchronized (mAudioTrackLock) {
    135             track = mAudioTrack;
    136         }
    137         if (track == null) {
    138             if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]");
    139             return;
    140         }
    141 
    142         // For "small" audio tracks, we have to stop() them to make them mixable,
    143         // else the audio subsystem will wait indefinitely for us to fill the buffer
    144         // before rendering the track mixable.
    145         //
    146         // If mStopped is true, the track would already have been stopped, so not
    147         // much point not doing that again.
    148         if (mBytesWritten < mAudioBufferSize && !mStopped) {
    149             if (DBG) {
    150                 Log.d(TAG, "Stopping audio track to flush audio, state was : " +
    151                         track.getPlayState() + ",stopped= " + mStopped);
    152             }
    153 
    154             mIsShortUtterance = true;
    155             track.stop();
    156         }
    157 
    158         // Block until the audio track is done only if we haven't stopped yet.
    159         if (!mStopped) {
    160             if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
    161             blockUntilDone(mAudioTrack);
    162         }
    163 
    164         // The last call to AudioTrack.write( ) will return only after
    165         // all data from the audioTrack has been sent to the mixer, so
    166         // it's safe to release at this point.
    167         if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
    168         synchronized(mAudioTrackLock) {
    169             mAudioTrack = null;
    170         }
    171         track.release();
    172     }
    173 
    174 
    175     static int getChannelConfig(int channelCount) {
    176         if (channelCount == 1) {
    177             return AudioFormat.CHANNEL_OUT_MONO;
    178         } else if (channelCount == 2){
    179             return AudioFormat.CHANNEL_OUT_STEREO;
    180         }
    181 
    182         return 0;
    183     }
    184 
    185     long getAudioLengthMs(int numBytes) {
    186         final int unconsumedFrames = numBytes / mBytesPerFrame;
    187         final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
    188 
    189         return estimatedTimeMs;
    190     }
    191 
    192     private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
    193         if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
    194             if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
    195             audioTrack.play();
    196         }
    197 
    198         int count = 0;
    199         while (count < bytes.length) {
    200             // Note that we don't take bufferCopy.mOffset into account because
    201             // it is guaranteed to be 0.
    202             int written = audioTrack.write(bytes, count, bytes.length);
    203             if (written <= 0) {
    204                 break;
    205             }
    206             count += written;
    207         }
    208         return count;
    209     }
    210 
    211     private AudioTrack createStreamingAudioTrack() {
    212         final int channelConfig = getChannelConfig(mChannelCount);
    213 
    214         int minBufferSizeInBytes
    215                 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
    216         int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
    217 
    218         AudioFormat audioFormat = (new AudioFormat.Builder())
    219                 .setChannelMask(channelConfig)
    220                 .setEncoding(mAudioFormat)
    221                 .setSampleRate(mSampleRateInHz).build();
    222         AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes,
    223                 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM,
    224                 mAudioParams.mSessionId);
    225 
    226         if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
    227             Log.w(TAG, "Unable to create audio track.");
    228             audioTrack.release();
    229             return null;
    230         }
    231 
    232         mAudioBufferSize = bufferSizeInBytes;
    233 
    234         setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan);
    235         return audioTrack;
    236     }
    237 
    238     private void blockUntilDone(AudioTrack audioTrack) {
    239         if (mBytesWritten <= 0) {
    240             return;
    241         }
    242 
    243         if (mIsShortUtterance) {
    244             // In this case we would have called AudioTrack#stop() to flush
    245             // buffers to the mixer. This makes the playback head position
    246             // unobservable and notification markers do not work reliably. We
    247             // have no option but to wait until we think the track would finish
    248             // playing and release it after.
    249             //
    250             // This isn't as bad as it looks because (a) We won't end up waiting
    251             // for much longer than we should because even at 4khz mono, a short
    252             // utterance weighs in at about 2 seconds, and (b) such short utterances
    253             // are expected to be relatively infrequent and in a stream of utterances
    254             // this shows up as a slightly longer pause.
    255             blockUntilEstimatedCompletion();
    256         } else {
    257             blockUntilCompletion(audioTrack);
    258         }
    259     }
    260 
    261     private void blockUntilEstimatedCompletion() {
    262         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
    263         final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
    264 
    265         if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
    266 
    267         try {
    268             Thread.sleep(estimatedTimeMs);
    269         } catch (InterruptedException ie) {
    270             // Do nothing.
    271         }
    272     }
    273 
    274     private void blockUntilCompletion(AudioTrack audioTrack) {
    275         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
    276 
    277         int previousPosition = -1;
    278         int currentPosition = 0;
    279         long blockedTimeMs = 0;
    280 
    281         while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
    282                 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
    283 
    284             final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
    285                     audioTrack.getSampleRate();
    286             final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
    287 
    288             // Check if the audio track has made progress since the last loop
    289             // iteration. We should then add in the amount of time that was
    290             // spent sleeping in the last iteration.
    291             if (currentPosition == previousPosition) {
    292                 // This works only because the sleep time that would have been calculated
    293                 // would be the same in the previous iteration too.
    294                 blockedTimeMs += sleepTimeMs;
    295                 // If we've taken too long to make progress, bail.
    296                 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
    297                     Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
    298                             "for AudioTrack to make progress, Aborting");
    299                     break;
    300                 }
    301             } else {
    302                 blockedTimeMs = 0;
    303             }
    304             previousPosition = currentPosition;
    305 
    306             if (DBG) {
    307                 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
    308                         " Playback position : " + currentPosition + ", Length in frames : "
    309                         + lengthInFrames);
    310             }
    311             try {
    312                 Thread.sleep(sleepTimeMs);
    313             } catch (InterruptedException ie) {
    314                 break;
    315             }
    316         }
    317     }
    318 
    319     private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
    320         final float vol = clip(volume, 0.0f, 1.0f);
    321         final float panning = clip(pan, -1.0f, 1.0f);
    322 
    323         float volLeft = vol;
    324         float volRight = vol;
    325         if (panning > 0.0f) {
    326             volLeft *= (1.0f - panning);
    327         } else if (panning < 0.0f) {
    328             volRight *= (1.0f + panning);
    329         }
    330         if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
    331         if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
    332             Log.e(TAG, "Failed to set volume");
    333         }
    334     }
    335 
    336     private static final long clip(long value, long min, long max) {
    337         return value < min ? min : (value < max ? value : max);
    338     }
    339 
    340     private static final float clip(float value, float min, float max) {
    341         return value < min ? min : (value < max ? value : max);
    342     }
    343 
    344 }
    345