1 // Copyright 2011 Google Inc. All Rights Reserved. 2 3 package android.speech.tts; 4 5 import android.media.AudioFormat; 6 import android.media.AudioTrack; 7 import android.util.Log; 8 9 /** 10 * Exposes parts of the {@link AudioTrack} API by delegating calls to an 11 * underlying {@link AudioTrack}. Additionally, provides methods like 12 * {@link #waitAndRelease()} that will block until all audiotrack 13 * data has been flushed to the mixer, and is estimated to have completed 14 * playback. 15 */ 16 class BlockingAudioTrack { 17 private static final String TAG = "TTS.BlockingAudioTrack"; 18 private static final boolean DBG = false; 19 20 21 /** 22 * The minimum increment of time to wait for an AudioTrack to finish 23 * playing. 24 */ 25 private static final long MIN_SLEEP_TIME_MS = 20; 26 27 /** 28 * The maximum increment of time to sleep while waiting for an AudioTrack 29 * to finish playing. 30 */ 31 private static final long MAX_SLEEP_TIME_MS = 2500; 32 33 /** 34 * The maximum amount of time to wait for an audio track to make progress while 35 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 36 * could happen in exceptional circumstances like a media_server crash. 37 */ 38 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 39 40 /** 41 * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} 42 * we create. 43 */ 44 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 45 46 47 private final int mStreamType; 48 private final int mSampleRateInHz; 49 private final int mAudioFormat; 50 private final int mChannelCount; 51 private final float mVolume; 52 private final float mPan; 53 54 private final int mBytesPerFrame; 55 /** 56 * A "short utterance" is one that uses less bytes than the audio 57 * track buffer size (mAudioBufferSize). In this case, we need to call 58 * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly 59 * different logic is required to wait for the track to finish. 60 * 61 * Not volatile, accessed only from the audio playback thread. 62 */ 63 private boolean mIsShortUtterance; 64 /** 65 * Will be valid after a call to {@link #init()}. 66 */ 67 private int mAudioBufferSize; 68 private int mBytesWritten = 0; 69 70 // Need to be seen by stop() which can be called from another thread. mAudioTrack will be 71 // set to null only after waitAndRelease(). 72 private Object mAudioTrackLock = new Object(); 73 private AudioTrack mAudioTrack; 74 private volatile boolean mStopped; 75 76 BlockingAudioTrack(int streamType, int sampleRate, 77 int audioFormat, int channelCount, 78 float volume, float pan) { 79 mStreamType = streamType; 80 mSampleRateInHz = sampleRate; 81 mAudioFormat = audioFormat; 82 mChannelCount = channelCount; 83 mVolume = volume; 84 mPan = pan; 85 86 mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount; 87 mIsShortUtterance = false; 88 mAudioBufferSize = 0; 89 mBytesWritten = 0; 90 91 mAudioTrack = null; 92 mStopped = false; 93 } 94 95 public boolean init() { 96 AudioTrack track = createStreamingAudioTrack(); 97 synchronized (mAudioTrackLock) { 98 mAudioTrack = track; 99 } 100 101 if (track == null) { 102 return false; 103 } else { 104 return true; 105 } 106 } 107 108 public void stop() { 109 synchronized (mAudioTrackLock) { 110 if (mAudioTrack != null) { 111 mAudioTrack.stop(); 112 } 113 mStopped = true; 114 } 115 } 116 117 public int write(byte[] data) { 118 AudioTrack track = null; 119 synchronized (mAudioTrackLock) { 120 track = mAudioTrack; 121 } 122 123 if (track == null || mStopped) { 124 return -1; 125 } 126 final int bytesWritten = writeToAudioTrack(track, data); 127 128 mBytesWritten += bytesWritten; 129 return bytesWritten; 130 } 131 132 public void waitAndRelease() { 133 AudioTrack track = null; 134 synchronized (mAudioTrackLock) { 135 track = mAudioTrack; 136 } 137 if (track == null) { 138 if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]"); 139 return; 140 } 141 142 // For "small" audio tracks, we have to stop() them to make them mixable, 143 // else the audio subsystem will wait indefinitely for us to fill the buffer 144 // before rendering the track mixable. 145 // 146 // If mStopped is true, the track would already have been stopped, so not 147 // much point not doing that again. 148 if (mBytesWritten < mAudioBufferSize && !mStopped) { 149 if (DBG) { 150 Log.d(TAG, "Stopping audio track to flush audio, state was : " + 151 track.getPlayState() + ",stopped= " + mStopped); 152 } 153 154 mIsShortUtterance = true; 155 track.stop(); 156 } 157 158 // Block until the audio track is done only if we haven't stopped yet. 159 if (!mStopped) { 160 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); 161 blockUntilDone(mAudioTrack); 162 } 163 164 // The last call to AudioTrack.write( ) will return only after 165 // all data from the audioTrack has been sent to the mixer, so 166 // it's safe to release at this point. 167 if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]"); 168 synchronized(mAudioTrackLock) { 169 mAudioTrack = null; 170 } 171 track.release(); 172 } 173 174 175 static int getChannelConfig(int channelCount) { 176 if (channelCount == 1) { 177 return AudioFormat.CHANNEL_OUT_MONO; 178 } else if (channelCount == 2){ 179 return AudioFormat.CHANNEL_OUT_STEREO; 180 } 181 182 return 0; 183 } 184 185 long getAudioLengthMs(int numBytes) { 186 final int unconsumedFrames = numBytes / mBytesPerFrame; 187 final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; 188 189 return estimatedTimeMs; 190 } 191 192 private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { 193 if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { 194 if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); 195 audioTrack.play(); 196 } 197 198 int count = 0; 199 while (count < bytes.length) { 200 // Note that we don't take bufferCopy.mOffset into account because 201 // it is guaranteed to be 0. 202 int written = audioTrack.write(bytes, count, bytes.length); 203 if (written <= 0) { 204 break; 205 } 206 count += written; 207 } 208 return count; 209 } 210 211 private AudioTrack createStreamingAudioTrack() { 212 final int channelConfig = getChannelConfig(mChannelCount); 213 214 int minBufferSizeInBytes 215 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); 216 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 217 218 AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig, 219 mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); 220 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 221 Log.w(TAG, "Unable to create audio track."); 222 audioTrack.release(); 223 return null; 224 } 225 226 mAudioBufferSize = bufferSizeInBytes; 227 228 setupVolume(audioTrack, mVolume, mPan); 229 return audioTrack; 230 } 231 232 private static int getBytesPerFrame(int audioFormat) { 233 if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) { 234 return 1; 235 } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) { 236 return 2; 237 } 238 239 return -1; 240 } 241 242 243 private void blockUntilDone(AudioTrack audioTrack) { 244 if (mBytesWritten <= 0) { 245 return; 246 } 247 248 if (mIsShortUtterance) { 249 // In this case we would have called AudioTrack#stop() to flush 250 // buffers to the mixer. This makes the playback head position 251 // unobservable and notification markers do not work reliably. We 252 // have no option but to wait until we think the track would finish 253 // playing and release it after. 254 // 255 // This isn't as bad as it looks because (a) We won't end up waiting 256 // for much longer than we should because even at 4khz mono, a short 257 // utterance weighs in at about 2 seconds, and (b) such short utterances 258 // are expected to be relatively infrequent and in a stream of utterances 259 // this shows up as a slightly longer pause. 260 blockUntilEstimatedCompletion(); 261 } else { 262 blockUntilCompletion(audioTrack); 263 } 264 } 265 266 private void blockUntilEstimatedCompletion() { 267 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 268 final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); 269 270 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 271 272 try { 273 Thread.sleep(estimatedTimeMs); 274 } catch (InterruptedException ie) { 275 // Do nothing. 276 } 277 } 278 279 private void blockUntilCompletion(AudioTrack audioTrack) { 280 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 281 282 int previousPosition = -1; 283 int currentPosition = 0; 284 long blockedTimeMs = 0; 285 286 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 287 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { 288 289 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 290 audioTrack.getSampleRate(); 291 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 292 293 // Check if the audio track has made progress since the last loop 294 // iteration. We should then add in the amount of time that was 295 // spent sleeping in the last iteration. 296 if (currentPosition == previousPosition) { 297 // This works only because the sleep time that would have been calculated 298 // would be the same in the previous iteration too. 299 blockedTimeMs += sleepTimeMs; 300 // If we've taken too long to make progress, bail. 301 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 302 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 303 "for AudioTrack to make progress, Aborting"); 304 break; 305 } 306 } else { 307 blockedTimeMs = 0; 308 } 309 previousPosition = currentPosition; 310 311 if (DBG) { 312 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 313 " Playback position : " + currentPosition + ", Length in frames : " 314 + lengthInFrames); 315 } 316 try { 317 Thread.sleep(sleepTimeMs); 318 } catch (InterruptedException ie) { 319 break; 320 } 321 } 322 } 323 324 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 325 final float vol = clip(volume, 0.0f, 1.0f); 326 final float panning = clip(pan, -1.0f, 1.0f); 327 328 float volLeft = vol; 329 float volRight = vol; 330 if (panning > 0.0f) { 331 volLeft *= (1.0f - panning); 332 } else if (panning < 0.0f) { 333 volRight *= (1.0f + panning); 334 } 335 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 336 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 337 Log.e(TAG, "Failed to set volume"); 338 } 339 } 340 341 private static final long clip(long value, long min, long max) { 342 if (value < min) { 343 return min; 344 } 345 346 if (value > max) { 347 return max; 348 } 349 350 return value; 351 } 352 353 private static float clip(float value, float min, float max) { 354 return value > max ? max : (value < min ? min : value); 355 } 356 357 } 358