1 // Copyright 2011 Google Inc. All Rights Reserved. 2 3 package android.speech.tts; 4 5 import android.media.AudioFormat; 6 import android.media.AudioTrack; 7 import android.util.Log; 8 9 /** 10 * Exposes parts of the {@link AudioTrack} API by delegating calls to an 11 * underlying {@link AudioTrack}. Additionally, provides methods like 12 * {@link #waitAndRelease()} that will block until all audiotrack 13 * data has been flushed to the mixer, and is estimated to have completed 14 * playback. 15 */ 16 class BlockingAudioTrack { 17 private static final String TAG = "TTS.BlockingAudioTrack"; 18 private static final boolean DBG = false; 19 20 21 /** 22 * The minimum increment of time to wait for an AudioTrack to finish 23 * playing. 24 */ 25 private static final long MIN_SLEEP_TIME_MS = 20; 26 27 /** 28 * The maximum increment of time to sleep while waiting for an AudioTrack 29 * to finish playing. 30 */ 31 private static final long MAX_SLEEP_TIME_MS = 2500; 32 33 /** 34 * The maximum amount of time to wait for an audio track to make progress while 35 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 36 * could happen in exceptional circumstances like a media_server crash. 37 */ 38 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 39 40 /** 41 * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} 42 * we create. 43 */ 44 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 45 46 47 private final int mStreamType; 48 private final int mSampleRateInHz; 49 private final int mAudioFormat; 50 private final int mChannelCount; 51 private final float mVolume; 52 private final float mPan; 53 54 private final int mBytesPerFrame; 55 /** 56 * A "short utterance" is one that uses less bytes than the audio 57 * track buffer size (mAudioBufferSize). In this case, we need to call 58 * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly 59 * different logic is required to wait for the track to finish. 60 * 61 * Not volatile, accessed only from the audio playback thread. 62 */ 63 private boolean mIsShortUtterance; 64 /** 65 * Will be valid after a call to {@link #init()}. 66 */ 67 private int mAudioBufferSize; 68 private int mBytesWritten = 0; 69 70 private AudioTrack mAudioTrack; 71 private volatile boolean mStopped; 72 // Locks the initialization / uninitialization of the audio track. 73 // This is required because stop() will throw an illegal state exception 74 // if called before init() or after mAudioTrack.release(). 75 private final Object mAudioTrackLock = new Object(); 76 77 BlockingAudioTrack(int streamType, int sampleRate, 78 int audioFormat, int channelCount, 79 float volume, float pan) { 80 mStreamType = streamType; 81 mSampleRateInHz = sampleRate; 82 mAudioFormat = audioFormat; 83 mChannelCount = channelCount; 84 mVolume = volume; 85 mPan = pan; 86 87 mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount; 88 mIsShortUtterance = false; 89 mAudioBufferSize = 0; 90 mBytesWritten = 0; 91 92 mAudioTrack = null; 93 mStopped = false; 94 } 95 96 public void init() { 97 AudioTrack track = createStreamingAudioTrack(); 98 99 synchronized (mAudioTrackLock) { 100 mAudioTrack = track; 101 } 102 } 103 104 public void stop() { 105 synchronized (mAudioTrackLock) { 106 if (mAudioTrack != null) { 107 mAudioTrack.stop(); 108 } 109 } 110 mStopped = true; 111 } 112 113 public int write(byte[] data) { 114 if (mAudioTrack == null || mStopped) { 115 return -1; 116 } 117 final int bytesWritten = writeToAudioTrack(mAudioTrack, data); 118 mBytesWritten += bytesWritten; 119 return bytesWritten; 120 } 121 122 public void waitAndRelease() { 123 // For "small" audio tracks, we have to stop() them to make them mixable, 124 // else the audio subsystem will wait indefinitely for us to fill the buffer 125 // before rendering the track mixable. 126 // 127 // If mStopped is true, the track would already have been stopped, so not 128 // much point not doing that again. 129 if (mBytesWritten < mAudioBufferSize && !mStopped) { 130 if (DBG) { 131 Log.d(TAG, "Stopping audio track to flush audio, state was : " + 132 mAudioTrack.getPlayState() + ",stopped= " + mStopped); 133 } 134 135 mIsShortUtterance = true; 136 mAudioTrack.stop(); 137 } 138 139 // Block until the audio track is done only if we haven't stopped yet. 140 if (!mStopped) { 141 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); 142 blockUntilDone(mAudioTrack); 143 } 144 145 // The last call to AudioTrack.write( ) will return only after 146 // all data from the audioTrack has been sent to the mixer, so 147 // it's safe to release at this point. 148 if (DBG) Log.d(TAG, "Releasing audio track [" + mAudioTrack.hashCode() + "]"); 149 synchronized (mAudioTrackLock) { 150 mAudioTrack.release(); 151 mAudioTrack = null; 152 } 153 } 154 155 156 static int getChannelConfig(int channelCount) { 157 if (channelCount == 1) { 158 return AudioFormat.CHANNEL_OUT_MONO; 159 } else if (channelCount == 2){ 160 return AudioFormat.CHANNEL_OUT_STEREO; 161 } 162 163 return 0; 164 } 165 166 long getAudioLengthMs(int numBytes) { 167 final int unconsumedFrames = numBytes / mBytesPerFrame; 168 final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; 169 170 return estimatedTimeMs; 171 } 172 173 private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { 174 if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { 175 if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); 176 audioTrack.play(); 177 } 178 179 int count = 0; 180 while (count < bytes.length) { 181 // Note that we don't take bufferCopy.mOffset into account because 182 // it is guaranteed to be 0. 183 int written = audioTrack.write(bytes, count, bytes.length); 184 if (written <= 0) { 185 break; 186 } 187 count += written; 188 } 189 return count; 190 } 191 192 private AudioTrack createStreamingAudioTrack() { 193 final int channelConfig = getChannelConfig(mChannelCount); 194 195 int minBufferSizeInBytes 196 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); 197 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 198 199 AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig, 200 mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); 201 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 202 Log.w(TAG, "Unable to create audio track."); 203 audioTrack.release(); 204 return null; 205 } 206 207 mAudioBufferSize = bufferSizeInBytes; 208 209 setupVolume(audioTrack, mVolume, mPan); 210 return audioTrack; 211 } 212 213 private static int getBytesPerFrame(int audioFormat) { 214 if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) { 215 return 1; 216 } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) { 217 return 2; 218 } 219 220 return -1; 221 } 222 223 224 private void blockUntilDone(AudioTrack audioTrack) { 225 if (mBytesWritten <= 0) { 226 return; 227 } 228 229 if (mIsShortUtterance) { 230 // In this case we would have called AudioTrack#stop() to flush 231 // buffers to the mixer. This makes the playback head position 232 // unobservable and notification markers do not work reliably. We 233 // have no option but to wait until we think the track would finish 234 // playing and release it after. 235 // 236 // This isn't as bad as it looks because (a) We won't end up waiting 237 // for much longer than we should because even at 4khz mono, a short 238 // utterance weighs in at about 2 seconds, and (b) such short utterances 239 // are expected to be relatively infrequent and in a stream of utterances 240 // this shows up as a slightly longer pause. 241 blockUntilEstimatedCompletion(); 242 } else { 243 blockUntilCompletion(audioTrack); 244 } 245 } 246 247 private void blockUntilEstimatedCompletion() { 248 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 249 final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); 250 251 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 252 253 try { 254 Thread.sleep(estimatedTimeMs); 255 } catch (InterruptedException ie) { 256 // Do nothing. 257 } 258 } 259 260 private void blockUntilCompletion(AudioTrack audioTrack) { 261 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 262 263 int previousPosition = -1; 264 int currentPosition = 0; 265 long blockedTimeMs = 0; 266 267 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 268 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { 269 270 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 271 audioTrack.getSampleRate(); 272 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 273 274 // Check if the audio track has made progress since the last loop 275 // iteration. We should then add in the amount of time that was 276 // spent sleeping in the last iteration. 277 if (currentPosition == previousPosition) { 278 // This works only because the sleep time that would have been calculated 279 // would be the same in the previous iteration too. 280 blockedTimeMs += sleepTimeMs; 281 // If we've taken too long to make progress, bail. 282 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 283 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 284 "for AudioTrack to make progress, Aborting"); 285 break; 286 } 287 } else { 288 blockedTimeMs = 0; 289 } 290 previousPosition = currentPosition; 291 292 if (DBG) { 293 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 294 " Playback position : " + currentPosition + ", Length in frames : " 295 + lengthInFrames); 296 } 297 try { 298 Thread.sleep(sleepTimeMs); 299 } catch (InterruptedException ie) { 300 break; 301 } 302 } 303 } 304 305 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 306 final float vol = clip(volume, 0.0f, 1.0f); 307 final float panning = clip(pan, -1.0f, 1.0f); 308 309 float volLeft = vol; 310 float volRight = vol; 311 if (panning > 0.0f) { 312 volLeft *= (1.0f - panning); 313 } else if (panning < 0.0f) { 314 volRight *= (1.0f + panning); 315 } 316 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 317 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 318 Log.e(TAG, "Failed to set volume"); 319 } 320 } 321 322 private static final long clip(long value, long min, long max) { 323 if (value < min) { 324 return min; 325 } 326 327 if (value > max) { 328 return max; 329 } 330 331 return value; 332 } 333 334 private static float clip(float value, float min, float max) { 335 return value > max ? max : (value < min ? min : value); 336 } 337 338 } 339