1 // Copyright 2011 Google Inc. All Rights Reserved. 2 3 package android.speech.tts; 4 5 import android.media.AudioAttributes; 6 import android.media.AudioFormat; 7 import android.media.AudioTrack; 8 import android.speech.tts.TextToSpeechService.AudioOutputParams; 9 import android.util.Log; 10 11 /** 12 * Exposes parts of the {@link AudioTrack} API by delegating calls to an 13 * underlying {@link AudioTrack}. Additionally, provides methods like 14 * {@link #waitAndRelease()} that will block until all audiotrack 15 * data has been flushed to the mixer, and is estimated to have completed 16 * playback. 17 */ 18 class BlockingAudioTrack { 19 private static final String TAG = "TTS.BlockingAudioTrack"; 20 private static final boolean DBG = false; 21 22 23 /** 24 * The minimum increment of time to wait for an AudioTrack to finish 25 * playing. 26 */ 27 private static final long MIN_SLEEP_TIME_MS = 20; 28 29 /** 30 * The maximum increment of time to sleep while waiting for an AudioTrack 31 * to finish playing. 32 */ 33 private static final long MAX_SLEEP_TIME_MS = 2500; 34 35 /** 36 * The maximum amount of time to wait for an audio track to make progress while 37 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 38 * could happen in exceptional circumstances like a media_server crash. 39 */ 40 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 41 42 /** 43 * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} 44 * we create. 45 */ 46 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 47 48 49 private final AudioOutputParams mAudioParams; 50 private final int mSampleRateInHz; 51 private final int mAudioFormat; 52 private final int mChannelCount; 53 54 55 private final int mBytesPerFrame; 56 /** 57 * A "short utterance" is one that uses less bytes than the audio 58 * track buffer size (mAudioBufferSize). In this case, we need to call 59 * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly 60 * different logic is required to wait for the track to finish. 61 * 62 * Not volatile, accessed only from the audio playback thread. 63 */ 64 private boolean mIsShortUtterance; 65 /** 66 * Will be valid after a call to {@link #init()}. 67 */ 68 private int mAudioBufferSize; 69 private int mBytesWritten = 0; 70 71 // Need to be seen by stop() which can be called from another thread. mAudioTrack will be 72 // set to null only after waitAndRelease(). 73 private Object mAudioTrackLock = new Object(); 74 private AudioTrack mAudioTrack; 75 private volatile boolean mStopped; 76 77 private int mSessionId; 78 79 BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate, 80 int audioFormat, int channelCount) { 81 mAudioParams = audioParams; 82 mSampleRateInHz = sampleRate; 83 mAudioFormat = audioFormat; 84 mChannelCount = channelCount; 85 86 mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount; 87 mIsShortUtterance = false; 88 mAudioBufferSize = 0; 89 mBytesWritten = 0; 90 91 mAudioTrack = null; 92 mStopped = false; 93 } 94 95 public boolean init() { 96 AudioTrack track = createStreamingAudioTrack(); 97 synchronized (mAudioTrackLock) { 98 mAudioTrack = track; 99 } 100 101 if (track == null) { 102 return false; 103 } else { 104 return true; 105 } 106 } 107 108 public void stop() { 109 synchronized (mAudioTrackLock) { 110 if (mAudioTrack != null) { 111 mAudioTrack.stop(); 112 } 113 mStopped = true; 114 } 115 } 116 117 public int write(byte[] data) { 118 AudioTrack track = null; 119 synchronized (mAudioTrackLock) { 120 track = mAudioTrack; 121 } 122 123 if (track == null || mStopped) { 124 return -1; 125 } 126 final int bytesWritten = writeToAudioTrack(track, data); 127 128 mBytesWritten += bytesWritten; 129 return bytesWritten; 130 } 131 132 public void waitAndRelease() { 133 AudioTrack track = null; 134 synchronized (mAudioTrackLock) { 135 track = mAudioTrack; 136 } 137 if (track == null) { 138 if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]"); 139 return; 140 } 141 142 // For "small" audio tracks, we have to stop() them to make them mixable, 143 // else the audio subsystem will wait indefinitely for us to fill the buffer 144 // before rendering the track mixable. 145 // 146 // If mStopped is true, the track would already have been stopped, so not 147 // much point not doing that again. 148 if (mBytesWritten < mAudioBufferSize && !mStopped) { 149 if (DBG) { 150 Log.d(TAG, "Stopping audio track to flush audio, state was : " + 151 track.getPlayState() + ",stopped= " + mStopped); 152 } 153 154 mIsShortUtterance = true; 155 track.stop(); 156 } 157 158 // Block until the audio track is done only if we haven't stopped yet. 159 if (!mStopped) { 160 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); 161 blockUntilDone(mAudioTrack); 162 } 163 164 // The last call to AudioTrack.write( ) will return only after 165 // all data from the audioTrack has been sent to the mixer, so 166 // it's safe to release at this point. 167 if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]"); 168 synchronized(mAudioTrackLock) { 169 mAudioTrack = null; 170 } 171 track.release(); 172 } 173 174 175 static int getChannelConfig(int channelCount) { 176 if (channelCount == 1) { 177 return AudioFormat.CHANNEL_OUT_MONO; 178 } else if (channelCount == 2){ 179 return AudioFormat.CHANNEL_OUT_STEREO; 180 } 181 182 return 0; 183 } 184 185 long getAudioLengthMs(int numBytes) { 186 final int unconsumedFrames = numBytes / mBytesPerFrame; 187 final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; 188 189 return estimatedTimeMs; 190 } 191 192 private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { 193 if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { 194 if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); 195 audioTrack.play(); 196 } 197 198 int count = 0; 199 while (count < bytes.length) { 200 // Note that we don't take bufferCopy.mOffset into account because 201 // it is guaranteed to be 0. 202 int written = audioTrack.write(bytes, count, bytes.length); 203 if (written <= 0) { 204 break; 205 } 206 count += written; 207 } 208 return count; 209 } 210 211 private AudioTrack createStreamingAudioTrack() { 212 final int channelConfig = getChannelConfig(mChannelCount); 213 214 int minBufferSizeInBytes 215 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); 216 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 217 218 AudioFormat audioFormat = (new AudioFormat.Builder()) 219 .setChannelMask(channelConfig) 220 .setEncoding(mAudioFormat) 221 .setSampleRate(mSampleRateInHz).build(); 222 AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes, 223 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM, 224 mAudioParams.mSessionId); 225 226 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 227 Log.w(TAG, "Unable to create audio track."); 228 audioTrack.release(); 229 return null; 230 } 231 232 mAudioBufferSize = bufferSizeInBytes; 233 234 setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan); 235 return audioTrack; 236 } 237 238 private void blockUntilDone(AudioTrack audioTrack) { 239 if (mBytesWritten <= 0) { 240 return; 241 } 242 243 if (mIsShortUtterance) { 244 // In this case we would have called AudioTrack#stop() to flush 245 // buffers to the mixer. This makes the playback head position 246 // unobservable and notification markers do not work reliably. We 247 // have no option but to wait until we think the track would finish 248 // playing and release it after. 249 // 250 // This isn't as bad as it looks because (a) We won't end up waiting 251 // for much longer than we should because even at 4khz mono, a short 252 // utterance weighs in at about 2 seconds, and (b) such short utterances 253 // are expected to be relatively infrequent and in a stream of utterances 254 // this shows up as a slightly longer pause. 255 blockUntilEstimatedCompletion(); 256 } else { 257 blockUntilCompletion(audioTrack); 258 } 259 } 260 261 private void blockUntilEstimatedCompletion() { 262 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 263 final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); 264 265 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 266 267 try { 268 Thread.sleep(estimatedTimeMs); 269 } catch (InterruptedException ie) { 270 // Do nothing. 271 } 272 } 273 274 private void blockUntilCompletion(AudioTrack audioTrack) { 275 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 276 277 int previousPosition = -1; 278 int currentPosition = 0; 279 long blockedTimeMs = 0; 280 281 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 282 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { 283 284 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 285 audioTrack.getSampleRate(); 286 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 287 288 // Check if the audio track has made progress since the last loop 289 // iteration. We should then add in the amount of time that was 290 // spent sleeping in the last iteration. 291 if (currentPosition == previousPosition) { 292 // This works only because the sleep time that would have been calculated 293 // would be the same in the previous iteration too. 294 blockedTimeMs += sleepTimeMs; 295 // If we've taken too long to make progress, bail. 296 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 297 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 298 "for AudioTrack to make progress, Aborting"); 299 break; 300 } 301 } else { 302 blockedTimeMs = 0; 303 } 304 previousPosition = currentPosition; 305 306 if (DBG) { 307 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 308 " Playback position : " + currentPosition + ", Length in frames : " 309 + lengthInFrames); 310 } 311 try { 312 Thread.sleep(sleepTimeMs); 313 } catch (InterruptedException ie) { 314 break; 315 } 316 } 317 } 318 319 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 320 final float vol = clip(volume, 0.0f, 1.0f); 321 final float panning = clip(pan, -1.0f, 1.0f); 322 323 float volLeft = vol; 324 float volRight = vol; 325 if (panning > 0.0f) { 326 volLeft *= (1.0f - panning); 327 } else if (panning < 0.0f) { 328 volRight *= (1.0f + panning); 329 } 330 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 331 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 332 Log.e(TAG, "Failed to set volume"); 333 } 334 } 335 336 private static final long clip(long value, long min, long max) { 337 return value < min ? min : (value < max ? value : max); 338 } 339 340 private static final float clip(float value, float min, float max) { 341 return value < min ? min : (value < max ? value : max); 342 } 343 344 } 345