1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 package android.speech.tts; 17 18 import android.media.AudioFormat; 19 import android.media.AudioTrack; 20 import android.text.TextUtils; 21 import android.util.Log; 22 23 import java.util.Iterator; 24 import java.util.concurrent.PriorityBlockingQueue; 25 import java.util.concurrent.atomic.AtomicLong; 26 27 class AudioPlaybackHandler { 28 private static final String TAG = "TTS.AudioPlaybackHandler"; 29 private static final boolean DBG_THREADING = false; 30 private static final boolean DBG = false; 31 32 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 33 34 private static final int SYNTHESIS_START = 1; 35 private static final int SYNTHESIS_DATA_AVAILABLE = 2; 36 private static final int SYNTHESIS_DONE = 3; 37 38 private static final int PLAY_AUDIO = 5; 39 private static final int PLAY_SILENCE = 6; 40 41 private static final int SHUTDOWN = -1; 42 43 private static final int DEFAULT_PRIORITY = 1; 44 private static final int HIGH_PRIORITY = 0; 45 46 private final PriorityBlockingQueue<ListEntry> mQueue = 47 new PriorityBlockingQueue<ListEntry>(); 48 private final Thread mHandlerThread; 49 50 private volatile MessageParams mCurrentParams = null; 51 // Used only for book keeping and error detection. 52 private volatile SynthesisMessageParams mLastSynthesisRequest = null; 53 // Used to order incoming messages in our priority queue. 54 private final AtomicLong mSequenceIdCtr = new AtomicLong(0); 55 56 57 AudioPlaybackHandler() { 58 mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread"); 59 } 60 61 public void start() { 62 mHandlerThread.start(); 63 } 64 65 /** 66 * Stops all synthesis for a given {@code token}. If the current token 67 * is currently being processed, an effort will be made to stop it but 68 * that is not guaranteed. 69 * 70 * NOTE: This assumes that all other messages in the queue with {@code token} 71 * have been removed already. 72 * 73 * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}. 74 */ 75 private void stop(MessageParams token) { 76 if (token == null) { 77 return; 78 } 79 80 if (DBG) Log.d(TAG, "Stopping token : " + token); 81 82 if (token.getType() == MessageParams.TYPE_SYNTHESIS) { 83 AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack(); 84 if (current != null) { 85 // Stop the current audio track if it's still playing. 86 // The audio track is thread safe in this regard. The current 87 // handleSynthesisDataAvailable call will return soon after this 88 // call. 89 current.stop(); 90 } 91 // This is safe because PlaybackSynthesisCallback#stop would have 92 // been called before this method, and will no longer enqueue any 93 // audio for this token. 94 // 95 // (Even if it did, all it would result in is a warning message). 96 mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY)); 97 } else if (token.getType() == MessageParams.TYPE_AUDIO) { 98 ((AudioMessageParams) token).getPlayer().stop(); 99 // No cleanup required for audio messages. 100 } else if (token.getType() == MessageParams.TYPE_SILENCE) { 101 ((SilenceMessageParams) token).getConditionVariable().open(); 102 // No cleanup required for silence messages. 103 } 104 } 105 106 // ----------------------------------------------------- 107 // Methods that add and remove elements from the queue. These do not 108 // need to be synchronized strictly speaking, but they make the behaviour 109 // a lot more predictable. (though it would still be correct without 110 // synchronization). 111 // ----------------------------------------------------- 112 113 synchronized public void removePlaybackItems(String callingApp) { 114 if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp); 115 removeMessages(callingApp); 116 117 final MessageParams current = getCurrentParams(); 118 if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) { 119 stop(current); 120 } 121 } 122 123 synchronized public void removeAllItems() { 124 if (DBG_THREADING) Log.d(TAG, "Removing all items"); 125 removeAllMessages(); 126 stop(getCurrentParams()); 127 } 128 129 /** 130 * @return false iff the queue is empty and no queue item is currently 131 * being handled, true otherwise. 132 */ 133 public boolean isSpeaking() { 134 return (mQueue.peek() != null) || (mCurrentParams != null); 135 } 136 137 /** 138 * Shut down the audio playback thread. 139 */ 140 synchronized public void quit() { 141 removeAllMessages(); 142 stop(getCurrentParams()); 143 mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY)); 144 } 145 146 synchronized void enqueueSynthesisStart(SynthesisMessageParams token) { 147 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token); 148 mQueue.add(new ListEntry(SYNTHESIS_START, token)); 149 } 150 151 synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) { 152 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token); 153 mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token)); 154 } 155 156 synchronized void enqueueSynthesisDone(SynthesisMessageParams token) { 157 if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token); 158 mQueue.add(new ListEntry(SYNTHESIS_DONE, token)); 159 } 160 161 synchronized void enqueueAudio(AudioMessageParams token) { 162 if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token); 163 mQueue.add(new ListEntry(PLAY_AUDIO, token)); 164 } 165 166 synchronized void enqueueSilence(SilenceMessageParams token) { 167 if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token); 168 mQueue.add(new ListEntry(PLAY_SILENCE, token)); 169 } 170 171 // ----------------------------------------- 172 // End of public API methods. 173 // ----------------------------------------- 174 175 // ----------------------------------------- 176 // Methods for managing the message queue. 177 // ----------------------------------------- 178 179 /* 180 * The MessageLoop is a handler like implementation that 181 * processes messages from a priority queue. 182 */ 183 private final class MessageLoop implements Runnable { 184 @Override 185 public void run() { 186 while (true) { 187 ListEntry entry = null; 188 try { 189 entry = mQueue.take(); 190 } catch (InterruptedException ie) { 191 return; 192 } 193 194 if (entry.mWhat == SHUTDOWN) { 195 if (DBG) Log.d(TAG, "MessageLoop : Shutting down"); 196 return; 197 } 198 199 if (DBG) { 200 Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat 201 + " ,seqId : " + entry.mSequenceId); 202 } 203 204 setCurrentParams(entry.mMessage); 205 handleMessage(entry); 206 setCurrentParams(null); 207 } 208 } 209 } 210 211 /* 212 * Atomically clear the queue of all messages. 213 */ 214 synchronized private void removeAllMessages() { 215 mQueue.clear(); 216 } 217 218 /* 219 * Remove all messages that originate from a given calling app. 220 */ 221 synchronized private void removeMessages(String callingApp) { 222 Iterator<ListEntry> it = mQueue.iterator(); 223 224 while (it.hasNext()) { 225 final ListEntry current = it.next(); 226 // The null check is to prevent us from removing control messages, 227 // such as a shutdown message. 228 if (current.mMessage != null && 229 callingApp.equals(current.mMessage.getCallingApp())) { 230 it.remove(); 231 } 232 } 233 } 234 235 /* 236 * An element of our priority queue of messages. Each message has a priority, 237 * and a sequence id (defined by the order of enqueue calls). Among messages 238 * with the same priority, messages that were received earlier win out. 239 */ 240 private final class ListEntry implements Comparable<ListEntry> { 241 final int mWhat; 242 final MessageParams mMessage; 243 final int mPriority; 244 final long mSequenceId; 245 246 private ListEntry(int what, MessageParams message) { 247 this(what, message, DEFAULT_PRIORITY); 248 } 249 250 private ListEntry(int what, MessageParams message, int priority) { 251 mWhat = what; 252 mMessage = message; 253 mPriority = priority; 254 mSequenceId = mSequenceIdCtr.incrementAndGet(); 255 } 256 257 @Override 258 public int compareTo(ListEntry that) { 259 if (that == this) { 260 return 0; 261 } 262 263 // Note that this is always 0, 1 or -1. 264 int priorityDiff = mPriority - that.mPriority; 265 if (priorityDiff == 0) { 266 // The == case cannot occur. 267 return (mSequenceId < that.mSequenceId) ? -1 : 1; 268 } 269 270 return priorityDiff; 271 } 272 } 273 274 private void setCurrentParams(MessageParams p) { 275 if (DBG_THREADING) { 276 if (p != null) { 277 Log.d(TAG, "Started handling :" + p); 278 } else { 279 Log.d(TAG, "End handling : " + mCurrentParams); 280 } 281 } 282 mCurrentParams = p; 283 } 284 285 private MessageParams getCurrentParams() { 286 return mCurrentParams; 287 } 288 289 // ----------------------------------------- 290 // Methods for dealing with individual messages, the methods 291 // below do the actual work. 292 // ----------------------------------------- 293 294 private void handleMessage(ListEntry entry) { 295 final MessageParams msg = entry.mMessage; 296 if (entry.mWhat == SYNTHESIS_START) { 297 handleSynthesisStart(msg); 298 } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) { 299 handleSynthesisDataAvailable(msg); 300 } else if (entry.mWhat == SYNTHESIS_DONE) { 301 handleSynthesisDone(msg); 302 } else if (entry.mWhat == PLAY_AUDIO) { 303 handleAudio(msg); 304 } else if (entry.mWhat == PLAY_SILENCE) { 305 handleSilence(msg); 306 } 307 } 308 309 // Currently implemented as blocking the audio playback thread for the 310 // specified duration. If a call to stop() is made, the thread 311 // unblocks. 312 private void handleSilence(MessageParams msg) { 313 if (DBG) Log.d(TAG, "handleSilence()"); 314 SilenceMessageParams params = (SilenceMessageParams) msg; 315 if (params.getSilenceDurationMs() > 0) { 316 params.getConditionVariable().block(params.getSilenceDurationMs()); 317 } 318 params.getDispatcher().dispatchUtteranceCompleted(); 319 if (DBG) Log.d(TAG, "handleSilence() done."); 320 } 321 322 // Plays back audio from a given URI. No TTS engine involvement here. 323 private void handleAudio(MessageParams msg) { 324 if (DBG) Log.d(TAG, "handleAudio()"); 325 AudioMessageParams params = (AudioMessageParams) msg; 326 // Note that the BlockingMediaPlayer spawns a separate thread. 327 // 328 // TODO: This can be avoided. 329 params.getPlayer().startAndWait(); 330 params.getDispatcher().dispatchUtteranceCompleted(); 331 if (DBG) Log.d(TAG, "handleAudio() done."); 332 } 333 334 // Denotes the start of a new synthesis request. We create a new 335 // audio track, and prepare it for incoming data. 336 // 337 // Note that since all TTS synthesis happens on a single thread, we 338 // should ALWAYS see the following order : 339 // 340 // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone 341 // OR 342 // handleSynthesisCompleteDataAvailable. 343 private void handleSynthesisStart(MessageParams msg) { 344 if (DBG) Log.d(TAG, "handleSynthesisStart()"); 345 final SynthesisMessageParams param = (SynthesisMessageParams) msg; 346 347 // Oops, looks like the engine forgot to call done(). We go through 348 // extra trouble to clean the data to prevent the AudioTrack resources 349 // from being leaked. 350 if (mLastSynthesisRequest != null) { 351 Log.w(TAG, "Error : Missing call to done() for request : " + 352 mLastSynthesisRequest); 353 handleSynthesisDone(mLastSynthesisRequest); 354 } 355 356 mLastSynthesisRequest = param; 357 358 // Create the audio track. 359 final AudioTrack audioTrack = createStreamingAudioTrack(param); 360 361 if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]"); 362 363 param.setAudioTrack(audioTrack); 364 } 365 366 // More data available to be flushed to the audio track. 367 private void handleSynthesisDataAvailable(MessageParams msg) { 368 final SynthesisMessageParams param = (SynthesisMessageParams) msg; 369 if (param.getAudioTrack() == null) { 370 Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param); 371 return; 372 } 373 374 if (param != mLastSynthesisRequest) { 375 Log.e(TAG, "Call to dataAvailable without done() / start()"); 376 return; 377 } 378 379 final AudioTrack audioTrack = param.getAudioTrack(); 380 final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer(); 381 382 if (bufferCopy == null) { 383 Log.e(TAG, "No buffers available to play."); 384 return; 385 } 386 387 int playState = audioTrack.getPlayState(); 388 if (playState == AudioTrack.PLAYSTATE_STOPPED) { 389 if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode()); 390 audioTrack.play(); 391 } 392 int count = 0; 393 while (count < bufferCopy.mBytes.length) { 394 // Note that we don't take bufferCopy.mOffset into account because 395 // it is guaranteed to be 0. 396 int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length); 397 if (written <= 0) { 398 break; 399 } 400 count += written; 401 } 402 param.mBytesWritten += count; 403 param.mLogger.onPlaybackStart(); 404 } 405 406 // Wait for the audio track to stop playing, and then release its resources. 407 private void handleSynthesisDone(MessageParams msg) { 408 final SynthesisMessageParams params = (SynthesisMessageParams) msg; 409 410 if (DBG) Log.d(TAG, "handleSynthesisDone()"); 411 final AudioTrack audioTrack = params.getAudioTrack(); 412 413 if (audioTrack == null) { 414 return; 415 } 416 417 if (params.mBytesWritten < params.mAudioBufferSize) { 418 if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " + 419 audioTrack.getPlayState()); 420 params.mIsShortUtterance = true; 421 audioTrack.stop(); 422 } 423 424 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + 425 audioTrack.hashCode()); 426 blockUntilDone(params); 427 if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]"); 428 429 // The last call to AudioTrack.write( ) will return only after 430 // all data from the audioTrack has been sent to the mixer, so 431 // it's safe to release at this point. Make sure release() and the call 432 // that set the audio track to null are performed atomically. 433 synchronized (this) { 434 // Never allow the audioTrack to be observed in a state where 435 // it is released but non null. The only case this might happen 436 // is in the various stopFoo methods that call AudioTrack#stop from 437 // different threads, but they are synchronized on AudioPlayBackHandler#this 438 // too. 439 audioTrack.release(); 440 params.setAudioTrack(null); 441 } 442 params.getDispatcher().dispatchUtteranceCompleted(); 443 mLastSynthesisRequest = null; 444 params.mLogger.onWriteData(); 445 } 446 447 /** 448 * The minimum increment of time to wait for an audiotrack to finish 449 * playing. 450 */ 451 private static final long MIN_SLEEP_TIME_MS = 20; 452 453 /** 454 * The maximum increment of time to sleep while waiting for an audiotrack 455 * to finish playing. 456 */ 457 private static final long MAX_SLEEP_TIME_MS = 2500; 458 459 /** 460 * The maximum amount of time to wait for an audio track to make progress while 461 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 462 * could happen in exceptional circumstances like a media_server crash. 463 */ 464 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 465 466 private static void blockUntilDone(SynthesisMessageParams params) { 467 if (params.mAudioTrack == null || params.mBytesWritten <= 0) { 468 return; 469 } 470 471 if (params.mIsShortUtterance) { 472 // In this case we would have called AudioTrack#stop() to flush 473 // buffers to the mixer. This makes the playback head position 474 // unobservable and notification markers do not work reliably. We 475 // have no option but to wait until we think the track would finish 476 // playing and release it after. 477 // 478 // This isn't as bad as it looks because (a) We won't end up waiting 479 // for much longer than we should because even at 4khz mono, a short 480 // utterance weighs in at about 2 seconds, and (b) such short utterances 481 // are expected to be relatively infrequent and in a stream of utterances 482 // this shows up as a slightly longer pause. 483 blockUntilEstimatedCompletion(params); 484 } else { 485 blockUntilCompletion(params); 486 } 487 } 488 489 private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) { 490 final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; 491 final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz); 492 493 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 494 495 try { 496 Thread.sleep(estimatedTimeMs); 497 } catch (InterruptedException ie) { 498 // Do nothing. 499 } 500 } 501 502 private static void blockUntilCompletion(SynthesisMessageParams params) { 503 final AudioTrack audioTrack = params.mAudioTrack; 504 final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; 505 506 int previousPosition = -1; 507 int currentPosition = 0; 508 long blockedTimeMs = 0; 509 510 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 511 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) { 512 513 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 514 audioTrack.getSampleRate(); 515 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 516 517 // Check if the audio track has made progress since the last loop 518 // iteration. We should then add in the amount of time that was 519 // spent sleeping in the last iteration. 520 if (currentPosition == previousPosition) { 521 // This works only because the sleep time that would have been calculated 522 // would be the same in the previous iteration too. 523 blockedTimeMs += sleepTimeMs; 524 // If we've taken too long to make progress, bail. 525 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 526 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 527 "for AudioTrack to make progress, Aborting"); 528 break; 529 } 530 } else { 531 blockedTimeMs = 0; 532 } 533 previousPosition = currentPosition; 534 535 if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 536 " Playback position : " + currentPosition + ", Length in frames : " 537 + lengthInFrames); 538 try { 539 Thread.sleep(sleepTimeMs); 540 } catch (InterruptedException ie) { 541 break; 542 } 543 } 544 } 545 546 private static final long clip(long value, long min, long max) { 547 if (value < min) { 548 return min; 549 } 550 551 if (value > max) { 552 return max; 553 } 554 555 return value; 556 } 557 558 private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) { 559 final int channelConfig = getChannelConfig(params.mChannelCount); 560 final int sampleRateInHz = params.mSampleRateInHz; 561 final int audioFormat = params.mAudioFormat; 562 563 int minBufferSizeInBytes 564 = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); 565 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 566 567 AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig, 568 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); 569 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 570 Log.w(TAG, "Unable to create audio track."); 571 audioTrack.release(); 572 return null; 573 } 574 params.mAudioBufferSize = bufferSizeInBytes; 575 576 setupVolume(audioTrack, params.mVolume, params.mPan); 577 return audioTrack; 578 } 579 580 static int getChannelConfig(int channelCount) { 581 if (channelCount == 1) { 582 return AudioFormat.CHANNEL_OUT_MONO; 583 } else if (channelCount == 2){ 584 return AudioFormat.CHANNEL_OUT_STEREO; 585 } 586 587 return 0; 588 } 589 590 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 591 float vol = clip(volume, 0.0f, 1.0f); 592 float panning = clip(pan, -1.0f, 1.0f); 593 float volLeft = vol; 594 float volRight = vol; 595 if (panning > 0.0f) { 596 volLeft *= (1.0f - panning); 597 } else if (panning < 0.0f) { 598 volRight *= (1.0f + panning); 599 } 600 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 601 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 602 Log.e(TAG, "Failed to set volume"); 603 } 604 } 605 606 private static float clip(float value, float min, float max) { 607 return value > max ? max : (value < min ? min : value); 608 } 609 610 } 611