1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 package android.speech.tts; 17 18 import android.media.AudioFormat; 19 import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher; 20 import android.util.Log; 21 22 import java.io.IOException; 23 import java.nio.ByteBuffer; 24 import java.nio.ByteOrder; 25 import java.nio.channels.FileChannel; 26 27 /** 28 * Speech synthesis request that writes the audio to a WAV file. 29 */ 30 class FileSynthesisCallback extends AbstractSynthesisCallback { 31 32 private static final String TAG = "FileSynthesisRequest"; 33 private static final boolean DBG = false; 34 35 private static final int MAX_AUDIO_BUFFER_SIZE = 8192; 36 37 private static final int WAV_HEADER_LENGTH = 44; 38 private static final short WAV_FORMAT_PCM = 0x0001; 39 40 private final Object mStateLock = new Object(); 41 42 private int mSampleRateInHz; 43 private int mAudioFormat; 44 private int mChannelCount; 45 46 private FileChannel mFileChannel; 47 48 private final UtteranceProgressDispatcher mDispatcher; 49 private final Object mCallerIdentity; 50 51 private boolean mStarted = false; 52 private boolean mDone = false; 53 54 /** Status code of synthesis */ 55 protected int mStatusCode; 56 57 FileSynthesisCallback(FileChannel fileChannel, UtteranceProgressDispatcher dispatcher, 58 Object callerIdentity, boolean clientIsUsingV2) { 59 super(clientIsUsingV2); 60 mFileChannel = fileChannel; 61 mDispatcher = dispatcher; 62 mCallerIdentity = callerIdentity; 63 mStatusCode = TextToSpeech.SUCCESS; 64 } 65 66 @Override 67 void stop() { 68 synchronized (mStateLock) { 69 if (mDone) { 70 return; 71 } 72 if (mStatusCode == TextToSpeech.STOPPED) { 73 return; 74 } 75 76 mStatusCode = TextToSpeech.STOPPED; 77 cleanUp(); 78 if (mDispatcher != null) { 79 mDispatcher.dispatchOnStop(); 80 } 81 } 82 } 83 84 /** 85 * Must be called while holding the monitor on {@link #mStateLock}. 86 */ 87 private void cleanUp() { 88 closeFile(); 89 } 90 91 /** 92 * Must be called while holding the monitor on {@link #mStateLock}. 93 */ 94 private void closeFile() { 95 // File will be closed by the SpeechItem in the speech service. 96 mFileChannel = null; 97 } 98 99 @Override 100 public int getMaxBufferSize() { 101 return MAX_AUDIO_BUFFER_SIZE; 102 } 103 104 @Override 105 public int start(int sampleRateInHz, int audioFormat, int channelCount) { 106 if (DBG) { 107 Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat 108 + "," + channelCount + ")"); 109 } 110 FileChannel fileChannel = null; 111 synchronized (mStateLock) { 112 if (mStatusCode == TextToSpeech.STOPPED) { 113 if (DBG) Log.d(TAG, "Request has been aborted."); 114 return errorCodeOnStop(); 115 } 116 if (mStatusCode != TextToSpeech.SUCCESS) { 117 if (DBG) Log.d(TAG, "Error was raised"); 118 return TextToSpeech.ERROR; 119 } 120 if (mStarted) { 121 Log.e(TAG, "Start called twice"); 122 return TextToSpeech.ERROR; 123 } 124 mStarted = true; 125 mSampleRateInHz = sampleRateInHz; 126 mAudioFormat = audioFormat; 127 mChannelCount = channelCount; 128 129 if (mDispatcher != null) { 130 mDispatcher.dispatchOnStart(); 131 } 132 fileChannel = mFileChannel; 133 } 134 135 try { 136 fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH)); 137 return TextToSpeech.SUCCESS; 138 } catch (IOException ex) { 139 Log.e(TAG, "Failed to write wav header to output file descriptor", ex); 140 synchronized (mStateLock) { 141 cleanUp(); 142 mStatusCode = TextToSpeech.ERROR_OUTPUT; 143 } 144 return TextToSpeech.ERROR; 145 } 146 } 147 148 @Override 149 public int audioAvailable(byte[] buffer, int offset, int length) { 150 if (DBG) { 151 Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset 152 + "," + length + ")"); 153 } 154 FileChannel fileChannel = null; 155 synchronized (mStateLock) { 156 if (mStatusCode == TextToSpeech.STOPPED) { 157 if (DBG) Log.d(TAG, "Request has been aborted."); 158 return errorCodeOnStop(); 159 } 160 if (mStatusCode != TextToSpeech.SUCCESS) { 161 if (DBG) Log.d(TAG, "Error was raised"); 162 return TextToSpeech.ERROR; 163 } 164 if (mFileChannel == null) { 165 Log.e(TAG, "File not open"); 166 mStatusCode = TextToSpeech.ERROR_OUTPUT; 167 return TextToSpeech.ERROR; 168 } 169 if (!mStarted) { 170 Log.e(TAG, "Start method was not called"); 171 return TextToSpeech.ERROR; 172 } 173 fileChannel = mFileChannel; 174 } 175 176 try { 177 fileChannel.write(ByteBuffer.wrap(buffer, offset, length)); 178 return TextToSpeech.SUCCESS; 179 } catch (IOException ex) { 180 Log.e(TAG, "Failed to write to output file descriptor", ex); 181 synchronized (mStateLock) { 182 cleanUp(); 183 mStatusCode = TextToSpeech.ERROR_OUTPUT; 184 } 185 return TextToSpeech.ERROR; 186 } 187 } 188 189 @Override 190 public int done() { 191 if (DBG) Log.d(TAG, "FileSynthesisRequest.done()"); 192 FileChannel fileChannel = null; 193 194 int sampleRateInHz = 0; 195 int audioFormat = 0; 196 int channelCount = 0; 197 198 synchronized (mStateLock) { 199 if (mDone) { 200 Log.w(TAG, "Duplicate call to done()"); 201 // This is not an error that would prevent synthesis. Hence no 202 // setStatusCode is set. 203 return TextToSpeech.ERROR; 204 } 205 if (mStatusCode == TextToSpeech.STOPPED) { 206 if (DBG) Log.d(TAG, "Request has been aborted."); 207 return errorCodeOnStop(); 208 } 209 if (mDispatcher != null && mStatusCode != TextToSpeech.SUCCESS && 210 mStatusCode != TextToSpeech.STOPPED) { 211 mDispatcher.dispatchOnError(mStatusCode); 212 return TextToSpeech.ERROR; 213 } 214 if (mFileChannel == null) { 215 Log.e(TAG, "File not open"); 216 return TextToSpeech.ERROR; 217 } 218 mDone = true; 219 fileChannel = mFileChannel; 220 sampleRateInHz = mSampleRateInHz; 221 audioFormat = mAudioFormat; 222 channelCount = mChannelCount; 223 } 224 225 try { 226 // Write WAV header at start of file 227 fileChannel.position(0); 228 int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH); 229 fileChannel.write( 230 makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength)); 231 232 synchronized (mStateLock) { 233 closeFile(); 234 if (mDispatcher != null) { 235 mDispatcher.dispatchOnSuccess(); 236 } 237 return TextToSpeech.SUCCESS; 238 } 239 } catch (IOException ex) { 240 Log.e(TAG, "Failed to write to output file descriptor", ex); 241 synchronized (mStateLock) { 242 cleanUp(); 243 } 244 return TextToSpeech.ERROR; 245 } 246 } 247 248 @Override 249 public void error() { 250 error(TextToSpeech.ERROR_SYNTHESIS); 251 } 252 253 @Override 254 public void error(int errorCode) { 255 if (DBG) Log.d(TAG, "FileSynthesisRequest.error()"); 256 synchronized (mStateLock) { 257 if (mDone) { 258 return; 259 } 260 cleanUp(); 261 mStatusCode = errorCode; 262 } 263 } 264 265 @Override 266 public boolean hasStarted() { 267 synchronized (mStateLock) { 268 return mStarted; 269 } 270 } 271 272 @Override 273 public boolean hasFinished() { 274 synchronized (mStateLock) { 275 return mDone; 276 } 277 } 278 279 private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount, 280 int dataLength) { 281 int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat); 282 int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount; 283 short blockAlign = (short) (sampleSizeInBytes * channelCount); 284 short bitsPerSample = (short) (sampleSizeInBytes * 8); 285 286 byte[] headerBuf = new byte[WAV_HEADER_LENGTH]; 287 ByteBuffer header = ByteBuffer.wrap(headerBuf); 288 header.order(ByteOrder.LITTLE_ENDIAN); 289 290 header.put(new byte[]{ 'R', 'I', 'F', 'F' }); 291 header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size 292 header.put(new byte[]{ 'W', 'A', 'V', 'E' }); 293 header.put(new byte[]{ 'f', 'm', 't', ' ' }); 294 header.putInt(16); // size of fmt chunk 295 header.putShort(WAV_FORMAT_PCM); 296 header.putShort((short) channelCount); 297 header.putInt(sampleRateInHz); 298 header.putInt(byteRate); 299 header.putShort(blockAlign); 300 header.putShort(bitsPerSample); 301 header.put(new byte[]{ 'd', 'a', 't', 'a' }); 302 header.putInt(dataLength); 303 header.flip(); 304 305 return header; 306 } 307 } 308