1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.voicedialer; 18 19 import android.app.Activity; 20 import android.content.Intent; 21 import android.speech.srec.MicrophoneInputStream; 22 import android.speech.srec.Recognizer; 23 import android.speech.srec.WaveHeader; 24 import android.util.Config; 25 import android.util.Log; 26 import java.io.File; 27 import java.io.FileInputStream; 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.util.ArrayList; 31 32 /** 33 * This class is a framework for recognizing speech. It must be extended to use. 34 * The child class must timplement setupGrammar and onRecognitionSuccess. 35 * A usage cycle is as follows: 36 * <ul> 37 * <li>Create with a reference to the {@link VoiceDialerActivity}. 38 * <li>Signal the user to start speaking with the Vibrator or beep. 39 * <li>Start audio input by creating a {@link MicrophoneInputStream}. 40 * <li>Create and configure a {@link Recognizer}. 41 * <li>Set up the grammar using setupGrammar. 42 * <li>Start the {@link Recognizer} running using data already being 43 * collected by the microphone. 44 * <li>Wait for the {@link Recognizer} to complete. 45 * <li>Process the results using onRecognitionSuccess, which will pass 46 * a list of intents to the {@RecogizerClient}. 47 * <li>Shut down and clean up. 48 * </ul> 49 * Notes: 50 * <ul> 51 * <li>Audio many be read from a file. 52 * <li>A directory tree of audio files may be stepped through. 53 * <li>A contact list may be read from a file. 54 * <li>A {@link RecognizerLogger} may generate a set of log files from 55 * a recognition session. 56 * <li>A static instance of this class is held and reused by the 57 * {@link VoiceDialerActivity}, which saves setup time. 58 * </ul> 59 */ 60 abstract public class RecognizerEngine { 61 62 protected static final String TAG = "RecognizerEngine"; 63 64 protected static final String ACTION_RECOGNIZER_RESULT = 65 "com.android.voicedialer.ACTION_RECOGNIZER_RESULT"; 66 public static final String SENTENCE_EXTRA = "sentence"; 67 public static final String SEMANTIC_EXTRA = "semantic"; 68 69 protected final String SREC_DIR = Recognizer.getConfigDir(null); 70 71 protected static final String OPEN_ENTRIES = "openentries.txt"; 72 73 protected static final int RESULT_LIMIT = 5; 74 75 protected Activity mActivity; 76 protected Recognizer mSrec; 77 protected Recognizer.Grammar mSrecGrammar; 78 protected RecognizerLogger mLogger; 79 protected int mSampleRate; 80 81 /** 82 * Constructor. 83 */ 84 public RecognizerEngine() { 85 mSampleRate = 0; 86 } 87 88 abstract protected void setupGrammar() throws IOException, InterruptedException; 89 90 abstract protected void onRecognitionSuccess(RecognizerClient recognizerClient) 91 throws InterruptedException; 92 93 /** 94 * Start the recognition process. 95 * 96 * <ul> 97 * <li>Create and start the microphone. 98 * <li>Create a Recognizer. 99 * <li>set up the grammar (implementation is in child class) 100 * <li>Start the Recognizer. 101 * <li>Feed the Recognizer audio until it provides a result. 102 * <li>Build a list of Intents corresponding to the results. (implementation 103 * is in child class) 104 * <li>Stop the microphone. 105 * <li>Stop the Recognizer. 106 * </ul> 107 * 108 * @param recognizerClient client to be given the results 109 * @param activity the Activity this recognition is being run from. 110 * @param micFile optional audio input from this file, or directory tree. 111 * @param sampleRate the same rate coming from the mic or micFile 112 */ 113 public void recognize(RecognizerClient recognizerClient, Activity activity, 114 File micFile, int sampleRate) { 115 InputStream mic = null; 116 boolean recognizerStarted = false; 117 try { 118 mActivity = activity; 119 // set up logger 120 mLogger = null; 121 if (RecognizerLogger.isEnabled(mActivity)) { 122 mLogger = new RecognizerLogger(mActivity); 123 } 124 125 if (mSampleRate != sampleRate) { 126 // sample rate has changed since we last used this recognizerEngine. 127 // destroy the grammar and regenerate. 128 if (mSrecGrammar != null) { 129 mSrecGrammar.destroy(); 130 } 131 mSrecGrammar = null; 132 mSampleRate = sampleRate; 133 } 134 135 // create a new recognizer 136 if (Config.LOGD) Log.d(TAG, "start new Recognizer"); 137 if (mSrec == null) { 138 String parFilePath = SREC_DIR + "/baseline11k.par"; 139 if (sampleRate == 8000) { 140 parFilePath = SREC_DIR + "/baseline8k.par"; 141 } 142 mSrec = new Recognizer(parFilePath); 143 } 144 145 // start audio input 146 if (micFile != null) { 147 if (Config.LOGD) Log.d(TAG, "using mic file"); 148 mic = new FileInputStream(micFile); 149 WaveHeader hdr = new WaveHeader(); 150 hdr.read(mic); 151 } else { 152 if (Config.LOGD) Log.d(TAG, "start new MicrophoneInputStream"); 153 mic = new MicrophoneInputStream(sampleRate, sampleRate * 15); 154 } 155 156 // notify UI 157 recognizerClient.onMicrophoneStart(mic); 158 159 // log audio if requested 160 if (mLogger != null) mic = mLogger.logInputStream(mic, sampleRate); 161 162 setupGrammar(); 163 164 // start the recognition process 165 if (Config.LOGD) Log.d(TAG, "start mSrec.start"); 166 mSrec.start(); 167 recognizerStarted = true; 168 169 // recognize 170 while (true) { 171 if (Thread.interrupted()) throw new InterruptedException(); 172 int event = mSrec.advance(); 173 if (event != Recognizer.EVENT_INCOMPLETE && 174 event != Recognizer.EVENT_NEED_MORE_AUDIO) { 175 Log.d(TAG, "start advance()=" + 176 Recognizer.eventToString(event) + 177 " avail " + mic.available()); 178 } 179 switch (event) { 180 case Recognizer.EVENT_INCOMPLETE: 181 case Recognizer.EVENT_STARTED: 182 case Recognizer.EVENT_START_OF_VOICING: 183 case Recognizer.EVENT_END_OF_VOICING: 184 continue; 185 case Recognizer.EVENT_RECOGNITION_RESULT: 186 onRecognitionSuccess(recognizerClient); 187 break; 188 case Recognizer.EVENT_NEED_MORE_AUDIO: 189 mSrec.putAudio(mic); 190 continue; 191 default: 192 Log.d(TAG, "unknown event " + event); 193 recognizerClient.onRecognitionFailure(Recognizer.eventToString(event)); 194 break; 195 } 196 break; 197 } 198 199 } catch (InterruptedException e) { 200 if (Config.LOGD) Log.d(TAG, "start interrupted " + e); 201 recognizerClient.onRecognitionError(e.toString()); 202 } catch (IOException e) { 203 if (Config.LOGD) Log.d(TAG, "start new Srec failed " + e); 204 recognizerClient.onRecognitionError(e.toString()); 205 } catch (Exception e) { 206 if (Config.LOGD) Log.d(TAG, "exception " + e); 207 recognizerClient.onRecognitionError(e.toString()); 208 } finally { 209 if (Config.LOGD) Log.d(TAG, "start mSrec.stop"); 210 if (mSrec != null && recognizerStarted) mSrec.stop(); 211 212 // stop microphone 213 try { 214 if (mic != null) mic.close(); 215 } 216 catch (IOException ex) { 217 if (Config.LOGD) Log.d(TAG, "start - mic.close failed - " + ex); 218 } 219 mic = null; 220 221 // close logger 222 try { 223 if (mLogger != null) mLogger.close(); 224 } 225 catch (IOException ex) { 226 if (Config.LOGD) Log.d(TAG, "start - mLoggger.close failed - " + ex); 227 } 228 mLogger = null; 229 } 230 if (Config.LOGD) Log.d(TAG, "start bye"); 231 } 232 233 protected static void addIntent(ArrayList<Intent> intents, Intent intent) { 234 for (Intent in : intents) { 235 if (in.getAction() != null && 236 in.getAction().equals(intent.getAction()) && 237 in.getData() != null && 238 in.getData().equals(intent.getData())) { 239 return; 240 } 241 } 242 intent.setFlags(intent.getFlags() | Intent.FLAG_ACTIVITY_NEW_TASK); 243 intents.add(intent); 244 } 245 } 246