1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.voicedialer; 18 19 import android.app.Activity; 20 import android.content.Intent; 21 import android.speech.srec.MicrophoneInputStream; 22 import android.speech.srec.Recognizer; 23 import android.speech.srec.WaveHeader; 24 import android.util.Log; 25 import java.io.File; 26 import java.io.FileInputStream; 27 import java.io.IOException; 28 import java.io.InputStream; 29 import java.util.ArrayList; 30 31 /** 32 * This class is a framework for recognizing speech. It must be extended to use. 33 * The child class must timplement setupGrammar and onRecognitionSuccess. 34 * A usage cycle is as follows: 35 * <ul> 36 * <li>Create with a reference to the {@link VoiceDialerActivity}. 37 * <li>Signal the user to start speaking with the Vibrator or beep. 38 * <li>Start audio input by creating a {@link MicrophoneInputStream}. 39 * <li>Create and configure a {@link Recognizer}. 40 * <li>Set up the grammar using setupGrammar. 41 * <li>Start the {@link Recognizer} running using data already being 42 * collected by the microphone. 43 * <li>Wait for the {@link Recognizer} to complete. 44 * <li>Process the results using onRecognitionSuccess, which will pass 45 * a list of intents to the {@RecogizerClient}. 46 * <li>Shut down and clean up. 47 * </ul> 48 * Notes: 49 * <ul> 50 * <li>Audio many be read from a file. 51 * <li>A directory tree of audio files may be stepped through. 52 * <li>A contact list may be read from a file. 53 * <li>A {@link RecognizerLogger} may generate a set of log files from 54 * a recognition session. 55 * <li>A static instance of this class is held and reused by the 56 * {@link VoiceDialerActivity}, which saves setup time. 57 * </ul> 58 */ 59 abstract public class RecognizerEngine { 60 61 protected static final String TAG = "RecognizerEngine"; 62 63 protected static final String ACTION_RECOGNIZER_RESULT = 64 "com.android.voicedialer.ACTION_RECOGNIZER_RESULT"; 65 public static final String SENTENCE_EXTRA = "sentence"; 66 public static final String SEMANTIC_EXTRA = "semantic"; 67 68 protected final String SREC_DIR = Recognizer.getConfigDir(null); 69 70 protected static final String OPEN_ENTRIES = "openentries.txt"; 71 72 protected static final int RESULT_LIMIT = 5; 73 74 protected Activity mActivity; 75 protected Recognizer mSrec; 76 protected Recognizer.Grammar mSrecGrammar; 77 protected RecognizerLogger mLogger; 78 protected int mSampleRate; 79 80 /** 81 * Constructor. 82 */ 83 public RecognizerEngine() { 84 mSampleRate = 0; 85 } 86 87 abstract protected void setupGrammar() throws IOException, InterruptedException; 88 89 abstract protected void onRecognitionSuccess(RecognizerClient recognizerClient) 90 throws InterruptedException; 91 92 /** 93 * Start the recognition process. 94 * 95 * <ul> 96 * <li>Create and start the microphone. 97 * <li>Create a Recognizer. 98 * <li>set up the grammar (implementation is in child class) 99 * <li>Start the Recognizer. 100 * <li>Feed the Recognizer audio until it provides a result. 101 * <li>Build a list of Intents corresponding to the results. (implementation 102 * is in child class) 103 * <li>Stop the microphone. 104 * <li>Stop the Recognizer. 105 * </ul> 106 * 107 * @param recognizerClient client to be given the results 108 * @param activity the Activity this recognition is being run from. 109 * @param micFile optional audio input from this file, or directory tree. 110 * @param sampleRate the same rate coming from the mic or micFile 111 */ 112 public void recognize(RecognizerClient recognizerClient, Activity activity, 113 File micFile, int sampleRate) { 114 InputStream mic = null; 115 boolean recognizerStarted = false; 116 try { 117 mActivity = activity; 118 // set up logger 119 mLogger = null; 120 if (RecognizerLogger.isEnabled(mActivity)) { 121 mLogger = new RecognizerLogger(mActivity); 122 } 123 124 if (mSampleRate != sampleRate) { 125 // sample rate has changed since we last used this recognizerEngine. 126 // destroy the grammar and regenerate. 127 if (mSrecGrammar != null) { 128 mSrecGrammar.destroy(); 129 } 130 mSrecGrammar = null; 131 mSampleRate = sampleRate; 132 } 133 134 // create a new recognizer 135 if (false) Log.d(TAG, "start new Recognizer"); 136 if (mSrec == null) { 137 String parFilePath = SREC_DIR + "/baseline11k.par"; 138 if (sampleRate == 8000) { 139 parFilePath = SREC_DIR + "/baseline8k.par"; 140 } 141 mSrec = new Recognizer(parFilePath); 142 } 143 144 // start audio input 145 if (micFile != null) { 146 if (false) Log.d(TAG, "using mic file"); 147 mic = new FileInputStream(micFile); 148 WaveHeader hdr = new WaveHeader(); 149 hdr.read(mic); 150 } else { 151 if (false) Log.d(TAG, "start new MicrophoneInputStream"); 152 mic = new MicrophoneInputStream(sampleRate, sampleRate * 15); 153 } 154 155 // notify UI 156 recognizerClient.onMicrophoneStart(mic); 157 158 // log audio if requested 159 if (mLogger != null) mic = mLogger.logInputStream(mic, sampleRate); 160 161 setupGrammar(); 162 163 // start the recognition process 164 if (false) Log.d(TAG, "start mSrec.start"); 165 mSrec.start(); 166 recognizerStarted = true; 167 168 // recognize 169 while (true) { 170 if (Thread.interrupted()) throw new InterruptedException(); 171 int event = mSrec.advance(); 172 if (event != Recognizer.EVENT_INCOMPLETE && 173 event != Recognizer.EVENT_NEED_MORE_AUDIO) { 174 Log.d(TAG, "start advance()=" + 175 Recognizer.eventToString(event) + 176 " avail " + mic.available()); 177 } 178 switch (event) { 179 case Recognizer.EVENT_INCOMPLETE: 180 case Recognizer.EVENT_STARTED: 181 case Recognizer.EVENT_START_OF_VOICING: 182 case Recognizer.EVENT_END_OF_VOICING: 183 continue; 184 case Recognizer.EVENT_RECOGNITION_RESULT: 185 onRecognitionSuccess(recognizerClient); 186 break; 187 case Recognizer.EVENT_NEED_MORE_AUDIO: 188 mSrec.putAudio(mic); 189 continue; 190 default: 191 Log.d(TAG, "unknown event " + event); 192 recognizerClient.onRecognitionFailure(Recognizer.eventToString(event)); 193 break; 194 } 195 break; 196 } 197 198 } catch (InterruptedException e) { 199 if (false) Log.d(TAG, "start interrupted " + e); 200 recognizerClient.onRecognitionError(e.toString()); 201 } catch (IOException e) { 202 if (false) Log.d(TAG, "start new Srec failed " + e); 203 recognizerClient.onRecognitionError(e.toString()); 204 } catch (Exception e) { 205 if (false) Log.d(TAG, "exception " + e); 206 recognizerClient.onRecognitionError(e.toString()); 207 } finally { 208 if (false) Log.d(TAG, "start mSrec.stop"); 209 if (mSrec != null && recognizerStarted) mSrec.stop(); 210 211 // stop microphone 212 try { 213 if (mic != null) mic.close(); 214 } 215 catch (IOException ex) { 216 if (false) Log.d(TAG, "start - mic.close failed - " + ex); 217 } 218 mic = null; 219 220 // close logger 221 try { 222 if (mLogger != null) mLogger.close(); 223 } 224 catch (IOException ex) { 225 if (false) Log.d(TAG, "start - mLoggger.close failed - " + ex); 226 } 227 mLogger = null; 228 } 229 if (false) Log.d(TAG, "start bye"); 230 } 231 232 protected static void addIntent(ArrayList<Intent> intents, Intent intent) { 233 for (Intent in : intents) { 234 if (in.getAction() != null && 235 in.getAction().equals(intent.getAction()) && 236 in.getData() != null && 237 in.getData().equals(intent.getData())) { 238 return; 239 } 240 } 241 intent.setFlags(intent.getFlags() | Intent.FLAG_ACTIVITY_NEW_TASK); 242 intents.add(intent); 243 } 244 } 245