Home | History | Annotate | Download | only in voicedialer
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.voicedialer;
     18 
     19 import android.app.Activity;
     20 import android.content.Intent;
     21 import android.speech.srec.MicrophoneInputStream;
     22 import android.speech.srec.Recognizer;
     23 import android.speech.srec.WaveHeader;
     24 import android.util.Config;
     25 import android.util.Log;
     26 import java.io.File;
     27 import java.io.FileInputStream;
     28 import java.io.IOException;
     29 import java.io.InputStream;
     30 import java.util.ArrayList;
     31 
     32 /**
     33  * This class is a framework for recognizing speech.  It must be extended to use.
     34  * The child class must timplement setupGrammar and onRecognitionSuccess.
     35  * A usage cycle is as follows:
     36  * <ul>
     37  * <li>Create with a reference to the {@link VoiceDialerActivity}.
     38  * <li>Signal the user to start speaking with the Vibrator or beep.
     39  * <li>Start audio input by creating a {@link MicrophoneInputStream}.
     40  * <li>Create and configure a {@link Recognizer}.
     41  * <li>Set up the grammar using setupGrammar.
     42  * <li>Start the {@link Recognizer} running using data already being
     43  * collected by the microphone.
     44  * <li>Wait for the {@link Recognizer} to complete.
     45  * <li>Process the results using onRecognitionSuccess, which will pass
     46  * a list of intents to the {@RecogizerClient}.
     47  * <li>Shut down and clean up.
     48  * </ul>
     49  * Notes:
     50  * <ul>
     51  * <li>Audio many be read from a file.
     52  * <li>A directory tree of audio files may be stepped through.
     53  * <li>A contact list may be read from a file.
     54  * <li>A {@link RecognizerLogger} may generate a set of log files from
     55  * a recognition session.
     56  * <li>A static instance of this class is held and reused by the
     57  * {@link VoiceDialerActivity}, which saves setup time.
     58  * </ul>
     59  */
     60 abstract public class RecognizerEngine {
     61 
     62     protected static final String TAG = "RecognizerEngine";
     63 
     64     protected static final String ACTION_RECOGNIZER_RESULT =
     65             "com.android.voicedialer.ACTION_RECOGNIZER_RESULT";
     66     public static final String SENTENCE_EXTRA = "sentence";
     67     public static final String SEMANTIC_EXTRA = "semantic";
     68 
     69     protected final String SREC_DIR = Recognizer.getConfigDir(null);
     70 
     71     protected static final String OPEN_ENTRIES = "openentries.txt";
     72 
     73     protected static final int RESULT_LIMIT = 5;
     74 
     75     protected Activity mActivity;
     76     protected Recognizer mSrec;
     77     protected Recognizer.Grammar mSrecGrammar;
     78     protected RecognizerLogger mLogger;
     79     protected int mSampleRate;
     80 
     81     /**
     82      * Constructor.
     83      */
     84     public RecognizerEngine() {
     85         mSampleRate = 0;
     86     }
     87 
     88     abstract protected void setupGrammar() throws IOException, InterruptedException;
     89 
     90     abstract protected void onRecognitionSuccess(RecognizerClient recognizerClient)
     91             throws InterruptedException;
     92 
     93     /**
     94      * Start the recognition process.
     95      *
     96      * <ul>
     97      * <li>Create and start the microphone.
     98      * <li>Create a Recognizer.
     99      * <li>set up the grammar (implementation is in child class)
    100      * <li>Start the Recognizer.
    101      * <li>Feed the Recognizer audio until it provides a result.
    102      * <li>Build a list of Intents corresponding to the results. (implementation
    103      * is in child class)
    104      * <li>Stop the microphone.
    105      * <li>Stop the Recognizer.
    106      * </ul>
    107      *
    108      * @param recognizerClient client to be given the results
    109      * @param activity the Activity this recognition is being run from.
    110      * @param micFile optional audio input from this file, or directory tree.
    111      * @param sampleRate the same rate coming from the mic or micFile
    112      */
    113     public void recognize(RecognizerClient recognizerClient, Activity activity,
    114             File micFile, int sampleRate) {
    115         InputStream mic = null;
    116         boolean recognizerStarted = false;
    117         try {
    118             mActivity = activity;
    119             // set up logger
    120             mLogger = null;
    121             if (RecognizerLogger.isEnabled(mActivity)) {
    122                 mLogger = new RecognizerLogger(mActivity);
    123             }
    124 
    125             if (mSampleRate != sampleRate) {
    126                 // sample rate has changed since we last used this recognizerEngine.
    127                 // destroy the grammar and regenerate.
    128                 if (mSrecGrammar != null) {
    129                     mSrecGrammar.destroy();
    130                 }
    131                 mSrecGrammar = null;
    132                 mSampleRate = sampleRate;
    133             }
    134 
    135             // create a new recognizer
    136             if (Config.LOGD) Log.d(TAG, "start new Recognizer");
    137             if (mSrec == null) {
    138                 String parFilePath = SREC_DIR + "/baseline11k.par";
    139                 if (sampleRate == 8000) {
    140                     parFilePath = SREC_DIR + "/baseline8k.par";
    141                 }
    142                 mSrec = new Recognizer(parFilePath);
    143             }
    144 
    145             // start audio input
    146             if (micFile != null) {
    147                 if (Config.LOGD) Log.d(TAG, "using mic file");
    148                 mic = new FileInputStream(micFile);
    149                 WaveHeader hdr = new WaveHeader();
    150                 hdr.read(mic);
    151             } else {
    152                 if (Config.LOGD) Log.d(TAG, "start new MicrophoneInputStream");
    153                 mic = new MicrophoneInputStream(sampleRate, sampleRate * 15);
    154             }
    155 
    156             // notify UI
    157             recognizerClient.onMicrophoneStart(mic);
    158 
    159             // log audio if requested
    160             if (mLogger != null) mic = mLogger.logInputStream(mic, sampleRate);
    161 
    162             setupGrammar();
    163 
    164             // start the recognition process
    165             if (Config.LOGD) Log.d(TAG, "start mSrec.start");
    166             mSrec.start();
    167             recognizerStarted = true;
    168 
    169             // recognize
    170             while (true) {
    171                 if (Thread.interrupted()) throw new InterruptedException();
    172                 int event = mSrec.advance();
    173                 if (event != Recognizer.EVENT_INCOMPLETE &&
    174                         event != Recognizer.EVENT_NEED_MORE_AUDIO) {
    175                     Log.d(TAG, "start advance()=" +
    176                             Recognizer.eventToString(event) +
    177                             " avail " + mic.available());
    178                 }
    179                 switch (event) {
    180                 case Recognizer.EVENT_INCOMPLETE:
    181                 case Recognizer.EVENT_STARTED:
    182                 case Recognizer.EVENT_START_OF_VOICING:
    183                 case Recognizer.EVENT_END_OF_VOICING:
    184                     continue;
    185                 case Recognizer.EVENT_RECOGNITION_RESULT:
    186                     onRecognitionSuccess(recognizerClient);
    187                     break;
    188                 case Recognizer.EVENT_NEED_MORE_AUDIO:
    189                     mSrec.putAudio(mic);
    190                     continue;
    191                 default:
    192                     Log.d(TAG, "unknown event " + event);
    193                     recognizerClient.onRecognitionFailure(Recognizer.eventToString(event));
    194                     break;
    195                 }
    196                 break;
    197             }
    198 
    199         } catch (InterruptedException e) {
    200             if (Config.LOGD) Log.d(TAG, "start interrupted " + e);
    201             recognizerClient.onRecognitionError(e.toString());
    202         } catch (IOException e) {
    203             if (Config.LOGD) Log.d(TAG, "start new Srec failed " + e);
    204             recognizerClient.onRecognitionError(e.toString());
    205         } catch (Exception e) {
    206             if (Config.LOGD) Log.d(TAG, "exception " + e);
    207             recognizerClient.onRecognitionError(e.toString());
    208         } finally {
    209             if (Config.LOGD) Log.d(TAG, "start mSrec.stop");
    210             if (mSrec != null && recognizerStarted) mSrec.stop();
    211 
    212             // stop microphone
    213             try {
    214                 if (mic != null) mic.close();
    215             }
    216             catch (IOException ex) {
    217                 if (Config.LOGD) Log.d(TAG, "start - mic.close failed - " + ex);
    218             }
    219             mic = null;
    220 
    221             // close logger
    222             try {
    223                 if (mLogger != null) mLogger.close();
    224             }
    225             catch (IOException ex) {
    226                 if (Config.LOGD) Log.d(TAG, "start - mLoggger.close failed - " + ex);
    227             }
    228             mLogger = null;
    229         }
    230         if (Config.LOGD) Log.d(TAG, "start bye");
    231     }
    232 
    233     protected static void addIntent(ArrayList<Intent> intents, Intent intent) {
    234         for (Intent in : intents) {
    235             if (in.getAction() != null &&
    236                     in.getAction().equals(intent.getAction()) &&
    237                     in.getData() != null &&
    238                     in.getData().equals(intent.getData())) {
    239                 return;
    240             }
    241         }
    242         intent.setFlags(intent.getFlags() | Intent.FLAG_ACTIVITY_NEW_TASK);
    243         intents.add(intent);
    244     }
    245 }
    246