Home | History | Annotate | Download | only in voicedialer
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.voicedialer;
     18 
     19 import android.app.Activity;
     20 import android.content.Intent;
     21 import android.speech.srec.MicrophoneInputStream;
     22 import android.speech.srec.Recognizer;
     23 import android.speech.srec.WaveHeader;
     24 import android.util.Log;
     25 import java.io.File;
     26 import java.io.FileInputStream;
     27 import java.io.IOException;
     28 import java.io.InputStream;
     29 import java.util.ArrayList;
     30 
     31 /**
     32  * This class is a framework for recognizing speech.  It must be extended to use.
     33  * The child class must timplement setupGrammar and onRecognitionSuccess.
     34  * A usage cycle is as follows:
     35  * <ul>
     36  * <li>Create with a reference to the {@link VoiceDialerActivity}.
     37  * <li>Signal the user to start speaking with the Vibrator or beep.
     38  * <li>Start audio input by creating a {@link MicrophoneInputStream}.
     39  * <li>Create and configure a {@link Recognizer}.
     40  * <li>Set up the grammar using setupGrammar.
     41  * <li>Start the {@link Recognizer} running using data already being
     42  * collected by the microphone.
     43  * <li>Wait for the {@link Recognizer} to complete.
     44  * <li>Process the results using onRecognitionSuccess, which will pass
     45  * a list of intents to the {@RecogizerClient}.
     46  * <li>Shut down and clean up.
     47  * </ul>
     48  * Notes:
     49  * <ul>
     50  * <li>Audio many be read from a file.
     51  * <li>A directory tree of audio files may be stepped through.
     52  * <li>A contact list may be read from a file.
     53  * <li>A {@link RecognizerLogger} may generate a set of log files from
     54  * a recognition session.
     55  * <li>A static instance of this class is held and reused by the
     56  * {@link VoiceDialerActivity}, which saves setup time.
     57  * </ul>
     58  */
     59 abstract public class RecognizerEngine {
     60 
     61     protected static final String TAG = "RecognizerEngine";
     62 
     63     protected static final String ACTION_RECOGNIZER_RESULT =
     64             "com.android.voicedialer.ACTION_RECOGNIZER_RESULT";
     65     public static final String SENTENCE_EXTRA = "sentence";
     66     public static final String SEMANTIC_EXTRA = "semantic";
     67 
     68     protected final String SREC_DIR = Recognizer.getConfigDir(null);
     69 
     70     protected static final String OPEN_ENTRIES = "openentries.txt";
     71 
     72     protected static final int RESULT_LIMIT = 5;
     73 
     74     protected Activity mActivity;
     75     protected Recognizer mSrec;
     76     protected Recognizer.Grammar mSrecGrammar;
     77     protected RecognizerLogger mLogger;
     78     protected int mSampleRate;
     79 
     80     /**
     81      * Constructor.
     82      */
     83     public RecognizerEngine() {
     84         mSampleRate = 0;
     85     }
     86 
     87     abstract protected void setupGrammar() throws IOException, InterruptedException;
     88 
     89     abstract protected void onRecognitionSuccess(RecognizerClient recognizerClient)
     90             throws InterruptedException;
     91 
     92     /**
     93      * Start the recognition process.
     94      *
     95      * <ul>
     96      * <li>Create and start the microphone.
     97      * <li>Create a Recognizer.
     98      * <li>set up the grammar (implementation is in child class)
     99      * <li>Start the Recognizer.
    100      * <li>Feed the Recognizer audio until it provides a result.
    101      * <li>Build a list of Intents corresponding to the results. (implementation
    102      * is in child class)
    103      * <li>Stop the microphone.
    104      * <li>Stop the Recognizer.
    105      * </ul>
    106      *
    107      * @param recognizerClient client to be given the results
    108      * @param activity the Activity this recognition is being run from.
    109      * @param micFile optional audio input from this file, or directory tree.
    110      * @param sampleRate the same rate coming from the mic or micFile
    111      */
    112     public void recognize(RecognizerClient recognizerClient, Activity activity,
    113             File micFile, int sampleRate) {
    114         InputStream mic = null;
    115         boolean recognizerStarted = false;
    116         try {
    117             mActivity = activity;
    118             // set up logger
    119             mLogger = null;
    120             if (RecognizerLogger.isEnabled(mActivity)) {
    121                 mLogger = new RecognizerLogger(mActivity);
    122             }
    123 
    124             if (mSampleRate != sampleRate) {
    125                 // sample rate has changed since we last used this recognizerEngine.
    126                 // destroy the grammar and regenerate.
    127                 if (mSrecGrammar != null) {
    128                     mSrecGrammar.destroy();
    129                 }
    130                 mSrecGrammar = null;
    131                 mSampleRate = sampleRate;
    132             }
    133 
    134             // create a new recognizer
    135             if (false) Log.d(TAG, "start new Recognizer");
    136             if (mSrec == null) {
    137                 String parFilePath = SREC_DIR + "/baseline11k.par";
    138                 if (sampleRate == 8000) {
    139                     parFilePath = SREC_DIR + "/baseline8k.par";
    140                 }
    141                 mSrec = new Recognizer(parFilePath);
    142             }
    143 
    144             // start audio input
    145             if (micFile != null) {
    146                 if (false) Log.d(TAG, "using mic file");
    147                 mic = new FileInputStream(micFile);
    148                 WaveHeader hdr = new WaveHeader();
    149                 hdr.read(mic);
    150             } else {
    151                 if (false) Log.d(TAG, "start new MicrophoneInputStream");
    152                 mic = new MicrophoneInputStream(sampleRate, sampleRate * 15);
    153             }
    154 
    155             // notify UI
    156             recognizerClient.onMicrophoneStart(mic);
    157 
    158             // log audio if requested
    159             if (mLogger != null) mic = mLogger.logInputStream(mic, sampleRate);
    160 
    161             setupGrammar();
    162 
    163             // start the recognition process
    164             if (false) Log.d(TAG, "start mSrec.start");
    165             mSrec.start();
    166             recognizerStarted = true;
    167 
    168             // recognize
    169             while (true) {
    170                 if (Thread.interrupted()) throw new InterruptedException();
    171                 int event = mSrec.advance();
    172                 if (event != Recognizer.EVENT_INCOMPLETE &&
    173                         event != Recognizer.EVENT_NEED_MORE_AUDIO) {
    174                     Log.d(TAG, "start advance()=" +
    175                             Recognizer.eventToString(event) +
    176                             " avail " + mic.available());
    177                 }
    178                 switch (event) {
    179                 case Recognizer.EVENT_INCOMPLETE:
    180                 case Recognizer.EVENT_STARTED:
    181                 case Recognizer.EVENT_START_OF_VOICING:
    182                 case Recognizer.EVENT_END_OF_VOICING:
    183                     continue;
    184                 case Recognizer.EVENT_RECOGNITION_RESULT:
    185                     onRecognitionSuccess(recognizerClient);
    186                     break;
    187                 case Recognizer.EVENT_NEED_MORE_AUDIO:
    188                     mSrec.putAudio(mic);
    189                     continue;
    190                 default:
    191                     Log.d(TAG, "unknown event " + event);
    192                     recognizerClient.onRecognitionFailure(Recognizer.eventToString(event));
    193                     break;
    194                 }
    195                 break;
    196             }
    197 
    198         } catch (InterruptedException e) {
    199             if (false) Log.d(TAG, "start interrupted " + e);
    200             recognizerClient.onRecognitionError(e.toString());
    201         } catch (IOException e) {
    202             if (false) Log.d(TAG, "start new Srec failed " + e);
    203             recognizerClient.onRecognitionError(e.toString());
    204         } catch (Exception e) {
    205             if (false) Log.d(TAG, "exception " + e);
    206             recognizerClient.onRecognitionError(e.toString());
    207         } finally {
    208             if (false) Log.d(TAG, "start mSrec.stop");
    209             if (mSrec != null && recognizerStarted) mSrec.stop();
    210 
    211             // stop microphone
    212             try {
    213                 if (mic != null) mic.close();
    214             }
    215             catch (IOException ex) {
    216                 if (false) Log.d(TAG, "start - mic.close failed - " + ex);
    217             }
    218             mic = null;
    219 
    220             // close logger
    221             try {
    222                 if (mLogger != null) mLogger.close();
    223             }
    224             catch (IOException ex) {
    225                 if (false) Log.d(TAG, "start - mLoggger.close failed - " + ex);
    226             }
    227             mLogger = null;
    228         }
    229         if (false) Log.d(TAG, "start bye");
    230     }
    231 
    232     protected static void addIntent(ArrayList<Intent> intents, Intent intent) {
    233         for (Intent in : intents) {
    234             if (in.getAction() != null &&
    235                     in.getAction().equals(intent.getAction()) &&
    236                     in.getData() != null &&
    237                     in.getData().equals(intent.getData())) {
    238                 return;
    239             }
    240         }
    241         intent.setFlags(intent.getFlags() | Intent.FLAG_ACTIVITY_NEW_TASK);
    242         intents.add(intent);
    243     }
    244 }
    245