Home | History | Annotate | Download | only in speech
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.speech;
     18 
     19 import android.content.ComponentName;
     20 import android.content.Context;
     21 import android.content.Intent;
     22 import android.content.ServiceConnection;
     23 import android.content.pm.ResolveInfo;
     24 import android.os.Bundle;
     25 import android.os.Handler;
     26 import android.os.IBinder;
     27 import android.os.Looper;
     28 import android.os.Message;
     29 import android.os.RemoteException;
     30 import android.provider.Settings;
     31 import android.text.TextUtils;
     32 import android.util.Log;
     33 
     34 import java.util.LinkedList;
     35 import java.util.List;
     36 import java.util.Queue;
     37 
     38 /**
     39  * This class provides access to the speech recognition service. This service allows access to the
     40  * speech recognizer. Do not instantiate this class directly, instead, call
     41  * {@link SpeechRecognizer#createSpeechRecognizer(Context)}. This class's methods must be
     42  * invoked only from the main application thread.
     43  *
     44  * <p>The implementation of this API is likely to stream audio to remote servers to perform speech
     45  * recognition. As such this API is not intended to be used for continuous recognition, which would
     46  * consume a significant amount of battery and bandwidth.
     47  *
     48  * <p>Please note that the application must have {@link android.Manifest.permission#RECORD_AUDIO}
     49  * permission to use this class.
     50  */
     51 public class SpeechRecognizer {
     52     /** DEBUG value to enable verbose debug prints */
     53     private final static boolean DBG = false;
     54 
     55     /** Log messages identifier */
     56     private static final String TAG = "SpeechRecognizer";
     57 
     58     /**
     59      * Key used to retrieve an {@code ArrayList<String>} from the {@link Bundle} passed to the
     60      * {@link RecognitionListener#onResults(Bundle)} and
     61      * {@link RecognitionListener#onPartialResults(Bundle)} methods. These strings are the possible
     62      * recognition results, where the first element is the most likely candidate.
     63      */
     64     public static final String RESULTS_RECOGNITION = "results_recognition";
     65 
     66     /**
     67      * Key used to retrieve a float array from the {@link Bundle} passed to the
     68      * {@link RecognitionListener#onResults(Bundle)} and
     69      * {@link RecognitionListener#onPartialResults(Bundle)} methods. The array should be
     70      * the same size as the ArrayList provided in {@link #RESULTS_RECOGNITION}, and should contain
     71      * values ranging from 0.0 to 1.0, or -1 to represent an unavailable confidence score.
     72      * <p>
     73      * Confidence values close to 1.0 indicate high confidence (the speech recognizer is confident
     74      * that the recognition result is correct), while values close to 0.0 indicate low confidence.
     75      * <p>
     76      * This value is optional and might not be provided.
     77      */
     78     public static final String CONFIDENCE_SCORES = "confidence_scores";
     79 
     80     /** Network operation timed out. */
     81     public static final int ERROR_NETWORK_TIMEOUT = 1;
     82 
     83     /** Other network related errors. */
     84     public static final int ERROR_NETWORK = 2;
     85 
     86     /** Audio recording error. */
     87     public static final int ERROR_AUDIO = 3;
     88 
     89     /** Server sends error status. */
     90     public static final int ERROR_SERVER = 4;
     91 
     92     /** Other client side errors. */
     93     public static final int ERROR_CLIENT = 5;
     94 
     95     /** No speech input */
     96     public static final int ERROR_SPEECH_TIMEOUT = 6;
     97 
     98     /** No recognition result matched. */
     99     public static final int ERROR_NO_MATCH = 7;
    100 
    101     /** RecognitionService busy. */
    102     public static final int ERROR_RECOGNIZER_BUSY = 8;
    103 
    104     /** Insufficient permissions */
    105     public static final int ERROR_INSUFFICIENT_PERMISSIONS = 9;
    106 
    107     /** action codes */
    108     private final static int MSG_START = 1;
    109     private final static int MSG_STOP = 2;
    110     private final static int MSG_CANCEL = 3;
    111     private final static int MSG_CHANGE_LISTENER = 4;
    112 
    113     /** The actual RecognitionService endpoint */
    114     private IRecognitionService mService;
    115 
    116     /** The connection to the actual service */
    117     private Connection mConnection;
    118 
    119     /** Context with which the manager was created */
    120     private final Context mContext;
    121 
    122     /** Component to direct service intent to */
    123     private final ComponentName mServiceComponent;
    124 
    125     /** Handler that will execute the main tasks */
    126     private Handler mHandler = new Handler() {
    127         @Override
    128         public void handleMessage(Message msg) {
    129             switch (msg.what) {
    130                 case MSG_START:
    131                     handleStartListening((Intent) msg.obj);
    132                     break;
    133                 case MSG_STOP:
    134                     handleStopMessage();
    135                     break;
    136                 case MSG_CANCEL:
    137                     handleCancelMessage();
    138                     break;
    139                 case MSG_CHANGE_LISTENER:
    140                     handleChangeListener((RecognitionListener) msg.obj);
    141                     break;
    142             }
    143         }
    144     };
    145 
    146     /**
    147      * Temporary queue, saving the messages until the connection will be established, afterwards,
    148      * only mHandler will receive the messages
    149      */
    150     private final Queue<Message> mPendingTasks = new LinkedList<Message>();
    151 
    152     /** The Listener that will receive all the callbacks */
    153     private final InternalListener mListener = new InternalListener();
    154 
    155     /**
    156      * The right way to create a {@code SpeechRecognizer} is by using
    157      * {@link #createSpeechRecognizer} static factory method
    158      */
    159     private SpeechRecognizer(final Context context, final ComponentName serviceComponent) {
    160         mContext = context;
    161         mServiceComponent = serviceComponent;
    162     }
    163 
    164     /**
    165      * Basic ServiceConnection that records the mService variable. Additionally, on creation it
    166      * invokes the {@link IRecognitionService#startListening(Intent, IRecognitionListener)}.
    167      */
    168     private class Connection implements ServiceConnection {
    169 
    170         public void onServiceConnected(final ComponentName name, final IBinder service) {
    171             // always done on the application main thread, so no need to send message to mHandler
    172             mService = IRecognitionService.Stub.asInterface(service);
    173             if (DBG) Log.d(TAG, "onServiceConnected - Success");
    174             while (!mPendingTasks.isEmpty()) {
    175                 mHandler.sendMessage(mPendingTasks.poll());
    176             }
    177         }
    178 
    179         public void onServiceDisconnected(final ComponentName name) {
    180             // always done on the application main thread, so no need to send message to mHandler
    181             mService = null;
    182             mConnection = null;
    183             mPendingTasks.clear();
    184             if (DBG) Log.d(TAG, "onServiceDisconnected - Success");
    185         }
    186     }
    187 
    188     /**
    189      * Checks whether a speech recognition service is available on the system. If this method
    190      * returns {@code false}, {@link SpeechRecognizer#createSpeechRecognizer(Context)} will
    191      * fail.
    192      *
    193      * @param context with which {@code SpeechRecognizer} will be created
    194      * @return {@code true} if recognition is available, {@code false} otherwise
    195      */
    196     public static boolean isRecognitionAvailable(final Context context) {
    197         final List<ResolveInfo> list = context.getPackageManager().queryIntentServices(
    198                 new Intent(RecognitionService.SERVICE_INTERFACE), 0);
    199         return list != null && list.size() != 0;
    200     }
    201 
    202     /**
    203      * Factory method to create a new {@code SpeechRecognizer}. Please note that
    204      * {@link #setRecognitionListener(RecognitionListener)} should be called before dispatching any
    205      * command to the created {@code SpeechRecognizer}, otherwise no notifications will be
    206      * received.
    207      *
    208      * @param context in which to create {@code SpeechRecognizer}
    209      * @return a new {@code SpeechRecognizer}
    210      */
    211     public static SpeechRecognizer createSpeechRecognizer(final Context context) {
    212         return createSpeechRecognizer(context, null);
    213     }
    214 
    215     /**
    216      * Factory method to create a new {@code SpeechRecognizer}. Please note that
    217      * {@link #setRecognitionListener(RecognitionListener)} should be called before dispatching any
    218      * command to the created {@code SpeechRecognizer}, otherwise no notifications will be
    219      * received.
    220      *
    221      * Use this version of the method to specify a specific service to direct this
    222      * {@link SpeechRecognizer} to. Normally you would not use this; use
    223      * {@link #createSpeechRecognizer(Context)} instead to use the system default recognition
    224      * service.
    225      *
    226      * @param context in which to create {@code SpeechRecognizer}
    227      * @param serviceComponent the {@link ComponentName} of a specific service to direct this
    228      *        {@code SpeechRecognizer} to
    229      * @return a new {@code SpeechRecognizer}
    230      */
    231     public static SpeechRecognizer createSpeechRecognizer(final Context context,
    232             final ComponentName serviceComponent) {
    233         if (context == null) {
    234             throw new IllegalArgumentException("Context cannot be null)");
    235         }
    236         checkIsCalledFromMainThread();
    237         return new SpeechRecognizer(context, serviceComponent);
    238     }
    239 
    240     /**
    241      * Sets the listener that will receive all the callbacks. The previous unfinished commands will
    242      * be executed with the old listener, while any following command will be executed with the new
    243      * listener.
    244      *
    245      * @param listener listener that will receive all the callbacks from the created
    246      *        {@link SpeechRecognizer}, this must not be null.
    247      */
    248     public void setRecognitionListener(RecognitionListener listener) {
    249         checkIsCalledFromMainThread();
    250         putMessage(Message.obtain(mHandler, MSG_CHANGE_LISTENER, listener));
    251     }
    252 
    253     /**
    254      * Starts listening for speech. Please note that
    255      * {@link #setRecognitionListener(RecognitionListener)} should be called beforehand, otherwise
    256      * no notifications will be received.
    257      *
    258      * @param recognizerIntent contains parameters for the recognition to be performed. The intent
    259      *        may also contain optional extras, see {@link RecognizerIntent}. If these values are
    260      *        not set explicitly, default values will be used by the recognizer.
    261      */
    262     public void startListening(final Intent recognizerIntent) {
    263         if (recognizerIntent == null) {
    264             throw new IllegalArgumentException("intent must not be null");
    265         }
    266         checkIsCalledFromMainThread();
    267         if (mConnection == null) { // first time connection
    268             mConnection = new Connection();
    269 
    270             Intent serviceIntent = new Intent(RecognitionService.SERVICE_INTERFACE);
    271 
    272             if (mServiceComponent == null) {
    273                 String serviceComponent = Settings.Secure.getString(mContext.getContentResolver(),
    274                         Settings.Secure.VOICE_RECOGNITION_SERVICE);
    275 
    276                 if (TextUtils.isEmpty(serviceComponent)) {
    277                     Log.e(TAG, "no selected voice recognition service");
    278                     mListener.onError(ERROR_CLIENT);
    279                     return;
    280                 }
    281 
    282                 serviceIntent.setComponent(ComponentName.unflattenFromString(serviceComponent));
    283             } else {
    284                 serviceIntent.setComponent(mServiceComponent);
    285             }
    286 
    287             if (!mContext.bindService(serviceIntent, mConnection, Context.BIND_AUTO_CREATE)) {
    288                 Log.e(TAG, "bind to recognition service failed");
    289                 mConnection = null;
    290                 mService = null;
    291                 mListener.onError(ERROR_CLIENT);
    292                 return;
    293             }
    294         }
    295         putMessage(Message.obtain(mHandler, MSG_START, recognizerIntent));
    296     }
    297 
    298     /**
    299      * Stops listening for speech. Speech captured so far will be recognized as if the user had
    300      * stopped speaking at this point. Note that in the default case, this does not need to be
    301      * called, as the speech endpointer will automatically stop the recognizer listening when it
    302      * determines speech has completed. However, you can manipulate endpointer parameters directly
    303      * using the intent extras defined in {@link RecognizerIntent}, in which case you may sometimes
    304      * want to manually call this method to stop listening sooner. Please note that
    305      * {@link #setRecognitionListener(RecognitionListener)} should be called beforehand, otherwise
    306      * no notifications will be received.
    307      */
    308     public void stopListening() {
    309         checkIsCalledFromMainThread();
    310         putMessage(Message.obtain(mHandler, MSG_STOP));
    311     }
    312 
    313     /**
    314      * Cancels the speech recognition. Please note that
    315      * {@link #setRecognitionListener(RecognitionListener)} should be called beforehand, otherwise
    316      * no notifications will be received.
    317      */
    318     public void cancel() {
    319         checkIsCalledFromMainThread();
    320         putMessage(Message.obtain(mHandler, MSG_CANCEL));
    321     }
    322 
    323     private static void checkIsCalledFromMainThread() {
    324         if (Looper.myLooper() != Looper.getMainLooper()) {
    325             throw new RuntimeException(
    326                     "SpeechRecognizer should be used only from the application's main thread");
    327         }
    328     }
    329 
    330     private void putMessage(Message msg) {
    331         if (mService == null) {
    332             mPendingTasks.offer(msg);
    333         } else {
    334             mHandler.sendMessage(msg);
    335         }
    336     }
    337 
    338     /** sends the actual message to the service */
    339     private void handleStartListening(Intent recognizerIntent) {
    340         if (!checkOpenConnection()) {
    341             return;
    342         }
    343         try {
    344             mService.startListening(recognizerIntent, mListener);
    345             if (DBG) Log.d(TAG, "service start listening command succeded");
    346         } catch (final RemoteException e) {
    347             Log.e(TAG, "startListening() failed", e);
    348             mListener.onError(ERROR_CLIENT);
    349         }
    350     }
    351 
    352     /** sends the actual message to the service */
    353     private void handleStopMessage() {
    354         if (!checkOpenConnection()) {
    355             return;
    356         }
    357         try {
    358             mService.stopListening(mListener);
    359             if (DBG) Log.d(TAG, "service stop listening command succeded");
    360         } catch (final RemoteException e) {
    361             Log.e(TAG, "stopListening() failed", e);
    362             mListener.onError(ERROR_CLIENT);
    363         }
    364     }
    365 
    366     /** sends the actual message to the service */
    367     private void handleCancelMessage() {
    368         if (!checkOpenConnection()) {
    369             return;
    370         }
    371         try {
    372             mService.cancel(mListener);
    373             if (DBG) Log.d(TAG, "service cancel command succeded");
    374         } catch (final RemoteException e) {
    375             Log.e(TAG, "cancel() failed", e);
    376             mListener.onError(ERROR_CLIENT);
    377         }
    378     }
    379 
    380     private boolean checkOpenConnection() {
    381         if (mService != null) {
    382             return true;
    383         }
    384         mListener.onError(ERROR_CLIENT);
    385         Log.e(TAG, "not connected to the recognition service");
    386         return false;
    387     }
    388 
    389     /** changes the listener */
    390     private void handleChangeListener(RecognitionListener listener) {
    391         if (DBG) Log.d(TAG, "handleChangeListener, listener=" + listener);
    392         mListener.mInternalListener = listener;
    393     }
    394 
    395     /**
    396      * Destroys the {@code SpeechRecognizer} object.
    397      */
    398     public void destroy() {
    399         if (mService != null) {
    400             try {
    401                 mService.cancel(mListener);
    402             } catch (final RemoteException e) {
    403                 // Not important
    404             }
    405         }
    406 
    407         if (mConnection != null) {
    408             mContext.unbindService(mConnection);
    409         }
    410         mPendingTasks.clear();
    411         mService = null;
    412         mConnection = null;
    413         mListener.mInternalListener = null;
    414     }
    415 
    416     /**
    417      * Internal wrapper of IRecognitionListener which will propagate the results to
    418      * RecognitionListener
    419      */
    420     private static class InternalListener extends IRecognitionListener.Stub {
    421         private RecognitionListener mInternalListener;
    422 
    423         private final static int MSG_BEGINNING_OF_SPEECH = 1;
    424         private final static int MSG_BUFFER_RECEIVED = 2;
    425         private final static int MSG_END_OF_SPEECH = 3;
    426         private final static int MSG_ERROR = 4;
    427         private final static int MSG_READY_FOR_SPEECH = 5;
    428         private final static int MSG_RESULTS = 6;
    429         private final static int MSG_PARTIAL_RESULTS = 7;
    430         private final static int MSG_RMS_CHANGED = 8;
    431         private final static int MSG_ON_EVENT = 9;
    432 
    433         private final Handler mInternalHandler = new Handler() {
    434             @Override
    435             public void handleMessage(Message msg) {
    436                 if (mInternalListener == null) {
    437                     return;
    438                 }
    439                 switch (msg.what) {
    440                     case MSG_BEGINNING_OF_SPEECH:
    441                         mInternalListener.onBeginningOfSpeech();
    442                         break;
    443                     case MSG_BUFFER_RECEIVED:
    444                         mInternalListener.onBufferReceived((byte[]) msg.obj);
    445                         break;
    446                     case MSG_END_OF_SPEECH:
    447                         mInternalListener.onEndOfSpeech();
    448                         break;
    449                     case MSG_ERROR:
    450                         mInternalListener.onError((Integer) msg.obj);
    451                         break;
    452                     case MSG_READY_FOR_SPEECH:
    453                         mInternalListener.onReadyForSpeech((Bundle) msg.obj);
    454                         break;
    455                     case MSG_RESULTS:
    456                         mInternalListener.onResults((Bundle) msg.obj);
    457                         break;
    458                     case MSG_PARTIAL_RESULTS:
    459                         mInternalListener.onPartialResults((Bundle) msg.obj);
    460                         break;
    461                     case MSG_RMS_CHANGED:
    462                         mInternalListener.onRmsChanged((Float) msg.obj);
    463                         break;
    464                     case MSG_ON_EVENT:
    465                         mInternalListener.onEvent(msg.arg1, (Bundle) msg.obj);
    466                         break;
    467                 }
    468             }
    469         };
    470 
    471         public void onBeginningOfSpeech() {
    472             Message.obtain(mInternalHandler, MSG_BEGINNING_OF_SPEECH).sendToTarget();
    473         }
    474 
    475         public void onBufferReceived(final byte[] buffer) {
    476             Message.obtain(mInternalHandler, MSG_BUFFER_RECEIVED, buffer).sendToTarget();
    477         }
    478 
    479         public void onEndOfSpeech() {
    480             Message.obtain(mInternalHandler, MSG_END_OF_SPEECH).sendToTarget();
    481         }
    482 
    483         public void onError(final int error) {
    484             Message.obtain(mInternalHandler, MSG_ERROR, error).sendToTarget();
    485         }
    486 
    487         public void onReadyForSpeech(final Bundle noiseParams) {
    488             Message.obtain(mInternalHandler, MSG_READY_FOR_SPEECH, noiseParams).sendToTarget();
    489         }
    490 
    491         public void onResults(final Bundle results) {
    492             Message.obtain(mInternalHandler, MSG_RESULTS, results).sendToTarget();
    493         }
    494 
    495         public void onPartialResults(final Bundle results) {
    496             Message.obtain(mInternalHandler, MSG_PARTIAL_RESULTS, results).sendToTarget();
    497         }
    498 
    499         public void onRmsChanged(final float rmsdB) {
    500             Message.obtain(mInternalHandler, MSG_RMS_CHANGED, rmsdB).sendToTarget();
    501         }
    502 
    503         public void onEvent(final int eventType, final Bundle params) {
    504             Message.obtain(mInternalHandler, MSG_ON_EVENT, eventType, eventType, params)
    505                     .sendToTarget();
    506         }
    507     }
    508 }
    509