Home | History | Annotate | Download | only in soundtrigger
      1 /**
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.media.soundtrigger;
     18 import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK;
     19 
     20 import android.annotation.IntDef;
     21 import android.annotation.NonNull;
     22 import android.annotation.Nullable;
     23 import android.annotation.SystemApi;
     24 import android.hardware.soundtrigger.IRecognitionStatusCallback;
     25 import android.hardware.soundtrigger.SoundTrigger;
     26 import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
     27 import android.media.AudioFormat;
     28 import android.os.Handler;
     29 import android.os.Looper;
     30 import android.os.Message;
     31 import android.os.ParcelUuid;
     32 import android.os.RemoteException;
     33 import android.util.Slog;
     34 
     35 import com.android.internal.app.ISoundTriggerService;
     36 
     37 import java.io.PrintWriter;
     38 import java.lang.annotation.Retention;
     39 import java.lang.annotation.RetentionPolicy;
     40 import java.util.UUID;
     41 
     42 /**
     43  * A class that allows interaction with the actual sound trigger detection on the system.
     44  * Sound trigger detection refers to a detectors that match generic sound patterns that are
     45  * not voice-based. The voice-based recognition models should utilize the {@link
     46  * VoiceInteractionService} instead. Access to this class is protected by a permission
     47  * granted only to system or privileged apps.
     48  *
     49  * @hide
     50  */
     51 @SystemApi
     52 public final class SoundTriggerDetector {
     53     private static final boolean DBG = false;
     54     private static final String TAG = "SoundTriggerDetector";
     55 
     56     private static final int MSG_AVAILABILITY_CHANGED = 1;
     57     private static final int MSG_SOUND_TRIGGER_DETECTED = 2;
     58     private static final int MSG_DETECTION_ERROR = 3;
     59     private static final int MSG_DETECTION_PAUSE = 4;
     60     private static final int MSG_DETECTION_RESUME = 5;
     61 
     62     private final Object mLock = new Object();
     63 
     64     private final ISoundTriggerService mSoundTriggerService;
     65     private final UUID mSoundModelId;
     66     private final Callback mCallback;
     67     private final Handler mHandler;
     68     private final RecognitionCallback mRecognitionCallback;
     69 
     70     /** @hide */
     71     @Retention(RetentionPolicy.SOURCE)
     72     @IntDef(flag = true,
     73             value = {
     74                 RECOGNITION_FLAG_NONE,
     75                 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
     76                 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS
     77             })
     78     public @interface RecognitionFlags {}
     79 
     80     /**
     81      * Empty flag for {@link #startRecognition(int)}.
     82      *
     83      *  @hide
     84      */
     85     public static final int RECOGNITION_FLAG_NONE = 0;
     86 
     87     /**
     88      * Recognition flag for {@link #startRecognition(int)} that indicates
     89      * whether the trigger audio for hotword needs to be captured.
     90      */
     91     public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
     92 
     93     /**
     94      * Recognition flag for {@link #startRecognition(int)} that indicates
     95      * whether the recognition should keep going on even after the
     96      * model triggers.
     97      * If this flag is specified, it's possible to get multiple
     98      * triggers after a call to {@link #startRecognition(int)}, if the model
     99      * triggers multiple times.
    100      * When this isn't specified, the default behavior is to stop recognition once the
    101      * trigger happenss, till the caller starts recognition again.
    102      */
    103     public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
    104 
    105     /**
    106      * Additional payload for {@link Callback#onDetected}.
    107      */
    108     public static class EventPayload {
    109         private final boolean mTriggerAvailable;
    110 
    111         // Indicates if {@code captureSession} can be used to continue capturing more audio
    112         // from the DSP hardware.
    113         private final boolean mCaptureAvailable;
    114         // The session to use when attempting to capture more audio from the DSP hardware.
    115         private final int mCaptureSession;
    116         private final AudioFormat mAudioFormat;
    117         // Raw data associated with the event.
    118         // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true.
    119         private final byte[] mData;
    120 
    121         private EventPayload(boolean triggerAvailable, boolean captureAvailable,
    122                 AudioFormat audioFormat, int captureSession, byte[] data) {
    123             mTriggerAvailable = triggerAvailable;
    124             mCaptureAvailable = captureAvailable;
    125             mCaptureSession = captureSession;
    126             mAudioFormat = audioFormat;
    127             mData = data;
    128         }
    129 
    130         /**
    131          * Gets the format of the audio obtained using {@link #getTriggerAudio()}.
    132          * May be null if there's no audio present.
    133          */
    134         @Nullable
    135         public AudioFormat getCaptureAudioFormat() {
    136             return mAudioFormat;
    137         }
    138 
    139         /**
    140          * Gets the raw audio that triggered the keyphrase.
    141          * This may be null if the trigger audio isn't available.
    142          * If non-null, the format of the audio can be obtained by calling
    143          * {@link #getCaptureAudioFormat()}.
    144          *
    145          * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO
    146          */
    147         @Nullable
    148         public byte[] getTriggerAudio() {
    149             if (mTriggerAvailable) {
    150                 return mData;
    151             } else {
    152                 return null;
    153             }
    154         }
    155 
    156         /**
    157          * Gets the session ID to start a capture from the DSP.
    158          * This may be null if streaming capture isn't possible.
    159          * If non-null, the format of the audio that can be captured can be
    160          * obtained using {@link #getCaptureAudioFormat()}.
    161          *
    162          * TODO: Candidate for Public API when the API to start capture with a session ID
    163          * is made public.
    164          *
    165          * TODO: Add this to {@link #getCaptureAudioFormat()}:
    166          * "Gets the format of the audio obtained using {@link #getTriggerAudio()}
    167          * or {@link #getCaptureSession()}. May be null if no audio can be obtained
    168          * for either the trigger or a streaming session."
    169          *
    170          * TODO: Should this return a known invalid value instead?
    171          *
    172          * @hide
    173          */
    174         @Nullable
    175         public Integer getCaptureSession() {
    176             if (mCaptureAvailable) {
    177                 return mCaptureSession;
    178             } else {
    179                 return null;
    180             }
    181         }
    182     }
    183 
    184     public static abstract class Callback {
    185         /**
    186          * Called when the availability of the sound model changes.
    187          */
    188         public abstract void onAvailabilityChanged(int status);
    189 
    190         /**
    191          * Called when the sound model has triggered (such as when it matched a
    192          * given sound pattern).
    193          */
    194         public abstract void onDetected(@NonNull EventPayload eventPayload);
    195 
    196         /**
    197          *  Called when the detection fails due to an error.
    198          */
    199         public abstract void onError();
    200 
    201         /**
    202          * Called when the recognition is paused temporarily for some reason.
    203          * This is an informational callback, and the clients shouldn't be doing anything here
    204          * except showing an indication on their UI if they have to.
    205          */
    206         public abstract void onRecognitionPaused();
    207 
    208         /**
    209          * Called when the recognition is resumed after it was temporarily paused.
    210          * This is an informational callback, and the clients shouldn't be doing anything here
    211          * except showing an indication on their UI if they have to.
    212          */
    213         public abstract void onRecognitionResumed();
    214     }
    215 
    216     /**
    217      * This class should be constructed by the {@link SoundTriggerManager}.
    218      * @hide
    219      */
    220     SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId,
    221             @NonNull Callback callback, @Nullable Handler handler) {
    222         mSoundTriggerService = soundTriggerService;
    223         mSoundModelId = soundModelId;
    224         mCallback = callback;
    225         if (handler == null) {
    226             mHandler = new MyHandler();
    227         } else {
    228             mHandler = new MyHandler(handler.getLooper());
    229         }
    230         mRecognitionCallback = new RecognitionCallback();
    231     }
    232 
    233     /**
    234      * Starts recognition on the associated sound model. Result is indicated via the
    235      * {@link Callback}.
    236      * @return Indicates whether the call succeeded or not.
    237      */
    238     public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
    239         if (DBG) {
    240             Slog.d(TAG, "startRecognition()");
    241         }
    242         boolean captureTriggerAudio =
    243                 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
    244 
    245         boolean allowMultipleTriggers =
    246                 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
    247         int status = STATUS_OK;
    248         try {
    249             status = mSoundTriggerService.startRecognition(new ParcelUuid(mSoundModelId),
    250                     mRecognitionCallback, new RecognitionConfig(captureTriggerAudio,
    251                         allowMultipleTriggers, null, null));
    252         } catch (RemoteException e) {
    253             return false;
    254         }
    255         return status == STATUS_OK;
    256     }
    257 
    258     /**
    259      * Stops recognition for the associated model.
    260      */
    261     public boolean stopRecognition() {
    262         int status = STATUS_OK;
    263         try {
    264             status = mSoundTriggerService.stopRecognition(new ParcelUuid(mSoundModelId),
    265                     mRecognitionCallback);
    266         } catch (RemoteException e) {
    267             return false;
    268         }
    269         return status == STATUS_OK;
    270     }
    271 
    272     /**
    273      * @hide
    274      */
    275     public void dump(String prefix, PrintWriter pw) {
    276         synchronized (mLock) {
    277             // TODO: Dump useful debug information.
    278         }
    279     }
    280 
    281     /**
    282      * Callback that handles events from the lower sound trigger layer.
    283      *
    284      * Note that these callbacks will be called synchronously from the SoundTriggerService
    285      * layer and thus should do minimal work (such as sending a message on a handler to do
    286      * the real work).
    287      * @hide
    288      */
    289     private class RecognitionCallback extends IRecognitionStatusCallback.Stub {
    290 
    291         /**
    292          * @hide
    293          */
    294         @Override
    295         public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) {
    296             Slog.d(TAG, "onGenericSoundTriggerDetected()" + event);
    297             Message.obtain(mHandler,
    298                     MSG_SOUND_TRIGGER_DETECTED,
    299                     new EventPayload(event.triggerInData, event.captureAvailable,
    300                             event.captureFormat, event.captureSession, event.data))
    301                     .sendToTarget();
    302         }
    303 
    304         @Override
    305         public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) {
    306             Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event);
    307         }
    308 
    309         /**
    310          * @hide
    311          */
    312         @Override
    313         public void onError(int status) {
    314             Slog.d(TAG, "onError()" + status);
    315             mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
    316         }
    317 
    318         /**
    319          * @hide
    320          */
    321         @Override
    322         public void onRecognitionPaused() {
    323             Slog.d(TAG, "onRecognitionPaused()");
    324             mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE);
    325         }
    326 
    327         /**
    328          * @hide
    329          */
    330         @Override
    331         public void onRecognitionResumed() {
    332             Slog.d(TAG, "onRecognitionResumed()");
    333             mHandler.sendEmptyMessage(MSG_DETECTION_RESUME);
    334         }
    335     }
    336 
    337     private class MyHandler extends Handler {
    338 
    339         MyHandler() {
    340             super();
    341         }
    342 
    343         MyHandler(Looper looper) {
    344             super(looper);
    345         }
    346 
    347         @Override
    348         public void handleMessage(Message msg) {
    349             if (mCallback == null) {
    350                   Slog.w(TAG, "Received message: " + msg.what + " for NULL callback.");
    351                   return;
    352             }
    353             switch (msg.what) {
    354                 case MSG_SOUND_TRIGGER_DETECTED:
    355                     mCallback.onDetected((EventPayload) msg.obj);
    356                     break;
    357                 case MSG_DETECTION_ERROR:
    358                     mCallback.onError();
    359                     break;
    360                 case MSG_DETECTION_PAUSE:
    361                     mCallback.onRecognitionPaused();
    362                     break;
    363                 case MSG_DETECTION_RESUME:
    364                     mCallback.onRecognitionResumed();
    365                     break;
    366                 default:
    367                     super.handleMessage(msg);
    368 
    369             }
    370         }
    371     }
    372 }
    373