Home | History | Annotate | Download | only in transcribe
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 package com.android.voicemail.impl.transcribe;
     17 
     18 import android.app.job.JobWorkItem;
     19 import android.content.Context;
     20 import android.net.Uri;
     21 import android.support.annotation.MainThread;
     22 import android.support.annotation.VisibleForTesting;
     23 import android.telecom.PhoneAccountHandle;
     24 import android.util.Pair;
     25 import com.android.dialer.common.Assert;
     26 import com.android.dialer.common.concurrent.ThreadUtil;
     27 import com.android.dialer.compat.android.provider.VoicemailCompat;
     28 import com.android.dialer.logging.DialerImpression;
     29 import com.android.dialer.logging.Logger;
     30 import com.android.voicemail.impl.VvmLog;
     31 import com.android.voicemail.impl.transcribe.TranscriptionService.JobCallback;
     32 import com.android.voicemail.impl.transcribe.grpc.TranscriptionClient;
     33 import com.android.voicemail.impl.transcribe.grpc.TranscriptionClientFactory;
     34 import com.android.voicemail.impl.transcribe.grpc.TranscriptionResponse;
     35 import com.google.internal.communications.voicemailtranscription.v1.AudioFormat;
     36 import com.google.internal.communications.voicemailtranscription.v1.TranscriptionStatus;
     37 import com.google.protobuf.ByteString;
     38 
     39 /**
     40  * Background task to get a voicemail transcription and update the database.
     41  *
     42  * <pre>
     43  * This task performs the following steps:
     44  *   1. Update the transcription-state in the database to 'in-progress'
     45  *   2. Create grpc client and transcription request
     46  *   3. Make synchronous or asynchronous grpc transcription request to backend server
     47  *     3a. On response
     48  *       Update the database with transcription (if successful) and new transcription-state
     49  *     3b. On network error
     50  *       If retry-count < max then increment retry-count and retry the request
     51  *       Otherwise update the transcription-state in the database to 'transcription-failed'
     52  *   4. Notify the callback that the work item is complete
     53  * </pre>
     54  */
     55 public abstract class TranscriptionTask implements Runnable {
     56   private static final String TAG = "TranscriptionTask";
     57 
     58   private final JobCallback callback;
     59   private final JobWorkItem workItem;
     60   private final TranscriptionClientFactory clientFactory;
     61   protected final Context context;
     62   protected final Uri voicemailUri;
     63   protected final PhoneAccountHandle phoneAccountHandle;
     64   protected final TranscriptionConfigProvider configProvider;
     65   protected final TranscriptionDbHelper dbHelper;
     66   protected ByteString audioData;
     67   protected AudioFormat encoding;
     68   protected volatile boolean cancelled;
     69 
     70   /** Functional interface for sending requests to the transcription server */
     71   public interface Request {
     72     TranscriptionResponse getResponse(TranscriptionClient client);
     73   }
     74 
     75   TranscriptionTask(
     76       Context context,
     77       JobCallback callback,
     78       JobWorkItem workItem,
     79       TranscriptionClientFactory clientFactory,
     80       TranscriptionConfigProvider configProvider) {
     81     this.context = context;
     82     this.callback = callback;
     83     this.workItem = workItem;
     84     this.clientFactory = clientFactory;
     85     this.voicemailUri = TranscriptionService.getVoicemailUri(workItem);
     86     this.phoneAccountHandle = TranscriptionService.getPhoneAccountHandle(workItem);
     87     this.configProvider = configProvider;
     88     dbHelper = new TranscriptionDbHelper(context, voicemailUri);
     89   }
     90 
     91   @MainThread
     92   void cancel() {
     93     Assert.isMainThread();
     94     VvmLog.i(TAG, "cancel");
     95     cancelled = true;
     96   }
     97 
     98   @Override
     99   public void run() {
    100     VvmLog.i(TAG, "run");
    101     if (readAndValidateAudioFile()) {
    102       updateTranscriptionState(VoicemailCompat.TRANSCRIPTION_IN_PROGRESS);
    103       transcribeVoicemail();
    104     } else {
    105       if (AudioFormat.AUDIO_FORMAT_UNSPECIFIED.equals(encoding)) {
    106         Logger.get(context)
    107             .logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_VOICEMAIL_FORMAT_NOT_SUPPORTED);
    108       } else {
    109         Logger.get(context)
    110             .logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_VOICEMAIL_INVALID_DATA);
    111       }
    112       updateTranscriptionState(VoicemailCompat.TRANSCRIPTION_FAILED);
    113     }
    114     ThreadUtil.postOnUiThread(
    115         () -> {
    116           callback.onWorkCompleted(workItem);
    117         });
    118   }
    119 
    120   protected abstract Pair<String, TranscriptionStatus> getTranscription();
    121 
    122   protected abstract DialerImpression.Type getRequestSentImpression();
    123 
    124   private void transcribeVoicemail() {
    125     VvmLog.i(TAG, "transcribeVoicemail");
    126     recordResult(context, getTranscription(), dbHelper, cancelled);
    127   }
    128 
    129   protected TranscriptionResponse sendRequest(Request request) {
    130     VvmLog.i(TAG, "sendRequest");
    131     TranscriptionClient client = clientFactory.getClient();
    132     for (int i = 0; i < configProvider.getMaxTranscriptionRetries(); i++) {
    133       if (cancelled) {
    134         VvmLog.i(TAG, "sendRequest, cancelled");
    135         return null;
    136       }
    137 
    138       VvmLog.i(TAG, "sendRequest, try: " + (i + 1));
    139       if (i == 0) {
    140         Logger.get(context).logImpression(getRequestSentImpression());
    141       } else {
    142         Logger.get(context).logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_REQUEST_RETRY);
    143       }
    144 
    145       TranscriptionResponse response = request.getResponse(client);
    146       if (cancelled) {
    147         VvmLog.i(TAG, "sendRequest, cancelled");
    148         return null;
    149       } else if (response.hasRecoverableError()) {
    150         Logger.get(context)
    151             .logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_RESPONSE_RECOVERABLE_ERROR);
    152         backoff(i);
    153       } else {
    154         return response;
    155       }
    156     }
    157 
    158     Logger.get(context)
    159         .logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_RESPONSE_TOO_MANY_ERRORS);
    160     return null;
    161   }
    162 
    163   private static void backoff(int retryCount) {
    164     VvmLog.i(TAG, "backoff, count: " + retryCount);
    165     long millis = (1L << retryCount) * 1000;
    166     sleep(millis);
    167   }
    168 
    169   protected static void sleep(long millis) {
    170     try {
    171       Thread.sleep(millis);
    172     } catch (InterruptedException e) {
    173       VvmLog.e(TAG, "interrupted", e);
    174       Thread.currentThread().interrupt();
    175     }
    176   }
    177 
    178   protected void updateTranscriptionState(int newState) {
    179     dbHelper.setTranscriptionState(newState);
    180   }
    181 
    182   protected void updateTranscriptionAndState(String transcript, int newState) {
    183     dbHelper.setTranscriptionAndState(transcript, newState);
    184   }
    185 
    186   static void recordResult(
    187       Context context, Pair<String, TranscriptionStatus> result, TranscriptionDbHelper dbHelper) {
    188     recordResult(context, result, dbHelper, false);
    189   }
    190 
    191   static void recordResult(
    192       Context context,
    193       Pair<String, TranscriptionStatus> result,
    194       TranscriptionDbHelper dbHelper,
    195       boolean cancelled) {
    196     if (result.first != null) {
    197       VvmLog.i(TAG, "recordResult, got transcription");
    198       dbHelper.setTranscriptionAndState(result.first, VoicemailCompat.TRANSCRIPTION_AVAILABLE);
    199       Logger.get(context).logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_RESPONSE_SUCCESS);
    200     } else if (result.second != null) {
    201       VvmLog.i(TAG, "recordResult, failed to transcribe, reason: " + result.second);
    202       switch (result.second) {
    203         case FAILED_NO_SPEECH_DETECTED:
    204           dbHelper.setTranscriptionState(VoicemailCompat.TRANSCRIPTION_FAILED_NO_SPEECH_DETECTED);
    205           Logger.get(context)
    206               .logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_RESPONSE_NO_SPEECH_DETECTED);
    207           break;
    208         case FAILED_LANGUAGE_NOT_SUPPORTED:
    209           dbHelper.setTranscriptionState(
    210               VoicemailCompat.TRANSCRIPTION_FAILED_LANGUAGE_NOT_SUPPORTED);
    211           Logger.get(context)
    212               .logImpression(
    213                   DialerImpression.Type.VVM_TRANSCRIPTION_RESPONSE_LANGUAGE_NOT_SUPPORTED);
    214           break;
    215         case EXPIRED:
    216           dbHelper.setTranscriptionState(VoicemailCompat.TRANSCRIPTION_FAILED);
    217           Logger.get(context)
    218               .logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_RESPONSE_EXPIRED);
    219           break;
    220         default:
    221           dbHelper.setTranscriptionState(
    222               cancelled
    223                   ? VoicemailCompat.TRANSCRIPTION_NOT_STARTED
    224                   : VoicemailCompat.TRANSCRIPTION_FAILED);
    225           Logger.get(context).logImpression(DialerImpression.Type.VVM_TRANSCRIPTION_RESPONSE_EMPTY);
    226           break;
    227       }
    228     }
    229   }
    230 
    231   private boolean readAndValidateAudioFile() {
    232     if (voicemailUri == null) {
    233       VvmLog.i(TAG, "Transcriber.readAndValidateAudioFile, file not found.");
    234       return false;
    235     } else {
    236       VvmLog.i(TAG, "Transcriber.readAndValidateAudioFile, reading: " + voicemailUri);
    237     }
    238 
    239     audioData = TranscriptionUtils.getAudioData(context, voicemailUri);
    240     if (audioData != null) {
    241       VvmLog.i(TAG, "readAndValidateAudioFile, read " + audioData.size() + " bytes");
    242     } else {
    243       VvmLog.i(TAG, "readAndValidateAudioFile, unable to read audio data for " + voicemailUri);
    244       return false;
    245     }
    246 
    247     encoding = TranscriptionUtils.getAudioFormat(audioData);
    248     if (encoding == AudioFormat.AUDIO_FORMAT_UNSPECIFIED) {
    249       VvmLog.i(TAG, "Transcriber.readAndValidateAudioFile, unknown encoding");
    250       return false;
    251     }
    252 
    253     return true;
    254   }
    255 
    256   @VisibleForTesting
    257   void setAudioDataForTesting(ByteString audioData) {
    258     this.audioData = audioData;
    259     encoding = TranscriptionUtils.getAudioFormat(audioData);
    260   }
    261 }
    262