1 /* 2 * Copyright (C) 2009 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.voice; 18 19 import com.android.inputmethod.latin.R; 20 21 import android.content.ContentResolver; 22 import android.content.Context; 23 import android.content.Intent; 24 import android.os.Build; 25 import android.os.Bundle; 26 import android.os.Handler; 27 import android.os.Message; 28 import android.speech.RecognitionListener; 29 import android.speech.SpeechRecognizer; 30 import android.speech.RecognizerIntent; 31 import android.util.Log; 32 import android.view.View; 33 import android.view.View.OnClickListener; 34 35 import java.io.ByteArrayOutputStream; 36 import java.io.IOException; 37 import java.util.ArrayList; 38 import java.util.HashMap; 39 import java.util.List; 40 import java.util.Locale; 41 import java.util.Map; 42 43 /** 44 * Speech recognition input, including both user interface and a background 45 * process to stream audio to the network recognizer. This class supplies a 46 * View (getView()), which it updates as recognition occurs. The user of this 47 * class is responsible for making the view visible to the user, as well as 48 * handling various events returned through UiListener. 49 */ 50 public class VoiceInput implements OnClickListener { 51 private static final String TAG = "VoiceInput"; 52 private static final String EXTRA_RECOGNITION_CONTEXT = 53 "android.speech.extras.RECOGNITION_CONTEXT"; 54 private static final String EXTRA_CALLING_PACKAGE = "calling_package"; 55 56 private static final String DEFAULT_RECOMMENDED_PACKAGES = 57 "com.android.mms " + 58 "com.google.android.gm " + 59 "com.google.android.talk " + 60 "com.google.android.apps.googlevoice " + 61 "com.android.email " + 62 "com.android.browser "; 63 64 // WARNING! Before enabling this, fix the problem with calling getExtractedText() in 65 // landscape view. It causes Extracted text updates to be rejected due to a token mismatch 66 public static boolean ENABLE_WORD_CORRECTIONS = false; 67 68 // Dummy word suggestion which means "delete current word" 69 public static final String DELETE_SYMBOL = " \u00D7 "; // times symbol 70 71 private Whitelist mRecommendedList; 72 private Whitelist mBlacklist; 73 74 private VoiceInputLogger mLogger; 75 76 // Names of a few intent extras defined in VoiceSearch's RecognitionService. 77 // These let us tweak the endpointer parameters. 78 private static final String EXTRA_SPEECH_MINIMUM_LENGTH_MILLIS = 79 "android.speech.extras.SPEECH_INPUT_MINIMUM_LENGTH_MILLIS"; 80 private static final String EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS = 81 "android.speech.extras.SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS"; 82 private static final String EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS = 83 "android.speech.extras.SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS"; 84 85 // The usual endpointer default value for input complete silence length is 0.5 seconds, 86 // but that's used for things like voice search. For dictation-like voice input like this, 87 // we go with a more liberal value of 1 second. This value will only be used if a value 88 // is not provided from Gservices. 89 private static final String INPUT_COMPLETE_SILENCE_LENGTH_DEFAULT_VALUE_MILLIS = "1000"; 90 91 // Used to record part of that state for logging purposes. 92 public static final int DEFAULT = 0; 93 public static final int LISTENING = 1; 94 public static final int WORKING = 2; 95 public static final int ERROR = 3; 96 97 private int mAfterVoiceInputDeleteCount = 0; 98 private int mAfterVoiceInputInsertCount = 0; 99 private int mAfterVoiceInputInsertPunctuationCount = 0; 100 private int mAfterVoiceInputCursorPos = 0; 101 private int mAfterVoiceInputSelectionSpan = 0; 102 103 private int mState = DEFAULT; 104 105 private final static int MSG_CLOSE_ERROR_DIALOG = 1; 106 107 private final Handler mHandler = new Handler() { 108 @Override 109 public void handleMessage(Message msg) { 110 if (msg.what == MSG_CLOSE_ERROR_DIALOG) { 111 mState = DEFAULT; 112 mRecognitionView.finish(); 113 mUiListener.onCancelVoice(); 114 } 115 } 116 }; 117 118 /** 119 * Events relating to the recognition UI. You must implement these. 120 */ 121 public interface UiListener { 122 123 /** 124 * @param recognitionResults a set of transcripts for what the user 125 * spoke, sorted by likelihood. 126 */ 127 public void onVoiceResults( 128 List<String> recognitionResults, 129 Map<String, List<CharSequence>> alternatives); 130 131 /** 132 * Called when the user cancels speech recognition. 133 */ 134 public void onCancelVoice(); 135 } 136 137 private SpeechRecognizer mSpeechRecognizer; 138 private RecognitionListener mRecognitionListener; 139 private RecognitionView mRecognitionView; 140 private UiListener mUiListener; 141 private Context mContext; 142 143 /** 144 * @param context the service or activity in which we're running. 145 * @param uiHandler object to receive events from VoiceInput. 146 */ 147 public VoiceInput(Context context, UiListener uiHandler) { 148 mLogger = VoiceInputLogger.getLogger(context); 149 mRecognitionListener = new ImeRecognitionListener(); 150 mSpeechRecognizer = SpeechRecognizer.createSpeechRecognizer(context); 151 mSpeechRecognizer.setRecognitionListener(mRecognitionListener); 152 mUiListener = uiHandler; 153 mContext = context; 154 newView(); 155 156 String recommendedPackages = SettingsUtil.getSettingsString( 157 context.getContentResolver(), 158 SettingsUtil.LATIN_IME_VOICE_INPUT_RECOMMENDED_PACKAGES, 159 DEFAULT_RECOMMENDED_PACKAGES); 160 161 mRecommendedList = new Whitelist(); 162 for (String recommendedPackage : recommendedPackages.split("\\s+")) { 163 mRecommendedList.addApp(recommendedPackage); 164 } 165 166 mBlacklist = new Whitelist(); 167 mBlacklist.addApp("com.android.setupwizard"); 168 } 169 170 public void setCursorPos(int pos) { 171 mAfterVoiceInputCursorPos = pos; 172 } 173 174 public int getCursorPos() { 175 return mAfterVoiceInputCursorPos; 176 } 177 178 public void setSelectionSpan(int span) { 179 mAfterVoiceInputSelectionSpan = span; 180 } 181 182 public int getSelectionSpan() { 183 return mAfterVoiceInputSelectionSpan; 184 } 185 186 public void incrementTextModificationDeleteCount(int count){ 187 mAfterVoiceInputDeleteCount += count; 188 // Send up intents for other text modification types 189 if (mAfterVoiceInputInsertCount > 0) { 190 logTextModifiedByTypingInsertion(mAfterVoiceInputInsertCount); 191 mAfterVoiceInputInsertCount = 0; 192 } 193 if (mAfterVoiceInputInsertPunctuationCount > 0) { 194 logTextModifiedByTypingInsertionPunctuation(mAfterVoiceInputInsertPunctuationCount); 195 mAfterVoiceInputInsertPunctuationCount = 0; 196 } 197 198 } 199 200 public void incrementTextModificationInsertCount(int count){ 201 mAfterVoiceInputInsertCount += count; 202 if (mAfterVoiceInputSelectionSpan > 0) { 203 // If text was highlighted before inserting the char, count this as 204 // a delete. 205 mAfterVoiceInputDeleteCount += mAfterVoiceInputSelectionSpan; 206 } 207 // Send up intents for other text modification types 208 if (mAfterVoiceInputDeleteCount > 0) { 209 logTextModifiedByTypingDeletion(mAfterVoiceInputDeleteCount); 210 mAfterVoiceInputDeleteCount = 0; 211 } 212 if (mAfterVoiceInputInsertPunctuationCount > 0) { 213 logTextModifiedByTypingInsertionPunctuation(mAfterVoiceInputInsertPunctuationCount); 214 mAfterVoiceInputInsertPunctuationCount = 0; 215 } 216 } 217 218 public void incrementTextModificationInsertPunctuationCount(int count){ 219 mAfterVoiceInputInsertPunctuationCount += 1; 220 if (mAfterVoiceInputSelectionSpan > 0) { 221 // If text was highlighted before inserting the char, count this as 222 // a delete. 223 mAfterVoiceInputDeleteCount += mAfterVoiceInputSelectionSpan; 224 } 225 // Send up intents for aggregated non-punctuation insertions 226 if (mAfterVoiceInputDeleteCount > 0) { 227 logTextModifiedByTypingDeletion(mAfterVoiceInputDeleteCount); 228 mAfterVoiceInputDeleteCount = 0; 229 } 230 if (mAfterVoiceInputInsertCount > 0) { 231 logTextModifiedByTypingInsertion(mAfterVoiceInputInsertCount); 232 mAfterVoiceInputInsertCount = 0; 233 } 234 } 235 236 public void flushAllTextModificationCounters() { 237 if (mAfterVoiceInputInsertCount > 0) { 238 logTextModifiedByTypingInsertion(mAfterVoiceInputInsertCount); 239 mAfterVoiceInputInsertCount = 0; 240 } 241 if (mAfterVoiceInputDeleteCount > 0) { 242 logTextModifiedByTypingDeletion(mAfterVoiceInputDeleteCount); 243 mAfterVoiceInputDeleteCount = 0; 244 } 245 if (mAfterVoiceInputInsertPunctuationCount > 0) { 246 logTextModifiedByTypingInsertionPunctuation(mAfterVoiceInputInsertPunctuationCount); 247 mAfterVoiceInputInsertPunctuationCount = 0; 248 } 249 } 250 251 /** 252 * The configuration of the IME changed and may have caused the views to be layed out 253 * again. Restore the state of the recognition view. 254 */ 255 public void onConfigurationChanged() { 256 mRecognitionView.restoreState(); 257 } 258 259 /** 260 * @return true if field is blacklisted for voice 261 */ 262 public boolean isBlacklistedField(FieldContext context) { 263 return mBlacklist.matches(context); 264 } 265 266 /** 267 * Used to decide whether to show voice input hints for this field, etc. 268 * 269 * @return true if field is recommended for voice 270 */ 271 public boolean isRecommendedField(FieldContext context) { 272 return mRecommendedList.matches(context); 273 } 274 275 /** 276 * Start listening for speech from the user. This will grab the microphone 277 * and start updating the view provided by getView(). It is the caller's 278 * responsibility to ensure that the view is visible to the user at this stage. 279 * 280 * @param context the same FieldContext supplied to voiceIsEnabled() 281 * @param swipe whether this voice input was started by swipe, for logging purposes 282 */ 283 public void startListening(FieldContext context, boolean swipe) { 284 mState = DEFAULT; 285 286 Locale locale = Locale.getDefault(); 287 String localeString = locale.getLanguage() + "-" + locale.getCountry(); 288 289 mLogger.start(localeString, swipe); 290 291 mState = LISTENING; 292 293 mRecognitionView.showInitializing(); 294 startListeningAfterInitialization(context); 295 } 296 297 /** 298 * Called only when the recognition manager's initialization completed 299 * 300 * @param context context with which {@link #startListening(FieldContext, boolean)} was executed 301 */ 302 private void startListeningAfterInitialization(FieldContext context) { 303 Intent intent = makeIntent(); 304 intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, ""); 305 intent.putExtra(EXTRA_RECOGNITION_CONTEXT, context.getBundle()); 306 intent.putExtra(EXTRA_CALLING_PACKAGE, "VoiceIME"); 307 intent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 308 SettingsUtil.getSettingsInt( 309 mContext.getContentResolver(), 310 SettingsUtil.LATIN_IME_MAX_VOICE_RESULTS, 311 1)); 312 313 // Get endpointer params from Gservices. 314 // TODO: Consider caching these values for improved performance on slower devices. 315 final ContentResolver cr = mContext.getContentResolver(); 316 putEndpointerExtra( 317 cr, 318 intent, 319 SettingsUtil.LATIN_IME_SPEECH_MINIMUM_LENGTH_MILLIS, 320 EXTRA_SPEECH_MINIMUM_LENGTH_MILLIS, 321 null /* rely on endpointer default */); 322 putEndpointerExtra( 323 cr, 324 intent, 325 SettingsUtil.LATIN_IME_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 326 EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 327 INPUT_COMPLETE_SILENCE_LENGTH_DEFAULT_VALUE_MILLIS 328 /* our default value is different from the endpointer's */); 329 putEndpointerExtra( 330 cr, 331 intent, 332 SettingsUtil. 333 LATIN_IME_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 334 EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 335 null /* rely on endpointer default */); 336 337 mSpeechRecognizer.startListening(intent); 338 } 339 340 /** 341 * Gets the value of the provided Gservices key, attempts to parse it into a long, 342 * and if successful, puts the long value as an extra in the provided intent. 343 */ 344 private void putEndpointerExtra(ContentResolver cr, Intent i, 345 String gservicesKey, String intentExtraKey, String defaultValue) { 346 long l = -1; 347 String s = SettingsUtil.getSettingsString(cr, gservicesKey, defaultValue); 348 if (s != null) { 349 try { 350 l = Long.valueOf(s); 351 } catch (NumberFormatException e) { 352 Log.e(TAG, "could not parse value for " + gservicesKey + ": " + s); 353 } 354 } 355 356 if (l != -1) i.putExtra(intentExtraKey, l); 357 } 358 359 public void destroy() { 360 mSpeechRecognizer.destroy(); 361 } 362 363 /** 364 * Creates a new instance of the view that is returned by {@link #getView()} 365 * Clients should use this when a previously returned view is stuck in a 366 * layout that is being thrown away and a new one is need to show to the 367 * user. 368 */ 369 public void newView() { 370 mRecognitionView = new RecognitionView(mContext, this); 371 } 372 373 /** 374 * @return a view that shows the recognition flow--e.g., "Speak now" and 375 * "working" dialogs. 376 */ 377 public View getView() { 378 return mRecognitionView.getView(); 379 } 380 381 /** 382 * Handle the cancel button. 383 */ 384 public void onClick(View view) { 385 switch(view.getId()) { 386 case R.id.button: 387 cancel(); 388 break; 389 } 390 } 391 392 public void logTextModifiedByTypingInsertion(int length) { 393 mLogger.textModifiedByTypingInsertion(length); 394 } 395 396 public void logTextModifiedByTypingInsertionPunctuation(int length) { 397 mLogger.textModifiedByTypingInsertionPunctuation(length); 398 } 399 400 public void logTextModifiedByTypingDeletion(int length) { 401 mLogger.textModifiedByTypingDeletion(length); 402 } 403 404 public void logTextModifiedByChooseSuggestion(int length) { 405 mLogger.textModifiedByChooseSuggestion(length); 406 } 407 408 public void logKeyboardWarningDialogShown() { 409 mLogger.keyboardWarningDialogShown(); 410 } 411 412 public void logKeyboardWarningDialogDismissed() { 413 mLogger.keyboardWarningDialogDismissed(); 414 } 415 416 public void logKeyboardWarningDialogOk() { 417 mLogger.keyboardWarningDialogOk(); 418 } 419 420 public void logKeyboardWarningDialogCancel() { 421 mLogger.keyboardWarningDialogCancel(); 422 } 423 424 public void logSwipeHintDisplayed() { 425 mLogger.swipeHintDisplayed(); 426 } 427 428 public void logPunctuationHintDisplayed() { 429 mLogger.punctuationHintDisplayed(); 430 } 431 432 public void logVoiceInputDelivered(int length) { 433 mLogger.voiceInputDelivered(length); 434 } 435 436 public void logNBestChoose(int index) { 437 mLogger.nBestChoose(index); 438 } 439 440 public void logInputEnded() { 441 mLogger.inputEnded(); 442 } 443 444 public void flushLogs() { 445 mLogger.flush(); 446 } 447 448 private static Intent makeIntent() { 449 Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); 450 451 // On Cupcake, use VoiceIMEHelper since VoiceSearch doesn't support. 452 // On Donut, always use VoiceSearch, since VoiceIMEHelper and 453 // VoiceSearch may conflict. 454 if (Build.VERSION.RELEASE.equals("1.5")) { 455 intent = intent.setClassName( 456 "com.google.android.voiceservice", 457 "com.google.android.voiceservice.IMERecognitionService"); 458 } else { 459 intent = intent.setClassName( 460 "com.google.android.voicesearch", 461 "com.google.android.voicesearch.RecognitionService"); 462 } 463 464 return intent; 465 } 466 467 /** 468 * Cancel in-progress speech recognition. 469 */ 470 public void cancel() { 471 switch (mState) { 472 case LISTENING: 473 mLogger.cancelDuringListening(); 474 break; 475 case WORKING: 476 mLogger.cancelDuringWorking(); 477 break; 478 case ERROR: 479 mLogger.cancelDuringError(); 480 break; 481 } 482 mState = DEFAULT; 483 484 // Remove all pending tasks (e.g., timers to cancel voice input) 485 mHandler.removeMessages(MSG_CLOSE_ERROR_DIALOG); 486 487 mSpeechRecognizer.cancel(); 488 mUiListener.onCancelVoice(); 489 mRecognitionView.finish(); 490 } 491 492 private int getErrorStringId(int errorType, boolean endpointed) { 493 switch (errorType) { 494 // We use CLIENT_ERROR to signify that voice search is not available on the device. 495 case SpeechRecognizer.ERROR_CLIENT: 496 return R.string.voice_not_installed; 497 case SpeechRecognizer.ERROR_NETWORK: 498 return R.string.voice_network_error; 499 case SpeechRecognizer.ERROR_NETWORK_TIMEOUT: 500 return endpointed ? 501 R.string.voice_network_error : R.string.voice_too_much_speech; 502 case SpeechRecognizer.ERROR_AUDIO: 503 return R.string.voice_audio_error; 504 case SpeechRecognizer.ERROR_SERVER: 505 return R.string.voice_server_error; 506 case SpeechRecognizer.ERROR_SPEECH_TIMEOUT: 507 return R.string.voice_speech_timeout; 508 case SpeechRecognizer.ERROR_NO_MATCH: 509 return R.string.voice_no_match; 510 default: return R.string.voice_error; 511 } 512 } 513 514 private void onError(int errorType, boolean endpointed) { 515 Log.i(TAG, "error " + errorType); 516 mLogger.error(errorType); 517 onError(mContext.getString(getErrorStringId(errorType, endpointed))); 518 } 519 520 private void onError(String error) { 521 mState = ERROR; 522 mRecognitionView.showError(error); 523 // Wait a couple seconds and then automatically dismiss message. 524 mHandler.sendMessageDelayed(Message.obtain(mHandler, MSG_CLOSE_ERROR_DIALOG), 2000); 525 } 526 527 private class ImeRecognitionListener implements RecognitionListener { 528 // Waveform data 529 final ByteArrayOutputStream mWaveBuffer = new ByteArrayOutputStream(); 530 int mSpeechStart; 531 private boolean mEndpointed = false; 532 533 public void onReadyForSpeech(Bundle noiseParams) { 534 mRecognitionView.showListening(); 535 } 536 537 public void onBeginningOfSpeech() { 538 mEndpointed = false; 539 mSpeechStart = mWaveBuffer.size(); 540 } 541 542 public void onRmsChanged(float rmsdB) { 543 mRecognitionView.updateVoiceMeter(rmsdB); 544 } 545 546 public void onBufferReceived(byte[] buf) { 547 try { 548 mWaveBuffer.write(buf); 549 } catch (IOException e) {} 550 } 551 552 public void onEndOfSpeech() { 553 mEndpointed = true; 554 mState = WORKING; 555 mRecognitionView.showWorking(mWaveBuffer, mSpeechStart, mWaveBuffer.size()); 556 } 557 558 public void onError(int errorType) { 559 mState = ERROR; 560 VoiceInput.this.onError(errorType, mEndpointed); 561 } 562 563 public void onResults(Bundle resultsBundle) { 564 List<String> results = resultsBundle 565 .getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION); 566 mState = DEFAULT; 567 568 final Map<String, List<CharSequence>> alternatives = 569 new HashMap<String, List<CharSequence>>(); 570 if (results.size() >= 2 && ENABLE_WORD_CORRECTIONS) { 571 final String[][] words = new String[results.size()][]; 572 for (int i = 0; i < words.length; i++) { 573 words[i] = results.get(i).split(" "); 574 } 575 576 for (int key = 0; key < words[0].length; key++) { 577 alternatives.put(words[0][key], new ArrayList<CharSequence>()); 578 for (int alt = 1; alt < words.length; alt++) { 579 int keyBegin = key * words[alt].length / words[0].length; 580 int keyEnd = (key + 1) * words[alt].length / words[0].length; 581 582 for (int i = keyBegin; i < Math.min(words[alt].length, keyEnd); i++) { 583 List<CharSequence> altList = alternatives.get(words[0][key]); 584 if (!altList.contains(words[alt][i]) && altList.size() < 6) { 585 altList.add(words[alt][i]); 586 } 587 } 588 } 589 } 590 } 591 592 if (results.size() > 5) { 593 results = results.subList(0, 5); 594 } 595 mUiListener.onVoiceResults(results, alternatives); 596 mRecognitionView.finish(); 597 } 598 599 public void onPartialResults(final Bundle partialResults) { 600 // currently - do nothing 601 } 602 603 public void onEvent(int eventType, Bundle params) { 604 // do nothing - reserved for events that might be added in the future 605 } 606 } 607 } 608