1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.voicedialer; 18 19 import android.app.Activity; 20 import android.app.AlertDialog; 21 import android.bluetooth.BluetoothAdapter; 22 import android.bluetooth.BluetoothDevice; 23 import android.bluetooth.BluetoothHeadset; 24 import android.bluetooth.BluetoothProfile; 25 import android.content.BroadcastReceiver; 26 import android.content.Context; 27 import android.content.DialogInterface; 28 import android.content.Intent; 29 import android.content.IntentFilter; 30 import android.media.AudioManager; 31 import android.media.ToneGenerator; 32 import android.os.Bundle; 33 import android.os.Handler; 34 import android.os.PowerManager; 35 import android.os.PowerManager.WakeLock; 36 import android.os.SystemProperties; 37 import android.os.Vibrator; 38 import android.speech.tts.TextToSpeech; 39 import android.util.Log; 40 import android.view.View; 41 import android.view.WindowManager; 42 import android.widget.TextView; 43 44 import java.io.File; 45 import java.io.IOException; 46 import java.io.InputStream; 47 import java.util.HashMap; 48 import java.util.List; 49 50 /** 51 * TODO: get rid of the anonymous classes 52 * 53 * This class is the user interface of the VoiceDialer application. 54 * It begins in the INITIALIZING state. 55 * 56 * INITIALIZING : 57 * This transitions out on events from TTS and the BluetoothHeadset 58 * once TTS initialized and SCO channel set up: 59 * * prompt the user "speak now" 60 * * transition to the SPEAKING_GREETING state 61 * 62 * SPEAKING_GREETING: 63 * This transitions out only on events from TTS or the fallback runnable 64 * once the greeting utterance completes: 65 * * begin listening for the command using the {@link CommandRecognizerEngine} 66 * * transition to the WAITING_FOR_COMMAND state 67 * 68 * WAITING_FOR_COMMAND : 69 * This transitions out only on events from the recognizer 70 * on RecognitionFailure or RecognitionError: 71 * * begin speaking "try again." 72 * * transition to state SPEAKING_TRY_AGAIN 73 * on RecognitionSuccess: 74 * single result: 75 * * begin speaking the sentence describing the intent 76 * * transition to the SPEAKING_CHOSEN_ACTION 77 * multiple results: 78 * * begin speaking each of the choices in order 79 * * transition to the SPEAKING_CHOICES state 80 * 81 * SPEAKING_TRY_AGAIN: 82 * This transitions out only on events from TTS or the fallback runnable 83 * once the try again utterance completes: 84 * * begin listening for the command using the {@link CommandRecognizerEngine} 85 * * transition to the LISTENING_FOR_COMMAND state 86 * 87 * SPEAKING_CHOSEN_ACTION: 88 * This transitions out only on events from TTS or the fallback runnable 89 * once the utterance completes: 90 * * dispatch the intent that was chosen 91 * * transition to the EXITING state 92 * * finish the activity 93 * 94 * SPEAKING_CHOICES: 95 * This transitions out only on events from TTS or the fallback runnable 96 * once the utterance completes: 97 * * begin listening for the user's choice using the 98 * {@link PhoneTypeChoiceRecognizerEngine} 99 * * transition to the WAITING_FOR_CHOICE state. 100 * 101 * WAITING_FOR_CHOICE: 102 * This transitions out only on events from the recognizer 103 * on RecognitionFailure or RecognitionError: 104 * * begin speaking the "invalid choice" message, along with the list 105 * of choices 106 * * transition to the SPEAKING_CHOICES state 107 * on RecognitionSuccess: 108 * if the result is "try again", prompt the user to say a command, begin 109 * listening for the command, and transition back to the WAITING_FOR_COMMAND 110 * state. 111 * if the result is "exit", then being speaking the "goodbye" message and 112 * transition to the SPEAKING_GOODBYE state. 113 * if the result is a valid choice, begin speaking the action chosen,initiate 114 * the command the user has choose and exit. 115 * if not a valid choice, speak the "invalid choice" message, begin 116 * speaking the choices in order again, transition to the 117 * SPEAKING_CHOICES 118 * 119 * SPEAKING_GOODBYE: 120 * This transitions out only on events from TTS or the fallback runnable 121 * after a time out, finish the activity. 122 * 123 */ 124 125 public class VoiceDialerActivity extends Activity { 126 127 private static final String TAG = "VoiceDialerActivity"; 128 129 private static final String MICROPHONE_EXTRA = "microphone"; 130 private static final String CONTACTS_EXTRA = "contacts"; 131 132 private static final String SPEAK_NOW_UTTERANCE = "speak_now"; 133 private static final String TRY_AGAIN_UTTERANCE = "try_again"; 134 private static final String CHOSEN_ACTION_UTTERANCE = "chose_action"; 135 private static final String GOODBYE_UTTERANCE = "goodbye"; 136 private static final String CHOICES_UTTERANCE = "choices"; 137 138 private static final int FIRST_UTTERANCE_DELAY = 300; 139 private static final int MAX_TTS_DELAY = 6000; 140 private static final int EXIT_DELAY = 2000; 141 142 private static final int BLUETOOTH_SAMPLE_RATE = 8000; 143 private static final int REGULAR_SAMPLE_RATE = 11025; 144 145 private static final int INITIALIZING = 0; 146 private static final int SPEAKING_GREETING = 1; 147 private static final int WAITING_FOR_COMMAND = 2; 148 private static final int SPEAKING_TRY_AGAIN = 3; 149 private static final int SPEAKING_CHOICES = 4; 150 private static final int WAITING_FOR_CHOICE = 5; 151 private static final int WAITING_FOR_DIALOG_CHOICE = 6; 152 private static final int SPEAKING_CHOSEN_ACTION = 7; 153 private static final int SPEAKING_GOODBYE = 8; 154 private static final int EXITING = 9; 155 156 private static final CommandRecognizerEngine mCommandEngine = 157 new CommandRecognizerEngine(); 158 private static final PhoneTypeChoiceRecognizerEngine mPhoneTypeChoiceEngine = 159 new PhoneTypeChoiceRecognizerEngine(); 160 private CommandRecognizerClient mCommandClient; 161 private ChoiceRecognizerClient mChoiceClient; 162 private ToneGenerator mToneGenerator; 163 private Handler mHandler; 164 private Thread mRecognizerThread = null; 165 private AudioManager mAudioManager; 166 private BluetoothHeadset mBluetoothHeadset; 167 private BluetoothDevice mBluetoothDevice; 168 private BluetoothAdapter mAdapter; 169 private TextToSpeech mTts; 170 private HashMap<String, String> mTtsParams; 171 private VoiceDialerBroadcastReceiver mReceiver; 172 private boolean mWaitingForTts; 173 private boolean mWaitingForScoConnection; 174 private Intent[] mAvailableChoices; 175 private Intent mChosenAction; 176 private int mBluetoothVoiceVolume; 177 private int mState; 178 private AlertDialog mAlertDialog; 179 private Runnable mFallbackRunnable; 180 private boolean mUsingBluetooth = false; 181 private int mSampleRate; 182 private WakeLock mWakeLock; 183 184 @Override 185 protected void onCreate(Bundle icicle) { 186 super.onCreate(icicle); 187 // TODO: All of this state management and holding of 188 // connections to the TTS engine and recognizer really 189 // belongs in a service. The activity can be stopped or deleted 190 // and recreated for lots of reasons. 191 // It's way too late in the ICS release cycle for a change 192 // like this now though. 193 // MHibdon Sept 20 2011 194 mHandler = new Handler(); 195 mAudioManager = (AudioManager)getSystemService(AUDIO_SERVICE); 196 mToneGenerator = new ToneGenerator(AudioManager.STREAM_RING, 197 ToneGenerator.MAX_VOLUME); 198 199 acquireWakeLock(this); 200 201 mState = INITIALIZING; 202 mChosenAction = null; 203 mAudioManager.requestAudioFocus( 204 null, AudioManager.STREAM_MUSIC, 205 AudioManager.AUDIOFOCUS_GAIN_TRANSIENT); 206 207 // set this flag so this activity will stay in front of the keyguard 208 int flags = WindowManager.LayoutParams.FLAG_SHOW_WHEN_LOCKED; 209 getWindow().addFlags(flags); 210 211 // open main window 212 setTheme(android.R.style.Theme_Dialog); 213 setTitle(R.string.title); 214 setContentView(R.layout.voice_dialing); 215 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 216 findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); 217 findViewById(R.id.microphone_loading_view).setVisibility(View.VISIBLE); 218 if (RecognizerLogger.isEnabled(this)) { 219 ((TextView) findViewById(R.id.substate)).setText(R.string.logging_enabled); 220 } 221 222 // Get handle to BluetoothHeadset object 223 IntentFilter audioStateFilter; 224 audioStateFilter = new IntentFilter(); 225 audioStateFilter.addAction(BluetoothHeadset.ACTION_CONNECTION_STATE_CHANGED); 226 audioStateFilter.addAction(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED); 227 mReceiver = new VoiceDialerBroadcastReceiver(); 228 registerReceiver(mReceiver, audioStateFilter); 229 230 mCommandEngine.setContactsFile(newFile(getArg(CONTACTS_EXTRA))); 231 mCommandEngine.setMinimizeResults(true); 232 mCommandEngine.setAllowOpenEntries(false); 233 mCommandClient = new CommandRecognizerClient(); 234 mChoiceClient = new ChoiceRecognizerClient(); 235 236 mAdapter = BluetoothAdapter.getDefaultAdapter(); 237 if (BluetoothHeadset.isBluetoothVoiceDialingEnabled(this) && mAdapter != null) { 238 if (!mAdapter.getProfileProxy(this, mBluetoothHeadsetServiceListener, 239 BluetoothProfile.HEADSET)) { 240 Log.e(TAG, "Getting Headset Proxy failed"); 241 } 242 243 } else { 244 mUsingBluetooth = false; 245 if (false) Log.d(TAG, "bluetooth unavailable"); 246 mSampleRate = REGULAR_SAMPLE_RATE; 247 mCommandEngine.setMinimizeResults(false); 248 mCommandEngine.setAllowOpenEntries(true); 249 250 // we're not using bluetooth apparently, just start listening. 251 listenForCommand(); 252 } 253 254 } 255 256 class ErrorRunnable implements Runnable { 257 private int mErrorMsg; 258 public ErrorRunnable(int errorMsg) { 259 mErrorMsg = errorMsg; 260 } 261 262 public void run() { 263 // put up an error and exit 264 mHandler.removeCallbacks(mMicFlasher); 265 ((TextView)findViewById(R.id.state)).setText(R.string.failure); 266 ((TextView)findViewById(R.id.substate)).setText(mErrorMsg); 267 ((TextView)findViewById(R.id.substate)).setText( 268 R.string.headset_connection_lost); 269 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 270 findViewById(R.id.retry_view).setVisibility(View.VISIBLE); 271 272 273 if (!mUsingBluetooth) { 274 playSound(ToneGenerator.TONE_PROP_NACK); 275 } 276 } 277 } 278 279 class OnTtsCompletionRunnable implements Runnable { 280 private boolean mFallback; 281 282 OnTtsCompletionRunnable(boolean fallback) { 283 mFallback = fallback; 284 } 285 286 public void run() { 287 if (mFallback) { 288 Log.e(TAG, "utterance completion not delivered, using fallback"); 289 } 290 Log.d(TAG, "onTtsCompletionRunnable"); 291 if (mState == SPEAKING_GREETING || mState == SPEAKING_TRY_AGAIN) { 292 listenForCommand(); 293 } else if (mState == SPEAKING_CHOICES) { 294 listenForChoice(); 295 } else if (mState == SPEAKING_GOODBYE) { 296 mState = EXITING; 297 finish(); 298 } else if (mState == SPEAKING_CHOSEN_ACTION) { 299 mState = EXITING; 300 startActivityHelp(mChosenAction); 301 finish(); 302 } 303 } 304 } 305 306 class GreetingRunnable implements Runnable { 307 public void run() { 308 mState = SPEAKING_GREETING; 309 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 310 SPEAK_NOW_UTTERANCE); 311 mTts.speak(getString(R.string.speak_now_tts), 312 TextToSpeech.QUEUE_FLUSH, 313 mTtsParams); 314 // Normally, we will begin listening for the command after the 315 // utterance completes. As a fallback in case the utterance 316 // does not complete, post a delayed runnable to fire 317 // the intent. 318 mFallbackRunnable = new OnTtsCompletionRunnable(true); 319 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 320 } 321 } 322 323 class TtsInitListener implements TextToSpeech.OnInitListener { 324 public void onInit(int status) { 325 // status can be either TextToSpeech.SUCCESS or TextToSpeech.ERROR. 326 if (false) Log.d(TAG, "onInit for tts"); 327 if (status != TextToSpeech.SUCCESS) { 328 // Initialization failed. 329 Log.e(TAG, "Could not initialize TextToSpeech."); 330 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 331 exitActivity(); 332 return; 333 } 334 335 if (mTts == null) { 336 Log.e(TAG, "null tts"); 337 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 338 exitActivity(); 339 return; 340 } 341 342 mTts.setOnUtteranceCompletedListener(new OnUtteranceCompletedListener()); 343 344 // The TTS engine has been successfully initialized. 345 mWaitingForTts = false; 346 347 // TTS over bluetooth is really loud, 348 // Limit volume to -18dB. Stream volume range represents approximately 50dB 349 // (See AudioSystem.cpp linearToLog()) so the number of steps corresponding 350 // to 18dB is 18 / (50 / maxSteps). 351 mBluetoothVoiceVolume = mAudioManager.getStreamVolume( 352 AudioManager.STREAM_BLUETOOTH_SCO); 353 int maxVolume = mAudioManager.getStreamMaxVolume(AudioManager.STREAM_BLUETOOTH_SCO); 354 int volume = maxVolume - ((18 / (50/maxVolume)) + 1); 355 if (mBluetoothVoiceVolume > volume) { 356 mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, volume, 0); 357 } 358 359 if (mWaitingForScoConnection) { 360 // the bluetooth connection is not up yet, still waiting. 361 } else { 362 // we now have SCO connection and TTS, so we can start. 363 mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY); 364 } 365 } 366 } 367 368 class OnUtteranceCompletedListener 369 implements TextToSpeech.OnUtteranceCompletedListener { 370 public void onUtteranceCompleted(String utteranceId) { 371 if (false) Log.d(TAG, "onUtteranceCompleted " + utteranceId); 372 // since the utterance has completed, we no longer need the fallback. 373 mHandler.removeCallbacks(mFallbackRunnable); 374 mFallbackRunnable = null; 375 mHandler.post(new OnTtsCompletionRunnable(false)); 376 } 377 } 378 379 private void updateBluetoothParameters(boolean connected) { 380 if (connected) { 381 if (false) Log.d(TAG, "using bluetooth"); 382 mUsingBluetooth = true; 383 384 mBluetoothHeadset.startVoiceRecognition(mBluetoothDevice); 385 386 mSampleRate = BLUETOOTH_SAMPLE_RATE; 387 mCommandEngine.setMinimizeResults(true); 388 mCommandEngine.setAllowOpenEntries(false); 389 390 // we can't start recognizing until we get connected to the BluetoothHeadset 391 // and have a connected audio state. We will listen for these 392 // states to change. 393 mWaitingForScoConnection = true; 394 395 // initialize the text to speech system 396 mWaitingForTts = true; 397 mTts = new TextToSpeech(VoiceDialerActivity.this, new TtsInitListener()); 398 mTtsParams = new HashMap<String, String>(); 399 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_STREAM, 400 String.valueOf(AudioManager.STREAM_VOICE_CALL)); 401 // we need to wait for the TTS system and the SCO connection 402 // before we can start listening. 403 } else { 404 if (false) Log.d(TAG, "not using bluetooth"); 405 mUsingBluetooth = false; 406 mSampleRate = REGULAR_SAMPLE_RATE; 407 mCommandEngine.setMinimizeResults(false); 408 mCommandEngine.setAllowOpenEntries(true); 409 410 // we're not using bluetooth apparently, just start listening. 411 listenForCommand(); 412 } 413 } 414 415 private BluetoothProfile.ServiceListener mBluetoothHeadsetServiceListener = 416 new BluetoothProfile.ServiceListener() { 417 public void onServiceConnected(int profile, BluetoothProfile proxy) { 418 if (false) Log.d(TAG, "onServiceConnected"); 419 mBluetoothHeadset = (BluetoothHeadset) proxy; 420 421 List<BluetoothDevice> deviceList = mBluetoothHeadset.getConnectedDevices(); 422 423 if (deviceList.size() > 0) { 424 mBluetoothDevice = deviceList.get(0); 425 int state = mBluetoothHeadset.getConnectionState(mBluetoothDevice); 426 if (false) Log.d(TAG, "headset status " + state); 427 428 // We are already connnected to a headset 429 if (state == BluetoothHeadset.STATE_CONNECTED) { 430 updateBluetoothParameters(true); 431 return; 432 } 433 } 434 updateBluetoothParameters(false); 435 } 436 437 public void onServiceDisconnected(int profile) { 438 mBluetoothHeadset = null; 439 } 440 }; 441 442 private class VoiceDialerBroadcastReceiver extends BroadcastReceiver { 443 @Override 444 public void onReceive(Context context, Intent intent) { 445 String action = intent.getAction(); 446 if (action.equals(BluetoothHeadset.ACTION_CONNECTION_STATE_CHANGED)) { 447 448 BluetoothDevice device = intent.getParcelableExtra(BluetoothDevice.EXTRA_DEVICE); 449 int state = intent.getIntExtra(BluetoothProfile.EXTRA_STATE, -1); 450 451 if (false) Log.d(TAG, "HEADSET STATE -> " + state); 452 453 if (state == BluetoothProfile.STATE_CONNECTED) { 454 if (device == null) { 455 return; 456 } 457 mBluetoothDevice = device; 458 updateBluetoothParameters(true); 459 } else if (state == BluetoothProfile.STATE_DISCONNECTED) { 460 mBluetoothDevice = null; 461 updateBluetoothParameters(false); 462 } 463 } else if (action.equals(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED)) { 464 int state = intent.getIntExtra(BluetoothProfile.EXTRA_STATE, -1); 465 int prevState = intent.getIntExtra(BluetoothProfile.EXTRA_PREVIOUS_STATE, -1); 466 if (state == BluetoothHeadset.STATE_AUDIO_CONNECTED && 467 mWaitingForScoConnection) { 468 // SCO channel has just become available. 469 mWaitingForScoConnection = false; 470 if (mWaitingForTts) { 471 // still waiting for the TTS to be set up. 472 } else { 473 // we now have SCO connection and TTS, so we can start. 474 mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY); 475 } 476 } else if (prevState == BluetoothHeadset.STATE_AUDIO_CONNECTED) { 477 if (!mWaitingForScoConnection) { 478 // apparently our connection to the headset has dropped. 479 // we won't be able to continue voicedialing. 480 if (false) Log.d(TAG, "lost sco connection"); 481 482 mHandler.post(new ErrorRunnable( 483 R.string.headset_connection_lost)); 484 485 exitActivity(); 486 } 487 } 488 } 489 } 490 } 491 492 private void askToTryAgain() { 493 // get work off UAPI thread 494 mHandler.post(new Runnable() { 495 public void run() { 496 if (mAlertDialog != null) { 497 mAlertDialog.dismiss(); 498 } 499 500 mHandler.removeCallbacks(mMicFlasher); 501 ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again); 502 findViewById(R.id.state).setVisibility(View.VISIBLE); 503 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 504 findViewById(R.id.retry_view).setVisibility(View.VISIBLE); 505 506 if (mUsingBluetooth) { 507 mState = SPEAKING_TRY_AGAIN; 508 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 509 TRY_AGAIN_UTTERANCE); 510 mTts.speak(getString(R.string.no_results_tts), 511 TextToSpeech.QUEUE_FLUSH, 512 mTtsParams); 513 514 // Normally, the we will start listening after the 515 // utterance completes. As a fallback in case the utterance 516 // does not complete, post a delayed runnable to fire 517 // the intent. 518 mFallbackRunnable = new OnTtsCompletionRunnable(true); 519 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 520 } else { 521 try { 522 Thread.sleep(playSound(ToneGenerator.TONE_PROP_NACK)); 523 } catch (InterruptedException e) { 524 } 525 // we are not using tts, so we just start listening again. 526 listenForCommand(); 527 } 528 } 529 }); 530 } 531 532 private void performChoice() { 533 if (mUsingBluetooth) { 534 String sentenceSpoken = spaceOutDigits( 535 mChosenAction.getStringExtra( 536 RecognizerEngine.SENTENCE_EXTRA)); 537 538 mState = SPEAKING_CHOSEN_ACTION; 539 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 540 CHOSEN_ACTION_UTTERANCE); 541 mTts.speak(sentenceSpoken, 542 TextToSpeech.QUEUE_FLUSH, 543 mTtsParams); 544 545 // Normally, the intent will be dispatched after the 546 // utterance completes. As a fallback in case the utterance 547 // does not complete, post a delayed runnable to fire 548 // the intent. 549 mFallbackRunnable = new OnTtsCompletionRunnable(true); 550 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 551 } else { 552 // just dispatch the intent 553 startActivityHelp(mChosenAction); 554 finish(); 555 } 556 } 557 558 private void waitForChoice() { 559 if (mUsingBluetooth) { 560 // We are running in bluetooth mode, and we have 561 // multiple matches. Speak the choices and let 562 // the user choose. 563 564 // We will not start listening until the utterance 565 // of the choice list completes. 566 speakChoices(); 567 568 // Normally, listening will begin after the 569 // utterance completes. As a fallback in case the utterance 570 // does not complete, post a delayed runnable to begin 571 // listening. 572 mFallbackRunnable = new OnTtsCompletionRunnable(true); 573 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 574 } else { 575 // We are not running in bluetooth mode, so all 576 // we need to do is wait for the user to select 577 // a choice from the alert dialog. We will wait 578 // indefinitely for this. 579 mState = WAITING_FOR_DIALOG_CHOICE; 580 } 581 } 582 583 private class CommandRecognizerClient implements RecognizerClient { 584 static final int MIN_VOLUME_TO_SKIP = 2; 585 /** 586 * Called by the {@link RecognizerEngine} when the microphone is started. 587 */ 588 public void onMicrophoneStart(InputStream mic) { 589 if (false) Log.d(TAG, "onMicrophoneStart"); 590 591 if (!mUsingBluetooth) { 592 playSound(ToneGenerator.TONE_PROP_BEEP); 593 594 int ringVolume = mAudioManager.getStreamVolume( 595 AudioManager.STREAM_RING); 596 Log.d(TAG, "ringVolume " + ringVolume); 597 598 if (ringVolume >= MIN_VOLUME_TO_SKIP) { 599 // now we're playing a sound, and corrupting the input sample. 600 // So we need to pull that junk off of the input stream so that the 601 // recognizer won't see it. 602 try { 603 skipBeep(mic); 604 } catch (java.io.IOException e) { 605 Log.e(TAG, "IOException " + e); 606 } 607 } else { 608 if (false) Log.d(TAG, "no tone"); 609 } 610 } 611 612 mHandler.post(new Runnable() { 613 public void run() { 614 findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); 615 findViewById(R.id.microphone_loading_view).setVisibility( 616 View.INVISIBLE); 617 ((TextView)findViewById(R.id.state)).setText(R.string.listening); 618 mHandler.post(mMicFlasher); 619 } 620 }); 621 } 622 623 /** 624 * Beep detection 625 */ 626 private static final int START_WINDOW_MS = 500; // Beep detection window duration in ms 627 private static final int SINE_FREQ = 400; // base sine frequency on beep 628 private static final int NUM_PERIODS_BLOCK = 10; // number of sine periods in one energy averaging block 629 private static final int THRESHOLD = 8; // absolute pseudo energy threshold 630 private static final int START = 0; // beep detection start 631 private static final int RISING = 1; // beep rising edge start 632 private static final int TOP = 2; // beep constant energy detected 633 634 void skipBeep(InputStream is) throws IOException { 635 int sampleCount = ((mSampleRate / SINE_FREQ) * NUM_PERIODS_BLOCK); 636 int blockSize = 2 * sampleCount; // energy averaging block 637 638 if (is == null || blockSize == 0) { 639 return; 640 } 641 642 byte[] buf = new byte[blockSize]; 643 int maxBytes = 2 * ((START_WINDOW_MS * mSampleRate) / 1000); 644 maxBytes = ((maxBytes-1) / blockSize + 1) * blockSize; 645 646 int count = 0; 647 int state = START; // detection state 648 long prevE = 0; // previous pseudo energy 649 long peak = 0; 650 int threshold = THRESHOLD*sampleCount; // absolute energy threshold 651 Log.d(TAG, "blockSize " + blockSize); 652 653 while (count < maxBytes) { 654 int cnt = 0; 655 while (cnt < blockSize) { 656 int n = is.read(buf, cnt, blockSize-cnt); 657 if (n < 0) { 658 throw new java.io.IOException(); 659 } 660 cnt += n; 661 } 662 663 // compute pseudo energy 664 cnt = blockSize; 665 long sumx = 0; 666 long sumxx = 0; 667 while (cnt >= 2) { 668 short smp = (short)((buf[cnt - 1] << 8) + (buf[cnt - 2] & 0xFF)); 669 sumx += smp; 670 sumxx += smp*smp; 671 cnt -= 2; 672 } 673 long energy = (sumxx*sampleCount - sumx*sumx)/(sampleCount*sampleCount); 674 Log.d(TAG, "sumx " + sumx + " sumxx " + sumxx + " ee " + energy); 675 676 switch (state) { 677 case START: 678 if (energy > threshold && energy > (prevE * 2) && prevE != 0) { 679 // rising edge if energy doubled and > abs threshold 680 state = RISING; 681 if (false) Log.d(TAG, "start RISING: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 682 } 683 break; 684 case RISING: 685 if (energy < threshold || energy < (prevE / 2)){ 686 // energy fell back below half of previous, back to start 687 if (false) Log.d(TAG, "back to START: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 688 peak = 0; 689 state = START; 690 } else if (energy > (prevE / 2) && energy < (prevE * 2)) { 691 // Start of constant energy 692 if (false) Log.d(TAG, "start TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 693 if (peak < energy) { 694 peak = energy; 695 } 696 state = TOP; 697 } 698 break; 699 case TOP: 700 if (energy < threshold || energy < (peak / 2)) { 701 // e went to less than half of the peak 702 if (false) Log.d(TAG, "end TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); 703 return; 704 } 705 break; 706 } 707 prevE = energy; 708 count += blockSize; 709 } 710 if (false) Log.d(TAG, "no beep detected, timed out"); 711 } 712 713 /** 714 * Called by the {@link RecognizerEngine} if the recognizer fails. 715 */ 716 public void onRecognitionFailure(final String msg) { 717 if (false) Log.d(TAG, "onRecognitionFailure " + msg); 718 // we had zero results. Just try again. 719 askToTryAgain(); 720 } 721 722 /** 723 * Called by the {@link RecognizerEngine} on an internal error. 724 */ 725 public void onRecognitionError(final String msg) { 726 if (false) Log.d(TAG, "onRecognitionError " + msg); 727 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 728 exitActivity(); 729 } 730 731 /** 732 * Called by the {@link RecognizerEngine} when is succeeds. If there is 733 * only one item, then the Intent is dispatched immediately. 734 * If there are more, then an AlertDialog is displayed and the user is 735 * prompted to select. 736 * @param intents a list of Intents corresponding to the sentences. 737 */ 738 public void onRecognitionSuccess(final Intent[] intents) { 739 if (false) Log.d(TAG, "CommandRecognizerClient onRecognitionSuccess " + 740 intents.length); 741 if (mState != WAITING_FOR_COMMAND) { 742 if (false) Log.d(TAG, "not waiting for command, ignoring"); 743 return; 744 } 745 746 // store the intents in a member variable so that we can access it 747 // later when the user chooses which action to perform. 748 mAvailableChoices = intents; 749 750 mHandler.post(new Runnable() { 751 public void run() { 752 if (!mUsingBluetooth) { 753 playSound(ToneGenerator.TONE_PROP_ACK); 754 } 755 mHandler.removeCallbacks(mMicFlasher); 756 757 String[] sentences = new String[intents.length]; 758 for (int i = 0; i < intents.length; i++) { 759 sentences[i] = intents[i].getStringExtra( 760 RecognizerEngine.SENTENCE_EXTRA); 761 } 762 763 if (intents.length == 0) { 764 onRecognitionFailure("zero intents"); 765 return; 766 } 767 768 if (intents.length > 0) { 769 // see if we the response was "exit" or "cancel". 770 String value = intents[0].getStringExtra( 771 RecognizerEngine.SEMANTIC_EXTRA); 772 if (false) Log.d(TAG, "value " + value); 773 if ("X".equals(value)) { 774 exitActivity(); 775 return; 776 } 777 } 778 779 if (mUsingBluetooth && 780 (intents.length == 1 || 781 !Intent.ACTION_CALL_PRIVILEGED.equals( 782 intents[0].getAction()))) { 783 // When we're running in bluetooth mode, we expect 784 // that the user is not looking at the screen and cannot 785 // interact with the device in any way besides voice 786 // commands. In this case we need to minimize how many 787 // interactions the user has to perform in order to call 788 // someone. 789 // So if there is only one match, instead of making the 790 // user confirm, we just assume it's correct, speak 791 // the choice over TTS, and then dispatch it. 792 // If there are multiple matches for some intent type 793 // besides "call", it's too difficult for the user to 794 // explain which one they meant, so we just take the highest 795 // confidence match and dispatch that. 796 797 // Speak the sentence for the action we are about 798 // to dispatch so that the user knows what is happening. 799 mChosenAction = intents[0]; 800 performChoice(); 801 802 return; 803 } else { 804 // Either we are not running in bluetooth mode, 805 // or we had multiple matches. Either way, we need 806 // the user to confirm the choice. 807 // Put up a dialog from which the user can select 808 // his/her choice. 809 DialogInterface.OnCancelListener cancelListener = 810 new DialogInterface.OnCancelListener() { 811 812 public void onCancel(DialogInterface dialog) { 813 if (false) { 814 Log.d(TAG, "cancelListener.onCancel"); 815 } 816 dialog.dismiss(); 817 finish(); 818 } 819 }; 820 821 DialogInterface.OnClickListener clickListener = 822 new DialogInterface.OnClickListener() { 823 824 public void onClick(DialogInterface dialog, int which) { 825 if (false) { 826 Log.d(TAG, "clickListener.onClick " + which); 827 } 828 startActivityHelp(intents[which]); 829 dialog.dismiss(); 830 finish(); 831 } 832 }; 833 834 DialogInterface.OnClickListener negativeListener = 835 new DialogInterface.OnClickListener() { 836 837 public void onClick(DialogInterface dialog, int which) { 838 if (false) { 839 Log.d(TAG, "negativeListener.onClick " + 840 which); 841 } 842 dialog.dismiss(); 843 finish(); 844 } 845 }; 846 847 mAlertDialog = 848 new AlertDialog.Builder(VoiceDialerActivity.this, 849 AlertDialog.THEME_HOLO_DARK) 850 .setTitle(R.string.title) 851 .setItems(sentences, clickListener) 852 .setOnCancelListener(cancelListener) 853 .setNegativeButton(android.R.string.cancel, 854 negativeListener) 855 .show(); 856 857 waitForChoice(); 858 } 859 } 860 }); 861 } 862 } 863 864 private class ChoiceRecognizerClient implements RecognizerClient { 865 public void onRecognitionSuccess(final Intent[] intents) { 866 if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionSuccess"); 867 if (mState != WAITING_FOR_CHOICE) { 868 if (false) Log.d(TAG, "not waiting for choice, ignoring"); 869 return; 870 } 871 872 if (mAlertDialog != null) { 873 mAlertDialog.dismiss(); 874 } 875 876 // disregard all but the first intent. 877 if (intents.length > 0) { 878 String value = intents[0].getStringExtra( 879 RecognizerEngine.SEMANTIC_EXTRA); 880 if (false) Log.d(TAG, "value " + value); 881 if ("R".equals(value)) { 882 if (mUsingBluetooth) { 883 mHandler.post(new GreetingRunnable()); 884 } else { 885 listenForCommand(); 886 } 887 } else if ("X".equals(value)) { 888 exitActivity(); 889 } else { 890 // it's a phone type response 891 mChosenAction = null; 892 for (int i = 0; i < mAvailableChoices.length; i++) { 893 if (value.equalsIgnoreCase( 894 mAvailableChoices[i].getStringExtra( 895 CommandRecognizerEngine.PHONE_TYPE_EXTRA))) { 896 mChosenAction = mAvailableChoices[i]; 897 } 898 } 899 900 if (mChosenAction != null) { 901 performChoice(); 902 } else { 903 // invalid choice 904 if (false) Log.d(TAG, "invalid choice" + value); 905 906 if (mUsingBluetooth) { 907 mTtsParams.remove(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID); 908 mTts.speak(getString(R.string.invalid_choice_tts), 909 TextToSpeech.QUEUE_FLUSH, 910 mTtsParams); 911 } 912 waitForChoice(); 913 } 914 } 915 } 916 } 917 918 public void onRecognitionFailure(String msg) { 919 if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionFailure"); 920 exitActivity(); 921 } 922 923 public void onRecognitionError(String err) { 924 if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionError"); 925 mHandler.post(new ErrorRunnable(R.string.recognition_error)); 926 exitActivity(); 927 } 928 929 public void onMicrophoneStart(InputStream mic) { 930 if (false) Log.d(TAG, "ChoiceRecognizerClient onMicrophoneStart"); 931 } 932 } 933 934 private void speakChoices() { 935 if (false) Log.d(TAG, "speakChoices"); 936 mState = SPEAKING_CHOICES; 937 938 String sentenceSpoken = spaceOutDigits( 939 mAvailableChoices[0].getStringExtra( 940 RecognizerEngine.SENTENCE_EXTRA)); 941 942 // When we have multiple choices, they will be of the form 943 // "call jack jones at home", "call jack jones on mobile". 944 // Speak the entire first sentence, then the last word from each 945 // of the remaining sentences. This will come out to something 946 // like "call jack jones at home mobile or work". 947 StringBuilder builder = new StringBuilder(); 948 builder.append(sentenceSpoken); 949 950 int count = mAvailableChoices.length; 951 for (int i=1; i < count; i++) { 952 if (i == count-1) { 953 builder.append(" or "); 954 } else { 955 builder.append(" "); 956 } 957 String tmpSentence = mAvailableChoices[i].getStringExtra( 958 RecognizerEngine.SENTENCE_EXTRA); 959 String[] words = tmpSentence.trim().split(" "); 960 builder.append(words[words.length-1]); 961 } 962 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 963 CHOICES_UTTERANCE); 964 mTts.speak(builder.toString(), 965 TextToSpeech.QUEUE_ADD, 966 mTtsParams); 967 } 968 969 970 private static String spaceOutDigits(String sentenceDisplay) { 971 // if we have a sentence of the form "dial 123 456 7890", 972 // we need to insert a space between each digit, otherwise 973 // the TTS engine will say "dial one hundred twenty three...." 974 // When there already is a space, we also insert a comma, 975 // so that it pauses between sections. For the displayable 976 // sentence "dial 123 456 7890" it will speak 977 // "dial 1 2 3, 4 5 6, 7 8 9 0" 978 char buffer[] = sentenceDisplay.toCharArray(); 979 StringBuilder builder = new StringBuilder(); 980 boolean buildingNumber = false; 981 int l = sentenceDisplay.length(); 982 for (int index = 0; index < l; index++) { 983 char c = buffer[index]; 984 if (Character.isDigit(c)) { 985 if (buildingNumber) { 986 builder.append(" "); 987 } 988 buildingNumber = true; 989 builder.append(c); 990 } else if (c == ' ') { 991 if (buildingNumber) { 992 builder.append(","); 993 } else { 994 builder.append(" "); 995 } 996 } else { 997 buildingNumber = false; 998 builder.append(c); 999 } 1000 } 1001 return builder.toString(); 1002 } 1003 1004 private void startActivityHelp(Intent intent) { 1005 startActivity(intent); 1006 } 1007 1008 private void listenForCommand() { 1009 if (false) Log.d(TAG, "" 1010 + "Command(): MICROPHONE_EXTRA: "+getArg(MICROPHONE_EXTRA)+ 1011 ", CONTACTS_EXTRA: "+getArg(CONTACTS_EXTRA)); 1012 1013 mState = WAITING_FOR_COMMAND; 1014 mRecognizerThread = new Thread() { 1015 public void run() { 1016 mCommandEngine.recognize(mCommandClient, 1017 VoiceDialerActivity.this, 1018 newFile(getArg(MICROPHONE_EXTRA)), 1019 mSampleRate); 1020 } 1021 }; 1022 mRecognizerThread.start(); 1023 } 1024 1025 private void listenForChoice() { 1026 if (false) Log.d(TAG, "listenForChoice(): MICROPHONE_EXTRA: " + 1027 getArg(MICROPHONE_EXTRA)); 1028 1029 mState = WAITING_FOR_CHOICE; 1030 mRecognizerThread = new Thread() { 1031 public void run() { 1032 mPhoneTypeChoiceEngine.recognize(mChoiceClient, 1033 VoiceDialerActivity.this, 1034 newFile(getArg(MICROPHONE_EXTRA)), mSampleRate); 1035 } 1036 }; 1037 mRecognizerThread.start(); 1038 } 1039 1040 private void exitActivity() { 1041 synchronized(this) { 1042 if (mState != EXITING) { 1043 if (false) Log.d(TAG, "exitActivity"); 1044 mState = SPEAKING_GOODBYE; 1045 if (mUsingBluetooth) { 1046 mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, 1047 GOODBYE_UTTERANCE); 1048 mTts.speak(getString(R.string.goodbye_tts), 1049 TextToSpeech.QUEUE_FLUSH, 1050 mTtsParams); 1051 // Normally, the activity will finish() after the 1052 // utterance completes. As a fallback in case the utterance 1053 // does not complete, post a delayed runnable finish the 1054 // activity. 1055 mFallbackRunnable = new OnTtsCompletionRunnable(true); 1056 mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); 1057 } else { 1058 mHandler.postDelayed(new Runnable() { 1059 public void run() { 1060 finish(); 1061 } 1062 }, EXIT_DELAY); 1063 } 1064 } 1065 } 1066 } 1067 1068 private String getArg(String name) { 1069 if (name == null) return null; 1070 String arg = getIntent().getStringExtra(name); 1071 if (arg != null) return arg; 1072 arg = SystemProperties.get("app.voicedialer." + name); 1073 return arg != null && arg.length() > 0 ? arg : null; 1074 } 1075 1076 private static File newFile(String name) { 1077 return name != null ? new File(name) : null; 1078 } 1079 1080 private int playSound(int toneType) { 1081 int msecDelay = 1; 1082 1083 // use the MediaPlayer to prompt the user 1084 if (mToneGenerator != null) { 1085 mToneGenerator.startTone(toneType); 1086 msecDelay = StrictMath.max(msecDelay, 300); 1087 } 1088 // use the Vibrator to prompt the user 1089 if (mAudioManager != null && 1090 mAudioManager.shouldVibrate(AudioManager.VIBRATE_TYPE_RINGER)) { 1091 final int VIBRATOR_TIME = 150; 1092 final int VIBRATOR_GUARD_TIME = 150; 1093 Vibrator vibrator = new Vibrator(); 1094 vibrator.vibrate(VIBRATOR_TIME); 1095 msecDelay = StrictMath.max(msecDelay, 1096 VIBRATOR_TIME + VIBRATOR_GUARD_TIME); 1097 } 1098 1099 1100 return msecDelay; 1101 } 1102 1103 protected void onDestroy() { 1104 synchronized(this) { 1105 mState = EXITING; 1106 } 1107 1108 if (mAlertDialog != null) { 1109 mAlertDialog.dismiss(); 1110 } 1111 1112 // set the volume back to the level it was before we started. 1113 mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, 1114 mBluetoothVoiceVolume, 0); 1115 mAudioManager.abandonAudioFocus(null); 1116 1117 // shut down bluetooth, if it exists 1118 if (mBluetoothHeadset != null) { 1119 mBluetoothHeadset.stopVoiceRecognition(mBluetoothDevice); 1120 mAdapter.closeProfileProxy(BluetoothProfile.HEADSET, mBluetoothHeadset); 1121 mBluetoothHeadset = null; 1122 } 1123 1124 // shut down recognizer and wait for the thread to complete 1125 if (mRecognizerThread != null) { 1126 mRecognizerThread.interrupt(); 1127 try { 1128 mRecognizerThread.join(); 1129 } catch (InterruptedException e) { 1130 if (false) Log.d(TAG, "onStop mRecognizerThread.join exception " + e); 1131 } 1132 mRecognizerThread = null; 1133 } 1134 1135 // clean up UI 1136 mHandler.removeCallbacks(mMicFlasher); 1137 mHandler.removeMessages(0); 1138 1139 if (mTts != null) { 1140 mTts.stop(); 1141 mTts.shutdown(); 1142 mTts = null; 1143 } 1144 unregisterReceiver(mReceiver); 1145 1146 super.onDestroy(); 1147 1148 releaseWakeLock(); 1149 } 1150 1151 private void acquireWakeLock(Context context) { 1152 if (mWakeLock == null) { 1153 PowerManager pm = (PowerManager)context.getSystemService(Context.POWER_SERVICE); 1154 mWakeLock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, 1155 "VoiceDialer"); 1156 mWakeLock.acquire(); 1157 } 1158 } 1159 1160 private void releaseWakeLock() { 1161 if (mWakeLock != null) { 1162 mWakeLock.release(); 1163 mWakeLock = null; 1164 } 1165 } 1166 1167 private Runnable mMicFlasher = new Runnable() { 1168 int visible = View.VISIBLE; 1169 1170 public void run() { 1171 findViewById(R.id.microphone_view).setVisibility(visible); 1172 findViewById(R.id.state).setVisibility(visible); 1173 visible = visible == View.VISIBLE ? View.INVISIBLE : View.VISIBLE; 1174 mHandler.postDelayed(this, 750); 1175 } 1176 }; 1177 } 1178