1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.voicedialer; 18 19 import android.app.Activity; 20 import android.app.AlertDialog; 21 import android.app.Dialog; 22 import android.content.Intent; 23 import android.content.DialogInterface; 24 import android.media.ToneGenerator; 25 import android.media.AudioManager; 26 import android.os.Bundle; 27 import android.os.Handler; 28 import android.os.SystemProperties; 29 import android.os.Vibrator; 30 import android.util.Config; 31 import android.util.Log; 32 import android.view.View; 33 import android.widget.TextView; 34 import android.widget.Toast; 35 import java.io.File; 36 import java.io.InputStream; 37 import java.io.IOException; 38 39 /** 40 * TODO: get rid of the anonymous classes 41 * TODO: merge with BluetoothVoiceDialerActivity 42 * 43 * This class is the user interface of the VoiceDialer application. 44 * Its life cycle is as follows: 45 * <ul> 46 * <li>The user presses the recognize key, and the VoiceDialerActivity starts. 47 * <li>A {@link RecognizerEngine} instance is created. 48 * <li>The RecognizerEngine signals the user to speak with the Vibrator. 49 * <li>The RecognizerEngine captures, processes, and recognizes speech 50 * against the names in the contact list. 51 * <li>The RecognizerEngine calls onRecognizerSuccess with a list of 52 * sentences and corresponding Intents. 53 * <li>If the list is one element long, the corresponding Intent is dispatched. 54 * <li>Else an {@link AlertDialog} containing the list of sentences is 55 * displayed. 56 * <li>The user selects the desired sentence from the list, 57 * and the corresponding Intent is dispatched. 58 * <ul> 59 * Notes: 60 * <ul> 61 * <li>The RecognizerEngine is kept and reused for the next recognition cycle. 62 * </ul> 63 */ 64 public class VoiceDialerActivity extends Activity { 65 66 private static final String TAG = "VoiceDialerActivity"; 67 68 private static final String MICROPHONE_EXTRA = "microphone"; 69 private static final String CONTACTS_EXTRA = "contacts"; 70 private static final String SAMPLE_RATE_EXTRA = "samplerate"; 71 private static final String INTENTS_KEY = "intents"; 72 73 private static final int FAIL_PAUSE_MSEC = 5000; 74 private static final int SAMPLE_RATE = 11025; 75 76 private static final int DIALOG_ID = 1; 77 78 private final static CommandRecognizerEngine mCommandEngine = 79 new CommandRecognizerEngine(); 80 private CommandRecognizerClient mCommandClient; 81 private VoiceDialerTester mVoiceDialerTester; 82 private Handler mHandler; 83 private Thread mRecognizerThread = null; 84 private AudioManager mAudioManager; 85 private ToneGenerator mToneGenerator; 86 private AlertDialog mAlertDialog; 87 88 @Override 89 protected void onCreate(Bundle icicle) { 90 if (Config.LOGD) Log.d(TAG, "onCreate"); 91 super.onCreate(icicle); 92 mHandler = new Handler(); 93 mAudioManager = (AudioManager)getSystemService(AUDIO_SERVICE); 94 mToneGenerator = new ToneGenerator(AudioManager.STREAM_RING, 95 ToneGenerator.MAX_VOLUME); 96 } 97 98 protected void onStart() { 99 if (Config.LOGD) Log.d(TAG, "onStart " + getIntent()); 100 super.onStart(); 101 mAudioManager.requestAudioFocus( 102 null, AudioManager.STREAM_MUSIC, 103 AudioManager.AUDIOFOCUS_GAIN_TRANSIENT); 104 105 mCommandEngine.setContactsFile(newFile(getArg(CONTACTS_EXTRA))); 106 mCommandClient = new CommandRecognizerClient(); 107 mCommandEngine.setMinimizeResults(false); 108 mCommandEngine.setAllowOpenEntries(true); 109 110 // open main window 111 setTheme(android.R.style.Theme_Dialog); 112 setTitle(R.string.title); 113 setContentView(R.layout.voice_dialing); 114 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 115 findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); 116 findViewById(R.id.microphone_loading_view).setVisibility(View.VISIBLE); 117 if (RecognizerLogger.isEnabled(this)) { 118 ((TextView)findViewById(R.id.substate)).setText(R.string.logging_enabled); 119 } 120 121 // start the tester, if present 122 mVoiceDialerTester = null; 123 File micDir = newFile(getArg(MICROPHONE_EXTRA)); 124 if (micDir != null && micDir.isDirectory()) { 125 mVoiceDialerTester = new VoiceDialerTester(micDir); 126 startNextTest(); 127 return; 128 } 129 130 startWork(); 131 } 132 133 private void startWork() { 134 // start the engine 135 mRecognizerThread = new Thread() { 136 public void run() { 137 if (Config.LOGD) Log.d(TAG, "onCreate.Runnable.run"); 138 String sampleRateStr = getArg(SAMPLE_RATE_EXTRA); 139 int sampleRate = SAMPLE_RATE; 140 if (sampleRateStr != null) { 141 sampleRate = Integer.parseInt(sampleRateStr); 142 } 143 mCommandEngine.recognize(mCommandClient, VoiceDialerActivity.this, 144 newFile(getArg(MICROPHONE_EXTRA)), 145 sampleRate); 146 } 147 }; 148 mRecognizerThread.start(); 149 } 150 151 private String getArg(String name) { 152 if (name == null) return null; 153 String arg = getIntent().getStringExtra(name); 154 if (arg != null) return arg; 155 arg = SystemProperties.get("app.voicedialer." + name); 156 return arg != null && arg.length() > 0 ? arg : null; 157 } 158 159 private static File newFile(String name) { 160 return name != null ? new File(name) : null; 161 } 162 163 private void startNextTest() { 164 mHandler.postDelayed(new Runnable() { 165 public void run() { 166 if (mVoiceDialerTester == null) { 167 return; 168 } 169 if (!mVoiceDialerTester.stepToNextTest()) { 170 mVoiceDialerTester.report(); 171 notifyText("Test completed!"); 172 finish(); 173 return; 174 } 175 File microphone = mVoiceDialerTester.getWavFile(); 176 File contacts = newFile(getArg(CONTACTS_EXTRA)); 177 178 notifyText("Testing\n" + microphone + "\n" + contacts); 179 mCommandEngine.recognize(mCommandClient, VoiceDialerActivity.this, 180 microphone, SAMPLE_RATE); 181 } 182 }, 2000); 183 } 184 185 private int playSound(int toneType) { 186 int msecDelay = 1; 187 188 // use the MediaPlayer to prompt the user 189 if (mToneGenerator != null) { 190 mToneGenerator.startTone(toneType); 191 msecDelay = StrictMath.max(msecDelay, 300); 192 } 193 194 // use the Vibrator to prompt the user 195 if ((mAudioManager != null) && 196 (mAudioManager.shouldVibrate(AudioManager.VIBRATE_TYPE_RINGER))) { 197 final int VIBRATOR_TIME = 150; 198 final int VIBRATOR_GUARD_TIME = 150; 199 Vibrator vibrator = new Vibrator(); 200 vibrator.vibrate(VIBRATOR_TIME); 201 msecDelay = StrictMath.max(msecDelay, 202 VIBRATOR_TIME + VIBRATOR_GUARD_TIME); 203 } 204 205 return msecDelay; 206 } 207 208 @Override 209 protected void onStop() { 210 if (Config.LOGD) Log.d(TAG, "onStop"); 211 212 mAudioManager.abandonAudioFocus(null); 213 214 // no more tester 215 mVoiceDialerTester = null; 216 217 // shut down recognizer and wait for the thread to complete 218 if (mRecognizerThread != null) { 219 mRecognizerThread.interrupt(); 220 try { 221 mRecognizerThread.join(); 222 } catch (InterruptedException e) { 223 if (Config.LOGD) Log.d(TAG, "onStop mRecognizerThread.join exception " + e); 224 } 225 mRecognizerThread = null; 226 } 227 228 // clean up UI 229 mHandler.removeCallbacks(mMicFlasher); 230 mHandler.removeMessages(0); 231 232 // clean up ToneGenerator 233 if (mToneGenerator != null) { 234 mToneGenerator.release(); 235 mToneGenerator = null; 236 } 237 238 super.onStop(); 239 240 // It makes no sense to have this activity maintain state when in 241 // background. When it stops, it should just be destroyed. 242 finish(); 243 } 244 245 private void notifyText(final CharSequence msg) { 246 Toast.makeText(VoiceDialerActivity.this, msg, Toast.LENGTH_SHORT).show(); 247 } 248 249 private Runnable mMicFlasher = new Runnable() { 250 int visible = View.VISIBLE; 251 252 public void run() { 253 findViewById(R.id.microphone_view).setVisibility(visible); 254 findViewById(R.id.state).setVisibility(visible); 255 visible = visible == View.VISIBLE ? View.INVISIBLE : View.VISIBLE; 256 mHandler.postDelayed(this, 750); 257 } 258 }; 259 260 261 protected Dialog onCreateDialog(int id, Bundle args) { 262 final Intent intents[] = (Intent[])args.getParcelableArray(INTENTS_KEY); 263 264 DialogInterface.OnClickListener clickListener = 265 new DialogInterface.OnClickListener() { 266 267 public void onClick(DialogInterface dialog, int which) { 268 if (Config.LOGD) Log.d(TAG, "clickListener.onClick " + which); 269 startActivityHelp(intents[which]); 270 dismissDialog(DIALOG_ID); 271 mAlertDialog = null; 272 finish(); 273 } 274 275 }; 276 277 DialogInterface.OnCancelListener cancelListener = 278 new DialogInterface.OnCancelListener() { 279 280 public void onCancel(DialogInterface dialog) { 281 if (Config.LOGD) Log.d(TAG, "cancelListener.onCancel"); 282 dismissDialog(DIALOG_ID); 283 mAlertDialog = null; 284 finish(); 285 } 286 287 }; 288 289 DialogInterface.OnClickListener positiveListener = 290 new DialogInterface.OnClickListener() { 291 292 public void onClick(DialogInterface dialog, int which) { 293 if (Config.LOGD) Log.d(TAG, "positiveListener.onClick " + which); 294 if (intents.length == 1 && which == -1) which = 0; 295 startActivityHelp(intents[which]); 296 dismissDialog(DIALOG_ID); 297 mAlertDialog = null; 298 finish(); 299 } 300 301 }; 302 303 DialogInterface.OnClickListener negativeListener = 304 new DialogInterface.OnClickListener() { 305 306 public void onClick(DialogInterface dialog, int which) { 307 if (Config.LOGD) Log.d(TAG, "negativeListener.onClick " + which); 308 dismissDialog(DIALOG_ID); 309 mAlertDialog = null; 310 finish(); 311 } 312 313 }; 314 315 String[] sentences = new String[intents.length]; 316 for (int i = 0; i < intents.length; i++) { 317 sentences[i] = intents[i].getStringExtra( 318 RecognizerEngine.SENTENCE_EXTRA); 319 } 320 321 mAlertDialog = intents.length > 1 ? 322 new AlertDialog.Builder(VoiceDialerActivity.this) 323 .setTitle(R.string.title) 324 .setItems(sentences, clickListener) 325 .setOnCancelListener(cancelListener) 326 .setNegativeButton(android.R.string.cancel, negativeListener) 327 .show() 328 : 329 new AlertDialog.Builder(VoiceDialerActivity.this) 330 .setTitle(R.string.title) 331 .setItems(sentences, clickListener) 332 .setOnCancelListener(cancelListener) 333 .setPositiveButton(android.R.string.ok, positiveListener) 334 .setNegativeButton(android.R.string.cancel, negativeListener) 335 .show(); 336 337 return mAlertDialog; 338 } 339 340 private class CommandRecognizerClient implements RecognizerClient { 341 static final int MIN_VOLUME_TO_SKIP = 2; 342 /** 343 * Called by the {@link RecognizerEngine} when the microphone is started. 344 */ 345 public void onMicrophoneStart(InputStream mic) { 346 if (Config.LOGD) Log.d(TAG, "onMicrophoneStart"); 347 playSound(ToneGenerator.TONE_PROP_BEEP); 348 349 int ringVolume = mAudioManager.getStreamVolume( 350 AudioManager.STREAM_RING); 351 Log.d(TAG, "ringVolume " + ringVolume); 352 353 if (ringVolume >= MIN_VOLUME_TO_SKIP) { 354 // now we're playing a sound, and corrupting the input sample. 355 // So we need to pull that junk off of the input stream so that the 356 // recognizer won't see it. 357 try { 358 skipBeep(mic); 359 } catch (java.io.IOException e) { 360 Log.e(TAG, "IOException " + e); 361 } 362 } else { 363 Log.d(TAG, "no tone"); 364 } 365 366 if (mVoiceDialerTester != null) return; 367 368 mHandler.post(new Runnable() { 369 public void run() { 370 findViewById(R.id.microphone_loading_view).setVisibility(View.INVISIBLE); 371 ((TextView)findViewById(R.id.state)).setText(R.string.listening); 372 mHandler.post(mMicFlasher); 373 } 374 }); 375 } 376 377 /** 378 * Beep detection 379 */ 380 private static final int START_WINDOW_MS = 500; // Beep detection window duration in ms 381 private static final int SINE_FREQ = 400; // base sine frequency on beep 382 private static final int NUM_PERIODS_BLOCK = 10; // number of sine periods in one energy averaging block 383 private static final int THRESHOLD = 8; // absolute pseudo energy threshold 384 private static final int START = 0; // beep detection start 385 private static final int RISING = 1; // beep rising edge start 386 private static final int TOP = 2; // beep constant energy detected 387 388 void skipBeep(InputStream is) throws IOException { 389 int sampleCount = ((SAMPLE_RATE / SINE_FREQ) * NUM_PERIODS_BLOCK); 390 int blockSize = 2 * sampleCount; // energy averaging block 391 392 if (is == null || blockSize == 0) { 393 return; 394 } 395 396 byte[] buf = new byte[blockSize]; 397 int maxBytes = 2 * ((START_WINDOW_MS * SAMPLE_RATE) / 1000); 398 maxBytes = ((maxBytes-1) / blockSize + 1) * blockSize; 399 400 int count = 0; 401 int state = START; // detection state 402 long prevE = 0; // previous pseudo energy 403 long peak = 0; 404 int threshold = THRESHOLD*sampleCount; // absolute energy threshold 405 Log.d(TAG, "blockSize " + blockSize); 406 407 while (count < maxBytes) { 408 int cnt = 0; 409 while (cnt < blockSize) { 410 int n = is.read(buf, cnt, blockSize-cnt); 411 if (n < 0) { 412 throw new java.io.IOException(); 413 } 414 cnt += n; 415 } 416 417 // compute pseudo energy 418 cnt = blockSize; 419 long sumx = 0; 420 long sumxx = 0; 421 while (cnt >= 2) { 422 short smp = (short)((buf[cnt - 1] << 8) + (buf[cnt - 2] & 0xFF)); 423 sumx += smp; 424 sumxx += smp*smp; 425 cnt -= 2; 426 } 427 long energy = (sumxx*sampleCount - sumx*sumx)/(sampleCount*sampleCount); 428 Log.d(TAG, "sumx " + sumx + " sumxx " + sumxx + " ee " + energy); 429 430 switch (state) { 431 case START: 432 if (energy > threshold && energy > (prevE * 2) && prevE != 0) { 433 // rising edge if energy doubled and > abs threshold 434 state = RISING; 435 if (Config.LOGD) Log.d(TAG, "start RISING: " + count +" time: "+ (((1000*count)/2)/SAMPLE_RATE)); 436 } 437 break; 438 case RISING: 439 if (energy < threshold || energy < (prevE / 2)){ 440 // energy fell back below half of previous, back to start 441 if (Config.LOGD) Log.d(TAG, "back to START: " + count +" time: "+ (((1000*count)/2)/SAMPLE_RATE)); 442 peak = 0; 443 state = START; 444 } else if (energy > (prevE / 2) && energy < (prevE * 2)) { 445 // Start of constant energy 446 if (Config.LOGD) Log.d(TAG, "start TOP: " + count +" time: "+ (((1000*count)/2)/SAMPLE_RATE)); 447 if (peak < energy) { 448 peak = energy; 449 } 450 state = TOP; 451 } 452 break; 453 case TOP: 454 if (energy < threshold || energy < (peak / 2)) { 455 // e went to less than half of the peak 456 if (Config.LOGD) Log.d(TAG, "end TOP: " + count +" time: "+ (((1000*count)/2)/SAMPLE_RATE)); 457 return; 458 } 459 break; 460 } 461 prevE = energy; 462 count += blockSize; 463 } 464 if (Config.LOGD) Log.d(TAG, "no beep detected, timed out"); 465 } 466 467 /** 468 * Called by the {@link RecognizerEngine} if the recognizer fails. 469 */ 470 public void onRecognitionFailure(final String msg) { 471 if (Config.LOGD) Log.d(TAG, "onRecognitionFailure " + msg); 472 473 // get work off UAPI thread 474 mHandler.post(new Runnable() { 475 public void run() { 476 // failure, so beep about it 477 playSound(ToneGenerator.TONE_PROP_NACK); 478 479 mHandler.removeCallbacks(mMicFlasher); 480 ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again); 481 findViewById(R.id.state).setVisibility(View.VISIBLE); 482 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 483 findViewById(R.id.retry_view).setVisibility(View.VISIBLE); 484 485 if (mVoiceDialerTester != null) { 486 mVoiceDialerTester.onRecognitionFailure(msg); 487 startNextTest(); 488 return; 489 } 490 491 mHandler.postDelayed(new Runnable() { 492 public void run() { 493 finish(); 494 } 495 }, FAIL_PAUSE_MSEC); 496 } 497 }); 498 } 499 500 /** 501 * Called by the {@link RecognizerEngine} on an internal error. 502 */ 503 public void onRecognitionError(final String msg) { 504 if (Config.LOGD) Log.d(TAG, "onRecognitionError " + msg); 505 506 // get work off UAPI thread 507 mHandler.post(new Runnable() { 508 public void run() { 509 // error, so beep about it 510 playSound(ToneGenerator.TONE_PROP_NACK); 511 512 mHandler.removeCallbacks(mMicFlasher); 513 ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again); 514 ((TextView)findViewById(R.id.substate)).setText(R.string.recognition_error); 515 findViewById(R.id.state).setVisibility(View.VISIBLE); 516 findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); 517 findViewById(R.id.retry_view).setVisibility(View.VISIBLE); 518 519 if (mVoiceDialerTester != null) { 520 mVoiceDialerTester.onRecognitionError(msg); 521 startNextTest(); 522 return; 523 } 524 525 mHandler.postDelayed(new Runnable() { 526 public void run() { 527 finish(); 528 } 529 }, FAIL_PAUSE_MSEC); 530 } 531 }); 532 } 533 534 /** 535 * Called by the {@link RecognizerEngine} when is succeeds. If there is 536 * only one item, then the Intent is dispatched immediately. 537 * If there are more, then an AlertDialog is displayed and the user is 538 * prompted to select. 539 * @param intents a list of Intents corresponding to the sentences. 540 */ 541 public void onRecognitionSuccess(final Intent[] intents) { 542 if (Config.LOGD) Log.d(TAG, "onRecognitionSuccess " + intents.length); 543 // repackage our intents as a bundle so that we can pass it into 544 // showDialog. This in required so that we can handle it when 545 // orientation changes and the activity is destroyed and recreated. 546 final Bundle args = new Bundle(); 547 args.putParcelableArray(INTENTS_KEY, intents); 548 549 mHandler.post(new Runnable() { 550 551 public void run() { 552 // success, so beep about it 553 playSound(ToneGenerator.TONE_PROP_ACK); 554 555 mHandler.removeCallbacks(mMicFlasher); 556 557 showDialog(DIALOG_ID, args); 558 559 // start the next test 560 if (mVoiceDialerTester != null) { 561 mVoiceDialerTester.onRecognitionSuccess(intents); 562 startNextTest(); 563 mHandler.postDelayed(new Runnable() { 564 public void run() { 565 dismissDialog(DIALOG_ID); 566 mAlertDialog = null; 567 } 568 }, 2000); 569 } 570 } 571 }); 572 } 573 } 574 575 // post a Toast if not real contacts or microphone 576 private void startActivityHelp(Intent intent) { 577 if (getArg(MICROPHONE_EXTRA) == null && 578 getArg(CONTACTS_EXTRA) == null) { 579 startActivity(intent); 580 } else { 581 notifyText(intent. 582 getStringExtra(RecognizerEngine.SENTENCE_EXTRA) + 583 "\n" + intent.toString()); 584 } 585 586 } 587 @Override 588 protected void onDestroy() { 589 if (Config.LOGD) Log.d(TAG, "onDestroy"); 590 super.onDestroy(); 591 } 592 } 593