1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.research; 18 19 import android.os.SystemClock; 20 import android.text.TextUtils; 21 import android.util.JsonWriter; 22 import android.util.Log; 23 24 import com.android.inputmethod.latin.SuggestedWords; 25 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 26 import com.android.inputmethod.latin.define.ProductionFlag; 27 28 import java.io.IOException; 29 import java.util.ArrayList; 30 import java.util.Arrays; 31 import java.util.List; 32 import java.util.regex.Pattern; 33 34 /** 35 * A group of log statements related to each other. 36 * 37 * A LogUnit is collection of LogStatements, each of which is generated by at a particular point 38 * in the code. (There is no LogStatement class; the data is stored across the instance variables 39 * here.) A single LogUnit's statements can correspond to all the calls made while in the same 40 * composing region, or all the calls between committing the last composing region, and the first 41 * character of the next composing region. 42 * 43 * Individual statements in a log may be marked as potentially private. If so, then they are only 44 * published to a ResearchLog if the ResearchLogger determines that publishing the entire LogUnit 45 * will not violate the user's privacy. Checks for this may include whether other LogUnits have 46 * been published recently, or whether the LogUnit contains numbers, etc. 47 */ 48 public class LogUnit { 49 private static final String TAG = LogUnit.class.getSimpleName(); 50 private static final boolean DEBUG = false 51 && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG; 52 53 private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); 54 private static final String[] EMPTY_STRING_ARRAY = new String[0]; 55 56 private final ArrayList<LogStatement> mLogStatementList; 57 private final ArrayList<Object[]> mValuesList; 58 // Assume that mTimeList is sorted in increasing order. Do not insert null values into 59 // mTimeList. 60 private final ArrayList<Long> mTimeList; 61 // Words that this LogUnit generates. Should be null if the data in the LogUnit does not 62 // generate a genuine word (i.e. separators alone do not count as a word). Should never be 63 // empty. Note that if the user types spaces explicitly, then normally mWords should contain 64 // only a single word; it will only contain space-separate multiple words if the user does not 65 // enter a space, and the system enters one automatically. 66 private String mWords; 67 private String[] mWordArray = EMPTY_STRING_ARRAY; 68 private boolean mMayContainDigit; 69 private boolean mIsPartOfMegaword; 70 private boolean mContainsCorrection; 71 72 // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the 73 // correction. 74 private int mCorrectionType; 75 // LogUnits start in this state. If a word is entered without being corrected, it will have 76 // this CorrectiontType. 77 public static final int CORRECTIONTYPE_NO_CORRECTION = 0; 78 // The LogUnit was corrected manually by the user in an unspecified way. 79 public static final int CORRECTIONTYPE_CORRECTION = 1; 80 // The LogUnit was corrected manually by the user to a word not in the list of suggestions of 81 // the first word typed here. (Note: this is a heuristic value, it may be incorrect, for 82 // example, if the user repositions the cursor). 83 public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2; 84 // The LogUnit was corrected manually by the user to a word that was in the list of suggestions 85 // of the first word typed here. (Again, a heuristic). It is probably a typo correction. 86 public static final int CORRECTIONTYPE_TYPO = 3; 87 // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's 88 // state and statistics. This should include how many times it has been corrected, whether 89 // other LogUnit edits were done between edits to this LogUnit, etc. Also track when a LogUnit 90 // previously contained a word, but was corrected to empty (because it was deleted, and there is 91 // no known replacement). 92 93 private SuggestedWords mSuggestedWords; 94 95 public LogUnit() { 96 mLogStatementList = new ArrayList<LogStatement>(); 97 mValuesList = new ArrayList<Object[]>(); 98 mTimeList = new ArrayList<Long>(); 99 mIsPartOfMegaword = false; 100 mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; 101 mSuggestedWords = null; 102 } 103 104 private LogUnit(final ArrayList<LogStatement> logStatementList, 105 final ArrayList<Object[]> valuesList, 106 final ArrayList<Long> timeList, 107 final boolean isPartOfMegaword) { 108 mLogStatementList = logStatementList; 109 mValuesList = valuesList; 110 mTimeList = timeList; 111 mIsPartOfMegaword = isPartOfMegaword; 112 mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; 113 mSuggestedWords = null; 114 } 115 116 private static final Object[] NULL_VALUES = new Object[0]; 117 /** 118 * Adds a new log statement. The time parameter in successive calls to this method must be 119 * monotonically increasing, or splitByTime() will not work. 120 */ 121 public void addLogStatement(final LogStatement logStatement, final long time, 122 Object... values) { 123 if (values == null) { 124 values = NULL_VALUES; 125 } 126 mLogStatementList.add(logStatement); 127 mValuesList.add(values); 128 mTimeList.add(time); 129 } 130 131 /** 132 * Publish the contents of this LogUnit to {@code researchLog}. 133 * 134 * For each publishable {@code LogStatement}, invoke {@link LogStatement#outputToLocked}. 135 * 136 * @param researchLog where to publish the contents of this {@code LogUnit} 137 * @param canIncludePrivateData whether the private data in this {@code LogUnit} should be 138 * included 139 * 140 * @throws IOException if publication to the log file is not possible 141 */ 142 public synchronized void publishTo(final ResearchLog researchLog, 143 final boolean canIncludePrivateData) throws IOException { 144 // Write out any logStatement that passes the privacy filter. 145 final int size = mLogStatementList.size(); 146 if (size != 0) { 147 // Note that jsonWriter is only set to a non-null value if the logUnit start text is 148 // output and at least one logStatement is output. 149 JsonWriter jsonWriter = null; 150 for (int i = 0; i < size; i++) { 151 final LogStatement logStatement = mLogStatementList.get(i); 152 if (!canIncludePrivateData && logStatement.isPotentiallyPrivate()) { 153 continue; 154 } 155 if (mIsPartOfMegaword && logStatement.isPotentiallyRevealing()) { 156 continue; 157 } 158 // Only retrieve the jsonWriter if we need to. If we don't get this far, then 159 // researchLog.getInitializedJsonWriterLocked() will not ever be called, and the 160 // file will not have been opened for writing. 161 if (jsonWriter == null) { 162 jsonWriter = researchLog.getInitializedJsonWriterLocked(); 163 outputLogUnitStart(jsonWriter, canIncludePrivateData); 164 } 165 logStatement.outputToLocked(jsonWriter, mTimeList.get(i), mValuesList.get(i)); 166 } 167 if (jsonWriter != null) { 168 // We must have called logUnitStart earlier, so emit a logUnitStop. 169 outputLogUnitStop(jsonWriter); 170 } 171 } 172 } 173 174 private static final String WORD_KEY = "_wo"; 175 private static final String CORRECTION_TYPE_KEY = "_corType"; 176 private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart"; 177 private static final String LOG_UNIT_END_KEY = "logUnitEnd"; 178 179 final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA = 180 new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */, 181 false /* isPotentiallyRevealing */, WORD_KEY, CORRECTION_TYPE_KEY); 182 final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA = 183 new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */, 184 false /* isPotentiallyRevealing */); 185 private void outputLogUnitStart(final JsonWriter jsonWriter, 186 final boolean canIncludePrivateData) { 187 final LogStatement logStatement; 188 if (canIncludePrivateData) { 189 LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter, 190 SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType()); 191 } else { 192 LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter, 193 SystemClock.uptimeMillis()); 194 } 195 } 196 197 final LogStatement LOGSTATEMENT_LOG_UNIT_END = 198 new LogStatement(LOG_UNIT_END_KEY, false /* isPotentiallyPrivate */, 199 false /* isPotentiallyRevealing */); 200 private void outputLogUnitStop(final JsonWriter jsonWriter) { 201 LOGSTATEMENT_LOG_UNIT_END.outputToLocked(jsonWriter, SystemClock.uptimeMillis()); 202 } 203 204 /** 205 * Mark the current logUnit as containing data to generate {@code newWords}. 206 * 207 * If {@code setWord()} was previously called for this LogUnit, then the method will try to 208 * determine what kind of correction it is, and update its internal state of the correctionType 209 * accordingly. 210 * 211 * @param newWords The words this LogUnit generates. Caller should not pass null or the empty 212 * string. 213 */ 214 public void setWords(final String newWords) { 215 if (hasOneOrMoreWords()) { 216 // The word was already set once, and it is now being changed. See if the new word 217 // is close to the old word. If so, then the change is probably a typo correction. 218 // If not, the user may have decided to enter a different word, so flag it. 219 if (mSuggestedWords != null) { 220 if (isInSuggestedWords(newWords, mSuggestedWords)) { 221 mCorrectionType = CORRECTIONTYPE_TYPO; 222 } else { 223 mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD; 224 } 225 } else { 226 // No suggested words, so it's not clear whether it's a typo or different word. 227 // Mark it as a generic correction. 228 mCorrectionType = CORRECTIONTYPE_CORRECTION; 229 } 230 } else { 231 mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; 232 } 233 mWords = newWords; 234 235 // Update mWordArray 236 mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY 237 : WHITESPACE_PATTERN.split(mWords); 238 if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) { 239 // Empty string at beginning of array. Must have been whitespace at the start of the 240 // word. Remove the empty string. 241 mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length); 242 } 243 } 244 245 public String getWordsAsString() { 246 return mWords; 247 } 248 249 /** 250 * Retuns the words generated by the data in this LogUnit. 251 * 252 * The first word may be an empty string, if the data in the LogUnit started by generating 253 * whitespace. 254 * 255 * @return the array of words. an empty list of there are no words associated with this LogUnit. 256 */ 257 public String[] getWordsAsStringArray() { 258 return mWordArray; 259 } 260 261 public boolean hasOneOrMoreWords() { 262 return mWordArray.length >= 1; 263 } 264 265 public int getNumWords() { 266 return mWordArray.length; 267 } 268 269 // TODO: Refactor to eliminate getter/setters 270 public void setMayContainDigit() { 271 mMayContainDigit = true; 272 } 273 274 // TODO: Refactor to eliminate getter/setters 275 public boolean mayContainDigit() { 276 return mMayContainDigit; 277 } 278 279 // TODO: Refactor to eliminate getter/setters 280 public void setContainsCorrection() { 281 mContainsCorrection = true; 282 } 283 284 // TODO: Refactor to eliminate getter/setters 285 public boolean containsCorrection() { 286 return mContainsCorrection; 287 } 288 289 // TODO: Refactor to eliminate getter/setters 290 public void setCorrectionType(final int correctionType) { 291 mCorrectionType = correctionType; 292 } 293 294 // TODO: Refactor to eliminate getter/setters 295 public int getCorrectionType() { 296 return mCorrectionType; 297 } 298 299 public boolean isEmpty() { 300 return mLogStatementList.isEmpty(); 301 } 302 303 /** 304 * Split this logUnit, with all events before maxTime staying in the current logUnit, and all 305 * events after maxTime going into a new LogUnit that is returned. 306 */ 307 public LogUnit splitByTime(final long maxTime) { 308 // Assume that mTimeList is in sorted order. 309 final int length = mTimeList.size(); 310 // TODO: find time by binary search, e.g. using Collections#binarySearch() 311 for (int index = 0; index < length; index++) { 312 if (mTimeList.get(index) > maxTime) { 313 final List<LogStatement> laterLogStatements = 314 mLogStatementList.subList(index, length); 315 final List<Object[]> laterValues = mValuesList.subList(index, length); 316 final List<Long> laterTimes = mTimeList.subList(index, length); 317 318 // Create the LogUnit containing the later logStatements and associated data. 319 final LogUnit newLogUnit = new LogUnit( 320 new ArrayList<LogStatement>(laterLogStatements), 321 new ArrayList<Object[]>(laterValues), 322 new ArrayList<Long>(laterTimes), 323 true /* isPartOfMegaword */); 324 newLogUnit.mWords = null; 325 newLogUnit.mMayContainDigit = mMayContainDigit; 326 newLogUnit.mContainsCorrection = mContainsCorrection; 327 328 // Purge the logStatements and associated data from this LogUnit. 329 laterLogStatements.clear(); 330 laterValues.clear(); 331 laterTimes.clear(); 332 mIsPartOfMegaword = true; 333 334 return newLogUnit; 335 } 336 } 337 return new LogUnit(); 338 } 339 340 public void append(final LogUnit logUnit) { 341 mLogStatementList.addAll(logUnit.mLogStatementList); 342 mValuesList.addAll(logUnit.mValuesList); 343 mTimeList.addAll(logUnit.mTimeList); 344 mWords = null; 345 if (logUnit.mWords != null) { 346 setWords(logUnit.mWords); 347 } 348 mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit; 349 mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection; 350 mIsPartOfMegaword = false; 351 } 352 353 public SuggestedWords getSuggestions() { 354 return mSuggestedWords; 355 } 356 357 /** 358 * Initialize the suggestions. 359 * 360 * Once set to a non-null value, the suggestions may not be changed again. This is to keep 361 * track of the list of words that are close to the user's initial effort to type the word. 362 * Only words that are close to the initial effort are considered typo corrections. 363 */ 364 public void initializeSuggestions(final SuggestedWords suggestedWords) { 365 if (mSuggestedWords == null) { 366 mSuggestedWords = suggestedWords; 367 } 368 } 369 370 private static boolean isInSuggestedWords(final String queryWord, 371 final SuggestedWords suggestedWords) { 372 if (TextUtils.isEmpty(queryWord)) { 373 return false; 374 } 375 final int size = suggestedWords.size(); 376 for (int i = 0; i < size; i++) { 377 final SuggestedWordInfo wordInfo = suggestedWords.getInfo(i); 378 if (queryWord.equals(wordInfo.mWord)) { 379 return true; 380 } 381 } 382 return false; 383 } 384 385 /** 386 * Remove data associated with selecting the Research button. 387 * 388 * A LogUnit will capture all user interactions with the IME, including the "meta-interactions" 389 * of using the Research button to control the logging (e.g. by starting and stopping recording 390 * of a test case). Because meta-interactions should not be part of the normal log, calling 391 * this method will set a field in the LogStatements of the motion events to indiciate that 392 * they should be disregarded. 393 * 394 * This implementation assumes that the data recorded by the meta-interaction takes the 395 * form of all events following the first MotionEvent.ACTION_DOWN before the first long-press 396 * before the last onCodeEvent containing a code matching {@code LogStatement.VALUE_RESEARCH}. 397 * 398 * @returns true if data was removed 399 */ 400 public boolean removeResearchButtonInvocation() { 401 // This method is designed to be idempotent. 402 403 // First, find last invocation of "research" key 404 final int indexOfLastResearchKey = findLastIndexContainingKeyValue( 405 LogStatement.TYPE_POINTER_TRACKER_CALL_LISTENER_ON_CODE_INPUT, 406 LogStatement.KEY_CODE, LogStatement.VALUE_RESEARCH); 407 if (indexOfLastResearchKey < 0) { 408 // Could not find invocation of "research" key. Leave log as is. 409 if (DEBUG) { 410 Log.d(TAG, "Could not find research key"); 411 } 412 return false; 413 } 414 415 // Look for the long press that started the invocation of the research key code input. 416 final int indexOfLastLongPressBeforeResearchKey = 417 findLastIndexBefore(LogStatement.TYPE_MAIN_KEYBOARD_VIEW_ON_LONG_PRESS, 418 indexOfLastResearchKey); 419 420 // Look for DOWN event preceding the long press 421 final int indexOfLastDownEventBeforeLongPress = 422 findLastIndexContainingKeyValueBefore(LogStatement.TYPE_MOTION_EVENT, 423 LogStatement.ACTION, LogStatement.VALUE_DOWN, 424 indexOfLastLongPressBeforeResearchKey); 425 426 // Flag all LatinKeyboardViewProcessMotionEvents from the DOWN event to the research key as 427 // logging-related 428 final int startingIndex = indexOfLastDownEventBeforeLongPress == -1 ? 0 429 : indexOfLastDownEventBeforeLongPress; 430 for (int index = startingIndex; index < indexOfLastResearchKey; index++) { 431 final LogStatement logStatement = mLogStatementList.get(index); 432 final String type = logStatement.getType(); 433 final Object[] values = mValuesList.get(index); 434 if (type.equals(LogStatement.TYPE_MOTION_EVENT)) { 435 logStatement.setValue(LogStatement.KEY_IS_LOGGING_RELATED, values, true); 436 } 437 } 438 return true; 439 } 440 441 /** 442 * Find the index of the last LogStatement before {@code startingIndex} of type {@code type}. 443 * 444 * @param queryType a String that must be {@code String.equals()} to the LogStatement type 445 * @param startingIndex the index to start the backward search from. Must be less than the 446 * length of mLogStatementList, or an IndexOutOfBoundsException is thrown. Can be negative, 447 * in which case -1 is returned. 448 * 449 * @return The index of the last LogStatement, -1 if none exists. 450 */ 451 private int findLastIndexBefore(final String queryType, final int startingIndex) { 452 return findLastIndexContainingKeyValueBefore(queryType, null, null, startingIndex); 453 } 454 455 /** 456 * Find the index of the last LogStatement before {@code startingIndex} of type {@code type} 457 * containing the given key-value pair. 458 * 459 * @param queryType a String that must be {@code String.equals()} to the LogStatement type 460 * @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement 461 * @param queryValue an Object that must be {@code String.equals()} to the key's corresponding 462 * value 463 * 464 * @return The index of the last LogStatement, -1 if none exists. 465 */ 466 private int findLastIndexContainingKeyValue(final String queryType, final String queryKey, 467 final Object queryValue) { 468 return findLastIndexContainingKeyValueBefore(queryType, queryKey, queryValue, 469 mLogStatementList.size() - 1); 470 } 471 472 /** 473 * Find the index of the last LogStatement before {@code startingIndex} of type {@code type} 474 * containing the given key-value pair. 475 * 476 * @param queryType a String that must be {@code String.equals()} to the LogStatement type 477 * @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement 478 * @param queryValue an Object that must be {@code String.equals()} to the key's corresponding 479 * value 480 * @param startingIndex the index to start the backward search from. Must be less than the 481 * length of mLogStatementList, or an IndexOutOfBoundsException is thrown. Can be negative, 482 * in which case -1 is returned. 483 * 484 * @return The index of the last LogStatement, -1 if none exists. 485 */ 486 private int findLastIndexContainingKeyValueBefore(final String queryType, final String queryKey, 487 final Object queryValue, final int startingIndex) { 488 if (startingIndex < 0) { 489 return -1; 490 } 491 for (int index = startingIndex; index >= 0; index--) { 492 final LogStatement logStatement = mLogStatementList.get(index); 493 final String type = logStatement.getType(); 494 if (type.equals(queryType) && (queryKey == null 495 || logStatement.containsKeyValuePair(queryKey, queryValue, 496 mValuesList.get(index)))) { 497 return index; 498 } 499 } 500 return -1; 501 } 502 } 503