Home | History | Annotate | Download | only in research
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.research;
     18 
     19 import android.os.SystemClock;
     20 import android.text.TextUtils;
     21 import android.util.JsonWriter;
     22 import android.util.Log;
     23 
     24 import com.android.inputmethod.latin.SuggestedWords;
     25 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     26 import com.android.inputmethod.latin.define.ProductionFlag;
     27 
     28 import java.io.IOException;
     29 import java.util.ArrayList;
     30 import java.util.Arrays;
     31 import java.util.List;
     32 import java.util.regex.Pattern;
     33 
     34 /**
     35  * A group of log statements related to each other.
     36  *
     37  * A LogUnit is collection of LogStatements, each of which is generated by at a particular point
     38  * in the code.  (There is no LogStatement class; the data is stored across the instance variables
     39  * here.)  A single LogUnit's statements can correspond to all the calls made while in the same
     40  * composing region, or all the calls between committing the last composing region, and the first
     41  * character of the next composing region.
     42  *
     43  * Individual statements in a log may be marked as potentially private.  If so, then they are only
     44  * published to a ResearchLog if the ResearchLogger determines that publishing the entire LogUnit
     45  * will not violate the user's privacy.  Checks for this may include whether other LogUnits have
     46  * been published recently, or whether the LogUnit contains numbers, etc.
     47  */
     48 public class LogUnit {
     49     private static final String TAG = LogUnit.class.getSimpleName();
     50     private static final boolean DEBUG = false
     51             && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG;
     52 
     53     private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
     54     private static final String[] EMPTY_STRING_ARRAY = new String[0];
     55 
     56     private final ArrayList<LogStatement> mLogStatementList;
     57     private final ArrayList<Object[]> mValuesList;
     58     // Assume that mTimeList is sorted in increasing order.  Do not insert null values into
     59     // mTimeList.
     60     private final ArrayList<Long> mTimeList;
     61     // Words that this LogUnit generates.  Should be null if the data in the LogUnit does not
     62     // generate a genuine word (i.e. separators alone do not count as a word).  Should never be
     63     // empty.  Note that if the user types spaces explicitly, then normally mWords should contain
     64     // only a single word; it will only contain space-separate multiple words if the user does not
     65     // enter a space, and the system enters one automatically.
     66     private String mWords;
     67     private String[] mWordArray = EMPTY_STRING_ARRAY;
     68     private boolean mMayContainDigit;
     69     private boolean mIsPartOfMegaword;
     70     private boolean mContainsCorrection;
     71 
     72     // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the
     73     // correction.
     74     private int mCorrectionType;
     75     // LogUnits start in this state.  If a word is entered without being corrected, it will have
     76     // this CorrectiontType.
     77     public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
     78     // The LogUnit was corrected manually by the user in an unspecified way.
     79     public static final int CORRECTIONTYPE_CORRECTION = 1;
     80     // The LogUnit was corrected manually by the user to a word not in the list of suggestions of
     81     // the first word typed here.  (Note: this is a heuristic value, it may be incorrect, for
     82     // example, if the user repositions the cursor).
     83     public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
     84     // The LogUnit was corrected manually by the user to a word that was in the list of suggestions
     85     // of the first word typed here.  (Again, a heuristic).  It is probably a typo correction.
     86     public static final int CORRECTIONTYPE_TYPO = 3;
     87     // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's
     88     // state and statistics.  This should include how many times it has been corrected, whether
     89     // other LogUnit edits were done between edits to this LogUnit, etc.  Also track when a LogUnit
     90     // previously contained a word, but was corrected to empty (because it was deleted, and there is
     91     // no known replacement).
     92 
     93     private SuggestedWords mSuggestedWords;
     94 
     95     public LogUnit() {
     96         mLogStatementList = new ArrayList<LogStatement>();
     97         mValuesList = new ArrayList<Object[]>();
     98         mTimeList = new ArrayList<Long>();
     99         mIsPartOfMegaword = false;
    100         mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
    101         mSuggestedWords = null;
    102     }
    103 
    104     private LogUnit(final ArrayList<LogStatement> logStatementList,
    105             final ArrayList<Object[]> valuesList,
    106             final ArrayList<Long> timeList,
    107             final boolean isPartOfMegaword) {
    108         mLogStatementList = logStatementList;
    109         mValuesList = valuesList;
    110         mTimeList = timeList;
    111         mIsPartOfMegaword = isPartOfMegaword;
    112         mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
    113         mSuggestedWords = null;
    114     }
    115 
    116     private static final Object[] NULL_VALUES = new Object[0];
    117     /**
    118      * Adds a new log statement.  The time parameter in successive calls to this method must be
    119      * monotonically increasing, or splitByTime() will not work.
    120      */
    121     public void addLogStatement(final LogStatement logStatement, final long time,
    122             Object... values) {
    123         if (values == null) {
    124             values = NULL_VALUES;
    125         }
    126         mLogStatementList.add(logStatement);
    127         mValuesList.add(values);
    128         mTimeList.add(time);
    129     }
    130 
    131     /**
    132      * Publish the contents of this LogUnit to {@code researchLog}.
    133      *
    134      * For each publishable {@code LogStatement}, invoke {@link LogStatement#outputToLocked}.
    135      *
    136      * @param researchLog where to publish the contents of this {@code LogUnit}
    137      * @param canIncludePrivateData whether the private data in this {@code LogUnit} should be
    138      * included
    139      *
    140      * @throws IOException if publication to the log file is not possible
    141      */
    142     public synchronized void publishTo(final ResearchLog researchLog,
    143             final boolean canIncludePrivateData) throws IOException {
    144         // Write out any logStatement that passes the privacy filter.
    145         final int size = mLogStatementList.size();
    146         if (size != 0) {
    147             // Note that jsonWriter is only set to a non-null value if the logUnit start text is
    148             // output and at least one logStatement is output.
    149             JsonWriter jsonWriter = null;
    150             for (int i = 0; i < size; i++) {
    151                 final LogStatement logStatement = mLogStatementList.get(i);
    152                 if (!canIncludePrivateData && logStatement.isPotentiallyPrivate()) {
    153                     continue;
    154                 }
    155                 if (mIsPartOfMegaword && logStatement.isPotentiallyRevealing()) {
    156                     continue;
    157                 }
    158                 // Only retrieve the jsonWriter if we need to.  If we don't get this far, then
    159                 // researchLog.getInitializedJsonWriterLocked() will not ever be called, and the
    160                 // file will not have been opened for writing.
    161                 if (jsonWriter == null) {
    162                     jsonWriter = researchLog.getInitializedJsonWriterLocked();
    163                     outputLogUnitStart(jsonWriter, canIncludePrivateData);
    164                 }
    165                 logStatement.outputToLocked(jsonWriter, mTimeList.get(i), mValuesList.get(i));
    166             }
    167             if (jsonWriter != null) {
    168                 // We must have called logUnitStart earlier, so emit a logUnitStop.
    169                 outputLogUnitStop(jsonWriter);
    170             }
    171         }
    172     }
    173 
    174     private static final String WORD_KEY = "_wo";
    175     private static final String CORRECTION_TYPE_KEY = "_corType";
    176     private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart";
    177     private static final String LOG_UNIT_END_KEY = "logUnitEnd";
    178 
    179     final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA =
    180             new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */,
    181                     false /* isPotentiallyRevealing */, WORD_KEY, CORRECTION_TYPE_KEY);
    182     final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA =
    183             new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */,
    184                     false /* isPotentiallyRevealing */);
    185     private void outputLogUnitStart(final JsonWriter jsonWriter,
    186             final boolean canIncludePrivateData) {
    187         final LogStatement logStatement;
    188         if (canIncludePrivateData) {
    189             LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter,
    190                     SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType());
    191         } else {
    192             LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter,
    193                     SystemClock.uptimeMillis());
    194         }
    195     }
    196 
    197     final LogStatement LOGSTATEMENT_LOG_UNIT_END =
    198             new LogStatement(LOG_UNIT_END_KEY, false /* isPotentiallyPrivate */,
    199                     false /* isPotentiallyRevealing */);
    200     private void outputLogUnitStop(final JsonWriter jsonWriter) {
    201         LOGSTATEMENT_LOG_UNIT_END.outputToLocked(jsonWriter, SystemClock.uptimeMillis());
    202     }
    203 
    204     /**
    205      * Mark the current logUnit as containing data to generate {@code newWords}.
    206      *
    207      * If {@code setWord()} was previously called for this LogUnit, then the method will try to
    208      * determine what kind of correction it is, and update its internal state of the correctionType
    209      * accordingly.
    210      *
    211      * @param newWords The words this LogUnit generates.  Caller should not pass null or the empty
    212      * string.
    213      */
    214     public void setWords(final String newWords) {
    215         if (hasOneOrMoreWords()) {
    216             // The word was already set once, and it is now being changed.  See if the new word
    217             // is close to the old word.  If so, then the change is probably a typo correction.
    218             // If not, the user may have decided to enter a different word, so flag it.
    219             if (mSuggestedWords != null) {
    220                 if (isInSuggestedWords(newWords, mSuggestedWords)) {
    221                     mCorrectionType = CORRECTIONTYPE_TYPO;
    222                 } else {
    223                     mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
    224                 }
    225             } else {
    226                 // No suggested words, so it's not clear whether it's a typo or different word.
    227                 // Mark it as a generic correction.
    228                 mCorrectionType = CORRECTIONTYPE_CORRECTION;
    229             }
    230         } else {
    231             mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
    232         }
    233         mWords = newWords;
    234 
    235         // Update mWordArray
    236         mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY
    237                 : WHITESPACE_PATTERN.split(mWords);
    238         if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) {
    239             // Empty string at beginning of array.  Must have been whitespace at the start of the
    240             // word.  Remove the empty string.
    241             mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length);
    242         }
    243     }
    244 
    245     public String getWordsAsString() {
    246         return mWords;
    247     }
    248 
    249     /**
    250      * Retuns the words generated by the data in this LogUnit.
    251      *
    252      * The first word may be an empty string, if the data in the LogUnit started by generating
    253      * whitespace.
    254      *
    255      * @return the array of words. an empty list of there are no words associated with this LogUnit.
    256      */
    257     public String[] getWordsAsStringArray() {
    258         return mWordArray;
    259     }
    260 
    261     public boolean hasOneOrMoreWords() {
    262         return mWordArray.length >= 1;
    263     }
    264 
    265     public int getNumWords() {
    266         return mWordArray.length;
    267     }
    268 
    269     // TODO: Refactor to eliminate getter/setters
    270     public void setMayContainDigit() {
    271         mMayContainDigit = true;
    272     }
    273 
    274     // TODO: Refactor to eliminate getter/setters
    275     public boolean mayContainDigit() {
    276         return mMayContainDigit;
    277     }
    278 
    279     // TODO: Refactor to eliminate getter/setters
    280     public void setContainsCorrection() {
    281         mContainsCorrection = true;
    282     }
    283 
    284     // TODO: Refactor to eliminate getter/setters
    285     public boolean containsCorrection() {
    286         return mContainsCorrection;
    287     }
    288 
    289     // TODO: Refactor to eliminate getter/setters
    290     public void setCorrectionType(final int correctionType) {
    291         mCorrectionType = correctionType;
    292     }
    293 
    294     // TODO: Refactor to eliminate getter/setters
    295     public int getCorrectionType() {
    296         return mCorrectionType;
    297     }
    298 
    299     public boolean isEmpty() {
    300         return mLogStatementList.isEmpty();
    301     }
    302 
    303     /**
    304      * Split this logUnit, with all events before maxTime staying in the current logUnit, and all
    305      * events after maxTime going into a new LogUnit that is returned.
    306      */
    307     public LogUnit splitByTime(final long maxTime) {
    308         // Assume that mTimeList is in sorted order.
    309         final int length = mTimeList.size();
    310         // TODO: find time by binary search, e.g. using Collections#binarySearch()
    311         for (int index = 0; index < length; index++) {
    312             if (mTimeList.get(index) > maxTime) {
    313                 final List<LogStatement> laterLogStatements =
    314                         mLogStatementList.subList(index, length);
    315                 final List<Object[]> laterValues = mValuesList.subList(index, length);
    316                 final List<Long> laterTimes = mTimeList.subList(index, length);
    317 
    318                 // Create the LogUnit containing the later logStatements and associated data.
    319                 final LogUnit newLogUnit = new LogUnit(
    320                         new ArrayList<LogStatement>(laterLogStatements),
    321                         new ArrayList<Object[]>(laterValues),
    322                         new ArrayList<Long>(laterTimes),
    323                         true /* isPartOfMegaword */);
    324                 newLogUnit.mWords = null;
    325                 newLogUnit.mMayContainDigit = mMayContainDigit;
    326                 newLogUnit.mContainsCorrection = mContainsCorrection;
    327 
    328                 // Purge the logStatements and associated data from this LogUnit.
    329                 laterLogStatements.clear();
    330                 laterValues.clear();
    331                 laterTimes.clear();
    332                 mIsPartOfMegaword = true;
    333 
    334                 return newLogUnit;
    335             }
    336         }
    337         return new LogUnit();
    338     }
    339 
    340     public void append(final LogUnit logUnit) {
    341         mLogStatementList.addAll(logUnit.mLogStatementList);
    342         mValuesList.addAll(logUnit.mValuesList);
    343         mTimeList.addAll(logUnit.mTimeList);
    344         mWords = null;
    345         if (logUnit.mWords != null) {
    346             setWords(logUnit.mWords);
    347         }
    348         mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
    349         mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;
    350         mIsPartOfMegaword = false;
    351     }
    352 
    353     public SuggestedWords getSuggestions() {
    354         return mSuggestedWords;
    355     }
    356 
    357     /**
    358      * Initialize the suggestions.
    359      *
    360      * Once set to a non-null value, the suggestions may not be changed again.  This is to keep
    361      * track of the list of words that are close to the user's initial effort to type the word.
    362      * Only words that are close to the initial effort are considered typo corrections.
    363      */
    364     public void initializeSuggestions(final SuggestedWords suggestedWords) {
    365         if (mSuggestedWords == null) {
    366             mSuggestedWords = suggestedWords;
    367         }
    368     }
    369 
    370     private static boolean isInSuggestedWords(final String queryWord,
    371             final SuggestedWords suggestedWords) {
    372         if (TextUtils.isEmpty(queryWord)) {
    373             return false;
    374         }
    375         final int size = suggestedWords.size();
    376         for (int i = 0; i < size; i++) {
    377             final SuggestedWordInfo wordInfo = suggestedWords.getInfo(i);
    378             if (queryWord.equals(wordInfo.mWord)) {
    379                 return true;
    380             }
    381         }
    382         return false;
    383     }
    384 
    385     /**
    386      * Remove data associated with selecting the Research button.
    387      *
    388      * A LogUnit will capture all user interactions with the IME, including the "meta-interactions"
    389      * of using the Research button to control the logging (e.g. by starting and stopping recording
    390      * of a test case).  Because meta-interactions should not be part of the normal log, calling
    391      * this method will set a field in the LogStatements of the motion events to indiciate that
    392      * they should be disregarded.
    393      *
    394      * This implementation assumes that the data recorded by the meta-interaction takes the
    395      * form of all events following the first MotionEvent.ACTION_DOWN before the first long-press
    396      * before the last onCodeEvent containing a code matching {@code LogStatement.VALUE_RESEARCH}.
    397      *
    398      * @returns true if data was removed
    399      */
    400     public boolean removeResearchButtonInvocation() {
    401         // This method is designed to be idempotent.
    402 
    403         // First, find last invocation of "research" key
    404         final int indexOfLastResearchKey = findLastIndexContainingKeyValue(
    405                 LogStatement.TYPE_POINTER_TRACKER_CALL_LISTENER_ON_CODE_INPUT,
    406                 LogStatement.KEY_CODE, LogStatement.VALUE_RESEARCH);
    407         if (indexOfLastResearchKey < 0) {
    408             // Could not find invocation of "research" key.  Leave log as is.
    409             if (DEBUG) {
    410                 Log.d(TAG, "Could not find research key");
    411             }
    412             return false;
    413         }
    414 
    415         // Look for the long press that started the invocation of the research key code input.
    416         final int indexOfLastLongPressBeforeResearchKey =
    417                 findLastIndexBefore(LogStatement.TYPE_MAIN_KEYBOARD_VIEW_ON_LONG_PRESS,
    418                         indexOfLastResearchKey);
    419 
    420         // Look for DOWN event preceding the long press
    421         final int indexOfLastDownEventBeforeLongPress =
    422                 findLastIndexContainingKeyValueBefore(LogStatement.TYPE_MOTION_EVENT,
    423                         LogStatement.ACTION, LogStatement.VALUE_DOWN,
    424                         indexOfLastLongPressBeforeResearchKey);
    425 
    426         // Flag all LatinKeyboardViewProcessMotionEvents from the DOWN event to the research key as
    427         // logging-related
    428         final int startingIndex = indexOfLastDownEventBeforeLongPress == -1 ? 0
    429                 : indexOfLastDownEventBeforeLongPress;
    430         for (int index = startingIndex; index < indexOfLastResearchKey; index++) {
    431             final LogStatement logStatement = mLogStatementList.get(index);
    432             final String type = logStatement.getType();
    433             final Object[] values = mValuesList.get(index);
    434             if (type.equals(LogStatement.TYPE_MOTION_EVENT)) {
    435                 logStatement.setValue(LogStatement.KEY_IS_LOGGING_RELATED, values, true);
    436             }
    437         }
    438         return true;
    439     }
    440 
    441     /**
    442      * Find the index of the last LogStatement before {@code startingIndex} of type {@code type}.
    443      *
    444      * @param queryType a String that must be {@code String.equals()} to the LogStatement type
    445      * @param startingIndex the index to start the backward search from.  Must be less than the
    446      * length of mLogStatementList, or an IndexOutOfBoundsException is thrown.  Can be negative,
    447      * in which case -1 is returned.
    448      *
    449      * @return The index of the last LogStatement, -1 if none exists.
    450      */
    451     private int findLastIndexBefore(final String queryType, final int startingIndex) {
    452         return findLastIndexContainingKeyValueBefore(queryType, null, null, startingIndex);
    453     }
    454 
    455     /**
    456      * Find the index of the last LogStatement before {@code startingIndex} of type {@code type}
    457      * containing the given key-value pair.
    458      *
    459      * @param queryType a String that must be {@code String.equals()} to the LogStatement type
    460      * @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement
    461      * @param queryValue an Object that must be {@code String.equals()} to the key's corresponding
    462      * value
    463      *
    464      * @return The index of the last LogStatement, -1 if none exists.
    465      */
    466     private int findLastIndexContainingKeyValue(final String queryType, final String queryKey,
    467             final Object queryValue) {
    468         return findLastIndexContainingKeyValueBefore(queryType, queryKey, queryValue,
    469                 mLogStatementList.size() - 1);
    470     }
    471 
    472     /**
    473      * Find the index of the last LogStatement before {@code startingIndex} of type {@code type}
    474      * containing the given key-value pair.
    475      *
    476      * @param queryType a String that must be {@code String.equals()} to the LogStatement type
    477      * @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement
    478      * @param queryValue an Object that must be {@code String.equals()} to the key's corresponding
    479      * value
    480      * @param startingIndex the index to start the backward search from.  Must be less than the
    481      * length of mLogStatementList, or an IndexOutOfBoundsException is thrown.  Can be negative,
    482      * in which case -1 is returned.
    483      *
    484      * @return The index of the last LogStatement, -1 if none exists.
    485      */
    486     private int findLastIndexContainingKeyValueBefore(final String queryType, final String queryKey,
    487             final Object queryValue, final int startingIndex) {
    488         if (startingIndex < 0) {
    489             return -1;
    490         }
    491         for (int index = startingIndex; index >= 0; index--) {
    492             final LogStatement logStatement = mLogStatementList.get(index);
    493             final String type = logStatement.getType();
    494             if (type.equals(queryType) && (queryKey == null
    495                     || logStatement.containsKeyValuePair(queryKey, queryValue,
    496                             mValuesList.get(index)))) {
    497                 return index;
    498             }
    499         }
    500         return -1;
    501     }
    502 }
    503