Home | History | Annotate | Download | only in research
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.research;
     18 
     19 import com.android.inputmethod.latin.Dictionary;
     20 import com.android.inputmethod.latin.Suggest;
     21 
     22 import java.util.Random;
     23 
     24 public class MainLogBuffer extends LogBuffer {
     25     // The size of the n-grams logged.  E.g. N_GRAM_SIZE = 2 means to sample bigrams.
     26     private static final int N_GRAM_SIZE = 2;
     27     // The number of words between n-grams to omit from the log.
     28     private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES = 18;
     29 
     30     private final ResearchLog mResearchLog;
     31     private Suggest mSuggest;
     32 
     33     // The minimum periodicity with which n-grams can be sampled.  E.g. mWinWordPeriod is 10 if
     34     // every 10th bigram is sampled, i.e., words 1-8 are not, but the bigram at words 9 and 10, etc.
     35     // for 11-18, and the bigram at words 19 and 20.  If an n-gram is not safe (e.g. it  contains a
     36     // number in the middle or an out-of-vocabulary word), then sampling is delayed until a safe
     37     // n-gram does appear.
     38     /* package for test */ int mMinWordPeriod;
     39 
     40     // Counter for words left to suppress before an n-gram can be sampled.  Reset to mMinWordPeriod
     41     // after a sample is taken.
     42     /* package for test */ int mWordsUntilSafeToSample;
     43 
     44     public MainLogBuffer(final ResearchLog researchLog) {
     45         super(N_GRAM_SIZE);
     46         mResearchLog = researchLog;
     47         mMinWordPeriod = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES + N_GRAM_SIZE;
     48         final Random random = new Random();
     49         mWordsUntilSafeToSample = random.nextInt(mMinWordPeriod);
     50     }
     51 
     52     public void setSuggest(Suggest suggest) {
     53         mSuggest = suggest;
     54     }
     55 
     56     @Override
     57     public void shiftIn(final LogUnit newLogUnit) {
     58         super.shiftIn(newLogUnit);
     59         if (newLogUnit.hasWord()) {
     60             if (mWordsUntilSafeToSample > 0) {
     61                 mWordsUntilSafeToSample--;
     62             }
     63         }
     64     }
     65 
     66     public void resetWordCounter() {
     67         mWordsUntilSafeToSample = mMinWordPeriod;
     68     }
     69 
     70     /**
     71      * Determines whether the content of the MainLogBuffer can be safely uploaded in its complete
     72      * form and still protect the user's privacy.
     73      *
     74      * The size of the MainLogBuffer is just enough to hold one n-gram, its corrections, and any
     75      * non-character data that is typed between words.  The decision about privacy is made based on
     76      * the buffer's entire content.  If it is decided that the privacy risks are too great to upload
     77      * the contents of this buffer, a censored version of the LogItems may still be uploaded.  E.g.,
     78      * the screen orientation and other characteristics about the device can be uploaded without
     79      * revealing much about the user.
     80      */
     81     public boolean isSafeToLog() {
     82         // Check that we are not sampling too frequently.  Having sampled recently might disclose
     83         // too much of the user's intended meaning.
     84         if (mWordsUntilSafeToSample > 0) {
     85             return false;
     86         }
     87         if (mSuggest == null || !mSuggest.hasMainDictionary()) {
     88             // Main dictionary is unavailable.  Since we cannot check it, we cannot tell if a word
     89             // is out-of-vocabulary or not.  Therefore, we must judge the entire buffer contents to
     90             // potentially pose a privacy risk.
     91             return false;
     92         }
     93         // Reload the dictionary in case it has changed (e.g., because the user has changed
     94         // languages).
     95         final Dictionary dictionary = mSuggest.getMainDictionary();
     96         if (dictionary == null) {
     97             return false;
     98         }
     99         // Check each word in the buffer.  If any word poses a privacy threat, we cannot upload the
    100         // complete buffer contents in detail.
    101         final int length = mLogUnits.size();
    102         for (int i = 0; i < length; i++) {
    103             final LogUnit logUnit = mLogUnits.get(i);
    104             final String word = logUnit.getWord();
    105             if (word == null) {
    106                 // Digits outside words are a privacy threat.
    107                 if (logUnit.hasDigit()) {
    108                     return false;
    109                 }
    110             } else {
    111                 // Words not in the dictionary are a privacy threat.
    112                 if (!(dictionary.isValidWord(word))) {
    113                     return false;
    114                 }
    115             }
    116         }
    117         // All checks have passed; this buffer's content can be safely uploaded.
    118         return true;
    119     }
    120 
    121     @Override
    122     protected void onShiftOut(LogUnit logUnit) {
    123         if (mResearchLog != null) {
    124             mResearchLog.publish(logUnit, false /* isIncludingPrivateData */);
    125         }
    126     }
    127 }
    128