Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.test.AndroidTestCase;
     20 import android.test.suitebuilder.annotation.LargeTest;
     21 import android.util.Pair;
     22 
     23 import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
     24 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
     25 import com.android.inputmethod.latin.makedict.CodePointUtils;
     26 import com.android.inputmethod.latin.makedict.DictDecoder;
     27 import com.android.inputmethod.latin.makedict.DictionaryHeader;
     28 import com.android.inputmethod.latin.makedict.FormatSpec;
     29 import com.android.inputmethod.latin.makedict.FusionDictionary;
     30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
     31 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
     32 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
     33 import com.android.inputmethod.latin.utils.FileUtils;
     34 import com.android.inputmethod.latin.utils.LocaleUtils;
     35 
     36 import java.io.File;
     37 import java.io.IOException;
     38 import java.util.ArrayList;
     39 import java.util.HashMap;
     40 import java.util.Locale;
     41 import java.util.Map;
     42 import java.util.Random;
     43 import java.util.concurrent.TimeUnit;
     44 
     45 @LargeTest
     46 public class BinaryDictionaryDecayingTests extends AndroidTestCase {
     47     private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
     48     private static final String TEST_LOCALE = "test";
     49     private static final int DUMMY_PROBABILITY = 0;
     50     private static final int[] DICT_FORMAT_VERSIONS =
     51             new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
     52 
     53     private int mCurrentTime = 0;
     54 
     55     @Override
     56     protected void setUp() throws Exception {
     57         super.setUp();
     58         mCurrentTime = 0;
     59     }
     60 
     61     @Override
     62     protected void tearDown() throws Exception {
     63         stopTestModeInNativeCode();
     64         super.tearDown();
     65     }
     66 
     67     private static boolean supportsBeginningOfSentence(final int formatVersion) {
     68         return formatVersion > FormatSpec.VERSION401;
     69     }
     70 
     71     private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
     72             final int probability) {
     73         binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
     74                 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
     75                 false /* isBeginningOfSentence */, false /* isNotAWord */,
     76                 false /* isBlacklisted */, mCurrentTime /* timestamp */);
     77     }
     78 
     79     private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
     80             final String word1, final int probability) {
     81         binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability,
     82                 mCurrentTime /* timestamp */);
     83     }
     84 
     85     private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
     86             final String word0, final String word1) {
     87         return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1);
     88     }
     89 
     90     private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
     91         // 30 days.
     92         final int timeToElapse = (int)TimeUnit.SECONDS.convert(30, TimeUnit.DAYS);
     93         mCurrentTime += timeToElapse;
     94         setCurrentTimeForTestMode(mCurrentTime);
     95         binaryDictionary.flushWithGC();
     96     }
     97 
     98     private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
     99         // 365 days.
    100         final int timeToElapse = (int)TimeUnit.SECONDS.convert(365, TimeUnit.DAYS);
    101         mCurrentTime += timeToElapse;
    102         setCurrentTimeForTestMode(mCurrentTime);
    103         binaryDictionary.flushWithGC();
    104     }
    105 
    106     private File createEmptyDictionaryAndGetFile(final String dictId,
    107             final int formatVersion) throws IOException {
    108         if (formatVersion == FormatSpec.VERSION4
    109                 || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
    110                 || formatVersion == FormatSpec.VERSION4_DEV) {
    111             return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
    112         } else {
    113             throw new IOException("Dictionary format version " + formatVersion
    114                     + " is not supported.");
    115         }
    116     }
    117 
    118     private File createEmptyVer4DictionaryAndGetFile(final String dictId, final int formatVersion)
    119             throws IOException {
    120         final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
    121                 getContext().getCacheDir());
    122         FileUtils.deleteRecursively(file);
    123         Map<String, String> attributeMap = new HashMap<>();
    124         attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, dictId);
    125         attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY,
    126                 String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
    127         attributeMap.put(DictionaryHeader.USES_FORGETTING_CURVE_KEY,
    128                 DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
    129         attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY,
    130                 DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
    131         if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
    132                 LocaleUtils.constructLocaleFromString(TEST_LOCALE), attributeMap)) {
    133             return file;
    134         } else {
    135             throw new IOException("Empty dictionary " + file.getAbsolutePath()
    136                     + " cannot be created. Foramt version: " + formatVersion);
    137         }
    138     }
    139 
    140     private static int setCurrentTimeForTestMode(final int currentTime) {
    141         return BinaryDictionaryUtils.setCurrentTimeForTest(currentTime);
    142     }
    143 
    144     private static int stopTestModeInNativeCode() {
    145         return BinaryDictionaryUtils.setCurrentTimeForTest(-1);
    146     }
    147 
    148     public void testReadDictInJavaSide() {
    149         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    150             testReadDictInJavaSide(formatVersion);
    151         }
    152     }
    153 
    154     private void testReadDictInJavaSide(final int formatVersion) {
    155         setCurrentTimeForTestMode(mCurrentTime);
    156         File dictFile = null;
    157         try {
    158             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    159         } catch (IOException e) {
    160             fail("IOException while writing an initial dictionary : " + e);
    161         }
    162         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    163                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    164                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    165         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    166         addUnigramWord(binaryDictionary, "ab", DUMMY_PROBABILITY);
    167         addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
    168         addBigramWords(binaryDictionary, "a", "aaa", DUMMY_PROBABILITY);
    169         binaryDictionary.flushWithGC();
    170         binaryDictionary.close();
    171 
    172         final DictDecoder dictDecoder =
    173                 BinaryDictIOUtils.getDictDecoder(dictFile, 0, dictFile.length());
    174         try {
    175             final FusionDictionary dict =
    176                     dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
    177             PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "a");
    178             assertNotNull(ptNode);
    179             assertTrue(ptNode.isTerminal());
    180             assertNotNull(ptNode.getBigram("aaa"));
    181             ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "ab");
    182             assertNotNull(ptNode);
    183             assertTrue(ptNode.isTerminal());
    184             ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa");
    185             assertNotNull(ptNode);
    186             assertTrue(ptNode.isTerminal());
    187         } catch (IOException e) {
    188             fail("IOException while reading dictionary: " + e);
    189         } catch (UnsupportedFormatException e) {
    190             fail("Unsupported format: " + e);
    191         }
    192         dictFile.delete();
    193     }
    194 
    195     public void testControlCurrentTime() {
    196         final int TEST_COUNT = 1000;
    197         final long seed = System.currentTimeMillis();
    198         final Random random = new Random(seed);
    199         final int startTime = stopTestModeInNativeCode();
    200         for (int i = 0; i < TEST_COUNT; i++) {
    201             final int currentTime = random.nextInt(Integer.MAX_VALUE);
    202             final int currentTimeInNativeCode = setCurrentTimeForTestMode(currentTime);
    203             assertEquals(currentTime, currentTimeInNativeCode);
    204         }
    205         final int endTime = stopTestModeInNativeCode();
    206         final int MAX_ALLOWED_ELAPSED_TIME = 10;
    207         assertTrue(startTime <= endTime && endTime <= startTime + MAX_ALLOWED_ELAPSED_TIME);
    208     }
    209 
    210     public void testAddValidAndInvalidWords() {
    211         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    212             testAddValidAndInvalidWords(formatVersion);
    213         }
    214     }
    215 
    216     private void testAddValidAndInvalidWords(final int formatVersion) {
    217         File dictFile = null;
    218         try {
    219             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    220         } catch (IOException e) {
    221             fail("IOException while writing an initial dictionary : " + e);
    222         }
    223         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    224                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    225                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    226 
    227         addUnigramWord(binaryDictionary, "a", Dictionary.NOT_A_PROBABILITY);
    228         assertFalse(binaryDictionary.isValidWord("a"));
    229         addUnigramWord(binaryDictionary, "a", Dictionary.NOT_A_PROBABILITY);
    230         addUnigramWord(binaryDictionary, "a", Dictionary.NOT_A_PROBABILITY);
    231         assertTrue(binaryDictionary.isValidWord("a"));
    232 
    233         addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
    234         assertTrue(binaryDictionary.isValidWord("b"));
    235 
    236         addBigramWords(binaryDictionary, "a", "b", Dictionary.NOT_A_PROBABILITY);
    237         assertFalse(isValidBigram(binaryDictionary, "a", "b"));
    238         addBigramWords(binaryDictionary, "a", "b", Dictionary.NOT_A_PROBABILITY);
    239         assertTrue(isValidBigram(binaryDictionary, "a", "b"));
    240 
    241         addUnigramWord(binaryDictionary, "c", DUMMY_PROBABILITY);
    242         addBigramWords(binaryDictionary, "a", "c", DUMMY_PROBABILITY);
    243         assertTrue(isValidBigram(binaryDictionary, "a", "c"));
    244 
    245         // Add bigrams of not valid unigrams.
    246         addBigramWords(binaryDictionary, "x", "y", Dictionary.NOT_A_PROBABILITY);
    247         assertFalse(isValidBigram(binaryDictionary, "x", "y"));
    248         addBigramWords(binaryDictionary, "x", "y", DUMMY_PROBABILITY);
    249         assertFalse(isValidBigram(binaryDictionary, "x", "y"));
    250 
    251         binaryDictionary.close();
    252         dictFile.delete();
    253     }
    254 
    255     public void testDecayingProbability() {
    256         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    257             testDecayingProbability(formatVersion);
    258         }
    259     }
    260 
    261     private void testDecayingProbability(final int formatVersion) {
    262         File dictFile = null;
    263         try {
    264             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    265         } catch (IOException e) {
    266             fail("IOException while writing an initial dictionary : " + e);
    267         }
    268         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    269                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    270                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    271 
    272         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    273         assertTrue(binaryDictionary.isValidWord("a"));
    274         forcePassingShortTime(binaryDictionary);
    275         assertFalse(binaryDictionary.isValidWord("a"));
    276 
    277         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    278         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    279         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    280         assertTrue(binaryDictionary.isValidWord("a"));
    281         forcePassingShortTime(binaryDictionary);
    282         assertTrue(binaryDictionary.isValidWord("a"));
    283         forcePassingLongTime(binaryDictionary);
    284         assertFalse(binaryDictionary.isValidWord("a"));
    285 
    286         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    287         addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
    288         addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
    289         assertTrue(isValidBigram(binaryDictionary, "a", "b"));
    290         forcePassingShortTime(binaryDictionary);
    291         assertFalse(isValidBigram(binaryDictionary, "a", "b"));
    292 
    293         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    294         addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
    295         addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
    296         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    297         addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
    298         addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
    299         addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
    300         addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
    301         addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
    302         assertTrue(isValidBigram(binaryDictionary, "a", "b"));
    303         forcePassingShortTime(binaryDictionary);
    304         assertTrue(isValidBigram(binaryDictionary, "a", "b"));
    305         forcePassingLongTime(binaryDictionary);
    306         assertFalse(isValidBigram(binaryDictionary, "a", "b"));
    307 
    308         binaryDictionary.close();
    309         dictFile.delete();
    310     }
    311 
    312     public void testAddManyUnigramsToDecayingDict() {
    313         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    314             testAddManyUnigramsToDecayingDict(formatVersion);
    315         }
    316     }
    317 
    318     private void testAddManyUnigramsToDecayingDict(final int formatVersion) {
    319         final int unigramCount = 30000;
    320         final int unigramTypedCount = 100000;
    321         final int codePointSetSize = 50;
    322         final long seed = System.currentTimeMillis();
    323         final Random random = new Random(seed);
    324 
    325         File dictFile = null;
    326         try {
    327             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    328         } catch (IOException e) {
    329             fail("IOException while writing an initial dictionary : " + e);
    330         }
    331         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    332                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    333                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    334         setCurrentTimeForTestMode(mCurrentTime);
    335 
    336         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    337         final ArrayList<String> words = new ArrayList<>();
    338 
    339         for (int i = 0; i < unigramCount; i++) {
    340             final String word = CodePointUtils.generateWord(random, codePointSet);
    341             words.add(word);
    342         }
    343 
    344         final int maxUnigramCount = Integer.parseInt(
    345                 binaryDictionary.getPropertyForTest(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
    346         for (int i = 0; i < unigramTypedCount; i++) {
    347             final String word = words.get(random.nextInt(words.size()));
    348             addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
    349 
    350             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    351                 final int unigramCountBeforeGC =
    352                         Integer.parseInt(binaryDictionary.getPropertyForTest(
    353                                 BinaryDictionary.UNIGRAM_COUNT_QUERY));
    354                 while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    355                     forcePassingShortTime(binaryDictionary);
    356                 }
    357                 final int unigramCountAfterGC =
    358                         Integer.parseInt(binaryDictionary.getPropertyForTest(
    359                                 BinaryDictionary.UNIGRAM_COUNT_QUERY));
    360                 assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
    361             }
    362         }
    363 
    364         assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
    365                 BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
    366         assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
    367                 BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
    368         forcePassingLongTime(binaryDictionary);
    369         assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTest(
    370                 BinaryDictionary.UNIGRAM_COUNT_QUERY)));
    371     }
    372 
    373     public void testOverflowUnigrams() {
    374         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    375             testOverflowUnigrams(formatVersion);
    376         }
    377     }
    378 
    379     private void testOverflowUnigrams(final int formatVersion) {
    380         final int unigramCount = 20000;
    381         final int eachUnigramTypedCount = 2;
    382         final int strongUnigramTypedCount = 20;
    383         final int weakUnigramTypedCount = 1;
    384         final int codePointSetSize = 50;
    385         final long seed = System.currentTimeMillis();
    386         final Random random = new Random(seed);
    387 
    388         File dictFile = null;
    389         try {
    390             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    391         } catch (IOException e) {
    392             fail("IOException while writing an initial dictionary : " + e);
    393         }
    394         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    395                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    396                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    397         setCurrentTimeForTestMode(mCurrentTime);
    398         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    399 
    400         final String strong = "strong";
    401         final String weak = "weak";
    402         for (int j = 0; j < strongUnigramTypedCount; j++) {
    403             addUnigramWord(binaryDictionary, strong, DUMMY_PROBABILITY);
    404         }
    405         for (int j = 0; j < weakUnigramTypedCount; j++) {
    406             addUnigramWord(binaryDictionary, weak, DUMMY_PROBABILITY);
    407         }
    408         assertTrue(binaryDictionary.isValidWord(strong));
    409         assertTrue(binaryDictionary.isValidWord(weak));
    410 
    411         for (int i = 0; i < unigramCount; i++) {
    412             final String word = CodePointUtils.generateWord(random, codePointSet);
    413             for (int j = 0; j < eachUnigramTypedCount; j++) {
    414                 addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
    415             }
    416             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    417                 final int unigramCountBeforeGC =
    418                         Integer.parseInt(binaryDictionary.getPropertyForTest(
    419                                 BinaryDictionary.UNIGRAM_COUNT_QUERY));
    420                 assertTrue(binaryDictionary.isValidWord(strong));
    421                 assertTrue(binaryDictionary.isValidWord(weak));
    422                 binaryDictionary.flushWithGC();
    423                 final int unigramCountAfterGC =
    424                         Integer.parseInt(binaryDictionary.getPropertyForTest(
    425                                 BinaryDictionary.UNIGRAM_COUNT_QUERY));
    426                 assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
    427                 assertFalse(binaryDictionary.isValidWord(weak));
    428                 assertTrue(binaryDictionary.isValidWord(strong));
    429                 break;
    430             }
    431         }
    432     }
    433 
    434     public void testAddManyBigramsToDecayingDict() {
    435         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    436             testAddManyBigramsToDecayingDict(formatVersion);
    437         }
    438     }
    439 
    440     private void testAddManyBigramsToDecayingDict(final int formatVersion) {
    441         final int unigramCount = 5000;
    442         final int bigramCount = 30000;
    443         final int bigramTypedCount = 100000;
    444         final int codePointSetSize = 50;
    445         final long seed = System.currentTimeMillis();
    446         final Random random = new Random(seed);
    447 
    448         File dictFile = null;
    449         try {
    450             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    451         } catch (IOException e) {
    452             fail("IOException while writing an initial dictionary : " + e);
    453         }
    454         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    455                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    456                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    457         setCurrentTimeForTestMode(mCurrentTime);
    458 
    459         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    460         final ArrayList<String> words = new ArrayList<>();
    461         final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
    462 
    463         for (int i = 0; i < unigramCount; ++i) {
    464             final String word = CodePointUtils.generateWord(random, codePointSet);
    465             words.add(word);
    466         }
    467         for (int i = 0; i < bigramCount; ++i) {
    468             final int word0Index = random.nextInt(words.size());
    469             int word1Index = random.nextInt(words.size() - 1);
    470             if (word1Index >= word0Index) {
    471                 word1Index += 1;
    472             }
    473             final String word0 = words.get(word0Index);
    474             final String word1 = words.get(word1Index);
    475             final Pair<String, String> bigram = new Pair<>(word0, word1);
    476             bigrams.add(bigram);
    477         }
    478 
    479         final int maxBigramCount = Integer.parseInt(
    480                 binaryDictionary.getPropertyForTest(BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
    481         for (int i = 0; i < bigramTypedCount; ++i) {
    482             final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
    483             addUnigramWord(binaryDictionary, bigram.first, DUMMY_PROBABILITY);
    484             addUnigramWord(binaryDictionary, bigram.second, DUMMY_PROBABILITY);
    485             addBigramWords(binaryDictionary, bigram.first, bigram.second, DUMMY_PROBABILITY);
    486 
    487             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    488                 final int bigramCountBeforeGC =
    489                         Integer.parseInt(binaryDictionary.getPropertyForTest(
    490                                 BinaryDictionary.BIGRAM_COUNT_QUERY));
    491                 while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    492                     forcePassingShortTime(binaryDictionary);
    493                 }
    494                 final int bigramCountAfterGC =
    495                         Integer.parseInt(binaryDictionary.getPropertyForTest(
    496                                 BinaryDictionary.BIGRAM_COUNT_QUERY));
    497                 assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
    498             }
    499         }
    500 
    501         assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
    502                 BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
    503         assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
    504                 BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
    505         forcePassingLongTime(binaryDictionary);
    506         assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTest(
    507                 BinaryDictionary.BIGRAM_COUNT_QUERY)));
    508     }
    509 
    510     public void testOverflowBigrams() {
    511         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    512             testOverflowBigrams(formatVersion);
    513         }
    514     }
    515 
    516     private void testOverflowBigrams(final int formatVersion) {
    517         final int bigramCount = 20000;
    518         final int unigramCount = 1000;
    519         final int unigramTypedCount = 20;
    520         final int eachBigramTypedCount = 2;
    521         final int strongBigramTypedCount = 20;
    522         final int weakBigramTypedCount = 1;
    523         final int codePointSetSize = 50;
    524         final long seed = System.currentTimeMillis();
    525         final Random random = new Random(seed);
    526 
    527         File dictFile = null;
    528         try {
    529             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    530         } catch (IOException e) {
    531             fail("IOException while writing an initial dictionary : " + e);
    532         }
    533         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    534                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    535                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    536         setCurrentTimeForTestMode(mCurrentTime);
    537         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    538 
    539         final ArrayList<String> words = new ArrayList<>();
    540         for (int i = 0; i < unigramCount; i++) {
    541             final String word = CodePointUtils.generateWord(random, codePointSet);
    542             words.add(word);
    543             for (int j = 0; j < unigramTypedCount; j++) {
    544                 addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
    545             }
    546         }
    547         final String strong = "strong";
    548         final String weak = "weak";
    549         final String target = "target";
    550         for (int j = 0; j < unigramTypedCount; j++) {
    551             addUnigramWord(binaryDictionary, strong, DUMMY_PROBABILITY);
    552             addUnigramWord(binaryDictionary, weak, DUMMY_PROBABILITY);
    553             addUnigramWord(binaryDictionary, target, DUMMY_PROBABILITY);
    554         }
    555         binaryDictionary.flushWithGC();
    556         for (int j = 0; j < strongBigramTypedCount; j++) {
    557             addBigramWords(binaryDictionary, strong, target, DUMMY_PROBABILITY);
    558         }
    559         for (int j = 0; j < weakBigramTypedCount; j++) {
    560             addBigramWords(binaryDictionary, weak, target, DUMMY_PROBABILITY);
    561         }
    562         assertTrue(isValidBigram(binaryDictionary, strong, target));
    563         assertTrue(isValidBigram(binaryDictionary, weak, target));
    564 
    565         for (int i = 0; i < bigramCount; i++) {
    566             final int word0Index = random.nextInt(words.size());
    567             final String word0 = words.get(word0Index);
    568             final int index = random.nextInt(words.size() - 1);
    569             final int word1Index = (index >= word0Index) ? index + 1 : index;
    570             final String word1 = words.get(word1Index);
    571 
    572             for (int j = 0; j < eachBigramTypedCount; j++) {
    573                 addBigramWords(binaryDictionary, word0, word1, DUMMY_PROBABILITY);
    574             }
    575             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    576                 final int bigramCountBeforeGC =
    577                         Integer.parseInt(binaryDictionary.getPropertyForTest(
    578                                 BinaryDictionary.BIGRAM_COUNT_QUERY));
    579                 binaryDictionary.flushWithGC();
    580                 final int bigramCountAfterGC =
    581                         Integer.parseInt(binaryDictionary.getPropertyForTest(
    582                                 BinaryDictionary.BIGRAM_COUNT_QUERY));
    583                 assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
    584                 assertTrue(isValidBigram(binaryDictionary, strong, target));
    585                 assertFalse(isValidBigram(binaryDictionary, weak, target));
    586                 break;
    587             }
    588         }
    589     }
    590 
    591     public void testDictMigration() {
    592         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    593             testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
    594         }
    595     }
    596 
    597     private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
    598         setCurrentTimeForTestMode(mCurrentTime);
    599         File dictFile = null;
    600         try {
    601             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
    602         } catch (IOException e) {
    603             fail("IOException while writing an initial dictionary : " + e);
    604         }
    605         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    606                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    607                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    608         addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
    609         assertTrue(binaryDictionary.isValidWord("aaa"));
    610         addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
    611         assertFalse(binaryDictionary.isValidWord("bbb"));
    612         addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
    613         addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
    614         addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
    615         addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
    616         addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
    617         addUnigramWord(binaryDictionary, "abc", DUMMY_PROBABILITY);
    618         addBigramWords(binaryDictionary, "aaa", "abc", DUMMY_PROBABILITY);
    619         assertTrue(isValidBigram(binaryDictionary, "aaa", "abc"));
    620         addBigramWords(binaryDictionary, "aaa", "bbb", Dictionary.NOT_A_PROBABILITY);
    621         assertFalse(isValidBigram(binaryDictionary, "aaa", "bbb"));
    622 
    623         assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
    624         assertTrue(binaryDictionary.migrateTo(toFormatVersion));
    625         assertTrue(binaryDictionary.isValidDictionary());
    626         assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
    627         assertTrue(binaryDictionary.isValidWord("aaa"));
    628         assertFalse(binaryDictionary.isValidWord("bbb"));
    629         assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc"));
    630         addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
    631         assertTrue(binaryDictionary.isValidWord("bbb"));
    632         assertTrue(isValidBigram(binaryDictionary, "aaa", "abc"));
    633         assertFalse(isValidBigram(binaryDictionary, "aaa", "bbb"));
    634         addBigramWords(binaryDictionary, "aaa", "bbb", Dictionary.NOT_A_PROBABILITY);
    635         assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
    636         binaryDictionary.close();
    637         dictFile.delete();
    638     }
    639 
    640     public void testBeginningOfSentence() {
    641         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    642             if (supportsBeginningOfSentence(formatVersion)) {
    643                 testBeginningOfSentence(formatVersion);
    644             }
    645         }
    646     }
    647 
    648     private void testBeginningOfSentence(final int formatVersion) {
    649         setCurrentTimeForTestMode(mCurrentTime);
    650         File dictFile = null;
    651         try {
    652             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    653         } catch (IOException e) {
    654             fail("IOException while writing an initial dictionary : " + e);
    655         }
    656         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    657                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    658                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    659 
    660         binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
    661                 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
    662                 true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
    663                 mCurrentTime);
    664         final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
    665         addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
    666         binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
    667                 mCurrentTime);
    668         assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
    669         binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
    670                 mCurrentTime);
    671         addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
    672         binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
    673                 mCurrentTime);
    674         assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
    675         assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
    676 
    677         forcePassingLongTime(binaryDictionary);
    678         assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
    679         assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
    680 
    681         addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
    682         binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
    683                 mCurrentTime);
    684         addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
    685         binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
    686                 mCurrentTime);
    687         assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
    688         assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
    689         binaryDictionary.close();
    690         dictFile.delete();
    691     }
    692 }
    693