Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.test.AndroidTestCase;
     20 import android.test.suitebuilder.annotation.LargeTest;
     21 import android.text.TextUtils;
     22 import android.util.Pair;
     23 
     24 import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
     25 import com.android.inputmethod.latin.makedict.CodePointUtils;
     26 import com.android.inputmethod.latin.makedict.FormatSpec;
     27 import com.android.inputmethod.latin.makedict.WeightedString;
     28 import com.android.inputmethod.latin.makedict.WordProperty;
     29 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
     30 import com.android.inputmethod.latin.utils.FileUtils;
     31 import com.android.inputmethod.latin.utils.LanguageModelParam;
     32 
     33 import java.io.File;
     34 import java.io.IOException;
     35 import java.util.ArrayList;
     36 import java.util.HashMap;
     37 import java.util.HashSet;
     38 import java.util.Locale;
     39 import java.util.Map;
     40 import java.util.Random;
     41 
     42 // TODO Use the seed passed as an argument for makedict test.
     43 @LargeTest
     44 public class BinaryDictionaryTests extends AndroidTestCase {
     45     private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
     46     private static final String TEST_LOCALE = "test";
     47     private static final int[] DICT_FORMAT_VERSIONS =
     48             new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
     49 
     50     private static boolean canCheckBigramProbability(final int formatVersion) {
     51         return formatVersion > FormatSpec.VERSION401;
     52     }
     53 
     54     private static boolean supportsBeginningOfSentence(final int formatVersion) {
     55         return formatVersion > FormatSpec.VERSION401;
     56     }
     57 
     58     private File createEmptyDictionaryAndGetFile(final String dictId,
     59             final int formatVersion) throws IOException {
     60         if (formatVersion == FormatSpec.VERSION4
     61                 || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
     62                 || formatVersion == FormatSpec.VERSION4_DEV) {
     63             return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
     64         } else {
     65             throw new IOException("Dictionary format version " + formatVersion
     66                     + " is not supported.");
     67         }
     68     }
     69 
     70     private File createEmptyVer4DictionaryAndGetFile(final String dictId,
     71             final int formatVersion) throws IOException {
     72         final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
     73                 getContext().getCacheDir());
     74         file.delete();
     75         file.mkdir();
     76         Map<String, String> attributeMap = new HashMap<>();
     77         if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
     78                 Locale.ENGLISH, attributeMap)) {
     79             return file;
     80         } else {
     81             throw new IOException("Empty dictionary " + file.getAbsolutePath()
     82                     + " cannot be created. Format version: " + formatVersion);
     83         }
     84     }
     85 
     86     public void testIsValidDictionary() {
     87         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
     88             testIsValidDictionary(formatVersion);
     89         }
     90     }
     91 
     92     private void testIsValidDictionary(final int formatVersion) {
     93         File dictFile = null;
     94         try {
     95             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
     96         } catch (IOException e) {
     97             fail("IOException while writing an initial dictionary : " + e);
     98         }
     99         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    100                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    101                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    102         assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
    103                 binaryDictionary.isValidDictionary());
    104         binaryDictionary.close();
    105         assertFalse("binaryDictionary must be invalid after closing.",
    106                 binaryDictionary.isValidDictionary());
    107         FileUtils.deleteRecursively(dictFile);
    108         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
    109                 dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
    110                 TEST_LOCALE, true /* isUpdatable */);
    111         assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
    112                 binaryDictionary.isValidDictionary());
    113         binaryDictionary.close();
    114     }
    115 
    116     public void testConstructingDictionaryOnMemory() {
    117         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    118             testConstructingDictionaryOnMemory(formatVersion);
    119         }
    120     }
    121 
    122     private void testConstructingDictionaryOnMemory(final int formatVersion) {
    123         File dictFile = null;
    124         try {
    125             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    126         } catch (IOException e) {
    127             fail("IOException while writing an initial dictionary : " + e);
    128         }
    129         FileUtils.deleteRecursively(dictFile);
    130         assertFalse(dictFile.exists());
    131         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    132                 true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
    133                 new HashMap<String, String>());
    134         assertTrue(binaryDictionary.isValidDictionary());
    135         assertEquals(formatVersion, binaryDictionary.getFormatVersion());
    136         final int probability = 100;
    137         addUnigramWord(binaryDictionary, "word", probability);
    138         assertEquals(probability, binaryDictionary.getFrequency("word"));
    139         assertFalse(dictFile.exists());
    140         binaryDictionary.flush();
    141         assertTrue(dictFile.exists());
    142         assertTrue(binaryDictionary.isValidDictionary());
    143         assertEquals(formatVersion, binaryDictionary.getFormatVersion());
    144         assertEquals(probability, binaryDictionary.getFrequency("word"));
    145         binaryDictionary.close();
    146         dictFile.delete();
    147     }
    148 
    149     public void testAddTooLongWord() {
    150         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    151             testAddTooLongWord(formatVersion);
    152         }
    153     }
    154 
    155     private void testAddTooLongWord(final int formatVersion) {
    156         File dictFile = null;
    157         try {
    158             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    159         } catch (IOException e) {
    160             fail("IOException while writing an initial dictionary : " + e);
    161         }
    162         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    163                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    164                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    165 
    166         final StringBuffer stringBuilder = new StringBuffer();
    167         for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
    168             stringBuilder.append('a');
    169         }
    170         final String validLongWord = stringBuilder.toString();
    171         stringBuilder.append('a');
    172         final String invalidLongWord = stringBuilder.toString();
    173         final int probability = 100;
    174         addUnigramWord(binaryDictionary, "aaa", probability);
    175         addUnigramWord(binaryDictionary, validLongWord, probability);
    176         addUnigramWord(binaryDictionary, invalidLongWord, probability);
    177         // Too long short cut.
    178         binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
    179                 10 /* shortcutProbability */, false /* isBeginningOfSentence */,
    180                 false /* isNotAWord */, false /* isBlacklisted */,
    181                 BinaryDictionary.NOT_A_VALID_TIMESTAMP);
    182         addUnigramWord(binaryDictionary, "abc", probability);
    183         final int updatedProbability = 200;
    184         // Update.
    185         addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
    186         addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
    187         addUnigramWord(binaryDictionary, "abc", updatedProbability);
    188 
    189         assertEquals(probability, binaryDictionary.getFrequency("aaa"));
    190         assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
    191         assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
    192                 binaryDictionary.getFrequency(invalidLongWord));
    193         assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
    194         dictFile.delete();
    195     }
    196 
    197     private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
    198             final int probability) {
    199         binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
    200                 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
    201                 false /* isBeginningOfSentence */, false /* isNotAWord */,
    202                 false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
    203     }
    204 
    205     private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
    206             final String word1, final int probability) {
    207         binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability,
    208                 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
    209     }
    210 
    211     private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
    212             final String word0, final String word1) {
    213         return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1);
    214     }
    215 
    216     private static void removeBigramEntry(final BinaryDictionary binaryDictionary,
    217             final String word0, final String word1) {
    218         binaryDictionary.removeNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1);
    219     }
    220 
    221     private static int getBigramProbability(final BinaryDictionary binaryDictionary,
    222             final String word0,  final String word1) {
    223         return binaryDictionary.getNgramProbability(new PrevWordsInfo(new WordInfo(word0)), word1);
    224     }
    225 
    226     public void testAddUnigramWord() {
    227         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    228             testAddUnigramWord(formatVersion);
    229         }
    230     }
    231 
    232     private void testAddUnigramWord(final int formatVersion) {
    233         File dictFile = null;
    234         try {
    235             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    236         } catch (IOException e) {
    237             fail("IOException while writing an initial dictionary : " + e);
    238         }
    239         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    240                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    241                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    242 
    243         final int probability = 100;
    244         addUnigramWord(binaryDictionary, "aaa", probability);
    245         // Reallocate and create.
    246         addUnigramWord(binaryDictionary, "aab", probability);
    247         // Insert into children.
    248         addUnigramWord(binaryDictionary, "aac", probability);
    249         // Make terminal.
    250         addUnigramWord(binaryDictionary, "aa", probability);
    251         // Create children.
    252         addUnigramWord(binaryDictionary, "aaaa", probability);
    253         // Reallocate and make termianl.
    254         addUnigramWord(binaryDictionary, "a", probability);
    255 
    256         final int updatedProbability = 200;
    257         // Update.
    258         addUnigramWord(binaryDictionary, "aaa", updatedProbability);
    259 
    260         assertEquals(probability, binaryDictionary.getFrequency("aab"));
    261         assertEquals(probability, binaryDictionary.getFrequency("aac"));
    262         assertEquals(probability, binaryDictionary.getFrequency("aa"));
    263         assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
    264         assertEquals(probability, binaryDictionary.getFrequency("a"));
    265         assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
    266 
    267         dictFile.delete();
    268     }
    269 
    270     public void testRandomlyAddUnigramWord() {
    271         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    272             testRandomlyAddUnigramWord(formatVersion);
    273         }
    274     }
    275 
    276     private void testRandomlyAddUnigramWord(final int formatVersion) {
    277         final int wordCount = 1000;
    278         final int codePointSetSize = 50;
    279         final long seed = System.currentTimeMillis();
    280 
    281         File dictFile = null;
    282         try {
    283             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    284         } catch (IOException e) {
    285             fail("IOException while writing an initial dictionary : " + e);
    286         }
    287         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    288                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    289                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    290 
    291         final HashMap<String, Integer> probabilityMap = new HashMap<>();
    292         // Test a word that isn't contained within the dictionary.
    293         final Random random = new Random(seed);
    294         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    295         for (int i = 0; i < wordCount; ++i) {
    296             final String word = CodePointUtils.generateWord(random, codePointSet);
    297             probabilityMap.put(word, random.nextInt(0xFF));
    298         }
    299         for (String word : probabilityMap.keySet()) {
    300             addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
    301         }
    302         for (String word : probabilityMap.keySet()) {
    303             assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
    304         }
    305         dictFile.delete();
    306     }
    307 
    308     public void testAddBigramWords() {
    309         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    310             testAddBigramWords(formatVersion);
    311         }
    312     }
    313 
    314     private void testAddBigramWords(final int formatVersion) {
    315         File dictFile = null;
    316         try {
    317             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    318         } catch (IOException e) {
    319             fail("IOException while writing an initial dictionary : " + e);
    320         }
    321         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    322                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    323                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    324 
    325         final int unigramProbability = 100;
    326         final int bigramProbability = 150;
    327         final int updatedBigramProbability = 200;
    328         addUnigramWord(binaryDictionary, "aaa", unigramProbability);
    329         addUnigramWord(binaryDictionary, "abb", unigramProbability);
    330         addUnigramWord(binaryDictionary, "bcc", unigramProbability);
    331         addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
    332         addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
    333         addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
    334         addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
    335 
    336         assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
    337         assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
    338         assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
    339         assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
    340         if (canCheckBigramProbability(formatVersion)) {
    341             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
    342             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
    343             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
    344             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
    345         }
    346 
    347         addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
    348         if (canCheckBigramProbability(formatVersion)) {
    349             assertEquals(updatedBigramProbability,
    350                     getBigramProbability(binaryDictionary, "aaa", "abb"));
    351         }
    352 
    353         assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
    354         assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
    355         assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
    356         assertEquals(Dictionary.NOT_A_PROBABILITY,
    357                 getBigramProbability(binaryDictionary, "bcc", "aaa"));
    358         assertEquals(Dictionary.NOT_A_PROBABILITY,
    359                 getBigramProbability(binaryDictionary, "bcc", "bbc"));
    360         assertEquals(Dictionary.NOT_A_PROBABILITY,
    361                 getBigramProbability(binaryDictionary, "aaa", "aaa"));
    362 
    363         // Testing bigram link.
    364         addUnigramWord(binaryDictionary, "abcde", unigramProbability);
    365         addUnigramWord(binaryDictionary, "fghij", unigramProbability);
    366         addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
    367         addUnigramWord(binaryDictionary, "fgh", unigramProbability);
    368         addUnigramWord(binaryDictionary, "abc", unigramProbability);
    369         addUnigramWord(binaryDictionary, "f", unigramProbability);
    370 
    371         if (canCheckBigramProbability(formatVersion)) {
    372             assertEquals(bigramProbability,
    373                     getBigramProbability(binaryDictionary, "abcde", "fghij"));
    374         }
    375         assertEquals(Dictionary.NOT_A_PROBABILITY,
    376                 getBigramProbability(binaryDictionary, "abcde", "fgh"));
    377         addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
    378         if (canCheckBigramProbability(formatVersion)) {
    379             assertEquals(updatedBigramProbability,
    380                     getBigramProbability(binaryDictionary, "abcde", "fghij"));
    381         }
    382 
    383         dictFile.delete();
    384     }
    385 
    386     public void testRandomlyAddBigramWords() {
    387         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    388             testRandomlyAddBigramWords(formatVersion);
    389         }
    390     }
    391 
    392     private void testRandomlyAddBigramWords(final int formatVersion) {
    393         final int wordCount = 100;
    394         final int bigramCount = 1000;
    395         final int codePointSetSize = 50;
    396         final long seed = System.currentTimeMillis();
    397         final Random random = new Random(seed);
    398 
    399         File dictFile = null;
    400         try {
    401             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    402         } catch (IOException e) {
    403             fail("IOException while writing an initial dictionary : " + e);
    404         }
    405         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    406                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    407                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    408 
    409         final ArrayList<String> words = new ArrayList<>();
    410         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
    411         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    412         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
    413         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
    414 
    415         for (int i = 0; i < wordCount; ++i) {
    416             final String word = CodePointUtils.generateWord(random, codePointSet);
    417             words.add(word);
    418             final int unigramProbability = random.nextInt(0xFF);
    419             unigramProbabilities.put(word, unigramProbability);
    420             addUnigramWord(binaryDictionary, word, unigramProbability);
    421         }
    422 
    423         for (int i = 0; i < bigramCount; i++) {
    424             final String word0 = words.get(random.nextInt(wordCount));
    425             final String word1 = words.get(random.nextInt(wordCount));
    426             if (TextUtils.equals(word0, word1)) {
    427                 continue;
    428             }
    429             final Pair<String, String> bigram = new Pair<>(word0, word1);
    430             bigramWords.add(bigram);
    431             final int unigramProbability = unigramProbabilities.get(word1);
    432             final int bigramProbability =
    433                     unigramProbability + random.nextInt(0xFF - unigramProbability);
    434             bigramProbabilities.put(bigram, bigramProbability);
    435             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
    436         }
    437 
    438         for (final Pair<String, String> bigram : bigramWords) {
    439             final int bigramProbability = bigramProbabilities.get(bigram);
    440             assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
    441                     isValidBigram(binaryDictionary, bigram.first, bigram.second));
    442             if (canCheckBigramProbability(formatVersion)) {
    443                 assertEquals(bigramProbability,
    444                         getBigramProbability(binaryDictionary, bigram.first, bigram.second));
    445             }
    446         }
    447 
    448         dictFile.delete();
    449     }
    450 
    451     public void testRemoveBigramWords() {
    452         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    453             testRemoveBigramWords(formatVersion);
    454         }
    455     }
    456 
    457     private void testRemoveBigramWords(final int formatVersion) {
    458         File dictFile = null;
    459         try {
    460             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    461         } catch (IOException e) {
    462             fail("IOException while writing an initial dictionary : " + e);
    463         }
    464         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    465                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    466                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    467         final int unigramProbability = 100;
    468         final int bigramProbability = 150;
    469         addUnigramWord(binaryDictionary, "aaa", unigramProbability);
    470         addUnigramWord(binaryDictionary, "abb", unigramProbability);
    471         addUnigramWord(binaryDictionary, "bcc", unigramProbability);
    472         addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
    473         addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
    474         addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
    475         addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
    476 
    477         assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
    478         assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
    479         assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
    480         assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
    481 
    482         removeBigramEntry(binaryDictionary, "aaa", "abb");
    483         assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
    484         addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
    485         assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
    486 
    487 
    488         removeBigramEntry(binaryDictionary, "aaa", "bcc");
    489         assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc"));
    490         removeBigramEntry(binaryDictionary, "abb", "aaa");
    491         assertFalse(isValidBigram(binaryDictionary, "abb", "aaa"));
    492         removeBigramEntry(binaryDictionary, "abb", "bcc");
    493         assertFalse(isValidBigram(binaryDictionary, "abb", "bcc"));
    494 
    495         removeBigramEntry(binaryDictionary, "aaa", "abb");
    496         // Test remove non-existing bigram operation.
    497         removeBigramEntry(binaryDictionary, "aaa", "abb");
    498         removeBigramEntry(binaryDictionary, "bcc", "aaa");
    499 
    500         dictFile.delete();
    501     }
    502 
    503     public void testFlushDictionary() {
    504         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    505             testFlushDictionary(formatVersion);
    506         }
    507     }
    508 
    509     private void testFlushDictionary(final int formatVersion) {
    510         File dictFile = null;
    511         try {
    512             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    513         } catch (IOException e) {
    514             fail("IOException while writing an initial dictionary : " + e);
    515         }
    516         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    517                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    518                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    519 
    520         final int probability = 100;
    521         addUnigramWord(binaryDictionary, "aaa", probability);
    522         addUnigramWord(binaryDictionary, "abcd", probability);
    523         // Close without flushing.
    524         binaryDictionary.close();
    525 
    526         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    527                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    528                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    529 
    530         assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
    531         assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
    532 
    533         addUnigramWord(binaryDictionary, "aaa", probability);
    534         addUnigramWord(binaryDictionary, "abcd", probability);
    535         binaryDictionary.flush();
    536         binaryDictionary.close();
    537 
    538         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    539                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    540                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    541 
    542         assertEquals(probability, binaryDictionary.getFrequency("aaa"));
    543         assertEquals(probability, binaryDictionary.getFrequency("abcd"));
    544         addUnigramWord(binaryDictionary, "bcde", probability);
    545         binaryDictionary.flush();
    546         binaryDictionary.close();
    547 
    548         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    549                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    550                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    551         assertEquals(probability, binaryDictionary.getFrequency("bcde"));
    552         binaryDictionary.close();
    553 
    554         dictFile.delete();
    555     }
    556 
    557     public void testFlushWithGCDictionary() {
    558         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    559             testFlushWithGCDictionary(formatVersion);
    560         }
    561     }
    562 
    563     private void testFlushWithGCDictionary(final int formatVersion) {
    564         File dictFile = null;
    565         try {
    566             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    567         } catch (IOException e) {
    568             fail("IOException while writing an initial dictionary : " + e);
    569         }
    570         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    571                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    572                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    573 
    574         final int unigramProbability = 100;
    575         final int bigramProbability = 150;
    576         addUnigramWord(binaryDictionary, "aaa", unigramProbability);
    577         addUnigramWord(binaryDictionary, "abb", unigramProbability);
    578         addUnigramWord(binaryDictionary, "bcc", unigramProbability);
    579         addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
    580         addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
    581         addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
    582         addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
    583         binaryDictionary.flushWithGC();
    584         binaryDictionary.close();
    585 
    586         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    587                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    588                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    589         assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
    590         assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
    591         assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
    592         if (canCheckBigramProbability(formatVersion)) {
    593             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
    594             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
    595             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
    596             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
    597         }
    598         assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
    599         assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
    600         assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
    601         binaryDictionary.flushWithGC();
    602         binaryDictionary.close();
    603 
    604         dictFile.delete();
    605     }
    606 
    607     public void testAddBigramWordsAndFlashWithGC() {
    608         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    609             testAddBigramWordsAndFlashWithGC(formatVersion);
    610         }
    611     }
    612 
    613     // TODO: Evaluate performance of GC
    614     private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
    615         final int wordCount = 100;
    616         final int bigramCount = 1000;
    617         final int codePointSetSize = 30;
    618         final long seed = System.currentTimeMillis();
    619         final Random random = new Random(seed);
    620 
    621         File dictFile = null;
    622         try {
    623             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    624         } catch (IOException e) {
    625             fail("IOException while writing an initial dictionary : " + e);
    626         }
    627 
    628         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    629                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    630                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    631 
    632         final ArrayList<String> words = new ArrayList<>();
    633         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
    634         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    635         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
    636         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
    637 
    638         for (int i = 0; i < wordCount; ++i) {
    639             final String word = CodePointUtils.generateWord(random, codePointSet);
    640             words.add(word);
    641             final int unigramProbability = random.nextInt(0xFF);
    642             unigramProbabilities.put(word, unigramProbability);
    643             addUnigramWord(binaryDictionary, word, unigramProbability);
    644         }
    645 
    646         for (int i = 0; i < bigramCount; i++) {
    647             final String word0 = words.get(random.nextInt(wordCount));
    648             final String word1 = words.get(random.nextInt(wordCount));
    649             if (TextUtils.equals(word0, word1)) {
    650                 continue;
    651             }
    652             final Pair<String, String> bigram = new Pair<>(word0, word1);
    653             bigramWords.add(bigram);
    654             final int unigramProbability = unigramProbabilities.get(word1);
    655             final int bigramProbability =
    656                     unigramProbability + random.nextInt(0xFF - unigramProbability);
    657             bigramProbabilities.put(bigram, bigramProbability);
    658             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
    659         }
    660 
    661         binaryDictionary.flushWithGC();
    662         binaryDictionary.close();
    663         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    664                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    665                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    666 
    667 
    668         for (final Pair<String, String> bigram : bigramWords) {
    669             final int bigramProbability = bigramProbabilities.get(bigram);
    670             assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
    671                     isValidBigram(binaryDictionary, bigram.first, bigram.second));
    672             if (canCheckBigramProbability(formatVersion)) {
    673                 assertEquals(bigramProbability,
    674                         getBigramProbability(binaryDictionary, bigram.first, bigram.second));
    675             }
    676         }
    677 
    678         dictFile.delete();
    679     }
    680 
    681     public void testRandomOperationsAndFlashWithGC() {
    682         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    683             testRandomOperationsAndFlashWithGC(formatVersion);
    684         }
    685     }
    686 
    687     private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
    688         final int flashWithGCIterationCount = 50;
    689         final int operationCountInEachIteration = 200;
    690         final int initialUnigramCount = 100;
    691         final float addUnigramProb = 0.5f;
    692         final float addBigramProb = 0.8f;
    693         final float removeBigramProb = 0.2f;
    694         final int codePointSetSize = 30;
    695 
    696         final long seed = System.currentTimeMillis();
    697         final Random random = new Random(seed);
    698 
    699         File dictFile = null;
    700         try {
    701             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    702         } catch (IOException e) {
    703             fail("IOException while writing an initial dictionary : " + e);
    704         }
    705 
    706         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    707                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    708                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    709         final ArrayList<String> words = new ArrayList<>();
    710         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
    711         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    712         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
    713         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
    714         for (int i = 0; i < initialUnigramCount; ++i) {
    715             final String word = CodePointUtils.generateWord(random, codePointSet);
    716             words.add(word);
    717             final int unigramProbability = random.nextInt(0xFF);
    718             unigramProbabilities.put(word, unigramProbability);
    719             addUnigramWord(binaryDictionary, word, unigramProbability);
    720         }
    721         binaryDictionary.flushWithGC();
    722         binaryDictionary.close();
    723 
    724         for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
    725             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    726                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    727                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    728             for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
    729                 // Add unigram.
    730                 if (random.nextFloat() < addUnigramProb) {
    731                     final String word = CodePointUtils.generateWord(random, codePointSet);
    732                     words.add(word);
    733                     final int unigramProbability = random.nextInt(0xFF);
    734                     unigramProbabilities.put(word, unigramProbability);
    735                     addUnigramWord(binaryDictionary, word, unigramProbability);
    736                 }
    737                 // Add bigram.
    738                 if (random.nextFloat() < addBigramProb && words.size() > 2) {
    739                     final int word0Index = random.nextInt(words.size());
    740                     int word1Index = random.nextInt(words.size() - 1);
    741                     if (word0Index <= word1Index) {
    742                         word1Index++;
    743                     }
    744                     final String word0 = words.get(word0Index);
    745                     final String word1 = words.get(word1Index);
    746                     if (TextUtils.equals(word0, word1)) {
    747                         continue;
    748                     }
    749                     final int unigramProbability = unigramProbabilities.get(word1);
    750                     final int bigramProbability =
    751                             unigramProbability + random.nextInt(0xFF - unigramProbability);
    752                     final Pair<String, String> bigram = new Pair<>(word0, word1);
    753                     bigramWords.add(bigram);
    754                     bigramProbabilities.put(bigram, bigramProbability);
    755                     addBigramWords(binaryDictionary, word0, word1, bigramProbability);
    756                 }
    757                 // Remove bigram.
    758                 if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
    759                     final int bigramIndex = random.nextInt(bigramWords.size());
    760                     final Pair<String, String> bigram = bigramWords.get(bigramIndex);
    761                     bigramWords.remove(bigramIndex);
    762                     bigramProbabilities.remove(bigram);
    763                     removeBigramEntry(binaryDictionary, bigram.first, bigram.second);
    764                 }
    765             }
    766 
    767             // Test whether the all unigram operations are collectlly handled.
    768             for (int i = 0; i < words.size(); i++) {
    769                 final String word = words.get(i);
    770                 final int unigramProbability = unigramProbabilities.get(word);
    771                 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
    772             }
    773             // Test whether the all bigram operations are collectlly handled.
    774             for (int i = 0; i < bigramWords.size(); i++) {
    775                 final Pair<String, String> bigram = bigramWords.get(i);
    776                 final int probability;
    777                 if (bigramProbabilities.containsKey(bigram)) {
    778                     final int bigramProbability = bigramProbabilities.get(bigram);
    779                     probability = bigramProbability;
    780                 } else {
    781                     probability = Dictionary.NOT_A_PROBABILITY;
    782                 }
    783 
    784                 if (canCheckBigramProbability(formatVersion)) {
    785                     assertEquals(probability,
    786                             getBigramProbability(binaryDictionary, bigram.first, bigram.second));
    787                 }
    788                 assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
    789                         isValidBigram(binaryDictionary, bigram.first, bigram.second));
    790             }
    791             binaryDictionary.flushWithGC();
    792             binaryDictionary.close();
    793         }
    794 
    795         dictFile.delete();
    796     }
    797 
    798     public void testAddManyUnigramsAndFlushWithGC() {
    799         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    800             testAddManyUnigramsAndFlushWithGC(formatVersion);
    801         }
    802     }
    803 
    804     private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
    805         final int flashWithGCIterationCount = 3;
    806         final int codePointSetSize = 50;
    807 
    808         final long seed = System.currentTimeMillis();
    809         final Random random = new Random(seed);
    810 
    811         File dictFile = null;
    812         try {
    813             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    814         } catch (IOException e) {
    815             fail("IOException while writing an initial dictionary : " + e);
    816         }
    817 
    818         final ArrayList<String> words = new ArrayList<>();
    819         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
    820         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    821 
    822         BinaryDictionary binaryDictionary;
    823         for (int i = 0; i < flashWithGCIterationCount; i++) {
    824             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    825                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    826                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    827             while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    828                 final String word = CodePointUtils.generateWord(random, codePointSet);
    829                 words.add(word);
    830                 final int unigramProbability = random.nextInt(0xFF);
    831                 unigramProbabilities.put(word, unigramProbability);
    832                 addUnigramWord(binaryDictionary, word, unigramProbability);
    833             }
    834 
    835             for (int j = 0; j < words.size(); j++) {
    836                 final String word = words.get(j);
    837                 final int unigramProbability = unigramProbabilities.get(word);
    838                 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
    839             }
    840 
    841             binaryDictionary.flushWithGC();
    842             binaryDictionary.close();
    843         }
    844 
    845         dictFile.delete();
    846     }
    847 
    848     public void testUnigramAndBigramCount() {
    849         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    850             testUnigramAndBigramCount(formatVersion);
    851         }
    852     }
    853 
    854     private void testUnigramAndBigramCount(final int formatVersion) {
    855         final int flashWithGCIterationCount = 10;
    856         final int codePointSetSize = 50;
    857         final int unigramCountPerIteration = 1000;
    858         final int bigramCountPerIteration = 2000;
    859         final long seed = System.currentTimeMillis();
    860         final Random random = new Random(seed);
    861 
    862         File dictFile = null;
    863         try {
    864             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    865         } catch (IOException e) {
    866             fail("IOException while writing an initial dictionary : " + e);
    867         }
    868 
    869         final ArrayList<String> words = new ArrayList<>();
    870         final HashSet<Pair<String, String>> bigrams = new HashSet<>();
    871         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    872 
    873         BinaryDictionary binaryDictionary;
    874         for (int i = 0; i < flashWithGCIterationCount; i++) {
    875             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    876                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    877                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    878             for (int j = 0; j < unigramCountPerIteration; j++) {
    879                 final String word = CodePointUtils.generateWord(random, codePointSet);
    880                 words.add(word);
    881                 final int unigramProbability = random.nextInt(0xFF);
    882                 addUnigramWord(binaryDictionary, word, unigramProbability);
    883             }
    884             for (int j = 0; j < bigramCountPerIteration; j++) {
    885                 final String word0 = words.get(random.nextInt(words.size()));
    886                 final String word1 = words.get(random.nextInt(words.size()));
    887                 if (TextUtils.equals(word0, word1)) {
    888                     continue;
    889                 }
    890                 bigrams.add(new Pair<>(word0, word1));
    891                 final int bigramProbability = random.nextInt(0xF);
    892                 addBigramWords(binaryDictionary, word0, word1, bigramProbability);
    893             }
    894             assertEquals(new HashSet<>(words).size(), Integer.parseInt(
    895                     binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
    896             assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
    897                     binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
    898             binaryDictionary.flushWithGC();
    899             assertEquals(new HashSet<>(words).size(), Integer.parseInt(
    900                     binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
    901             assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
    902                     binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
    903             binaryDictionary.close();
    904         }
    905 
    906         dictFile.delete();
    907     }
    908 
    909     public void testAddMultipleDictionaryEntries() {
    910         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    911             testAddMultipleDictionaryEntries(formatVersion);
    912         }
    913     }
    914 
    915     private void testAddMultipleDictionaryEntries(final int formatVersion) {
    916         final int codePointSetSize = 20;
    917         final int lmParamCount = 1000;
    918         final double bigramContinueRate = 0.9;
    919         final long seed = System.currentTimeMillis();
    920         final Random random = new Random(seed);
    921 
    922         File dictFile = null;
    923         try {
    924             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    925         } catch (IOException e) {
    926             fail("IOException while writing an initial dictionary : " + e);
    927         }
    928 
    929         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    930         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
    931         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
    932 
    933         final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
    934         String prevWord = null;
    935         for (int i = 0; i < languageModelParams.length; i++) {
    936             final String word = CodePointUtils.generateWord(random, codePointSet);
    937             final int probability = random.nextInt(0xFF);
    938             final int bigramProbability = probability + random.nextInt(0xFF - probability);
    939             unigramProbabilities.put(word, probability);
    940             if (prevWord == null) {
    941                 languageModelParams[i] = new LanguageModelParam(word, probability,
    942                         BinaryDictionary.NOT_A_VALID_TIMESTAMP);
    943             } else {
    944                 languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
    945                         bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
    946                 bigramProbabilities.put(new Pair<>(prevWord, word),
    947                         bigramProbability);
    948             }
    949             prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
    950         }
    951 
    952         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    953                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    954                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    955         binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
    956 
    957         for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
    958             assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
    959         }
    960 
    961         for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
    962             final String word0 = entry.getKey().first;
    963             final String word1 = entry.getKey().second;
    964             final int bigramProbability = entry.getValue();
    965             assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
    966                     isValidBigram(binaryDictionary, word0, word1));
    967             if (canCheckBigramProbability(formatVersion)) {
    968                 assertEquals(bigramProbability,
    969                         getBigramProbability(binaryDictionary, word0, word1));
    970             }
    971         }
    972     }
    973 
    974     public void testGetWordProperties() {
    975         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
    976             testGetWordProperties(formatVersion);
    977         }
    978     }
    979 
    980     private void testGetWordProperties(final int formatVersion) {
    981         final long seed = System.currentTimeMillis();
    982         final Random random = new Random(seed);
    983         final int UNIGRAM_COUNT = 1000;
    984         final int BIGRAM_COUNT = 1000;
    985         final int codePointSetSize = 20;
    986         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    987 
    988         File dictFile = null;
    989         try {
    990             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
    991         } catch (IOException e) {
    992             fail("IOException while writing an initial dictionary : " + e);
    993         }
    994         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    995                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    996                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    997 
    998         final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
    999                 false /* isBeginningOfSentence */);
   1000         assertFalse(invalidWordProperty.isValid());
   1001 
   1002         final ArrayList<String> words = new ArrayList<>();
   1003         final HashMap<String, Integer> wordProbabilities = new HashMap<>();
   1004         final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
   1005         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
   1006 
   1007         for (int i = 0; i < UNIGRAM_COUNT; i++) {
   1008             final String word = CodePointUtils.generateWord(random, codePointSet);
   1009             final int unigramProbability = random.nextInt(0xFF);
   1010             final boolean isNotAWord = random.nextBoolean();
   1011             final boolean isBlacklisted = random.nextBoolean();
   1012             // TODO: Add tests for historical info.
   1013             binaryDictionary.addUnigramEntry(word, unigramProbability,
   1014                     null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
   1015                     false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
   1016                     BinaryDictionary.NOT_A_VALID_TIMESTAMP);
   1017             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
   1018                 binaryDictionary.flushWithGC();
   1019             }
   1020             words.add(word);
   1021             wordProbabilities.put(word, unigramProbability);
   1022             final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
   1023                     false /* isBeginningOfSentence */);
   1024             assertEquals(word, wordProperty.mWord);
   1025             assertTrue(wordProperty.isValid());
   1026             assertEquals(isNotAWord, wordProperty.mIsNotAWord);
   1027             assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
   1028             assertEquals(false, wordProperty.mHasBigrams);
   1029             assertEquals(false, wordProperty.mHasShortcuts);
   1030             assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
   1031             assertTrue(wordProperty.mShortcutTargets.isEmpty());
   1032         }
   1033 
   1034         for (int i = 0; i < BIGRAM_COUNT; i++) {
   1035             final int word0Index = random.nextInt(wordProbabilities.size());
   1036             final int word1Index = random.nextInt(wordProbabilities.size());
   1037             if (word0Index == word1Index) {
   1038                 continue;
   1039             }
   1040             final String word0 = words.get(word0Index);
   1041             final String word1 = words.get(word1Index);
   1042             final int unigramProbability = wordProbabilities.get(word1);
   1043             final int bigramProbability =
   1044                     unigramProbability + random.nextInt(0xFF - unigramProbability);
   1045             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
   1046             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
   1047                 binaryDictionary.flushWithGC();
   1048             }
   1049             if (!bigrams.containsKey(word0)) {
   1050                 final HashSet<String> bigramWord1s = new HashSet<>();
   1051                 bigrams.put(word0, bigramWord1s);
   1052             }
   1053             bigrams.get(word0).add(word1);
   1054             bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability);
   1055         }
   1056 
   1057         for (int i = 0; i < words.size(); i++) {
   1058             final String word0 = words.get(i);
   1059             if (!bigrams.containsKey(word0)) {
   1060                 continue;
   1061             }
   1062             final HashSet<String> bigramWord1s = bigrams.get(word0);
   1063             final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
   1064                     false /* isBeginningOfSentence */);
   1065             assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
   1066             for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
   1067                 final String word1 = wordProperty.mBigrams.get(j).mWord;
   1068                 assertTrue(bigramWord1s.contains(word1));
   1069                 if (canCheckBigramProbability(formatVersion)) {
   1070                     final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
   1071                     assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
   1072                 }
   1073             }
   1074         }
   1075     }
   1076 
   1077     public void testIterateAllWords() {
   1078         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
   1079             testIterateAllWords(formatVersion);
   1080         }
   1081     }
   1082 
   1083     private void testIterateAllWords(final int formatVersion) {
   1084         final long seed = System.currentTimeMillis();
   1085         final Random random = new Random(seed);
   1086         final int UNIGRAM_COUNT = 1000;
   1087         final int BIGRAM_COUNT = 1000;
   1088         final int codePointSetSize = 20;
   1089         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
   1090 
   1091         File dictFile = null;
   1092         try {
   1093             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
   1094         } catch (IOException e) {
   1095             fail("IOException while writing an initial dictionary : " + e);
   1096         }
   1097         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
   1098                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
   1099                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
   1100 
   1101         final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
   1102                 false /* isBeginningOfSentence */);
   1103         assertFalse(invalidWordProperty.isValid());
   1104 
   1105         final ArrayList<String> words = new ArrayList<>();
   1106         final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>();
   1107         final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
   1108         final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
   1109                 new HashMap<>();
   1110 
   1111         for (int i = 0; i < UNIGRAM_COUNT; i++) {
   1112             final String word = CodePointUtils.generateWord(random, codePointSet);
   1113             final int unigramProbability = random.nextInt(0xFF);
   1114             addUnigramWord(binaryDictionary, word, unigramProbability);
   1115             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
   1116                 binaryDictionary.flushWithGC();
   1117             }
   1118             words.add(word);
   1119             wordProbabilitiesToCheckLater.put(word, unigramProbability);
   1120         }
   1121 
   1122         for (int i = 0; i < BIGRAM_COUNT; i++) {
   1123             final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
   1124             final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
   1125             if (word0Index == word1Index) {
   1126                 continue;
   1127             }
   1128             final String word0 = words.get(word0Index);
   1129             final String word1 = words.get(word1Index);
   1130             final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
   1131             final int bigramProbability =
   1132                     unigramProbability + random.nextInt(0xFF - unigramProbability);
   1133             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
   1134             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
   1135                 binaryDictionary.flushWithGC();
   1136             }
   1137             if (!bigrams.containsKey(word0)) {
   1138                 final HashSet<String> bigramWord1s = new HashSet<>();
   1139                 bigrams.put(word0, bigramWord1s);
   1140             }
   1141             bigrams.get(word0).add(word1);
   1142             bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability);
   1143         }
   1144 
   1145         final HashSet<String> wordSet = new HashSet<>(words);
   1146         final HashSet<Pair<String, String>> bigramSet =
   1147                 new HashSet<>(bigramProbabilitiesToCheckLater.keySet());
   1148         int token = 0;
   1149         do {
   1150             final BinaryDictionary.GetNextWordPropertyResult result =
   1151                     binaryDictionary.getNextWordProperty(token);
   1152             final WordProperty wordProperty = result.mWordProperty;
   1153             final String word0 = wordProperty.mWord;
   1154             assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
   1155                     wordProperty.mProbabilityInfo.mProbability);
   1156             wordSet.remove(word0);
   1157             final HashSet<String> bigramWord1s = bigrams.get(word0);
   1158             for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
   1159                 final String word1 = wordProperty.mBigrams.get(j).mWord;
   1160                 assertTrue(bigramWord1s.contains(word1));
   1161                 final Pair<String, String> bigram = new Pair<>(word0, word1);
   1162                 if (canCheckBigramProbability(formatVersion)) {
   1163                     final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
   1164                     assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
   1165                 }
   1166                 bigramSet.remove(bigram);
   1167             }
   1168             token = result.mNextToken;
   1169         } while (token != 0);
   1170         assertTrue(wordSet.isEmpty());
   1171         assertTrue(bigramSet.isEmpty());
   1172     }
   1173 
   1174     public void testAddShortcuts() {
   1175         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
   1176             testAddShortcuts(formatVersion);
   1177         }
   1178     }
   1179 
   1180     private void testAddShortcuts(final int formatVersion) {
   1181         File dictFile = null;
   1182         try {
   1183             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
   1184         } catch (IOException e) {
   1185             fail("IOException while writing an initial dictionary : " + e);
   1186         }
   1187         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
   1188                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
   1189                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
   1190 
   1191         final int unigramProbability = 100;
   1192         final int shortcutProbability = 10;
   1193         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
   1194                 shortcutProbability, false /* isBeginningOfSentence */,
   1195                 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
   1196         WordProperty wordProperty = binaryDictionary.getWordProperty("aaa",
   1197                 false /* isBeginningOfSentence */);
   1198         assertEquals(1, wordProperty.mShortcutTargets.size());
   1199         assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
   1200         assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
   1201         final int updatedShortcutProbability = 2;
   1202         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
   1203                 updatedShortcutProbability, false /* isBeginningOfSentence */,
   1204                 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
   1205         wordProperty = binaryDictionary.getWordProperty("aaa",
   1206                 false /* isBeginningOfSentence */);
   1207         assertEquals(1, wordProperty.mShortcutTargets.size());
   1208         assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
   1209         assertEquals(updatedShortcutProbability,
   1210                 wordProperty.mShortcutTargets.get(0).getProbability());
   1211         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
   1212                 shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
   1213                 false /* isBlacklisted */, 0 /* timestamp */);
   1214         final HashMap<String, Integer> shortcutTargets = new HashMap<>();
   1215         shortcutTargets.put("zzz", updatedShortcutProbability);
   1216         shortcutTargets.put("yyy", shortcutProbability);
   1217         wordProperty = binaryDictionary.getWordProperty("aaa",
   1218                 false /* isBeginningOfSentence */);
   1219         assertEquals(2, wordProperty.mShortcutTargets.size());
   1220         for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
   1221             assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
   1222             assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
   1223                     shortcutTarget.getProbability());
   1224             shortcutTargets.remove(shortcutTarget.mWord);
   1225         }
   1226         shortcutTargets.put("zzz", updatedShortcutProbability);
   1227         shortcutTargets.put("yyy", shortcutProbability);
   1228         binaryDictionary.flushWithGC();
   1229         wordProperty = binaryDictionary.getWordProperty("aaa",
   1230                 false /* isBeginningOfSentence */);
   1231         assertEquals(2, wordProperty.mShortcutTargets.size());
   1232         for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
   1233             assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
   1234             assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
   1235                     shortcutTarget.getProbability());
   1236             shortcutTargets.remove(shortcutTarget.mWord);
   1237         }
   1238     }
   1239 
   1240     public void testAddManyShortcuts() {
   1241         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
   1242             testAddManyShortcuts(formatVersion);
   1243         }
   1244     }
   1245 
   1246     private void testAddManyShortcuts(final int formatVersion) {
   1247         final long seed = System.currentTimeMillis();
   1248         final Random random = new Random(seed);
   1249         final int UNIGRAM_COUNT = 1000;
   1250         final int SHORTCUT_COUNT = 10000;
   1251         final int codePointSetSize = 20;
   1252         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
   1253 
   1254         final ArrayList<String> words = new ArrayList<>();
   1255         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
   1256         final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>();
   1257 
   1258         File dictFile = null;
   1259         try {
   1260             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
   1261         } catch (IOException e) {
   1262             fail("IOException while writing an initial dictionary : " + e);
   1263         }
   1264         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
   1265                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
   1266                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
   1267 
   1268         for (int i = 0; i < UNIGRAM_COUNT; i++) {
   1269             final String word = CodePointUtils.generateWord(random, codePointSet);
   1270             final int unigramProbability = random.nextInt(0xFF);
   1271             addUnigramWord(binaryDictionary, word, unigramProbability);
   1272             words.add(word);
   1273             unigramProbabilities.put(word, unigramProbability);
   1274             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
   1275                 binaryDictionary.flushWithGC();
   1276             }
   1277         }
   1278         for (int i = 0; i < SHORTCUT_COUNT; i++) {
   1279             final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
   1280             final int shortcutProbability = random.nextInt(0xF);
   1281             final String word = words.get(random.nextInt(words.size()));
   1282             final int unigramProbability = unigramProbabilities.get(word);
   1283             binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
   1284                     shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
   1285                     false /* isBlacklisted */, 0 /* timestamp */);
   1286             if (shortcutTargets.containsKey(word)) {
   1287                 final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
   1288                 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
   1289             } else {
   1290                 final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>();
   1291                 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
   1292                 shortcutTargets.put(word, shortcutTargetsOfWord);
   1293             }
   1294             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
   1295                 binaryDictionary.flushWithGC();
   1296             }
   1297         }
   1298 
   1299         for (final String word : words) {
   1300             final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
   1301                     false /* isBeginningOfSentence */);
   1302             assertEquals((int)unigramProbabilities.get(word),
   1303                     wordProperty.mProbabilityInfo.mProbability);
   1304             if (!shortcutTargets.containsKey(word)) {
   1305                 // The word does not have shortcut targets.
   1306                 continue;
   1307             }
   1308             assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
   1309             for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
   1310                 final String targetCodePonts = shortcutTarget.mWord;
   1311                 assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
   1312                         shortcutTarget.getProbability());
   1313             }
   1314         }
   1315     }
   1316 
   1317     public void testDictMigration() {
   1318         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
   1319             testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
   1320         }
   1321     }
   1322 
   1323     private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
   1324         File dictFile = null;
   1325         try {
   1326             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
   1327         } catch (IOException e) {
   1328             fail("IOException while writing an initial dictionary : " + e);
   1329         }
   1330         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
   1331                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
   1332                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
   1333         final int unigramProbability = 100;
   1334         addUnigramWord(binaryDictionary, "aaa", unigramProbability);
   1335         addUnigramWord(binaryDictionary, "bbb", unigramProbability);
   1336         final int bigramProbability = 150;
   1337         addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
   1338         final int shortcutProbability = 10;
   1339         binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
   1340                 false /* isBeginningOfSentence */, false /* isNotAWord */,
   1341                 false /* isBlacklisted */, 0 /* timestamp */);
   1342         binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
   1343                 Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
   1344                 true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
   1345         binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE,
   1346                 "aaa", bigramProbability, 0 /* timestamp */);
   1347         assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
   1348         assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
   1349         assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
   1350         assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
   1351         assertTrue(binaryDictionary.migrateTo(toFormatVersion));
   1352         assertTrue(binaryDictionary.isValidDictionary());
   1353         assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
   1354         assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
   1355         assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
   1356         if (canCheckBigramProbability(toFormatVersion)) {
   1357             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
   1358             assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
   1359                     PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa"));
   1360         }
   1361         assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
   1362         WordProperty wordProperty = binaryDictionary.getWordProperty("ccc",
   1363                 false /* isBeginningOfSentence */);
   1364         assertEquals(1, wordProperty.mShortcutTargets.size());
   1365         assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
   1366         wordProperty = binaryDictionary.getWordProperty("ddd",
   1367                 false /* isBeginningOfSentence */);
   1368         assertTrue(wordProperty.mIsBlacklistEntry);
   1369         assertTrue(wordProperty.mIsNotAWord);
   1370     }
   1371 
   1372     public void testLargeDictMigration() {
   1373         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
   1374             testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
   1375         }
   1376     }
   1377 
   1378     private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
   1379         final int UNIGRAM_COUNT = 3000;
   1380         final int BIGRAM_COUNT = 3000;
   1381         final int codePointSetSize = 50;
   1382         final long seed = System.currentTimeMillis();
   1383         final Random random = new Random(seed);
   1384 
   1385         File dictFile = null;
   1386         try {
   1387             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
   1388         } catch (IOException e) {
   1389             fail("IOException while writing an initial dictionary : " + e);
   1390         }
   1391         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
   1392                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
   1393                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
   1394 
   1395         final ArrayList<String> words = new ArrayList<>();
   1396         final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
   1397         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
   1398         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
   1399         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
   1400 
   1401         for (int i = 0; i < UNIGRAM_COUNT; i++) {
   1402             final String word = CodePointUtils.generateWord(random, codePointSet);
   1403             final int unigramProbability = random.nextInt(0xFF);
   1404             addUnigramWord(binaryDictionary, word, unigramProbability);
   1405             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
   1406                 binaryDictionary.flushWithGC();
   1407             }
   1408             words.add(word);
   1409             unigramProbabilities.put(word, unigramProbability);
   1410         }
   1411 
   1412         for (int i = 0; i < BIGRAM_COUNT; i++) {
   1413             final int word0Index = random.nextInt(words.size());
   1414             final int word1Index = random.nextInt(words.size());
   1415             if (word0Index == word1Index) {
   1416                 continue;
   1417             }
   1418             final String word0 = words.get(word0Index);
   1419             final String word1 = words.get(word1Index);
   1420             final int unigramProbability = unigramProbabilities.get(word1);
   1421             final int bigramProbability =
   1422                     random.nextInt(0xFF - unigramProbability) + unigramProbability;
   1423             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
   1424             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
   1425                 binaryDictionary.flushWithGC();
   1426             }
   1427             final Pair<String, String> bigram = new Pair<>(word0, word1);
   1428             bigrams.add(bigram);
   1429             bigramProbabilities.put(bigram, bigramProbability);
   1430         }
   1431         assertTrue(binaryDictionary.migrateTo(toFormatVersion));
   1432 
   1433         for (final String word : words) {
   1434             assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
   1435         }
   1436         assertEquals(unigramProbabilities.size(), Integer.parseInt(
   1437                 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
   1438 
   1439         for (final Pair<String, String> bigram : bigrams) {
   1440             if (canCheckBigramProbability(toFormatVersion)) {
   1441                 assertEquals((int)bigramProbabilities.get(bigram),
   1442                         getBigramProbability(binaryDictionary, bigram.first, bigram.second));
   1443             }
   1444             assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
   1445         }
   1446         assertEquals(bigramProbabilities.size(), Integer.parseInt(
   1447                 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
   1448     }
   1449 
   1450     public void testBeginningOfSentence() {
   1451         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
   1452             if (supportsBeginningOfSentence(formatVersion)) {
   1453                 testBeginningOfSentence(formatVersion);
   1454             }
   1455         }
   1456     }
   1457 
   1458     private void testBeginningOfSentence(final int formatVersion) {
   1459         File dictFile = null;
   1460         try {
   1461             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
   1462         } catch (IOException e) {
   1463             fail("IOException while writing an initial dictionary : " + e);
   1464         }
   1465         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
   1466                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
   1467                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
   1468         final int dummyProbability = 0;
   1469         final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
   1470         final int bigramProbability = 200;
   1471         addUnigramWord(binaryDictionary, "aaa", dummyProbability);
   1472         binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
   1473                 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
   1474         assertEquals(bigramProbability,
   1475                 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
   1476         binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
   1477                 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
   1478         addUnigramWord(binaryDictionary, "bbb", dummyProbability);
   1479         binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability,
   1480                 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
   1481         binaryDictionary.flushWithGC();
   1482         assertEquals(bigramProbability,
   1483                 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
   1484         assertEquals(bigramProbability,
   1485                 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb"));
   1486     }
   1487 
   1488     public void testGetMaxFrequencyOfExactMatches() {
   1489         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
   1490             testGetMaxFrequencyOfExactMatches(formatVersion);
   1491         }
   1492     }
   1493 
   1494     private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
   1495         File dictFile = null;
   1496         try {
   1497             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
   1498         } catch (IOException e) {
   1499             fail("IOException while writing an initial dictionary : " + e);
   1500         }
   1501         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
   1502                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
   1503                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
   1504         addUnigramWord(binaryDictionary, "abc", 10);
   1505         addUnigramWord(binaryDictionary, "aBc", 15);
   1506         assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
   1507         addUnigramWord(binaryDictionary, "ab'c", 20);
   1508         assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
   1509         addUnigramWord(binaryDictionary, "a-b-c", 25);
   1510         assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
   1511         addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
   1512         assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
   1513         addUnigramWord(binaryDictionary, "ab c", 255);
   1514         assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
   1515     }
   1516 }
   1517