Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.test.AndroidTestCase;
     20 import android.test.suitebuilder.annotation.LargeTest;
     21 import android.text.TextUtils;
     22 import android.util.Pair;
     23 
     24 import com.android.inputmethod.latin.makedict.CodePointUtils;
     25 import com.android.inputmethod.latin.makedict.FormatSpec;
     26 
     27 import java.io.File;
     28 import java.io.IOException;
     29 import java.util.ArrayList;
     30 import java.util.HashMap;
     31 import java.util.HashSet;
     32 import java.util.Locale;
     33 import java.util.Map;
     34 import java.util.Random;
     35 
     36 @LargeTest
     37 public class BinaryDictionaryTests extends AndroidTestCase {
     38     private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
     39     private static final String TEST_LOCALE = "test";
     40 
     41     @Override
     42     protected void setUp() throws Exception {
     43         super.setUp();
     44     }
     45 
     46     @Override
     47     protected void tearDown() throws Exception {
     48         super.tearDown();
     49     }
     50 
     51     private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
     52         final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
     53                 getContext().getCacheDir());
     54         Map<String, String> attributeMap = new HashMap<String, String>();
     55         attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
     56                 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
     57         if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
     58                 3 /* dictVersion */, attributeMap)) {
     59             return file;
     60         } else {
     61             throw new IOException("Empty dictionary cannot be created.");
     62         }
     63     }
     64 
     65     public void testIsValidDictionary() {
     66         File dictFile = null;
     67         try {
     68             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
     69         } catch (IOException e) {
     70             fail("IOException while writing an initial dictionary : " + e);
     71         }
     72         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
     73                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
     74                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
     75         assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
     76                 binaryDictionary.isValidDictionary());
     77         binaryDictionary.close();
     78         assertFalse("binaryDictionary must be invalid after closing.",
     79                 binaryDictionary.isValidDictionary());
     80         dictFile.delete();
     81         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
     82                 dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
     83                 TEST_LOCALE, true /* isUpdatable */);
     84         assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
     85                 binaryDictionary.isValidDictionary());
     86         binaryDictionary.close();
     87     }
     88 
     89     public void testAddUnigramWord() {
     90         File dictFile = null;
     91         try {
     92             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
     93         } catch (IOException e) {
     94             fail("IOException while writing an initial dictionary : " + e);
     95         }
     96         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
     97                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
     98                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
     99 
    100         final int probability = 100;
    101         binaryDictionary.addUnigramWord("aaa", probability);
    102         // Reallocate and create.
    103         binaryDictionary.addUnigramWord("aab", probability);
    104         // Insert into children.
    105         binaryDictionary.addUnigramWord("aac", probability);
    106         // Make terminal.
    107         binaryDictionary.addUnigramWord("aa", probability);
    108         // Create children.
    109         binaryDictionary.addUnigramWord("aaaa", probability);
    110         // Reallocate and make termianl.
    111         binaryDictionary.addUnigramWord("a", probability);
    112 
    113         final int updatedProbability = 200;
    114         // Update.
    115         binaryDictionary.addUnigramWord("aaa", updatedProbability);
    116 
    117         assertEquals(probability, binaryDictionary.getFrequency("aab"));
    118         assertEquals(probability, binaryDictionary.getFrequency("aac"));
    119         assertEquals(probability, binaryDictionary.getFrequency("aa"));
    120         assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
    121         assertEquals(probability, binaryDictionary.getFrequency("a"));
    122         assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
    123 
    124         dictFile.delete();
    125     }
    126 
    127     public void testRandomlyAddUnigramWord() {
    128         final int wordCount = 1000;
    129         final int codePointSetSize = 50;
    130         final long seed = System.currentTimeMillis();
    131 
    132         File dictFile = null;
    133         try {
    134             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    135         } catch (IOException e) {
    136             fail("IOException while writing an initial dictionary : " + e);
    137         }
    138         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    139                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    140                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    141 
    142         final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>();
    143         // Test a word that isn't contained within the dictionary.
    144         final Random random = new Random(seed);
    145         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    146         for (int i = 0; i < wordCount; ++i) {
    147             final String word = CodePointUtils.generateWord(random, codePointSet);
    148             probabilityMap.put(word, random.nextInt(0xFF));
    149         }
    150         for (String word : probabilityMap.keySet()) {
    151             binaryDictionary.addUnigramWord(word, probabilityMap.get(word));
    152         }
    153         for (String word : probabilityMap.keySet()) {
    154             assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
    155         }
    156         dictFile.delete();
    157     }
    158 
    159     public void testAddBigramWords() {
    160         File dictFile = null;
    161         try {
    162             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    163         } catch (IOException e) {
    164             fail("IOException while writing an initial dictionary : " + e);
    165         }
    166         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    167                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    168                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    169 
    170         final int unigramProbability = 100;
    171         final int bigramProbability = 10;
    172         final int updatedBigramProbability = 15;
    173         binaryDictionary.addUnigramWord("aaa", unigramProbability);
    174         binaryDictionary.addUnigramWord("abb", unigramProbability);
    175         binaryDictionary.addUnigramWord("bcc", unigramProbability);
    176         binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
    177         binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
    178         binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
    179         binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
    180 
    181         final int probability = binaryDictionary.calculateProbability(unigramProbability,
    182                 bigramProbability);
    183         assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
    184         assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
    185         assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
    186         assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
    187         assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
    188         assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
    189         assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
    190         assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
    191 
    192         binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability);
    193         final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability,
    194                 updatedBigramProbability);
    195         assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
    196 
    197         assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
    198         assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
    199         assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
    200         assertEquals(Dictionary.NOT_A_PROBABILITY,
    201                 binaryDictionary.getBigramProbability("bcc", "aaa"));
    202         assertEquals(Dictionary.NOT_A_PROBABILITY,
    203                 binaryDictionary.getBigramProbability("bcc", "bbc"));
    204         assertEquals(Dictionary.NOT_A_PROBABILITY,
    205                 binaryDictionary.getBigramProbability("aaa", "aaa"));
    206 
    207         // Testing bigram link.
    208         binaryDictionary.addUnigramWord("abcde", unigramProbability);
    209         binaryDictionary.addUnigramWord("fghij", unigramProbability);
    210         binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability);
    211         binaryDictionary.addUnigramWord("fgh", unigramProbability);
    212         binaryDictionary.addUnigramWord("abc", unigramProbability);
    213         binaryDictionary.addUnigramWord("f", unigramProbability);
    214         assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
    215         assertEquals(Dictionary.NOT_A_PROBABILITY,
    216                 binaryDictionary.getBigramProbability("abcde", "fgh"));
    217         binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability);
    218         assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij"));
    219 
    220         dictFile.delete();
    221     }
    222 
    223     public void testRandomlyAddBigramWords() {
    224         final int wordCount = 100;
    225         final int bigramCount = 1000;
    226         final int codePointSetSize = 50;
    227         final long seed = System.currentTimeMillis();
    228         final Random random = new Random(seed);
    229 
    230         File dictFile = null;
    231         try {
    232             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    233         } catch (IOException e) {
    234             fail("IOException while writing an initial dictionary : " + e);
    235         }
    236         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    237                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    238                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    239 
    240         final ArrayList<String> words = new ArrayList<String>();
    241         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
    242         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    243         final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
    244         final HashMap<Pair<String, String>, Integer> bigramProbabilities =
    245                 new HashMap<Pair<String, String>, Integer>();
    246 
    247         for (int i = 0; i < wordCount; ++i) {
    248             final String word = CodePointUtils.generateWord(random, codePointSet);
    249             words.add(word);
    250             final int unigramProbability = random.nextInt(0xFF);
    251             unigramProbabilities.put(word, unigramProbability);
    252             binaryDictionary.addUnigramWord(word, unigramProbability);
    253         }
    254 
    255         for (int i = 0; i < bigramCount; i++) {
    256             final String word0 = words.get(random.nextInt(wordCount));
    257             final String word1 = words.get(random.nextInt(wordCount));
    258             if (TextUtils.equals(word0, word1)) {
    259                 continue;
    260             }
    261             final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
    262             bigramWords.add(bigram);
    263             final int bigramProbability = random.nextInt(0xF);
    264             bigramProbabilities.put(bigram, bigramProbability);
    265             binaryDictionary.addBigramWords(word0, word1, bigramProbability);
    266         }
    267 
    268         for (final Pair<String, String> bigram : bigramWords) {
    269             final int unigramProbability = unigramProbabilities.get(bigram.second);
    270             final int bigramProbability = bigramProbabilities.get(bigram);
    271             final int probability = binaryDictionary.calculateProbability(unigramProbability,
    272                     bigramProbability);
    273             assertEquals(probability,
    274                     binaryDictionary.getBigramProbability(bigram.first, bigram.second));
    275         }
    276 
    277         dictFile.delete();
    278     }
    279 
    280     public void testRemoveBigramWords() {
    281         File dictFile = null;
    282         try {
    283             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    284         } catch (IOException e) {
    285             fail("IOException while writing an initial dictionary : " + e);
    286         }
    287         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    288                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    289                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    290         final int unigramProbability = 100;
    291         final int bigramProbability = 10;
    292         binaryDictionary.addUnigramWord("aaa", unigramProbability);
    293         binaryDictionary.addUnigramWord("abb", unigramProbability);
    294         binaryDictionary.addUnigramWord("bcc", unigramProbability);
    295         binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
    296         binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
    297         binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
    298         binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
    299 
    300         assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
    301         assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
    302         assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
    303         assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
    304 
    305         binaryDictionary.removeBigramWords("aaa", "abb");
    306         assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
    307         binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
    308         assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
    309 
    310 
    311         binaryDictionary.removeBigramWords("aaa", "bcc");
    312         assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
    313         binaryDictionary.removeBigramWords("abb", "aaa");
    314         assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
    315         binaryDictionary.removeBigramWords("abb", "bcc");
    316         assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
    317 
    318         binaryDictionary.removeBigramWords("aaa", "abb");
    319         // Test remove non-existing bigram operation.
    320         binaryDictionary.removeBigramWords("aaa", "abb");
    321         binaryDictionary.removeBigramWords("bcc", "aaa");
    322 
    323         dictFile.delete();
    324     }
    325 
    326     public void testFlushDictionary() {
    327         File dictFile = null;
    328         try {
    329             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    330         } catch (IOException e) {
    331             fail("IOException while writing an initial dictionary : " + e);
    332         }
    333         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    334                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    335                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    336 
    337         final int probability = 100;
    338         binaryDictionary.addUnigramWord("aaa", probability);
    339         binaryDictionary.addUnigramWord("abcd", probability);
    340         // Close without flushing.
    341         binaryDictionary.close();
    342 
    343         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    344                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    345                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    346 
    347         assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
    348         assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
    349 
    350         binaryDictionary.addUnigramWord("aaa", probability);
    351         binaryDictionary.addUnigramWord("abcd", probability);
    352         binaryDictionary.flush();
    353         binaryDictionary.close();
    354 
    355         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    356                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    357                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    358 
    359         assertEquals(probability, binaryDictionary.getFrequency("aaa"));
    360         assertEquals(probability, binaryDictionary.getFrequency("abcd"));
    361         binaryDictionary.addUnigramWord("bcde", probability);
    362         binaryDictionary.flush();
    363         binaryDictionary.close();
    364 
    365         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    366                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    367                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    368         assertEquals(probability, binaryDictionary.getFrequency("bcde"));
    369         binaryDictionary.close();
    370 
    371         dictFile.delete();
    372     }
    373 
    374     public void testFlushWithGCDictionary() {
    375         File dictFile = null;
    376         try {
    377             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    378         } catch (IOException e) {
    379             fail("IOException while writing an initial dictionary : " + e);
    380         }
    381         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    382                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    383                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    384 
    385         final int unigramProbability = 100;
    386         final int bigramProbability = 10;
    387         binaryDictionary.addUnigramWord("aaa", unigramProbability);
    388         binaryDictionary.addUnigramWord("abb", unigramProbability);
    389         binaryDictionary.addUnigramWord("bcc", unigramProbability);
    390         binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
    391         binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
    392         binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
    393         binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
    394         binaryDictionary.flushWithGC();
    395         binaryDictionary.close();
    396 
    397         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    398                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    399                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    400         final int probability = binaryDictionary.calculateProbability(unigramProbability,
    401                 bigramProbability);
    402         assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
    403         assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
    404         assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
    405         assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
    406         assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
    407         assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
    408         assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
    409         assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
    410         assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
    411         assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
    412         binaryDictionary.flushWithGC();
    413         binaryDictionary.close();
    414 
    415         dictFile.delete();
    416     }
    417 
    418     // TODO: Evaluate performance of GC
    419     public void testAddBigramWordsAndFlashWithGC() {
    420         final int wordCount = 100;
    421         final int bigramCount = 1000;
    422         final int codePointSetSize = 30;
    423         final long seed = System.currentTimeMillis();
    424         final Random random = new Random(seed);
    425 
    426         File dictFile = null;
    427         try {
    428             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    429         } catch (IOException e) {
    430             fail("IOException while writing an initial dictionary : " + e);
    431         }
    432 
    433         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    434                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    435                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    436 
    437         final ArrayList<String> words = new ArrayList<String>();
    438         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
    439         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    440         final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
    441         final HashMap<Pair<String, String>, Integer> bigramProbabilities =
    442                 new HashMap<Pair<String, String>, Integer>();
    443 
    444         for (int i = 0; i < wordCount; ++i) {
    445             final String word = CodePointUtils.generateWord(random, codePointSet);
    446             words.add(word);
    447             final int unigramProbability = random.nextInt(0xFF);
    448             unigramProbabilities.put(word, unigramProbability);
    449             binaryDictionary.addUnigramWord(word, unigramProbability);
    450         }
    451 
    452         for (int i = 0; i < bigramCount; i++) {
    453             final String word0 = words.get(random.nextInt(wordCount));
    454             final String word1 = words.get(random.nextInt(wordCount));
    455             if (TextUtils.equals(word0, word1)) {
    456                 continue;
    457             }
    458             final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
    459             bigramWords.add(bigram);
    460             final int bigramProbability = random.nextInt(0xF);
    461             bigramProbabilities.put(bigram, bigramProbability);
    462             binaryDictionary.addBigramWords(word0, word1, bigramProbability);
    463         }
    464 
    465         binaryDictionary.flushWithGC();
    466         binaryDictionary.close();
    467         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    468                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    469                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    470 
    471         for (final Pair<String, String> bigram : bigramWords) {
    472             final int unigramProbability = unigramProbabilities.get(bigram.second);
    473             final int bigramProbability = bigramProbabilities.get(bigram);
    474             final int probability = binaryDictionary.calculateProbability(unigramProbability,
    475                     bigramProbability);
    476             assertEquals(probability,
    477                     binaryDictionary.getBigramProbability(bigram.first, bigram.second));
    478         }
    479 
    480         dictFile.delete();
    481     }
    482 
    483     public void testRandomOperetionsAndFlashWithGC() {
    484         final int flashWithGCIterationCount = 50;
    485         final int operationCountInEachIteration = 200;
    486         final int initialUnigramCount = 100;
    487         final float addUnigramProb = 0.5f;
    488         final float addBigramProb = 0.8f;
    489         final float removeBigramProb = 0.2f;
    490         final int codePointSetSize = 30;
    491 
    492         final long seed = System.currentTimeMillis();
    493         final Random random = new Random(seed);
    494 
    495         File dictFile = null;
    496         try {
    497             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    498         } catch (IOException e) {
    499             fail("IOException while writing an initial dictionary : " + e);
    500         }
    501 
    502         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    503                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    504                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    505         final ArrayList<String> words = new ArrayList<String>();
    506         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
    507         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    508         final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
    509         final HashMap<Pair<String, String>, Integer> bigramProbabilities =
    510                 new HashMap<Pair<String, String>, Integer>();
    511         for (int i = 0; i < initialUnigramCount; ++i) {
    512             final String word = CodePointUtils.generateWord(random, codePointSet);
    513             words.add(word);
    514             final int unigramProbability = random.nextInt(0xFF);
    515             unigramProbabilities.put(word, unigramProbability);
    516             binaryDictionary.addUnigramWord(word, unigramProbability);
    517         }
    518         binaryDictionary.flushWithGC();
    519         binaryDictionary.close();
    520 
    521         for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
    522             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    523                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    524                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    525             for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
    526                 // Add unigram.
    527                 if (random.nextFloat() < addUnigramProb) {
    528                     final String word = CodePointUtils.generateWord(random, codePointSet);
    529                     words.add(word);
    530                     final int unigramProbability = random.nextInt(0xFF);
    531                     unigramProbabilities.put(word, unigramProbability);
    532                     binaryDictionary.addUnigramWord(word, unigramProbability);
    533                 }
    534                 // Add bigram.
    535                 if (random.nextFloat() < addBigramProb && words.size() > 2) {
    536                     final int word0Index = random.nextInt(words.size());
    537                     int word1Index = random.nextInt(words.size() - 1);
    538                     if (word0Index <= word1Index) {
    539                         word1Index++;
    540                     }
    541                     final String word0 = words.get(word0Index);
    542                     final String word1 = words.get(word1Index);
    543                     if (TextUtils.equals(word0, word1)) {
    544                         continue;
    545                     }
    546                     final int bigramProbability = random.nextInt(0xF);
    547                     final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
    548                     bigramWords.add(bigram);
    549                     bigramProbabilities.put(bigram, bigramProbability);
    550                     binaryDictionary.addBigramWords(word0, word1, bigramProbability);
    551                 }
    552                 // Remove bigram.
    553                 if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
    554                     final int bigramIndex = random.nextInt(bigramWords.size());
    555                     final Pair<String, String> bigram = bigramWords.get(bigramIndex);
    556                     bigramWords.remove(bigramIndex);
    557                     bigramProbabilities.remove(bigram);
    558                     binaryDictionary.removeBigramWords(bigram.first, bigram.second);
    559                 }
    560             }
    561 
    562             // Test whether the all unigram operations are collectlly handled.
    563             for (int i = 0; i < words.size(); i++) {
    564                 final String word = words.get(i);
    565                 final int unigramProbability = unigramProbabilities.get(word);
    566                 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
    567             }
    568             // Test whether the all bigram operations are collectlly handled.
    569             for (int i = 0; i < bigramWords.size(); i++) {
    570                 final Pair<String, String> bigram = bigramWords.get(i);
    571                 final int unigramProbability = unigramProbabilities.get(bigram.second);
    572                 final int probability;
    573                 if (bigramProbabilities.containsKey(bigram)) {
    574                     final int bigramProbability = bigramProbabilities.get(bigram);
    575                     probability = binaryDictionary.calculateProbability(unigramProbability,
    576                             bigramProbability);
    577                 } else {
    578                     probability = Dictionary.NOT_A_PROBABILITY;
    579                 }
    580                 assertEquals(probability,
    581                         binaryDictionary.getBigramProbability(bigram.first, bigram.second));
    582             }
    583             binaryDictionary.flushWithGC();
    584             binaryDictionary.close();
    585         }
    586 
    587         dictFile.delete();
    588     }
    589 
    590     public void testAddManyUnigramsAndFlushWithGC() {
    591         final int flashWithGCIterationCount = 3;
    592         final int codePointSetSize = 50;
    593 
    594         final long seed = System.currentTimeMillis();
    595         final Random random = new Random(seed);
    596 
    597         File dictFile = null;
    598         try {
    599             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    600         } catch (IOException e) {
    601             fail("IOException while writing an initial dictionary : " + e);
    602         }
    603 
    604         final ArrayList<String> words = new ArrayList<String>();
    605         final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
    606         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    607 
    608         BinaryDictionary binaryDictionary;
    609         for (int i = 0; i < flashWithGCIterationCount; i++) {
    610             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    611                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    612                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    613             while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    614                 final String word = CodePointUtils.generateWord(random, codePointSet);
    615                 words.add(word);
    616                 final int unigramProbability = random.nextInt(0xFF);
    617                 unigramProbabilities.put(word, unigramProbability);
    618                 binaryDictionary.addUnigramWord(word, unigramProbability);
    619             }
    620 
    621             for (int j = 0; j < words.size(); j++) {
    622                 final String word = words.get(j);
    623                 final int unigramProbability = unigramProbabilities.get(word);
    624                 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
    625             }
    626 
    627             binaryDictionary.flushWithGC();
    628             binaryDictionary.close();
    629         }
    630 
    631         dictFile.delete();
    632     }
    633 
    634     public void testUnigramAndBigramCount() {
    635         final int flashWithGCIterationCount = 10;
    636         final int codePointSetSize = 50;
    637         final int unigramCountPerIteration = 1000;
    638         final int bigramCountPerIteration = 2000;
    639         final long seed = System.currentTimeMillis();
    640         final Random random = new Random(seed);
    641 
    642         File dictFile = null;
    643         try {
    644             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    645         } catch (IOException e) {
    646             fail("IOException while writing an initial dictionary : " + e);
    647         }
    648 
    649         final ArrayList<String> words = new ArrayList<String>();
    650         final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>();
    651         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    652 
    653         BinaryDictionary binaryDictionary;
    654         for (int i = 0; i < flashWithGCIterationCount; i++) {
    655             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    656                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    657                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    658             for (int j = 0; j < unigramCountPerIteration; j++) {
    659                 final String word = CodePointUtils.generateWord(random, codePointSet);
    660                 words.add(word);
    661                 final int unigramProbability = random.nextInt(0xFF);
    662                 binaryDictionary.addUnigramWord(word, unigramProbability);
    663             }
    664             for (int j = 0; j < bigramCountPerIteration; j++) {
    665                 final String word0 = words.get(random.nextInt(words.size()));
    666                 final String word1 = words.get(random.nextInt(words.size()));
    667                 if (TextUtils.equals(word0, word1)) {
    668                     continue;
    669                 }
    670                 bigrams.add(new Pair<String, String>(word0, word1));
    671                 final int bigramProbability = random.nextInt(0xF);
    672                 binaryDictionary.addBigramWords(word0, word1, bigramProbability);
    673             }
    674             assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
    675                     binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
    676             assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
    677                     binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
    678             binaryDictionary.flushWithGC();
    679             assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
    680                     binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
    681             assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
    682                     binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
    683             binaryDictionary.close();
    684         }
    685 
    686         dictFile.delete();
    687     }
    688 }
    689