Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.test.AndroidTestCase;
     20 import android.test.suitebuilder.annotation.LargeTest;
     21 import android.util.Pair;
     22 
     23 import com.android.inputmethod.latin.makedict.CodePointUtils;
     24 import com.android.inputmethod.latin.makedict.FormatSpec;
     25 
     26 import java.io.File;
     27 import java.io.IOException;
     28 import java.util.ArrayList;
     29 import java.util.HashMap;
     30 import java.util.Locale;
     31 import java.util.Map;
     32 import java.util.Random;
     33 
     34 @LargeTest
     35 public class BinaryDictionaryDecayingTests extends AndroidTestCase {
     36     private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
     37     private static final String TEST_LOCALE = "test";
     38 
     39     // Note that these are corresponding definitions in native code in
     40     // latinime::DynamicPatriciaTriePolicy.
     41     private static final String SET_NEEDS_TO_DECAY_FOR_TESTING_KEY =
     42             "SET_NEEDS_TO_DECAY_FOR_TESTING";
     43 
     44     private static final int DUMMY_PROBABILITY = 0;
     45 
     46     @Override
     47     protected void setUp() throws Exception {
     48         super.setUp();
     49     }
     50 
     51     @Override
     52     protected void tearDown() throws Exception {
     53         super.tearDown();
     54     }
     55 
     56     private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
     57         // Entries having low probability would be suppressed once in 3 GCs.
     58         final int count = 3;
     59         for (int i = 0; i < count; i++) {
     60             binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
     61             binaryDictionary.flushWithGC();
     62         }
     63     }
     64 
     65     private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
     66         // Currently, probabilities are decayed when GC is run. All entries that have never been
     67         // typed in 128 GCs would be removed.
     68         final int count = 128;
     69         for (int i = 0; i < count; i++) {
     70             binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
     71             binaryDictionary.flushWithGC();
     72         }
     73     }
     74 
     75     private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
     76         final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
     77                 getContext().getCacheDir());
     78         Map<String, String> attributeMap = new HashMap<String, String>();
     79         attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
     80                 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
     81         attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
     82                 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
     83         if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
     84                 3 /* dictVersion */, attributeMap)) {
     85             return file;
     86         } else {
     87             throw new IOException("Empty dictionary cannot be created.");
     88         }
     89     }
     90 
     91     public void testAddValidAndInvalidWords() {
     92         File dictFile = null;
     93         try {
     94             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
     95         } catch (IOException e) {
     96             fail("IOException while writing an initial dictionary : " + e);
     97         }
     98         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
     99                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    100                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    101 
    102         binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
    103         assertFalse(binaryDictionary.isValidWord("a"));
    104         binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
    105         assertFalse(binaryDictionary.isValidWord("a"));
    106         binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
    107         assertFalse(binaryDictionary.isValidWord("a"));
    108         binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
    109         assertTrue(binaryDictionary.isValidWord("a"));
    110 
    111         binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
    112         assertTrue(binaryDictionary.isValidWord("b"));
    113 
    114         final int unigramProbability = binaryDictionary.getFrequency("a");
    115         binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
    116         assertFalse(binaryDictionary.isValidBigram("a", "b"));
    117         binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
    118         assertFalse(binaryDictionary.isValidBigram("a", "b"));
    119         binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
    120         assertFalse(binaryDictionary.isValidBigram("a", "b"));
    121         binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
    122         assertTrue(binaryDictionary.isValidBigram("a", "b"));
    123 
    124         binaryDictionary.addUnigramWord("c", DUMMY_PROBABILITY);
    125         binaryDictionary.addBigramWords("a", "c", DUMMY_PROBABILITY);
    126         assertTrue(binaryDictionary.isValidBigram("a", "c"));
    127 
    128         // Add bigrams of not valid unigrams.
    129         binaryDictionary.addBigramWords("x", "y", Dictionary.NOT_A_PROBABILITY);
    130         assertFalse(binaryDictionary.isValidBigram("x", "y"));
    131         binaryDictionary.addBigramWords("x", "y", DUMMY_PROBABILITY);
    132         assertFalse(binaryDictionary.isValidBigram("x", "y"));
    133 
    134         binaryDictionary.close();
    135         dictFile.delete();
    136     }
    137 
    138     public void testDecayingProbability() {
    139         File dictFile = null;
    140         try {
    141             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    142         } catch (IOException e) {
    143             fail("IOException while writing an initial dictionary : " + e);
    144         }
    145         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    146                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    147                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    148 
    149         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    150         assertTrue(binaryDictionary.isValidWord("a"));
    151         forcePassingShortTime(binaryDictionary);
    152         assertFalse(binaryDictionary.isValidWord("a"));
    153 
    154         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    155         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    156         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    157         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    158         forcePassingShortTime(binaryDictionary);
    159         assertTrue(binaryDictionary.isValidWord("a"));
    160         forcePassingLongTime(binaryDictionary);
    161         assertFalse(binaryDictionary.isValidWord("a"));
    162 
    163         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    164         binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
    165         binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
    166         assertTrue(binaryDictionary.isValidBigram("a", "b"));
    167         forcePassingShortTime(binaryDictionary);
    168         assertFalse(binaryDictionary.isValidBigram("a", "b"));
    169 
    170         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    171         binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
    172         binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
    173         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    174         binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
    175         binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
    176         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    177         binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
    178         binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
    179         binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
    180         binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
    181         binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
    182         assertTrue(binaryDictionary.isValidBigram("a", "b"));
    183         forcePassingShortTime(binaryDictionary);
    184         assertTrue(binaryDictionary.isValidBigram("a", "b"));
    185         forcePassingLongTime(binaryDictionary);
    186         assertFalse(binaryDictionary.isValidBigram("a", "b"));
    187 
    188         binaryDictionary.close();
    189         dictFile.delete();
    190     }
    191 
    192     public void testAddManyUnigramsToDecayingDict() {
    193         final int unigramCount = 30000;
    194         final int unigramTypedCount = 100000;
    195         final int codePointSetSize = 50;
    196         final long seed = System.currentTimeMillis();
    197         final Random random = new Random(seed);
    198 
    199         File dictFile = null;
    200         try {
    201             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    202         } catch (IOException e) {
    203             fail("IOException while writing an initial dictionary : " + e);
    204         }
    205         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    206                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    207                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    208 
    209         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    210         final ArrayList<String> words = new ArrayList<String>();
    211 
    212         for (int i = 0; i < unigramCount; i++) {
    213             final String word = CodePointUtils.generateWord(random, codePointSet);
    214             words.add(word);
    215         }
    216 
    217         final int maxUnigramCount = Integer.parseInt(
    218                 binaryDictionary.getPropertyForTests(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
    219         for (int i = 0; i < unigramTypedCount; i++) {
    220             final String word = words.get(random.nextInt(words.size()));
    221             binaryDictionary.addUnigramWord(word, DUMMY_PROBABILITY);
    222 
    223             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    224                 final int unigramCountBeforeGC =
    225                         Integer.parseInt(binaryDictionary.getPropertyForTests(
    226                                 BinaryDictionary.UNIGRAM_COUNT_QUERY));
    227                 while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    228                     binaryDictionary.flushWithGC();
    229                 }
    230                 final int unigramCountAfterGC =
    231                         Integer.parseInt(binaryDictionary.getPropertyForTests(
    232                                 BinaryDictionary.UNIGRAM_COUNT_QUERY));
    233                 assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
    234             }
    235         }
    236 
    237         assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
    238                 BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
    239         assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
    240                 BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
    241     }
    242 
    243     public void testAddManyBigramsToDecayingDict() {
    244         final int unigramCount = 5000;
    245         final int bigramCount = 30000;
    246         final int bigramTypedCount = 100000;
    247         final int codePointSetSize = 50;
    248         final long seed = System.currentTimeMillis();
    249         final Random random = new Random(seed);
    250 
    251         File dictFile = null;
    252         try {
    253             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
    254         } catch (IOException e) {
    255             fail("IOException while writing an initial dictionary : " + e);
    256         }
    257         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
    258                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
    259                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    260 
    261         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
    262         final ArrayList<String> words = new ArrayList<String>();
    263         final ArrayList<Pair<String, String>> bigrams = new ArrayList<Pair<String, String>>();
    264 
    265         for (int i = 0; i < unigramCount; ++i) {
    266             final String word = CodePointUtils.generateWord(random, codePointSet);
    267             words.add(word);
    268         }
    269         for (int i = 0; i < bigramCount; ++i) {
    270             final int word0Index = random.nextInt(words.size());
    271             int word1Index = random.nextInt(words.size() - 1);
    272             if (word1Index >= word0Index) {
    273                 word1Index += 1;
    274             }
    275             final String word0 = words.get(word0Index);
    276             final String word1 = words.get(word1Index);
    277             final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
    278             bigrams.add(bigram);
    279         }
    280 
    281         final int maxBigramCount = Integer.parseInt(
    282                 binaryDictionary.getPropertyForTests(BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
    283         for (int i = 0; i < bigramTypedCount; ++i) {
    284             final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
    285             binaryDictionary.addUnigramWord(bigram.first, DUMMY_PROBABILITY);
    286             binaryDictionary.addUnigramWord(bigram.second, DUMMY_PROBABILITY);
    287             binaryDictionary.addBigramWords(bigram.first, bigram.second, DUMMY_PROBABILITY);
    288 
    289             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    290                 final int bigramCountBeforeGC =
    291                         Integer.parseInt(binaryDictionary.getPropertyForTests(
    292                                 BinaryDictionary.BIGRAM_COUNT_QUERY));
    293                 while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
    294                     binaryDictionary.flushWithGC();
    295                 }
    296                 final int bigramCountAfterGC =
    297                         Integer.parseInt(binaryDictionary.getPropertyForTests(
    298                                 BinaryDictionary.BIGRAM_COUNT_QUERY));
    299                 assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
    300             }
    301         }
    302 
    303         assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
    304                 BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
    305         assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
    306                 BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
    307     }
    308 }
    309