1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.test.AndroidTestCase; 20 import android.test.suitebuilder.annotation.LargeTest; 21 import android.util.Pair; 22 23 import com.android.inputmethod.latin.makedict.CodePointUtils; 24 import com.android.inputmethod.latin.makedict.FormatSpec; 25 26 import java.io.File; 27 import java.io.IOException; 28 import java.util.ArrayList; 29 import java.util.HashMap; 30 import java.util.Locale; 31 import java.util.Map; 32 import java.util.Random; 33 34 @LargeTest 35 public class BinaryDictionaryDecayingTests extends AndroidTestCase { 36 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 37 private static final String TEST_LOCALE = "test"; 38 39 // Note that these are corresponding definitions in native code in 40 // latinime::DynamicPatriciaTriePolicy. 41 private static final String SET_NEEDS_TO_DECAY_FOR_TESTING_KEY = 42 "SET_NEEDS_TO_DECAY_FOR_TESTING"; 43 44 private static final int DUMMY_PROBABILITY = 0; 45 46 @Override 47 protected void setUp() throws Exception { 48 super.setUp(); 49 } 50 51 @Override 52 protected void tearDown() throws Exception { 53 super.tearDown(); 54 } 55 56 private void forcePassingShortTime(final BinaryDictionary binaryDictionary) { 57 // Entries having low probability would be suppressed once in 3 GCs. 58 final int count = 3; 59 for (int i = 0; i < count; i++) { 60 binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); 61 binaryDictionary.flushWithGC(); 62 } 63 } 64 65 private void forcePassingLongTime(final BinaryDictionary binaryDictionary) { 66 // Currently, probabilities are decayed when GC is run. All entries that have never been 67 // typed in 128 GCs would be removed. 68 final int count = 128; 69 for (int i = 0; i < count; i++) { 70 binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); 71 binaryDictionary.flushWithGC(); 72 } 73 } 74 75 private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { 76 final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, 77 getContext().getCacheDir()); 78 Map<String, String> attributeMap = new HashMap<String, String>(); 79 attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, 80 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); 81 attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, 82 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); 83 if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 84 3 /* dictVersion */, attributeMap)) { 85 return file; 86 } else { 87 throw new IOException("Empty dictionary cannot be created."); 88 } 89 } 90 91 public void testAddValidAndInvalidWords() { 92 File dictFile = null; 93 try { 94 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 95 } catch (IOException e) { 96 fail("IOException while writing an initial dictionary : " + e); 97 } 98 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 99 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 100 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 101 102 binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY); 103 assertFalse(binaryDictionary.isValidWord("a")); 104 binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY); 105 assertFalse(binaryDictionary.isValidWord("a")); 106 binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY); 107 assertFalse(binaryDictionary.isValidWord("a")); 108 binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY); 109 assertTrue(binaryDictionary.isValidWord("a")); 110 111 binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); 112 assertTrue(binaryDictionary.isValidWord("b")); 113 114 final int unigramProbability = binaryDictionary.getFrequency("a"); 115 binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY); 116 assertFalse(binaryDictionary.isValidBigram("a", "b")); 117 binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY); 118 assertFalse(binaryDictionary.isValidBigram("a", "b")); 119 binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY); 120 assertFalse(binaryDictionary.isValidBigram("a", "b")); 121 binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY); 122 assertTrue(binaryDictionary.isValidBigram("a", "b")); 123 124 binaryDictionary.addUnigramWord("c", DUMMY_PROBABILITY); 125 binaryDictionary.addBigramWords("a", "c", DUMMY_PROBABILITY); 126 assertTrue(binaryDictionary.isValidBigram("a", "c")); 127 128 // Add bigrams of not valid unigrams. 129 binaryDictionary.addBigramWords("x", "y", Dictionary.NOT_A_PROBABILITY); 130 assertFalse(binaryDictionary.isValidBigram("x", "y")); 131 binaryDictionary.addBigramWords("x", "y", DUMMY_PROBABILITY); 132 assertFalse(binaryDictionary.isValidBigram("x", "y")); 133 134 binaryDictionary.close(); 135 dictFile.delete(); 136 } 137 138 public void testDecayingProbability() { 139 File dictFile = null; 140 try { 141 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 142 } catch (IOException e) { 143 fail("IOException while writing an initial dictionary : " + e); 144 } 145 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 146 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 147 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 148 149 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 150 assertTrue(binaryDictionary.isValidWord("a")); 151 forcePassingShortTime(binaryDictionary); 152 assertFalse(binaryDictionary.isValidWord("a")); 153 154 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 155 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 156 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 157 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 158 forcePassingShortTime(binaryDictionary); 159 assertTrue(binaryDictionary.isValidWord("a")); 160 forcePassingLongTime(binaryDictionary); 161 assertFalse(binaryDictionary.isValidWord("a")); 162 163 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 164 binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); 165 binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); 166 assertTrue(binaryDictionary.isValidBigram("a", "b")); 167 forcePassingShortTime(binaryDictionary); 168 assertFalse(binaryDictionary.isValidBigram("a", "b")); 169 170 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 171 binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); 172 binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); 173 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 174 binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); 175 binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); 176 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 177 binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); 178 binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); 179 binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); 180 binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); 181 binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); 182 assertTrue(binaryDictionary.isValidBigram("a", "b")); 183 forcePassingShortTime(binaryDictionary); 184 assertTrue(binaryDictionary.isValidBigram("a", "b")); 185 forcePassingLongTime(binaryDictionary); 186 assertFalse(binaryDictionary.isValidBigram("a", "b")); 187 188 binaryDictionary.close(); 189 dictFile.delete(); 190 } 191 192 public void testAddManyUnigramsToDecayingDict() { 193 final int unigramCount = 30000; 194 final int unigramTypedCount = 100000; 195 final int codePointSetSize = 50; 196 final long seed = System.currentTimeMillis(); 197 final Random random = new Random(seed); 198 199 File dictFile = null; 200 try { 201 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 202 } catch (IOException e) { 203 fail("IOException while writing an initial dictionary : " + e); 204 } 205 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 206 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 207 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 208 209 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 210 final ArrayList<String> words = new ArrayList<String>(); 211 212 for (int i = 0; i < unigramCount; i++) { 213 final String word = CodePointUtils.generateWord(random, codePointSet); 214 words.add(word); 215 } 216 217 final int maxUnigramCount = Integer.parseInt( 218 binaryDictionary.getPropertyForTests(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY)); 219 for (int i = 0; i < unigramTypedCount; i++) { 220 final String word = words.get(random.nextInt(words.size())); 221 binaryDictionary.addUnigramWord(word, DUMMY_PROBABILITY); 222 223 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 224 final int unigramCountBeforeGC = 225 Integer.parseInt(binaryDictionary.getPropertyForTests( 226 BinaryDictionary.UNIGRAM_COUNT_QUERY)); 227 while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 228 binaryDictionary.flushWithGC(); 229 } 230 final int unigramCountAfterGC = 231 Integer.parseInt(binaryDictionary.getPropertyForTests( 232 BinaryDictionary.UNIGRAM_COUNT_QUERY)); 233 assertTrue(unigramCountBeforeGC > unigramCountAfterGC); 234 } 235 } 236 237 assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests( 238 BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0); 239 assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests( 240 BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount); 241 } 242 243 public void testAddManyBigramsToDecayingDict() { 244 final int unigramCount = 5000; 245 final int bigramCount = 30000; 246 final int bigramTypedCount = 100000; 247 final int codePointSetSize = 50; 248 final long seed = System.currentTimeMillis(); 249 final Random random = new Random(seed); 250 251 File dictFile = null; 252 try { 253 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 254 } catch (IOException e) { 255 fail("IOException while writing an initial dictionary : " + e); 256 } 257 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 258 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 259 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 260 261 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 262 final ArrayList<String> words = new ArrayList<String>(); 263 final ArrayList<Pair<String, String>> bigrams = new ArrayList<Pair<String, String>>(); 264 265 for (int i = 0; i < unigramCount; ++i) { 266 final String word = CodePointUtils.generateWord(random, codePointSet); 267 words.add(word); 268 } 269 for (int i = 0; i < bigramCount; ++i) { 270 final int word0Index = random.nextInt(words.size()); 271 int word1Index = random.nextInt(words.size() - 1); 272 if (word1Index >= word0Index) { 273 word1Index += 1; 274 } 275 final String word0 = words.get(word0Index); 276 final String word1 = words.get(word1Index); 277 final Pair<String, String> bigram = new Pair<String, String>(word0, word1); 278 bigrams.add(bigram); 279 } 280 281 final int maxBigramCount = Integer.parseInt( 282 binaryDictionary.getPropertyForTests(BinaryDictionary.MAX_BIGRAM_COUNT_QUERY)); 283 for (int i = 0; i < bigramTypedCount; ++i) { 284 final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size())); 285 binaryDictionary.addUnigramWord(bigram.first, DUMMY_PROBABILITY); 286 binaryDictionary.addUnigramWord(bigram.second, DUMMY_PROBABILITY); 287 binaryDictionary.addBigramWords(bigram.first, bigram.second, DUMMY_PROBABILITY); 288 289 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 290 final int bigramCountBeforeGC = 291 Integer.parseInt(binaryDictionary.getPropertyForTests( 292 BinaryDictionary.BIGRAM_COUNT_QUERY)); 293 while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 294 binaryDictionary.flushWithGC(); 295 } 296 final int bigramCountAfterGC = 297 Integer.parseInt(binaryDictionary.getPropertyForTests( 298 BinaryDictionary.BIGRAM_COUNT_QUERY)); 299 assertTrue(bigramCountBeforeGC > bigramCountAfterGC); 300 } 301 } 302 303 assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests( 304 BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0); 305 assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests( 306 BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount); 307 } 308 } 309