1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.test.AndroidTestCase; 20 import android.test.suitebuilder.annotation.LargeTest; 21 import android.text.TextUtils; 22 import android.util.Pair; 23 24 import com.android.inputmethod.latin.makedict.CodePointUtils; 25 import com.android.inputmethod.latin.makedict.FormatSpec; 26 27 import java.io.File; 28 import java.io.IOException; 29 import java.util.ArrayList; 30 import java.util.HashMap; 31 import java.util.HashSet; 32 import java.util.Locale; 33 import java.util.Map; 34 import java.util.Random; 35 36 @LargeTest 37 public class BinaryDictionaryTests extends AndroidTestCase { 38 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 39 private static final String TEST_LOCALE = "test"; 40 41 @Override 42 protected void setUp() throws Exception { 43 super.setUp(); 44 } 45 46 @Override 47 protected void tearDown() throws Exception { 48 super.tearDown(); 49 } 50 51 private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { 52 final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, 53 getContext().getCacheDir()); 54 Map<String, String> attributeMap = new HashMap<String, String>(); 55 attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, 56 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); 57 if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 58 3 /* dictVersion */, attributeMap)) { 59 return file; 60 } else { 61 throw new IOException("Empty dictionary cannot be created."); 62 } 63 } 64 65 public void testIsValidDictionary() { 66 File dictFile = null; 67 try { 68 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 69 } catch (IOException e) { 70 fail("IOException while writing an initial dictionary : " + e); 71 } 72 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 73 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 74 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 75 assertTrue("binaryDictionary must be valid for existing valid dictionary file.", 76 binaryDictionary.isValidDictionary()); 77 binaryDictionary.close(); 78 assertFalse("binaryDictionary must be invalid after closing.", 79 binaryDictionary.isValidDictionary()); 80 dictFile.delete(); 81 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, 82 dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), 83 TEST_LOCALE, true /* isUpdatable */); 84 assertFalse("binaryDictionary must be invalid for not existing dictionary file.", 85 binaryDictionary.isValidDictionary()); 86 binaryDictionary.close(); 87 } 88 89 public void testAddUnigramWord() { 90 File dictFile = null; 91 try { 92 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 93 } catch (IOException e) { 94 fail("IOException while writing an initial dictionary : " + e); 95 } 96 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 97 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 98 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 99 100 final int probability = 100; 101 binaryDictionary.addUnigramWord("aaa", probability); 102 // Reallocate and create. 103 binaryDictionary.addUnigramWord("aab", probability); 104 // Insert into children. 105 binaryDictionary.addUnigramWord("aac", probability); 106 // Make terminal. 107 binaryDictionary.addUnigramWord("aa", probability); 108 // Create children. 109 binaryDictionary.addUnigramWord("aaaa", probability); 110 // Reallocate and make termianl. 111 binaryDictionary.addUnigramWord("a", probability); 112 113 final int updatedProbability = 200; 114 // Update. 115 binaryDictionary.addUnigramWord("aaa", updatedProbability); 116 117 assertEquals(probability, binaryDictionary.getFrequency("aab")); 118 assertEquals(probability, binaryDictionary.getFrequency("aac")); 119 assertEquals(probability, binaryDictionary.getFrequency("aa")); 120 assertEquals(probability, binaryDictionary.getFrequency("aaaa")); 121 assertEquals(probability, binaryDictionary.getFrequency("a")); 122 assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); 123 124 dictFile.delete(); 125 } 126 127 public void testRandomlyAddUnigramWord() { 128 final int wordCount = 1000; 129 final int codePointSetSize = 50; 130 final long seed = System.currentTimeMillis(); 131 132 File dictFile = null; 133 try { 134 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 135 } catch (IOException e) { 136 fail("IOException while writing an initial dictionary : " + e); 137 } 138 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 139 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 140 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 141 142 final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>(); 143 // Test a word that isn't contained within the dictionary. 144 final Random random = new Random(seed); 145 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 146 for (int i = 0; i < wordCount; ++i) { 147 final String word = CodePointUtils.generateWord(random, codePointSet); 148 probabilityMap.put(word, random.nextInt(0xFF)); 149 } 150 for (String word : probabilityMap.keySet()) { 151 binaryDictionary.addUnigramWord(word, probabilityMap.get(word)); 152 } 153 for (String word : probabilityMap.keySet()) { 154 assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); 155 } 156 dictFile.delete(); 157 } 158 159 public void testAddBigramWords() { 160 File dictFile = null; 161 try { 162 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 163 } catch (IOException e) { 164 fail("IOException while writing an initial dictionary : " + e); 165 } 166 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 167 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 168 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 169 170 final int unigramProbability = 100; 171 final int bigramProbability = 10; 172 final int updatedBigramProbability = 15; 173 binaryDictionary.addUnigramWord("aaa", unigramProbability); 174 binaryDictionary.addUnigramWord("abb", unigramProbability); 175 binaryDictionary.addUnigramWord("bcc", unigramProbability); 176 binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); 177 binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); 178 binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); 179 binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); 180 181 final int probability = binaryDictionary.calculateProbability(unigramProbability, 182 bigramProbability); 183 assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); 184 assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); 185 assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); 186 assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); 187 assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); 188 assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); 189 assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); 190 assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); 191 192 binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability); 193 final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, 194 updatedBigramProbability); 195 assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); 196 197 assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); 198 assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); 199 assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); 200 assertEquals(Dictionary.NOT_A_PROBABILITY, 201 binaryDictionary.getBigramProbability("bcc", "aaa")); 202 assertEquals(Dictionary.NOT_A_PROBABILITY, 203 binaryDictionary.getBigramProbability("bcc", "bbc")); 204 assertEquals(Dictionary.NOT_A_PROBABILITY, 205 binaryDictionary.getBigramProbability("aaa", "aaa")); 206 207 // Testing bigram link. 208 binaryDictionary.addUnigramWord("abcde", unigramProbability); 209 binaryDictionary.addUnigramWord("fghij", unigramProbability); 210 binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability); 211 binaryDictionary.addUnigramWord("fgh", unigramProbability); 212 binaryDictionary.addUnigramWord("abc", unigramProbability); 213 binaryDictionary.addUnigramWord("f", unigramProbability); 214 assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij")); 215 assertEquals(Dictionary.NOT_A_PROBABILITY, 216 binaryDictionary.getBigramProbability("abcde", "fgh")); 217 binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability); 218 assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); 219 220 dictFile.delete(); 221 } 222 223 public void testRandomlyAddBigramWords() { 224 final int wordCount = 100; 225 final int bigramCount = 1000; 226 final int codePointSetSize = 50; 227 final long seed = System.currentTimeMillis(); 228 final Random random = new Random(seed); 229 230 File dictFile = null; 231 try { 232 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 233 } catch (IOException e) { 234 fail("IOException while writing an initial dictionary : " + e); 235 } 236 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 237 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 238 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 239 240 final ArrayList<String> words = new ArrayList<String>(); 241 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); 242 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 243 final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); 244 final HashMap<Pair<String, String>, Integer> bigramProbabilities = 245 new HashMap<Pair<String, String>, Integer>(); 246 247 for (int i = 0; i < wordCount; ++i) { 248 final String word = CodePointUtils.generateWord(random, codePointSet); 249 words.add(word); 250 final int unigramProbability = random.nextInt(0xFF); 251 unigramProbabilities.put(word, unigramProbability); 252 binaryDictionary.addUnigramWord(word, unigramProbability); 253 } 254 255 for (int i = 0; i < bigramCount; i++) { 256 final String word0 = words.get(random.nextInt(wordCount)); 257 final String word1 = words.get(random.nextInt(wordCount)); 258 if (TextUtils.equals(word0, word1)) { 259 continue; 260 } 261 final Pair<String, String> bigram = new Pair<String, String>(word0, word1); 262 bigramWords.add(bigram); 263 final int bigramProbability = random.nextInt(0xF); 264 bigramProbabilities.put(bigram, bigramProbability); 265 binaryDictionary.addBigramWords(word0, word1, bigramProbability); 266 } 267 268 for (final Pair<String, String> bigram : bigramWords) { 269 final int unigramProbability = unigramProbabilities.get(bigram.second); 270 final int bigramProbability = bigramProbabilities.get(bigram); 271 final int probability = binaryDictionary.calculateProbability(unigramProbability, 272 bigramProbability); 273 assertEquals(probability, 274 binaryDictionary.getBigramProbability(bigram.first, bigram.second)); 275 } 276 277 dictFile.delete(); 278 } 279 280 public void testRemoveBigramWords() { 281 File dictFile = null; 282 try { 283 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 284 } catch (IOException e) { 285 fail("IOException while writing an initial dictionary : " + e); 286 } 287 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 288 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 289 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 290 final int unigramProbability = 100; 291 final int bigramProbability = 10; 292 binaryDictionary.addUnigramWord("aaa", unigramProbability); 293 binaryDictionary.addUnigramWord("abb", unigramProbability); 294 binaryDictionary.addUnigramWord("bcc", unigramProbability); 295 binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); 296 binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); 297 binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); 298 binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); 299 300 assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); 301 assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); 302 assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); 303 assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); 304 305 binaryDictionary.removeBigramWords("aaa", "abb"); 306 assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); 307 binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); 308 assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); 309 310 311 binaryDictionary.removeBigramWords("aaa", "bcc"); 312 assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc")); 313 binaryDictionary.removeBigramWords("abb", "aaa"); 314 assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa")); 315 binaryDictionary.removeBigramWords("abb", "bcc"); 316 assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc")); 317 318 binaryDictionary.removeBigramWords("aaa", "abb"); 319 // Test remove non-existing bigram operation. 320 binaryDictionary.removeBigramWords("aaa", "abb"); 321 binaryDictionary.removeBigramWords("bcc", "aaa"); 322 323 dictFile.delete(); 324 } 325 326 public void testFlushDictionary() { 327 File dictFile = null; 328 try { 329 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 330 } catch (IOException e) { 331 fail("IOException while writing an initial dictionary : " + e); 332 } 333 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 334 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 335 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 336 337 final int probability = 100; 338 binaryDictionary.addUnigramWord("aaa", probability); 339 binaryDictionary.addUnigramWord("abcd", probability); 340 // Close without flushing. 341 binaryDictionary.close(); 342 343 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 344 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 345 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 346 347 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); 348 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); 349 350 binaryDictionary.addUnigramWord("aaa", probability); 351 binaryDictionary.addUnigramWord("abcd", probability); 352 binaryDictionary.flush(); 353 binaryDictionary.close(); 354 355 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 356 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 357 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 358 359 assertEquals(probability, binaryDictionary.getFrequency("aaa")); 360 assertEquals(probability, binaryDictionary.getFrequency("abcd")); 361 binaryDictionary.addUnigramWord("bcde", probability); 362 binaryDictionary.flush(); 363 binaryDictionary.close(); 364 365 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 366 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 367 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 368 assertEquals(probability, binaryDictionary.getFrequency("bcde")); 369 binaryDictionary.close(); 370 371 dictFile.delete(); 372 } 373 374 public void testFlushWithGCDictionary() { 375 File dictFile = null; 376 try { 377 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 378 } catch (IOException e) { 379 fail("IOException while writing an initial dictionary : " + e); 380 } 381 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 382 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 383 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 384 385 final int unigramProbability = 100; 386 final int bigramProbability = 10; 387 binaryDictionary.addUnigramWord("aaa", unigramProbability); 388 binaryDictionary.addUnigramWord("abb", unigramProbability); 389 binaryDictionary.addUnigramWord("bcc", unigramProbability); 390 binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); 391 binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); 392 binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); 393 binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); 394 binaryDictionary.flushWithGC(); 395 binaryDictionary.close(); 396 397 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 398 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 399 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 400 final int probability = binaryDictionary.calculateProbability(unigramProbability, 401 bigramProbability); 402 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 403 assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); 404 assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); 405 assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); 406 assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); 407 assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); 408 assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); 409 assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); 410 assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); 411 assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); 412 binaryDictionary.flushWithGC(); 413 binaryDictionary.close(); 414 415 dictFile.delete(); 416 } 417 418 // TODO: Evaluate performance of GC 419 public void testAddBigramWordsAndFlashWithGC() { 420 final int wordCount = 100; 421 final int bigramCount = 1000; 422 final int codePointSetSize = 30; 423 final long seed = System.currentTimeMillis(); 424 final Random random = new Random(seed); 425 426 File dictFile = null; 427 try { 428 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 429 } catch (IOException e) { 430 fail("IOException while writing an initial dictionary : " + e); 431 } 432 433 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 434 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 435 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 436 437 final ArrayList<String> words = new ArrayList<String>(); 438 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); 439 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 440 final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); 441 final HashMap<Pair<String, String>, Integer> bigramProbabilities = 442 new HashMap<Pair<String, String>, Integer>(); 443 444 for (int i = 0; i < wordCount; ++i) { 445 final String word = CodePointUtils.generateWord(random, codePointSet); 446 words.add(word); 447 final int unigramProbability = random.nextInt(0xFF); 448 unigramProbabilities.put(word, unigramProbability); 449 binaryDictionary.addUnigramWord(word, unigramProbability); 450 } 451 452 for (int i = 0; i < bigramCount; i++) { 453 final String word0 = words.get(random.nextInt(wordCount)); 454 final String word1 = words.get(random.nextInt(wordCount)); 455 if (TextUtils.equals(word0, word1)) { 456 continue; 457 } 458 final Pair<String, String> bigram = new Pair<String, String>(word0, word1); 459 bigramWords.add(bigram); 460 final int bigramProbability = random.nextInt(0xF); 461 bigramProbabilities.put(bigram, bigramProbability); 462 binaryDictionary.addBigramWords(word0, word1, bigramProbability); 463 } 464 465 binaryDictionary.flushWithGC(); 466 binaryDictionary.close(); 467 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 468 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 469 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 470 471 for (final Pair<String, String> bigram : bigramWords) { 472 final int unigramProbability = unigramProbabilities.get(bigram.second); 473 final int bigramProbability = bigramProbabilities.get(bigram); 474 final int probability = binaryDictionary.calculateProbability(unigramProbability, 475 bigramProbability); 476 assertEquals(probability, 477 binaryDictionary.getBigramProbability(bigram.first, bigram.second)); 478 } 479 480 dictFile.delete(); 481 } 482 483 public void testRandomOperetionsAndFlashWithGC() { 484 final int flashWithGCIterationCount = 50; 485 final int operationCountInEachIteration = 200; 486 final int initialUnigramCount = 100; 487 final float addUnigramProb = 0.5f; 488 final float addBigramProb = 0.8f; 489 final float removeBigramProb = 0.2f; 490 final int codePointSetSize = 30; 491 492 final long seed = System.currentTimeMillis(); 493 final Random random = new Random(seed); 494 495 File dictFile = null; 496 try { 497 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 498 } catch (IOException e) { 499 fail("IOException while writing an initial dictionary : " + e); 500 } 501 502 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 503 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 504 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 505 final ArrayList<String> words = new ArrayList<String>(); 506 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); 507 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 508 final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); 509 final HashMap<Pair<String, String>, Integer> bigramProbabilities = 510 new HashMap<Pair<String, String>, Integer>(); 511 for (int i = 0; i < initialUnigramCount; ++i) { 512 final String word = CodePointUtils.generateWord(random, codePointSet); 513 words.add(word); 514 final int unigramProbability = random.nextInt(0xFF); 515 unigramProbabilities.put(word, unigramProbability); 516 binaryDictionary.addUnigramWord(word, unigramProbability); 517 } 518 binaryDictionary.flushWithGC(); 519 binaryDictionary.close(); 520 521 for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { 522 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 523 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 524 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 525 for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { 526 // Add unigram. 527 if (random.nextFloat() < addUnigramProb) { 528 final String word = CodePointUtils.generateWord(random, codePointSet); 529 words.add(word); 530 final int unigramProbability = random.nextInt(0xFF); 531 unigramProbabilities.put(word, unigramProbability); 532 binaryDictionary.addUnigramWord(word, unigramProbability); 533 } 534 // Add bigram. 535 if (random.nextFloat() < addBigramProb && words.size() > 2) { 536 final int word0Index = random.nextInt(words.size()); 537 int word1Index = random.nextInt(words.size() - 1); 538 if (word0Index <= word1Index) { 539 word1Index++; 540 } 541 final String word0 = words.get(word0Index); 542 final String word1 = words.get(word1Index); 543 if (TextUtils.equals(word0, word1)) { 544 continue; 545 } 546 final int bigramProbability = random.nextInt(0xF); 547 final Pair<String, String> bigram = new Pair<String, String>(word0, word1); 548 bigramWords.add(bigram); 549 bigramProbabilities.put(bigram, bigramProbability); 550 binaryDictionary.addBigramWords(word0, word1, bigramProbability); 551 } 552 // Remove bigram. 553 if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { 554 final int bigramIndex = random.nextInt(bigramWords.size()); 555 final Pair<String, String> bigram = bigramWords.get(bigramIndex); 556 bigramWords.remove(bigramIndex); 557 bigramProbabilities.remove(bigram); 558 binaryDictionary.removeBigramWords(bigram.first, bigram.second); 559 } 560 } 561 562 // Test whether the all unigram operations are collectlly handled. 563 for (int i = 0; i < words.size(); i++) { 564 final String word = words.get(i); 565 final int unigramProbability = unigramProbabilities.get(word); 566 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 567 } 568 // Test whether the all bigram operations are collectlly handled. 569 for (int i = 0; i < bigramWords.size(); i++) { 570 final Pair<String, String> bigram = bigramWords.get(i); 571 final int unigramProbability = unigramProbabilities.get(bigram.second); 572 final int probability; 573 if (bigramProbabilities.containsKey(bigram)) { 574 final int bigramProbability = bigramProbabilities.get(bigram); 575 probability = binaryDictionary.calculateProbability(unigramProbability, 576 bigramProbability); 577 } else { 578 probability = Dictionary.NOT_A_PROBABILITY; 579 } 580 assertEquals(probability, 581 binaryDictionary.getBigramProbability(bigram.first, bigram.second)); 582 } 583 binaryDictionary.flushWithGC(); 584 binaryDictionary.close(); 585 } 586 587 dictFile.delete(); 588 } 589 590 public void testAddManyUnigramsAndFlushWithGC() { 591 final int flashWithGCIterationCount = 3; 592 final int codePointSetSize = 50; 593 594 final long seed = System.currentTimeMillis(); 595 final Random random = new Random(seed); 596 597 File dictFile = null; 598 try { 599 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 600 } catch (IOException e) { 601 fail("IOException while writing an initial dictionary : " + e); 602 } 603 604 final ArrayList<String> words = new ArrayList<String>(); 605 final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); 606 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 607 608 BinaryDictionary binaryDictionary; 609 for (int i = 0; i < flashWithGCIterationCount; i++) { 610 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 611 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 612 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 613 while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 614 final String word = CodePointUtils.generateWord(random, codePointSet); 615 words.add(word); 616 final int unigramProbability = random.nextInt(0xFF); 617 unigramProbabilities.put(word, unigramProbability); 618 binaryDictionary.addUnigramWord(word, unigramProbability); 619 } 620 621 for (int j = 0; j < words.size(); j++) { 622 final String word = words.get(j); 623 final int unigramProbability = unigramProbabilities.get(word); 624 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 625 } 626 627 binaryDictionary.flushWithGC(); 628 binaryDictionary.close(); 629 } 630 631 dictFile.delete(); 632 } 633 634 public void testUnigramAndBigramCount() { 635 final int flashWithGCIterationCount = 10; 636 final int codePointSetSize = 50; 637 final int unigramCountPerIteration = 1000; 638 final int bigramCountPerIteration = 2000; 639 final long seed = System.currentTimeMillis(); 640 final Random random = new Random(seed); 641 642 File dictFile = null; 643 try { 644 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 645 } catch (IOException e) { 646 fail("IOException while writing an initial dictionary : " + e); 647 } 648 649 final ArrayList<String> words = new ArrayList<String>(); 650 final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>(); 651 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 652 653 BinaryDictionary binaryDictionary; 654 for (int i = 0; i < flashWithGCIterationCount; i++) { 655 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 656 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 657 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 658 for (int j = 0; j < unigramCountPerIteration; j++) { 659 final String word = CodePointUtils.generateWord(random, codePointSet); 660 words.add(word); 661 final int unigramProbability = random.nextInt(0xFF); 662 binaryDictionary.addUnigramWord(word, unigramProbability); 663 } 664 for (int j = 0; j < bigramCountPerIteration; j++) { 665 final String word0 = words.get(random.nextInt(words.size())); 666 final String word1 = words.get(random.nextInt(words.size())); 667 if (TextUtils.equals(word0, word1)) { 668 continue; 669 } 670 bigrams.add(new Pair<String, String>(word0, word1)); 671 final int bigramProbability = random.nextInt(0xF); 672 binaryDictionary.addBigramWords(word0, word1, bigramProbability); 673 } 674 assertEquals(new HashSet<String>(words).size(), Integer.parseInt( 675 binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 676 assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( 677 binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); 678 binaryDictionary.flushWithGC(); 679 assertEquals(new HashSet<String>(words).size(), Integer.parseInt( 680 binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 681 assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( 682 binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); 683 binaryDictionary.close(); 684 } 685 686 dictFile.delete(); 687 } 688 } 689