1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.test.AndroidTestCase; 20 import android.test.suitebuilder.annotation.LargeTest; 21 import android.text.TextUtils; 22 import android.util.Pair; 23 24 import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; 25 import com.android.inputmethod.latin.makedict.CodePointUtils; 26 import com.android.inputmethod.latin.makedict.FormatSpec; 27 import com.android.inputmethod.latin.makedict.WeightedString; 28 import com.android.inputmethod.latin.makedict.WordProperty; 29 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 30 import com.android.inputmethod.latin.utils.FileUtils; 31 import com.android.inputmethod.latin.utils.LanguageModelParam; 32 33 import java.io.File; 34 import java.io.IOException; 35 import java.util.ArrayList; 36 import java.util.HashMap; 37 import java.util.HashSet; 38 import java.util.Locale; 39 import java.util.Map; 40 import java.util.Random; 41 42 // TODO Use the seed passed as an argument for makedict test. 43 @LargeTest 44 public class BinaryDictionaryTests extends AndroidTestCase { 45 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 46 private static final String TEST_LOCALE = "test"; 47 private static final int[] DICT_FORMAT_VERSIONS = 48 new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV }; 49 50 private static boolean canCheckBigramProbability(final int formatVersion) { 51 return formatVersion > FormatSpec.VERSION401; 52 } 53 54 private static boolean supportsBeginningOfSentence(final int formatVersion) { 55 return formatVersion > FormatSpec.VERSION401; 56 } 57 58 private File createEmptyDictionaryAndGetFile(final String dictId, 59 final int formatVersion) throws IOException { 60 if (formatVersion == FormatSpec.VERSION4 61 || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING 62 || formatVersion == FormatSpec.VERSION4_DEV) { 63 return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); 64 } else { 65 throw new IOException("Dictionary format version " + formatVersion 66 + " is not supported."); 67 } 68 } 69 70 private File createEmptyVer4DictionaryAndGetFile(final String dictId, 71 final int formatVersion) throws IOException { 72 final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, 73 getContext().getCacheDir()); 74 file.delete(); 75 file.mkdir(); 76 Map<String, String> attributeMap = new HashMap<>(); 77 if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, 78 Locale.ENGLISH, attributeMap)) { 79 return file; 80 } else { 81 throw new IOException("Empty dictionary " + file.getAbsolutePath() 82 + " cannot be created. Format version: " + formatVersion); 83 } 84 } 85 86 public void testIsValidDictionary() { 87 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 88 testIsValidDictionary(formatVersion); 89 } 90 } 91 92 private void testIsValidDictionary(final int formatVersion) { 93 File dictFile = null; 94 try { 95 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 96 } catch (IOException e) { 97 fail("IOException while writing an initial dictionary : " + e); 98 } 99 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 100 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 101 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 102 assertTrue("binaryDictionary must be valid for existing valid dictionary file.", 103 binaryDictionary.isValidDictionary()); 104 binaryDictionary.close(); 105 assertFalse("binaryDictionary must be invalid after closing.", 106 binaryDictionary.isValidDictionary()); 107 FileUtils.deleteRecursively(dictFile); 108 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, 109 dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), 110 TEST_LOCALE, true /* isUpdatable */); 111 assertFalse("binaryDictionary must be invalid for not existing dictionary file.", 112 binaryDictionary.isValidDictionary()); 113 binaryDictionary.close(); 114 } 115 116 public void testConstructingDictionaryOnMemory() { 117 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 118 testConstructingDictionaryOnMemory(formatVersion); 119 } 120 } 121 122 private void testConstructingDictionaryOnMemory(final int formatVersion) { 123 File dictFile = null; 124 try { 125 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 126 } catch (IOException e) { 127 fail("IOException while writing an initial dictionary : " + e); 128 } 129 FileUtils.deleteRecursively(dictFile); 130 assertFalse(dictFile.exists()); 131 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 132 true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion, 133 new HashMap<String, String>()); 134 assertTrue(binaryDictionary.isValidDictionary()); 135 assertEquals(formatVersion, binaryDictionary.getFormatVersion()); 136 final int probability = 100; 137 addUnigramWord(binaryDictionary, "word", probability); 138 assertEquals(probability, binaryDictionary.getFrequency("word")); 139 assertFalse(dictFile.exists()); 140 binaryDictionary.flush(); 141 assertTrue(dictFile.exists()); 142 assertTrue(binaryDictionary.isValidDictionary()); 143 assertEquals(formatVersion, binaryDictionary.getFormatVersion()); 144 assertEquals(probability, binaryDictionary.getFrequency("word")); 145 binaryDictionary.close(); 146 dictFile.delete(); 147 } 148 149 public void testAddTooLongWord() { 150 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 151 testAddTooLongWord(formatVersion); 152 } 153 } 154 155 private void testAddTooLongWord(final int formatVersion) { 156 File dictFile = null; 157 try { 158 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 159 } catch (IOException e) { 160 fail("IOException while writing an initial dictionary : " + e); 161 } 162 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 163 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 164 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 165 166 final StringBuffer stringBuilder = new StringBuffer(); 167 for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) { 168 stringBuilder.append('a'); 169 } 170 final String validLongWord = stringBuilder.toString(); 171 stringBuilder.append('a'); 172 final String invalidLongWord = stringBuilder.toString(); 173 final int probability = 100; 174 addUnigramWord(binaryDictionary, "aaa", probability); 175 addUnigramWord(binaryDictionary, validLongWord, probability); 176 addUnigramWord(binaryDictionary, invalidLongWord, probability); 177 // Too long short cut. 178 binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, 179 10 /* shortcutProbability */, false /* isBeginningOfSentence */, 180 false /* isNotAWord */, false /* isBlacklisted */, 181 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 182 addUnigramWord(binaryDictionary, "abc", probability); 183 final int updatedProbability = 200; 184 // Update. 185 addUnigramWord(binaryDictionary, validLongWord, updatedProbability); 186 addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability); 187 addUnigramWord(binaryDictionary, "abc", updatedProbability); 188 189 assertEquals(probability, binaryDictionary.getFrequency("aaa")); 190 assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); 191 assertEquals(BinaryDictionary.NOT_A_PROBABILITY, 192 binaryDictionary.getFrequency(invalidLongWord)); 193 assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); 194 dictFile.delete(); 195 } 196 197 private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, 198 final int probability) { 199 binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, 200 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, 201 false /* isBeginningOfSentence */, false /* isNotAWord */, 202 false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 203 } 204 205 private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, 206 final String word1, final int probability) { 207 binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability, 208 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 209 } 210 211 private static boolean isValidBigram(final BinaryDictionary binaryDictionary, 212 final String word0, final String word1) { 213 return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1); 214 } 215 216 private static void removeBigramEntry(final BinaryDictionary binaryDictionary, 217 final String word0, final String word1) { 218 binaryDictionary.removeNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1); 219 } 220 221 private static int getBigramProbability(final BinaryDictionary binaryDictionary, 222 final String word0, final String word1) { 223 return binaryDictionary.getNgramProbability(new PrevWordsInfo(new WordInfo(word0)), word1); 224 } 225 226 public void testAddUnigramWord() { 227 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 228 testAddUnigramWord(formatVersion); 229 } 230 } 231 232 private void testAddUnigramWord(final int formatVersion) { 233 File dictFile = null; 234 try { 235 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 236 } catch (IOException e) { 237 fail("IOException while writing an initial dictionary : " + e); 238 } 239 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 240 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 241 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 242 243 final int probability = 100; 244 addUnigramWord(binaryDictionary, "aaa", probability); 245 // Reallocate and create. 246 addUnigramWord(binaryDictionary, "aab", probability); 247 // Insert into children. 248 addUnigramWord(binaryDictionary, "aac", probability); 249 // Make terminal. 250 addUnigramWord(binaryDictionary, "aa", probability); 251 // Create children. 252 addUnigramWord(binaryDictionary, "aaaa", probability); 253 // Reallocate and make termianl. 254 addUnigramWord(binaryDictionary, "a", probability); 255 256 final int updatedProbability = 200; 257 // Update. 258 addUnigramWord(binaryDictionary, "aaa", updatedProbability); 259 260 assertEquals(probability, binaryDictionary.getFrequency("aab")); 261 assertEquals(probability, binaryDictionary.getFrequency("aac")); 262 assertEquals(probability, binaryDictionary.getFrequency("aa")); 263 assertEquals(probability, binaryDictionary.getFrequency("aaaa")); 264 assertEquals(probability, binaryDictionary.getFrequency("a")); 265 assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); 266 267 dictFile.delete(); 268 } 269 270 public void testRandomlyAddUnigramWord() { 271 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 272 testRandomlyAddUnigramWord(formatVersion); 273 } 274 } 275 276 private void testRandomlyAddUnigramWord(final int formatVersion) { 277 final int wordCount = 1000; 278 final int codePointSetSize = 50; 279 final long seed = System.currentTimeMillis(); 280 281 File dictFile = null; 282 try { 283 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 284 } catch (IOException e) { 285 fail("IOException while writing an initial dictionary : " + e); 286 } 287 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 288 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 289 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 290 291 final HashMap<String, Integer> probabilityMap = new HashMap<>(); 292 // Test a word that isn't contained within the dictionary. 293 final Random random = new Random(seed); 294 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 295 for (int i = 0; i < wordCount; ++i) { 296 final String word = CodePointUtils.generateWord(random, codePointSet); 297 probabilityMap.put(word, random.nextInt(0xFF)); 298 } 299 for (String word : probabilityMap.keySet()) { 300 addUnigramWord(binaryDictionary, word, probabilityMap.get(word)); 301 } 302 for (String word : probabilityMap.keySet()) { 303 assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); 304 } 305 dictFile.delete(); 306 } 307 308 public void testAddBigramWords() { 309 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 310 testAddBigramWords(formatVersion); 311 } 312 } 313 314 private void testAddBigramWords(final int formatVersion) { 315 File dictFile = null; 316 try { 317 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 318 } catch (IOException e) { 319 fail("IOException while writing an initial dictionary : " + e); 320 } 321 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 322 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 323 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 324 325 final int unigramProbability = 100; 326 final int bigramProbability = 150; 327 final int updatedBigramProbability = 200; 328 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 329 addUnigramWord(binaryDictionary, "abb", unigramProbability); 330 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 331 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 332 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 333 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 334 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 335 336 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 337 assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); 338 assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); 339 assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); 340 if (canCheckBigramProbability(formatVersion)) { 341 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); 342 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); 343 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); 344 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); 345 } 346 347 addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); 348 if (canCheckBigramProbability(formatVersion)) { 349 assertEquals(updatedBigramProbability, 350 getBigramProbability(binaryDictionary, "aaa", "abb")); 351 } 352 353 assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); 354 assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); 355 assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); 356 assertEquals(Dictionary.NOT_A_PROBABILITY, 357 getBigramProbability(binaryDictionary, "bcc", "aaa")); 358 assertEquals(Dictionary.NOT_A_PROBABILITY, 359 getBigramProbability(binaryDictionary, "bcc", "bbc")); 360 assertEquals(Dictionary.NOT_A_PROBABILITY, 361 getBigramProbability(binaryDictionary, "aaa", "aaa")); 362 363 // Testing bigram link. 364 addUnigramWord(binaryDictionary, "abcde", unigramProbability); 365 addUnigramWord(binaryDictionary, "fghij", unigramProbability); 366 addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability); 367 addUnigramWord(binaryDictionary, "fgh", unigramProbability); 368 addUnigramWord(binaryDictionary, "abc", unigramProbability); 369 addUnigramWord(binaryDictionary, "f", unigramProbability); 370 371 if (canCheckBigramProbability(formatVersion)) { 372 assertEquals(bigramProbability, 373 getBigramProbability(binaryDictionary, "abcde", "fghij")); 374 } 375 assertEquals(Dictionary.NOT_A_PROBABILITY, 376 getBigramProbability(binaryDictionary, "abcde", "fgh")); 377 addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); 378 if (canCheckBigramProbability(formatVersion)) { 379 assertEquals(updatedBigramProbability, 380 getBigramProbability(binaryDictionary, "abcde", "fghij")); 381 } 382 383 dictFile.delete(); 384 } 385 386 public void testRandomlyAddBigramWords() { 387 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 388 testRandomlyAddBigramWords(formatVersion); 389 } 390 } 391 392 private void testRandomlyAddBigramWords(final int formatVersion) { 393 final int wordCount = 100; 394 final int bigramCount = 1000; 395 final int codePointSetSize = 50; 396 final long seed = System.currentTimeMillis(); 397 final Random random = new Random(seed); 398 399 File dictFile = null; 400 try { 401 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 402 } catch (IOException e) { 403 fail("IOException while writing an initial dictionary : " + e); 404 } 405 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 406 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 407 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 408 409 final ArrayList<String> words = new ArrayList<>(); 410 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 411 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 412 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 413 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 414 415 for (int i = 0; i < wordCount; ++i) { 416 final String word = CodePointUtils.generateWord(random, codePointSet); 417 words.add(word); 418 final int unigramProbability = random.nextInt(0xFF); 419 unigramProbabilities.put(word, unigramProbability); 420 addUnigramWord(binaryDictionary, word, unigramProbability); 421 } 422 423 for (int i = 0; i < bigramCount; i++) { 424 final String word0 = words.get(random.nextInt(wordCount)); 425 final String word1 = words.get(random.nextInt(wordCount)); 426 if (TextUtils.equals(word0, word1)) { 427 continue; 428 } 429 final Pair<String, String> bigram = new Pair<>(word0, word1); 430 bigramWords.add(bigram); 431 final int unigramProbability = unigramProbabilities.get(word1); 432 final int bigramProbability = 433 unigramProbability + random.nextInt(0xFF - unigramProbability); 434 bigramProbabilities.put(bigram, bigramProbability); 435 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 436 } 437 438 for (final Pair<String, String> bigram : bigramWords) { 439 final int bigramProbability = bigramProbabilities.get(bigram); 440 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 441 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 442 if (canCheckBigramProbability(formatVersion)) { 443 assertEquals(bigramProbability, 444 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 445 } 446 } 447 448 dictFile.delete(); 449 } 450 451 public void testRemoveBigramWords() { 452 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 453 testRemoveBigramWords(formatVersion); 454 } 455 } 456 457 private void testRemoveBigramWords(final int formatVersion) { 458 File dictFile = null; 459 try { 460 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 461 } catch (IOException e) { 462 fail("IOException while writing an initial dictionary : " + e); 463 } 464 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 465 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 466 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 467 final int unigramProbability = 100; 468 final int bigramProbability = 150; 469 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 470 addUnigramWord(binaryDictionary, "abb", unigramProbability); 471 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 472 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 473 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 474 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 475 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 476 477 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 478 assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); 479 assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); 480 assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); 481 482 removeBigramEntry(binaryDictionary, "aaa", "abb"); 483 assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); 484 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 485 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 486 487 488 removeBigramEntry(binaryDictionary, "aaa", "bcc"); 489 assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc")); 490 removeBigramEntry(binaryDictionary, "abb", "aaa"); 491 assertFalse(isValidBigram(binaryDictionary, "abb", "aaa")); 492 removeBigramEntry(binaryDictionary, "abb", "bcc"); 493 assertFalse(isValidBigram(binaryDictionary, "abb", "bcc")); 494 495 removeBigramEntry(binaryDictionary, "aaa", "abb"); 496 // Test remove non-existing bigram operation. 497 removeBigramEntry(binaryDictionary, "aaa", "abb"); 498 removeBigramEntry(binaryDictionary, "bcc", "aaa"); 499 500 dictFile.delete(); 501 } 502 503 public void testFlushDictionary() { 504 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 505 testFlushDictionary(formatVersion); 506 } 507 } 508 509 private void testFlushDictionary(final int formatVersion) { 510 File dictFile = null; 511 try { 512 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 513 } catch (IOException e) { 514 fail("IOException while writing an initial dictionary : " + e); 515 } 516 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 517 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 518 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 519 520 final int probability = 100; 521 addUnigramWord(binaryDictionary, "aaa", probability); 522 addUnigramWord(binaryDictionary, "abcd", probability); 523 // Close without flushing. 524 binaryDictionary.close(); 525 526 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 527 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 528 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 529 530 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); 531 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); 532 533 addUnigramWord(binaryDictionary, "aaa", probability); 534 addUnigramWord(binaryDictionary, "abcd", probability); 535 binaryDictionary.flush(); 536 binaryDictionary.close(); 537 538 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 539 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 540 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 541 542 assertEquals(probability, binaryDictionary.getFrequency("aaa")); 543 assertEquals(probability, binaryDictionary.getFrequency("abcd")); 544 addUnigramWord(binaryDictionary, "bcde", probability); 545 binaryDictionary.flush(); 546 binaryDictionary.close(); 547 548 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 549 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 550 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 551 assertEquals(probability, binaryDictionary.getFrequency("bcde")); 552 binaryDictionary.close(); 553 554 dictFile.delete(); 555 } 556 557 public void testFlushWithGCDictionary() { 558 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 559 testFlushWithGCDictionary(formatVersion); 560 } 561 } 562 563 private void testFlushWithGCDictionary(final int formatVersion) { 564 File dictFile = null; 565 try { 566 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 567 } catch (IOException e) { 568 fail("IOException while writing an initial dictionary : " + e); 569 } 570 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 571 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 572 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 573 574 final int unigramProbability = 100; 575 final int bigramProbability = 150; 576 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 577 addUnigramWord(binaryDictionary, "abb", unigramProbability); 578 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 579 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 580 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 581 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 582 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 583 binaryDictionary.flushWithGC(); 584 binaryDictionary.close(); 585 586 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 587 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 588 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 589 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 590 assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); 591 assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); 592 if (canCheckBigramProbability(formatVersion)) { 593 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); 594 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); 595 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); 596 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); 597 } 598 assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); 599 assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); 600 assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); 601 binaryDictionary.flushWithGC(); 602 binaryDictionary.close(); 603 604 dictFile.delete(); 605 } 606 607 public void testAddBigramWordsAndFlashWithGC() { 608 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 609 testAddBigramWordsAndFlashWithGC(formatVersion); 610 } 611 } 612 613 // TODO: Evaluate performance of GC 614 private void testAddBigramWordsAndFlashWithGC(final int formatVersion) { 615 final int wordCount = 100; 616 final int bigramCount = 1000; 617 final int codePointSetSize = 30; 618 final long seed = System.currentTimeMillis(); 619 final Random random = new Random(seed); 620 621 File dictFile = null; 622 try { 623 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 624 } catch (IOException e) { 625 fail("IOException while writing an initial dictionary : " + e); 626 } 627 628 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 629 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 630 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 631 632 final ArrayList<String> words = new ArrayList<>(); 633 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 634 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 635 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 636 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 637 638 for (int i = 0; i < wordCount; ++i) { 639 final String word = CodePointUtils.generateWord(random, codePointSet); 640 words.add(word); 641 final int unigramProbability = random.nextInt(0xFF); 642 unigramProbabilities.put(word, unigramProbability); 643 addUnigramWord(binaryDictionary, word, unigramProbability); 644 } 645 646 for (int i = 0; i < bigramCount; i++) { 647 final String word0 = words.get(random.nextInt(wordCount)); 648 final String word1 = words.get(random.nextInt(wordCount)); 649 if (TextUtils.equals(word0, word1)) { 650 continue; 651 } 652 final Pair<String, String> bigram = new Pair<>(word0, word1); 653 bigramWords.add(bigram); 654 final int unigramProbability = unigramProbabilities.get(word1); 655 final int bigramProbability = 656 unigramProbability + random.nextInt(0xFF - unigramProbability); 657 bigramProbabilities.put(bigram, bigramProbability); 658 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 659 } 660 661 binaryDictionary.flushWithGC(); 662 binaryDictionary.close(); 663 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 664 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 665 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 666 667 668 for (final Pair<String, String> bigram : bigramWords) { 669 final int bigramProbability = bigramProbabilities.get(bigram); 670 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 671 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 672 if (canCheckBigramProbability(formatVersion)) { 673 assertEquals(bigramProbability, 674 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 675 } 676 } 677 678 dictFile.delete(); 679 } 680 681 public void testRandomOperationsAndFlashWithGC() { 682 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 683 testRandomOperationsAndFlashWithGC(formatVersion); 684 } 685 } 686 687 private void testRandomOperationsAndFlashWithGC(final int formatVersion) { 688 final int flashWithGCIterationCount = 50; 689 final int operationCountInEachIteration = 200; 690 final int initialUnigramCount = 100; 691 final float addUnigramProb = 0.5f; 692 final float addBigramProb = 0.8f; 693 final float removeBigramProb = 0.2f; 694 final int codePointSetSize = 30; 695 696 final long seed = System.currentTimeMillis(); 697 final Random random = new Random(seed); 698 699 File dictFile = null; 700 try { 701 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 702 } catch (IOException e) { 703 fail("IOException while writing an initial dictionary : " + e); 704 } 705 706 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 707 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 708 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 709 final ArrayList<String> words = new ArrayList<>(); 710 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 711 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 712 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 713 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 714 for (int i = 0; i < initialUnigramCount; ++i) { 715 final String word = CodePointUtils.generateWord(random, codePointSet); 716 words.add(word); 717 final int unigramProbability = random.nextInt(0xFF); 718 unigramProbabilities.put(word, unigramProbability); 719 addUnigramWord(binaryDictionary, word, unigramProbability); 720 } 721 binaryDictionary.flushWithGC(); 722 binaryDictionary.close(); 723 724 for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { 725 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 726 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 727 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 728 for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { 729 // Add unigram. 730 if (random.nextFloat() < addUnigramProb) { 731 final String word = CodePointUtils.generateWord(random, codePointSet); 732 words.add(word); 733 final int unigramProbability = random.nextInt(0xFF); 734 unigramProbabilities.put(word, unigramProbability); 735 addUnigramWord(binaryDictionary, word, unigramProbability); 736 } 737 // Add bigram. 738 if (random.nextFloat() < addBigramProb && words.size() > 2) { 739 final int word0Index = random.nextInt(words.size()); 740 int word1Index = random.nextInt(words.size() - 1); 741 if (word0Index <= word1Index) { 742 word1Index++; 743 } 744 final String word0 = words.get(word0Index); 745 final String word1 = words.get(word1Index); 746 if (TextUtils.equals(word0, word1)) { 747 continue; 748 } 749 final int unigramProbability = unigramProbabilities.get(word1); 750 final int bigramProbability = 751 unigramProbability + random.nextInt(0xFF - unigramProbability); 752 final Pair<String, String> bigram = new Pair<>(word0, word1); 753 bigramWords.add(bigram); 754 bigramProbabilities.put(bigram, bigramProbability); 755 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 756 } 757 // Remove bigram. 758 if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { 759 final int bigramIndex = random.nextInt(bigramWords.size()); 760 final Pair<String, String> bigram = bigramWords.get(bigramIndex); 761 bigramWords.remove(bigramIndex); 762 bigramProbabilities.remove(bigram); 763 removeBigramEntry(binaryDictionary, bigram.first, bigram.second); 764 } 765 } 766 767 // Test whether the all unigram operations are collectlly handled. 768 for (int i = 0; i < words.size(); i++) { 769 final String word = words.get(i); 770 final int unigramProbability = unigramProbabilities.get(word); 771 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 772 } 773 // Test whether the all bigram operations are collectlly handled. 774 for (int i = 0; i < bigramWords.size(); i++) { 775 final Pair<String, String> bigram = bigramWords.get(i); 776 final int probability; 777 if (bigramProbabilities.containsKey(bigram)) { 778 final int bigramProbability = bigramProbabilities.get(bigram); 779 probability = bigramProbability; 780 } else { 781 probability = Dictionary.NOT_A_PROBABILITY; 782 } 783 784 if (canCheckBigramProbability(formatVersion)) { 785 assertEquals(probability, 786 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 787 } 788 assertEquals(probability != Dictionary.NOT_A_PROBABILITY, 789 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 790 } 791 binaryDictionary.flushWithGC(); 792 binaryDictionary.close(); 793 } 794 795 dictFile.delete(); 796 } 797 798 public void testAddManyUnigramsAndFlushWithGC() { 799 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 800 testAddManyUnigramsAndFlushWithGC(formatVersion); 801 } 802 } 803 804 private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { 805 final int flashWithGCIterationCount = 3; 806 final int codePointSetSize = 50; 807 808 final long seed = System.currentTimeMillis(); 809 final Random random = new Random(seed); 810 811 File dictFile = null; 812 try { 813 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 814 } catch (IOException e) { 815 fail("IOException while writing an initial dictionary : " + e); 816 } 817 818 final ArrayList<String> words = new ArrayList<>(); 819 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 820 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 821 822 BinaryDictionary binaryDictionary; 823 for (int i = 0; i < flashWithGCIterationCount; i++) { 824 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 825 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 826 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 827 while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 828 final String word = CodePointUtils.generateWord(random, codePointSet); 829 words.add(word); 830 final int unigramProbability = random.nextInt(0xFF); 831 unigramProbabilities.put(word, unigramProbability); 832 addUnigramWord(binaryDictionary, word, unigramProbability); 833 } 834 835 for (int j = 0; j < words.size(); j++) { 836 final String word = words.get(j); 837 final int unigramProbability = unigramProbabilities.get(word); 838 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 839 } 840 841 binaryDictionary.flushWithGC(); 842 binaryDictionary.close(); 843 } 844 845 dictFile.delete(); 846 } 847 848 public void testUnigramAndBigramCount() { 849 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 850 testUnigramAndBigramCount(formatVersion); 851 } 852 } 853 854 private void testUnigramAndBigramCount(final int formatVersion) { 855 final int flashWithGCIterationCount = 10; 856 final int codePointSetSize = 50; 857 final int unigramCountPerIteration = 1000; 858 final int bigramCountPerIteration = 2000; 859 final long seed = System.currentTimeMillis(); 860 final Random random = new Random(seed); 861 862 File dictFile = null; 863 try { 864 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 865 } catch (IOException e) { 866 fail("IOException while writing an initial dictionary : " + e); 867 } 868 869 final ArrayList<String> words = new ArrayList<>(); 870 final HashSet<Pair<String, String>> bigrams = new HashSet<>(); 871 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 872 873 BinaryDictionary binaryDictionary; 874 for (int i = 0; i < flashWithGCIterationCount; i++) { 875 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 876 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 877 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 878 for (int j = 0; j < unigramCountPerIteration; j++) { 879 final String word = CodePointUtils.generateWord(random, codePointSet); 880 words.add(word); 881 final int unigramProbability = random.nextInt(0xFF); 882 addUnigramWord(binaryDictionary, word, unigramProbability); 883 } 884 for (int j = 0; j < bigramCountPerIteration; j++) { 885 final String word0 = words.get(random.nextInt(words.size())); 886 final String word1 = words.get(random.nextInt(words.size())); 887 if (TextUtils.equals(word0, word1)) { 888 continue; 889 } 890 bigrams.add(new Pair<>(word0, word1)); 891 final int bigramProbability = random.nextInt(0xF); 892 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 893 } 894 assertEquals(new HashSet<>(words).size(), Integer.parseInt( 895 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 896 assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( 897 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 898 binaryDictionary.flushWithGC(); 899 assertEquals(new HashSet<>(words).size(), Integer.parseInt( 900 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 901 assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( 902 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 903 binaryDictionary.close(); 904 } 905 906 dictFile.delete(); 907 } 908 909 public void testAddMultipleDictionaryEntries() { 910 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 911 testAddMultipleDictionaryEntries(formatVersion); 912 } 913 } 914 915 private void testAddMultipleDictionaryEntries(final int formatVersion) { 916 final int codePointSetSize = 20; 917 final int lmParamCount = 1000; 918 final double bigramContinueRate = 0.9; 919 final long seed = System.currentTimeMillis(); 920 final Random random = new Random(seed); 921 922 File dictFile = null; 923 try { 924 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 925 } catch (IOException e) { 926 fail("IOException while writing an initial dictionary : " + e); 927 } 928 929 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 930 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 931 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 932 933 final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; 934 String prevWord = null; 935 for (int i = 0; i < languageModelParams.length; i++) { 936 final String word = CodePointUtils.generateWord(random, codePointSet); 937 final int probability = random.nextInt(0xFF); 938 final int bigramProbability = probability + random.nextInt(0xFF - probability); 939 unigramProbabilities.put(word, probability); 940 if (prevWord == null) { 941 languageModelParams[i] = new LanguageModelParam(word, probability, 942 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 943 } else { 944 languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, 945 bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); 946 bigramProbabilities.put(new Pair<>(prevWord, word), 947 bigramProbability); 948 } 949 prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; 950 } 951 952 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 953 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 954 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 955 binaryDictionary.addMultipleDictionaryEntries(languageModelParams); 956 957 for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) { 958 assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey())); 959 } 960 961 for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { 962 final String word0 = entry.getKey().first; 963 final String word1 = entry.getKey().second; 964 final int bigramProbability = entry.getValue(); 965 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 966 isValidBigram(binaryDictionary, word0, word1)); 967 if (canCheckBigramProbability(formatVersion)) { 968 assertEquals(bigramProbability, 969 getBigramProbability(binaryDictionary, word0, word1)); 970 } 971 } 972 } 973 974 public void testGetWordProperties() { 975 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 976 testGetWordProperties(formatVersion); 977 } 978 } 979 980 private void testGetWordProperties(final int formatVersion) { 981 final long seed = System.currentTimeMillis(); 982 final Random random = new Random(seed); 983 final int UNIGRAM_COUNT = 1000; 984 final int BIGRAM_COUNT = 1000; 985 final int codePointSetSize = 20; 986 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 987 988 File dictFile = null; 989 try { 990 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 991 } catch (IOException e) { 992 fail("IOException while writing an initial dictionary : " + e); 993 } 994 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 995 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 996 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 997 998 final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", 999 false /* isBeginningOfSentence */); 1000 assertFalse(invalidWordProperty.isValid()); 1001 1002 final ArrayList<String> words = new ArrayList<>(); 1003 final HashMap<String, Integer> wordProbabilities = new HashMap<>(); 1004 final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); 1005 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 1006 1007 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1008 final String word = CodePointUtils.generateWord(random, codePointSet); 1009 final int unigramProbability = random.nextInt(0xFF); 1010 final boolean isNotAWord = random.nextBoolean(); 1011 final boolean isBlacklisted = random.nextBoolean(); 1012 // TODO: Add tests for historical info. 1013 binaryDictionary.addUnigramEntry(word, unigramProbability, 1014 null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, 1015 false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, 1016 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 1017 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1018 binaryDictionary.flushWithGC(); 1019 } 1020 words.add(word); 1021 wordProbabilities.put(word, unigramProbability); 1022 final WordProperty wordProperty = binaryDictionary.getWordProperty(word, 1023 false /* isBeginningOfSentence */); 1024 assertEquals(word, wordProperty.mWord); 1025 assertTrue(wordProperty.isValid()); 1026 assertEquals(isNotAWord, wordProperty.mIsNotAWord); 1027 assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); 1028 assertEquals(false, wordProperty.mHasBigrams); 1029 assertEquals(false, wordProperty.mHasShortcuts); 1030 assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); 1031 assertTrue(wordProperty.mShortcutTargets.isEmpty()); 1032 } 1033 1034 for (int i = 0; i < BIGRAM_COUNT; i++) { 1035 final int word0Index = random.nextInt(wordProbabilities.size()); 1036 final int word1Index = random.nextInt(wordProbabilities.size()); 1037 if (word0Index == word1Index) { 1038 continue; 1039 } 1040 final String word0 = words.get(word0Index); 1041 final String word1 = words.get(word1Index); 1042 final int unigramProbability = wordProbabilities.get(word1); 1043 final int bigramProbability = 1044 unigramProbability + random.nextInt(0xFF - unigramProbability); 1045 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1046 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1047 binaryDictionary.flushWithGC(); 1048 } 1049 if (!bigrams.containsKey(word0)) { 1050 final HashSet<String> bigramWord1s = new HashSet<>(); 1051 bigrams.put(word0, bigramWord1s); 1052 } 1053 bigrams.get(word0).add(word1); 1054 bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability); 1055 } 1056 1057 for (int i = 0; i < words.size(); i++) { 1058 final String word0 = words.get(i); 1059 if (!bigrams.containsKey(word0)) { 1060 continue; 1061 } 1062 final HashSet<String> bigramWord1s = bigrams.get(word0); 1063 final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, 1064 false /* isBeginningOfSentence */); 1065 assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); 1066 for (int j = 0; j < wordProperty.mBigrams.size(); j++) { 1067 final String word1 = wordProperty.mBigrams.get(j).mWord; 1068 assertTrue(bigramWord1s.contains(word1)); 1069 if (canCheckBigramProbability(formatVersion)) { 1070 final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); 1071 assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); 1072 } 1073 } 1074 } 1075 } 1076 1077 public void testIterateAllWords() { 1078 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1079 testIterateAllWords(formatVersion); 1080 } 1081 } 1082 1083 private void testIterateAllWords(final int formatVersion) { 1084 final long seed = System.currentTimeMillis(); 1085 final Random random = new Random(seed); 1086 final int UNIGRAM_COUNT = 1000; 1087 final int BIGRAM_COUNT = 1000; 1088 final int codePointSetSize = 20; 1089 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1090 1091 File dictFile = null; 1092 try { 1093 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1094 } catch (IOException e) { 1095 fail("IOException while writing an initial dictionary : " + e); 1096 } 1097 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1098 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1099 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1100 1101 final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", 1102 false /* isBeginningOfSentence */); 1103 assertFalse(invalidWordProperty.isValid()); 1104 1105 final ArrayList<String> words = new ArrayList<>(); 1106 final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>(); 1107 final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); 1108 final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = 1109 new HashMap<>(); 1110 1111 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1112 final String word = CodePointUtils.generateWord(random, codePointSet); 1113 final int unigramProbability = random.nextInt(0xFF); 1114 addUnigramWord(binaryDictionary, word, unigramProbability); 1115 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1116 binaryDictionary.flushWithGC(); 1117 } 1118 words.add(word); 1119 wordProbabilitiesToCheckLater.put(word, unigramProbability); 1120 } 1121 1122 for (int i = 0; i < BIGRAM_COUNT; i++) { 1123 final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); 1124 final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); 1125 if (word0Index == word1Index) { 1126 continue; 1127 } 1128 final String word0 = words.get(word0Index); 1129 final String word1 = words.get(word1Index); 1130 final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); 1131 final int bigramProbability = 1132 unigramProbability + random.nextInt(0xFF - unigramProbability); 1133 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1134 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1135 binaryDictionary.flushWithGC(); 1136 } 1137 if (!bigrams.containsKey(word0)) { 1138 final HashSet<String> bigramWord1s = new HashSet<>(); 1139 bigrams.put(word0, bigramWord1s); 1140 } 1141 bigrams.get(word0).add(word1); 1142 bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability); 1143 } 1144 1145 final HashSet<String> wordSet = new HashSet<>(words); 1146 final HashSet<Pair<String, String>> bigramSet = 1147 new HashSet<>(bigramProbabilitiesToCheckLater.keySet()); 1148 int token = 0; 1149 do { 1150 final BinaryDictionary.GetNextWordPropertyResult result = 1151 binaryDictionary.getNextWordProperty(token); 1152 final WordProperty wordProperty = result.mWordProperty; 1153 final String word0 = wordProperty.mWord; 1154 assertEquals((int)wordProbabilitiesToCheckLater.get(word0), 1155 wordProperty.mProbabilityInfo.mProbability); 1156 wordSet.remove(word0); 1157 final HashSet<String> bigramWord1s = bigrams.get(word0); 1158 for (int j = 0; j < wordProperty.mBigrams.size(); j++) { 1159 final String word1 = wordProperty.mBigrams.get(j).mWord; 1160 assertTrue(bigramWord1s.contains(word1)); 1161 final Pair<String, String> bigram = new Pair<>(word0, word1); 1162 if (canCheckBigramProbability(formatVersion)) { 1163 final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); 1164 assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); 1165 } 1166 bigramSet.remove(bigram); 1167 } 1168 token = result.mNextToken; 1169 } while (token != 0); 1170 assertTrue(wordSet.isEmpty()); 1171 assertTrue(bigramSet.isEmpty()); 1172 } 1173 1174 public void testAddShortcuts() { 1175 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1176 testAddShortcuts(formatVersion); 1177 } 1178 } 1179 1180 private void testAddShortcuts(final int formatVersion) { 1181 File dictFile = null; 1182 try { 1183 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1184 } catch (IOException e) { 1185 fail("IOException while writing an initial dictionary : " + e); 1186 } 1187 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1188 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1189 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1190 1191 final int unigramProbability = 100; 1192 final int shortcutProbability = 10; 1193 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", 1194 shortcutProbability, false /* isBeginningOfSentence */, 1195 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); 1196 WordProperty wordProperty = binaryDictionary.getWordProperty("aaa", 1197 false /* isBeginningOfSentence */); 1198 assertEquals(1, wordProperty.mShortcutTargets.size()); 1199 assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); 1200 assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); 1201 final int updatedShortcutProbability = 2; 1202 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", 1203 updatedShortcutProbability, false /* isBeginningOfSentence */, 1204 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); 1205 wordProperty = binaryDictionary.getWordProperty("aaa", 1206 false /* isBeginningOfSentence */); 1207 assertEquals(1, wordProperty.mShortcutTargets.size()); 1208 assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); 1209 assertEquals(updatedShortcutProbability, 1210 wordProperty.mShortcutTargets.get(0).getProbability()); 1211 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", 1212 shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, 1213 false /* isBlacklisted */, 0 /* timestamp */); 1214 final HashMap<String, Integer> shortcutTargets = new HashMap<>(); 1215 shortcutTargets.put("zzz", updatedShortcutProbability); 1216 shortcutTargets.put("yyy", shortcutProbability); 1217 wordProperty = binaryDictionary.getWordProperty("aaa", 1218 false /* isBeginningOfSentence */); 1219 assertEquals(2, wordProperty.mShortcutTargets.size()); 1220 for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1221 assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); 1222 assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), 1223 shortcutTarget.getProbability()); 1224 shortcutTargets.remove(shortcutTarget.mWord); 1225 } 1226 shortcutTargets.put("zzz", updatedShortcutProbability); 1227 shortcutTargets.put("yyy", shortcutProbability); 1228 binaryDictionary.flushWithGC(); 1229 wordProperty = binaryDictionary.getWordProperty("aaa", 1230 false /* isBeginningOfSentence */); 1231 assertEquals(2, wordProperty.mShortcutTargets.size()); 1232 for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1233 assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); 1234 assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), 1235 shortcutTarget.getProbability()); 1236 shortcutTargets.remove(shortcutTarget.mWord); 1237 } 1238 } 1239 1240 public void testAddManyShortcuts() { 1241 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1242 testAddManyShortcuts(formatVersion); 1243 } 1244 } 1245 1246 private void testAddManyShortcuts(final int formatVersion) { 1247 final long seed = System.currentTimeMillis(); 1248 final Random random = new Random(seed); 1249 final int UNIGRAM_COUNT = 1000; 1250 final int SHORTCUT_COUNT = 10000; 1251 final int codePointSetSize = 20; 1252 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1253 1254 final ArrayList<String> words = new ArrayList<>(); 1255 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 1256 final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>(); 1257 1258 File dictFile = null; 1259 try { 1260 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1261 } catch (IOException e) { 1262 fail("IOException while writing an initial dictionary : " + e); 1263 } 1264 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1265 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1266 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1267 1268 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1269 final String word = CodePointUtils.generateWord(random, codePointSet); 1270 final int unigramProbability = random.nextInt(0xFF); 1271 addUnigramWord(binaryDictionary, word, unigramProbability); 1272 words.add(word); 1273 unigramProbabilities.put(word, unigramProbability); 1274 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1275 binaryDictionary.flushWithGC(); 1276 } 1277 } 1278 for (int i = 0; i < SHORTCUT_COUNT; i++) { 1279 final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet); 1280 final int shortcutProbability = random.nextInt(0xF); 1281 final String word = words.get(random.nextInt(words.size())); 1282 final int unigramProbability = unigramProbabilities.get(word); 1283 binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, 1284 shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, 1285 false /* isBlacklisted */, 0 /* timestamp */); 1286 if (shortcutTargets.containsKey(word)) { 1287 final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); 1288 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); 1289 } else { 1290 final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>(); 1291 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); 1292 shortcutTargets.put(word, shortcutTargetsOfWord); 1293 } 1294 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1295 binaryDictionary.flushWithGC(); 1296 } 1297 } 1298 1299 for (final String word : words) { 1300 final WordProperty wordProperty = binaryDictionary.getWordProperty(word, 1301 false /* isBeginningOfSentence */); 1302 assertEquals((int)unigramProbabilities.get(word), 1303 wordProperty.mProbabilityInfo.mProbability); 1304 if (!shortcutTargets.containsKey(word)) { 1305 // The word does not have shortcut targets. 1306 continue; 1307 } 1308 assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size()); 1309 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1310 final String targetCodePonts = shortcutTarget.mWord; 1311 assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), 1312 shortcutTarget.getProbability()); 1313 } 1314 } 1315 } 1316 1317 public void testDictMigration() { 1318 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1319 testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); 1320 } 1321 } 1322 1323 private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { 1324 File dictFile = null; 1325 try { 1326 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); 1327 } catch (IOException e) { 1328 fail("IOException while writing an initial dictionary : " + e); 1329 } 1330 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1331 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1332 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1333 final int unigramProbability = 100; 1334 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 1335 addUnigramWord(binaryDictionary, "bbb", unigramProbability); 1336 final int bigramProbability = 150; 1337 addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); 1338 final int shortcutProbability = 10; 1339 binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, 1340 false /* isBeginningOfSentence */, false /* isNotAWord */, 1341 false /* isBlacklisted */, 0 /* timestamp */); 1342 binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, 1343 Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, 1344 true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); 1345 binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE, 1346 "aaa", bigramProbability, 0 /* timestamp */); 1347 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 1348 assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); 1349 assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); 1350 assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); 1351 assertTrue(binaryDictionary.migrateTo(toFormatVersion)); 1352 assertTrue(binaryDictionary.isValidDictionary()); 1353 assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); 1354 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 1355 assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); 1356 if (canCheckBigramProbability(toFormatVersion)) { 1357 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); 1358 assertEquals(bigramProbability, binaryDictionary.getNgramProbability( 1359 PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa")); 1360 } 1361 assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); 1362 WordProperty wordProperty = binaryDictionary.getWordProperty("ccc", 1363 false /* isBeginningOfSentence */); 1364 assertEquals(1, wordProperty.mShortcutTargets.size()); 1365 assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); 1366 wordProperty = binaryDictionary.getWordProperty("ddd", 1367 false /* isBeginningOfSentence */); 1368 assertTrue(wordProperty.mIsBlacklistEntry); 1369 assertTrue(wordProperty.mIsNotAWord); 1370 } 1371 1372 public void testLargeDictMigration() { 1373 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1374 testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); 1375 } 1376 } 1377 1378 private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) { 1379 final int UNIGRAM_COUNT = 3000; 1380 final int BIGRAM_COUNT = 3000; 1381 final int codePointSetSize = 50; 1382 final long seed = System.currentTimeMillis(); 1383 final Random random = new Random(seed); 1384 1385 File dictFile = null; 1386 try { 1387 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); 1388 } catch (IOException e) { 1389 fail("IOException while writing an initial dictionary : " + e); 1390 } 1391 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1392 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1393 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1394 1395 final ArrayList<String> words = new ArrayList<>(); 1396 final ArrayList<Pair<String, String>> bigrams = new ArrayList<>(); 1397 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1398 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 1399 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 1400 1401 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1402 final String word = CodePointUtils.generateWord(random, codePointSet); 1403 final int unigramProbability = random.nextInt(0xFF); 1404 addUnigramWord(binaryDictionary, word, unigramProbability); 1405 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1406 binaryDictionary.flushWithGC(); 1407 } 1408 words.add(word); 1409 unigramProbabilities.put(word, unigramProbability); 1410 } 1411 1412 for (int i = 0; i < BIGRAM_COUNT; i++) { 1413 final int word0Index = random.nextInt(words.size()); 1414 final int word1Index = random.nextInt(words.size()); 1415 if (word0Index == word1Index) { 1416 continue; 1417 } 1418 final String word0 = words.get(word0Index); 1419 final String word1 = words.get(word1Index); 1420 final int unigramProbability = unigramProbabilities.get(word1); 1421 final int bigramProbability = 1422 random.nextInt(0xFF - unigramProbability) + unigramProbability; 1423 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1424 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1425 binaryDictionary.flushWithGC(); 1426 } 1427 final Pair<String, String> bigram = new Pair<>(word0, word1); 1428 bigrams.add(bigram); 1429 bigramProbabilities.put(bigram, bigramProbability); 1430 } 1431 assertTrue(binaryDictionary.migrateTo(toFormatVersion)); 1432 1433 for (final String word : words) { 1434 assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); 1435 } 1436 assertEquals(unigramProbabilities.size(), Integer.parseInt( 1437 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 1438 1439 for (final Pair<String, String> bigram : bigrams) { 1440 if (canCheckBigramProbability(toFormatVersion)) { 1441 assertEquals((int)bigramProbabilities.get(bigram), 1442 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 1443 } 1444 assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); 1445 } 1446 assertEquals(bigramProbabilities.size(), Integer.parseInt( 1447 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 1448 } 1449 1450 public void testBeginningOfSentence() { 1451 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1452 if (supportsBeginningOfSentence(formatVersion)) { 1453 testBeginningOfSentence(formatVersion); 1454 } 1455 } 1456 } 1457 1458 private void testBeginningOfSentence(final int formatVersion) { 1459 File dictFile = null; 1460 try { 1461 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1462 } catch (IOException e) { 1463 fail("IOException while writing an initial dictionary : " + e); 1464 } 1465 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1466 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1467 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1468 final int dummyProbability = 0; 1469 final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; 1470 final int bigramProbability = 200; 1471 addUnigramWord(binaryDictionary, "aaa", dummyProbability); 1472 binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, 1473 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1474 assertEquals(bigramProbability, 1475 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); 1476 binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, 1477 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1478 addUnigramWord(binaryDictionary, "bbb", dummyProbability); 1479 binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability, 1480 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1481 binaryDictionary.flushWithGC(); 1482 assertEquals(bigramProbability, 1483 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); 1484 assertEquals(bigramProbability, 1485 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb")); 1486 } 1487 1488 public void testGetMaxFrequencyOfExactMatches() { 1489 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1490 testGetMaxFrequencyOfExactMatches(formatVersion); 1491 } 1492 } 1493 1494 private void testGetMaxFrequencyOfExactMatches(final int formatVersion) { 1495 File dictFile = null; 1496 try { 1497 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1498 } catch (IOException e) { 1499 fail("IOException while writing an initial dictionary : " + e); 1500 } 1501 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1502 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1503 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1504 addUnigramWord(binaryDictionary, "abc", 10); 1505 addUnigramWord(binaryDictionary, "aBc", 15); 1506 assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1507 addUnigramWord(binaryDictionary, "ab'c", 20); 1508 assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1509 addUnigramWord(binaryDictionary, "a-b-c", 25); 1510 assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1511 addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30); 1512 assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1513 addUnigramWord(binaryDictionary, "ab c", 255); 1514 assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1515 } 1516 } 1517