1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import android.test.AndroidTestCase; 20 import android.test.MoreAsserts; 21 import android.test.suitebuilder.annotation.LargeTest; 22 import android.util.Log; 23 import android.util.SparseArray; 24 25 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 26 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 27 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 28 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 29 import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; 30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 31 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 32 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 33 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; 34 import com.android.inputmethod.latin.utils.CollectionUtils; 35 36 import java.io.File; 37 import java.io.FileInputStream; 38 import java.io.IOException; 39 import java.util.ArrayList; 40 import java.util.Arrays; 41 import java.util.HashMap; 42 import java.util.HashSet; 43 import java.util.List; 44 import java.util.Map.Entry; 45 import java.util.Random; 46 import java.util.Set; 47 import java.util.TreeMap; 48 49 /** 50 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. 51 */ 52 @LargeTest 53 public class BinaryDictDecoderEncoderTests extends AndroidTestCase { 54 private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); 55 private static final int DEFAULT_MAX_UNIGRAMS = 100; 56 private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; 57 private static final int UNIGRAM_FREQ = 10; 58 private static final int BIGRAM_FREQ = 50; 59 private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; 60 private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50; 61 private static final int NUM_OF_SHORTCUTS = 5; 62 63 private static final int USE_BYTE_ARRAY = 1; 64 private static final int USE_BYTE_BUFFER = 2; 65 66 private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); 67 private static final SparseArray<List<Integer>> sEmptyBigrams = 68 CollectionUtils.newSparseArray(); 69 private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); 70 private static final SparseArray<List<Integer>> sChainBigrams = 71 CollectionUtils.newSparseArray(); 72 private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap(); 73 74 private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); 75 private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = 76 new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */); 77 private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE = 78 new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */); 79 private static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE = 80 new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */); 81 private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE = 82 new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */); 83 private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP = 84 new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */, 85 true /* hasTimestamp */); 86 87 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 88 89 public BinaryDictDecoderEncoderTests() { 90 this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); 91 } 92 93 public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { 94 super(); 95 Log.e(TAG, "Testing dictionary: seed is " + seed); 96 final Random random = new Random(seed); 97 sWords.clear(); 98 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 99 random); 100 generateWords(maxUnigrams, random, codePointSet); 101 102 for (int i = 0; i < sWords.size(); ++i) { 103 sChainBigrams.put(i, new ArrayList<Integer>()); 104 if (i > 0) { 105 sChainBigrams.get(i - 1).add(i); 106 } 107 } 108 109 sStarBigrams.put(0, new ArrayList<Integer>()); 110 // MAX - 1 because we added one above already 111 final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); 112 for (int i = 1; i < maxBigrams; ++i) { 113 sStarBigrams.get(0).add(i); 114 } 115 116 sShortcuts.clear(); 117 for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) { 118 final int from = Math.abs(random.nextInt()) % sWords.size(); 119 sShortcuts.put(sWords.get(from), new ArrayList<String>()); 120 for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) { 121 final int to = Math.abs(random.nextInt()) % sWords.size(); 122 sShortcuts.get(sWords.get(from)).add(sWords.get(to)); 123 } 124 } 125 } 126 127 private DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions) { 128 if (formatOptions.mVersion == FormatSpec.VERSION4) { 129 return new Ver4DictEncoder(getContext().getCacheDir()); 130 } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) { 131 return new Ver3DictEncoder(file); 132 } else { 133 throw new RuntimeException("The format option has a wrong version : " 134 + formatOptions.mVersion); 135 } 136 } 137 138 private void generateWords(final int number, final Random random, final int[] codePointSet) { 139 final Set<String> wordSet = CollectionUtils.newHashSet(); 140 while (wordSet.size() < number) { 141 wordSet.add(CodePointUtils.generateWord(random, codePointSet)); 142 } 143 sWords.addAll(wordSet); 144 } 145 146 /** 147 * Adds unigrams to the dictionary. 148 */ 149 private void addUnigrams(final int number, final FusionDictionary dict, 150 final List<String> words, final HashMap<String, List<String>> shortcutMap) { 151 for (int i = 0; i < number; ++i) { 152 final String word = words.get(i); 153 final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); 154 if (shortcutMap != null && shortcutMap.containsKey(word)) { 155 for (final String shortcut : shortcutMap.get(word)) { 156 shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); 157 } 158 } 159 dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts, 160 false /* isNotAWord */); 161 } 162 } 163 164 private void addBigrams(final FusionDictionary dict, 165 final List<String> words, 166 final SparseArray<List<Integer>> bigrams) { 167 for (int i = 0; i < bigrams.size(); ++i) { 168 final int w1 = bigrams.keyAt(i); 169 for (int w2 : bigrams.valueAt(i)) { 170 dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ); 171 } 172 } 173 } 174 175 // The following is useful to dump the dictionary into a textual file, but it can't compile 176 // on-device, so it's commented out. 177 // private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) 178 // throws IOException { 179 // com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( 180 // new java.io.FileWriter(new File(filename)), dict); 181 // } 182 183 private long timeWritingDictToFile(final File file, final FusionDictionary dict, 184 final FormatSpec.FormatOptions formatOptions) { 185 186 long now = -1, diff = -1; 187 188 try { 189 final DictEncoder dictEncoder = getDictEncoder(file, formatOptions); 190 191 now = System.currentTimeMillis(); 192 // If you need to dump the dict to a textual file, uncomment the line below and the 193 // function above 194 // dumpToCombinedFileForDebug(file, "/tmp/foo"); 195 dictEncoder.writeDictionary(dict, formatOptions); 196 diff = System.currentTimeMillis() - now; 197 } catch (IOException e) { 198 Log.e(TAG, "IO exception while writing file", e); 199 } catch (UnsupportedFormatException e) { 200 Log.e(TAG, "UnsupportedFormatException", e); 201 } 202 203 return diff; 204 } 205 206 private void checkDictionary(final FusionDictionary dict, final List<String> words, 207 final SparseArray<List<Integer>> bigrams, 208 final HashMap<String, List<String>> shortcutMap) { 209 assertNotNull(dict); 210 211 // check unigram 212 for (final String word : words) { 213 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); 214 assertNotNull(ptNode); 215 } 216 217 // check bigram 218 for (int i = 0; i < bigrams.size(); ++i) { 219 final int w1 = bigrams.keyAt(i); 220 for (final int w2 : bigrams.valueAt(i)) { 221 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 222 words.get(w1)); 223 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); 224 } 225 } 226 227 // check shortcut 228 if (shortcutMap != null) { 229 for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) { 230 assertTrue(words.contains(entry.getKey())); 231 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 232 entry.getKey()); 233 for (final String word : entry.getValue()) { 234 assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, 235 ptNode.getShortcut(word)); 236 } 237 } 238 } 239 } 240 241 private String outputOptions(final int bufferType, 242 final FormatSpec.FormatOptions formatOptions) { 243 String result = " : buffer type = " 244 + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); 245 result += " : version = " + formatOptions.mVersion; 246 return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate; 247 } 248 249 private DictionaryOptions getDictionaryOptions(final String id, final String version) { 250 final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(), 251 false, false); 252 options.mAttributes.put("version", version); 253 options.mAttributes.put("dictionary", id); 254 return options; 255 } 256 257 private File setUpDictionaryFile(final String name, final String version) { 258 File file = null; 259 try { 260 file = new File(getContext().getCacheDir(), name + "." + version 261 + TEST_DICT_FILE_EXTENSION); 262 file.createNewFile(); 263 } catch (IOException e) { 264 // do nothing 265 } 266 assertTrue("Failed to create the dictionary file.", file.exists()); 267 return file; 268 } 269 270 private DictDecoder getDictDecoder(final File file, final int bufferType, 271 final FormatOptions formatOptions, final DictionaryOptions dictOptions) { 272 if (formatOptions.mVersion == FormatSpec.VERSION4) { 273 final FileHeader header = new FileHeader(0, dictOptions, formatOptions); 274 return FormatSpec.getDictDecoder(new File(getContext().getCacheDir(), 275 header.getId() + "." + header.getVersion()), bufferType); 276 } else { 277 return FormatSpec.getDictDecoder(file, bufferType); 278 } 279 } 280 // Tests for readDictionaryBinary and writeDictionaryBinary 281 282 private long timeReadingAndCheckDict(final File file, final List<String> words, 283 final SparseArray<List<Integer>> bigrams, 284 final HashMap<String, List<String>> shortcutMap, final int bufferType, 285 final FormatOptions formatOptions, final DictionaryOptions dictOptions) { 286 long now, diff = -1; 287 288 FusionDictionary dict = null; 289 try { 290 final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, 291 dictOptions); 292 now = System.currentTimeMillis(); 293 dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); 294 diff = System.currentTimeMillis() - now; 295 } catch (IOException e) { 296 Log.e(TAG, "IOException while reading dictionary", e); 297 } catch (UnsupportedFormatException e) { 298 Log.e(TAG, "Unsupported format", e); 299 } 300 301 checkDictionary(dict, words, bigrams, shortcutMap); 302 return diff; 303 } 304 305 // Tests for readDictionaryBinary and writeDictionaryBinary 306 private String runReadAndWrite(final List<String> words, 307 final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts, 308 final int bufferType, final FormatSpec.FormatOptions formatOptions, 309 final String message) { 310 311 final String dictName = "runReadAndWrite"; 312 final String dictVersion = Long.toString(System.currentTimeMillis()); 313 final File file = setUpDictionaryFile(dictName, dictVersion); 314 315 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 316 getDictionaryOptions(dictName, dictVersion)); 317 addUnigrams(words.size(), dict, words, shortcuts); 318 addBigrams(dict, words, bigrams); 319 checkDictionary(dict, words, bigrams, shortcuts); 320 321 final long write = timeWritingDictToFile(file, dict, formatOptions); 322 final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType, 323 formatOptions, dict.mOptions); 324 325 return "PROF: read=" + read + "ms, write=" + write + "ms :" + message 326 + " : " + outputOptions(bufferType, formatOptions); 327 } 328 329 private void runReadAndWriteTests(final List<String> results, final int bufferType, 330 final FormatSpec.FormatOptions formatOptions) { 331 results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType, 332 formatOptions, "unigram")); 333 results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType, 334 formatOptions, "chain")); 335 results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType, 336 formatOptions, "star")); 337 results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions, 338 "unigram with shortcuts")); 339 results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions, 340 "chain with shortcuts")); 341 results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions, 342 "star with shortcuts")); 343 } 344 345 // Unit test for CharEncoding.readString and CharEncoding.writeString. 346 public void testCharEncoding() { 347 // the max length of a word in sWords is less than 50. 348 // See generateWords. 349 final byte[] buffer = new byte[50 * 3]; 350 final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); 351 for (final String word : sWords) { 352 Log.d("testReadAndWriteString", "write : " + word); 353 Arrays.fill(buffer, (byte)0); 354 CharEncoding.writeString(buffer, 0, word); 355 dictBuffer.position(0); 356 final String str = CharEncoding.readString(dictBuffer); 357 assertEquals(word, str); 358 } 359 } 360 361 public void testReadAndWriteWithByteBuffer() { 362 final List<String> results = CollectionUtils.newArrayList(); 363 364 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2); 365 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); 366 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); 367 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); 368 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); 369 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); 370 371 for (final String result : results) { 372 Log.d(TAG, result); 373 } 374 } 375 376 public void testReadAndWriteWithByteArray() { 377 final List<String> results = CollectionUtils.newArrayList(); 378 379 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2); 380 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); 381 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); 382 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); 383 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); 384 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); 385 386 for (final String result : results) { 387 Log.d(TAG, result); 388 } 389 } 390 391 // Tests for readUnigramsAndBigramsBinary 392 393 private void checkWordMap(final List<String> expectedWords, 394 final SparseArray<List<Integer>> expectedBigrams, 395 final TreeMap<Integer, String> resultWords, 396 final TreeMap<Integer, Integer> resultFrequencies, 397 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams) { 398 // check unigrams 399 final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); 400 final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); 401 assertEquals(actualWordsSet, expectedWordsSet); 402 403 for (int freq : resultFrequencies.values()) { 404 assertEquals(freq, UNIGRAM_FREQ); 405 } 406 407 // check bigrams 408 final HashMap<String, List<String>> expBigrams = new HashMap<String, List<String>>(); 409 for (int i = 0; i < expectedBigrams.size(); ++i) { 410 final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); 411 for (int w2 : expectedBigrams.valueAt(i)) { 412 if (expBigrams.get(word1) == null) { 413 expBigrams.put(word1, new ArrayList<String>()); 414 } 415 expBigrams.get(word1).add(expectedWords.get(w2)); 416 } 417 } 418 419 final HashMap<String, List<String>> actBigrams = new HashMap<String, List<String>>(); 420 for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { 421 final String word1 = resultWords.get(entry.getKey()); 422 final int unigramFreq = resultFrequencies.get(entry.getKey()); 423 for (PendingAttribute attr : entry.getValue()) { 424 final String word2 = resultWords.get(attr.mAddress); 425 if (actBigrams.get(word1) == null) { 426 actBigrams.put(word1, new ArrayList<String>()); 427 } 428 actBigrams.get(word1).add(word2); 429 430 final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( 431 unigramFreq, attr.mFrequency); 432 assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); 433 } 434 } 435 436 assertEquals(actBigrams, expBigrams); 437 } 438 439 private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, 440 final SparseArray<List<Integer>> bigrams, final int bufferType, 441 final FormatOptions formatOptions, final DictionaryOptions dictOptions) { 442 FileInputStream inStream = null; 443 444 final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap(); 445 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = 446 CollectionUtils.newTreeMap(); 447 final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); 448 449 long now = -1, diff = -1; 450 try { 451 final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, 452 dictOptions); 453 now = System.currentTimeMillis(); 454 dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); 455 diff = System.currentTimeMillis() - now; 456 } catch (IOException e) { 457 Log.e(TAG, "IOException", e); 458 } catch (UnsupportedFormatException e) { 459 Log.e(TAG, "UnsupportedFormatException", e); 460 } finally { 461 if (inStream != null) { 462 try { 463 inStream.close(); 464 } catch (IOException e) { 465 // do nothing 466 } 467 } 468 } 469 470 checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); 471 return diff; 472 } 473 474 private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, 475 final SparseArray<List<Integer>> bigrams, final int bufferType, 476 final FormatSpec.FormatOptions formatOptions, final String message) { 477 final String dictName = "runReadUnigrams"; 478 final String dictVersion = Long.toString(System.currentTimeMillis()); 479 final File file = setUpDictionaryFile(dictName, dictVersion); 480 481 // making the dictionary from lists of words. 482 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 483 getDictionaryOptions(dictName, dictVersion)); 484 addUnigrams(words.size(), dict, words, null /* shortcutMap */); 485 addBigrams(dict, words, bigrams); 486 487 timeWritingDictToFile(file, dict, formatOptions); 488 489 long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, 490 formatOptions, dict.mOptions); 491 long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, 492 bufferType, formatOptions, dict.mOptions); 493 494 return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap 495 + " : " + message + " : " + outputOptions(bufferType, formatOptions); 496 } 497 498 private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, 499 final int bufferType, final FormatSpec.FormatOptions formatOptions) { 500 results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, 501 formatOptions, "unigram")); 502 results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, 503 formatOptions, "chain")); 504 results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, 505 formatOptions, "star")); 506 } 507 508 public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { 509 final ArrayList<String> results = CollectionUtils.newArrayList(); 510 511 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); 512 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); 513 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); 514 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); 515 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); 516 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, 517 VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); 518 519 for (final String result : results) { 520 Log.d(TAG, result); 521 } 522 } 523 524 public void testReadUnigramsAndBigramsBinaryWithByteArray() { 525 final ArrayList<String> results = CollectionUtils.newArrayList(); 526 527 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); 528 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); 529 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); 530 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); 531 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); 532 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, 533 VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); 534 535 for (final String result : results) { 536 Log.d(TAG, result); 537 } 538 } 539 540 // Tests for getTerminalPosition 541 private String getWordFromBinary(final DictDecoder dictDecoder, final int address) { 542 if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); 543 544 FileHeader fileHeader = null; 545 try { 546 fileHeader = dictDecoder.readHeader(); 547 } catch (IOException e) { 548 return null; 549 } catch (UnsupportedFormatException e) { 550 return null; 551 } 552 if (fileHeader == null) return null; 553 return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize, 554 address, fileHeader.mFormatOptions).mWord; 555 } 556 557 private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, 558 final boolean contained) { 559 long diff = -1; 560 int position = -1; 561 try { 562 final long now = System.nanoTime(); 563 position = dictDecoder.getTerminalPosition(word); 564 diff = System.nanoTime() - now; 565 } catch (IOException e) { 566 Log.e(TAG, "IOException while getTerminalPosition", e); 567 } catch (UnsupportedFormatException e) { 568 Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); 569 } 570 571 assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); 572 if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); 573 return diff; 574 } 575 576 private void runGetTerminalPosition(final ArrayList<String> words, 577 final SparseArray<List<Integer>> bigrams, final int bufferType, 578 final FormatOptions formatOptions, final String message) { 579 final String dictName = "testGetTerminalPosition"; 580 final String dictVersion = Long.toString(System.currentTimeMillis()); 581 final File file = setUpDictionaryFile(dictName, dictVersion); 582 583 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 584 getDictionaryOptions(dictName, dictVersion)); 585 addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); 586 addBigrams(dict, words, bigrams); 587 timeWritingDictToFile(file, dict, formatOptions); 588 589 final DictDecoder dictDecoder = getDictDecoder(file, DictDecoder.USE_BYTEARRAY, 590 formatOptions, dict.mOptions); 591 try { 592 dictDecoder.openDictBuffer(); 593 } catch (IOException e) { 594 // ignore 595 Log.e(TAG, "IOException while opening the buffer", e); 596 } 597 assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); 598 599 try { 600 // too long word 601 final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 602 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); 603 604 // null 605 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); 606 607 // empty string 608 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); 609 } catch (IOException e) { 610 } catch (UnsupportedFormatException e) { 611 } 612 613 // Test a word that is contained within the dictionary. 614 long sum = 0; 615 for (int i = 0; i < sWords.size(); ++i) { 616 final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true); 617 sum += time == -1 ? 0 : time; 618 } 619 Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message 620 + " : " + outputOptions(bufferType, formatOptions)); 621 622 // Test a word that isn't contained within the dictionary. 623 final Random random = new Random((int)System.currentTimeMillis()); 624 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 625 random); 626 for (int i = 0; i < 1000; ++i) { 627 final String word = CodePointUtils.generateWord(random, codePointSet); 628 if (sWords.indexOf(word) != -1) continue; 629 checkGetTerminalPosition(dictDecoder, word, false); 630 } 631 } 632 633 private void runGetTerminalPositionTests(final int bufferType, 634 final FormatOptions formatOptions) { 635 runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); 636 } 637 638 public void testGetTerminalPosition() { 639 final ArrayList<String> results = CollectionUtils.newArrayList(); 640 641 runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION2); 642 runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); 643 runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); 644 runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); 645 runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); 646 runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); 647 648 runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION2); 649 runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); 650 runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); 651 runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); 652 runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); 653 runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); 654 655 for (final String result : results) { 656 Log.d(TAG, result); 657 } 658 } 659 660 private void runTestDeleteWord(final FormatOptions formatOptions) { 661 final String dictName = "testDeleteWord"; 662 final String dictVersion = Long.toString(System.currentTimeMillis()); 663 final File file = setUpDictionaryFile(dictName, dictVersion); 664 665 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 666 new FusionDictionary.DictionaryOptions( 667 new HashMap<String, String>(), false, false)); 668 addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); 669 timeWritingDictToFile(file, dict, formatOptions); 670 671 final DictUpdater dictUpdater; 672 if (formatOptions.mVersion == 3) { 673 dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); 674 } else if (formatOptions.mVersion == 4) { 675 dictUpdater = new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); 676 } else { 677 throw new RuntimeException("DictUpdater for version " + formatOptions.mVersion 678 + " doesn't exist."); 679 } 680 681 try { 682 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, 683 dictUpdater.getTerminalPosition(sWords.get(0))); 684 dictUpdater.deleteWord(sWords.get(0)); 685 assertEquals(FormatSpec.NOT_VALID_WORD, 686 dictUpdater.getTerminalPosition(sWords.get(0))); 687 688 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, 689 dictUpdater.getTerminalPosition(sWords.get(5))); 690 dictUpdater.deleteWord(sWords.get(5)); 691 assertEquals(FormatSpec.NOT_VALID_WORD, 692 dictUpdater.getTerminalPosition(sWords.get(5))); 693 } catch (IOException e) { 694 } catch (UnsupportedFormatException e) { 695 } 696 } 697 698 public void testDeleteWord() { 699 runTestDeleteWord(VERSION3_WITH_DYNAMIC_UPDATE); 700 runTestDeleteWord(VERSION4_WITH_DYNAMIC_UPDATE); 701 } 702 } 703