Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import android.test.AndroidTestCase;
     20 import android.test.MoreAsserts;
     21 import android.test.suitebuilder.annotation.LargeTest;
     22 import android.util.Log;
     23 import android.util.SparseArray;
     24 
     25 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
     26 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
     27 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
     28 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     29 import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
     30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
     31 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
     32 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
     33 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
     34 import com.android.inputmethod.latin.utils.CollectionUtils;
     35 
     36 import java.io.File;
     37 import java.io.FileInputStream;
     38 import java.io.IOException;
     39 import java.util.ArrayList;
     40 import java.util.Arrays;
     41 import java.util.HashMap;
     42 import java.util.HashSet;
     43 import java.util.List;
     44 import java.util.Map.Entry;
     45 import java.util.Random;
     46 import java.util.Set;
     47 import java.util.TreeMap;
     48 
     49 /**
     50  * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils.
     51  */
     52 @LargeTest
     53 public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
     54     private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName();
     55     private static final int DEFAULT_MAX_UNIGRAMS = 100;
     56     private static final int DEFAULT_CODE_POINT_SET_SIZE = 50;
     57     private static final int UNIGRAM_FREQ = 10;
     58     private static final int BIGRAM_FREQ = 50;
     59     private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
     60     private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
     61     private static final int NUM_OF_SHORTCUTS = 5;
     62 
     63     private static final int USE_BYTE_ARRAY = 1;
     64     private static final int USE_BYTE_BUFFER = 2;
     65 
     66     private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
     67     private static final SparseArray<List<Integer>> sEmptyBigrams =
     68             CollectionUtils.newSparseArray();
     69     private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
     70     private static final SparseArray<List<Integer>> sChainBigrams =
     71             CollectionUtils.newSparseArray();
     72     private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap();
     73 
     74     private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
     75     private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
     76             new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */);
     77     private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
     78             new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
     79     private static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE =
     80             new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */);
     81     private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE =
     82             new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */);
     83     private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP =
     84             new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */,
     85                     true /* hasTimestamp */);
     86 
     87     private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
     88 
     89     public BinaryDictDecoderEncoderTests() {
     90         this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
     91     }
     92 
     93     public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) {
     94         super();
     95         Log.e(TAG, "Testing dictionary: seed is " + seed);
     96         final Random random = new Random(seed);
     97         sWords.clear();
     98         final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
     99                 random);
    100         generateWords(maxUnigrams, random, codePointSet);
    101 
    102         for (int i = 0; i < sWords.size(); ++i) {
    103             sChainBigrams.put(i, new ArrayList<Integer>());
    104             if (i > 0) {
    105                 sChainBigrams.get(i - 1).add(i);
    106             }
    107         }
    108 
    109         sStarBigrams.put(0, new ArrayList<Integer>());
    110         // MAX - 1 because we added one above already
    111         final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1);
    112         for (int i = 1; i < maxBigrams; ++i) {
    113             sStarBigrams.get(0).add(i);
    114         }
    115 
    116         sShortcuts.clear();
    117         for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
    118             final int from = Math.abs(random.nextInt()) % sWords.size();
    119             sShortcuts.put(sWords.get(from), new ArrayList<String>());
    120             for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
    121                 final int to = Math.abs(random.nextInt()) % sWords.size();
    122                 sShortcuts.get(sWords.get(from)).add(sWords.get(to));
    123             }
    124         }
    125     }
    126 
    127     private DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions) {
    128         if (formatOptions.mVersion == FormatSpec.VERSION4) {
    129             return new Ver4DictEncoder(getContext().getCacheDir());
    130         } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) {
    131             return new Ver3DictEncoder(file);
    132         } else {
    133             throw new RuntimeException("The format option has a wrong version : "
    134                     + formatOptions.mVersion);
    135         }
    136     }
    137 
    138     private void generateWords(final int number, final Random random, final int[] codePointSet) {
    139         final Set<String> wordSet = CollectionUtils.newHashSet();
    140         while (wordSet.size() < number) {
    141             wordSet.add(CodePointUtils.generateWord(random, codePointSet));
    142         }
    143         sWords.addAll(wordSet);
    144     }
    145 
    146     /**
    147      * Adds unigrams to the dictionary.
    148      */
    149     private void addUnigrams(final int number, final FusionDictionary dict,
    150             final List<String> words, final HashMap<String, List<String>> shortcutMap) {
    151         for (int i = 0; i < number; ++i) {
    152             final String word = words.get(i);
    153             final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList();
    154             if (shortcutMap != null && shortcutMap.containsKey(word)) {
    155                 for (final String shortcut : shortcutMap.get(word)) {
    156                     shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
    157                 }
    158             }
    159             dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts,
    160                     false /* isNotAWord */);
    161         }
    162     }
    163 
    164     private void addBigrams(final FusionDictionary dict,
    165             final List<String> words,
    166             final SparseArray<List<Integer>> bigrams) {
    167         for (int i = 0; i < bigrams.size(); ++i) {
    168             final int w1 = bigrams.keyAt(i);
    169             for (int w2 : bigrams.valueAt(i)) {
    170                 dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ);
    171             }
    172         }
    173     }
    174 
    175 //    The following is useful to dump the dictionary into a textual file, but it can't compile
    176 //    on-device, so it's commented out.
    177 //    private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename)
    178 //            throws IOException {
    179 //        com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined(
    180 //                new java.io.FileWriter(new File(filename)), dict);
    181 //    }
    182 
    183     private long timeWritingDictToFile(final File file, final FusionDictionary dict,
    184             final FormatSpec.FormatOptions formatOptions) {
    185 
    186         long now = -1, diff = -1;
    187 
    188         try {
    189             final DictEncoder dictEncoder = getDictEncoder(file, formatOptions);
    190 
    191             now = System.currentTimeMillis();
    192             // If you need to dump the dict to a textual file, uncomment the line below and the
    193             // function above
    194             // dumpToCombinedFileForDebug(file, "/tmp/foo");
    195             dictEncoder.writeDictionary(dict, formatOptions);
    196             diff = System.currentTimeMillis() - now;
    197         } catch (IOException e) {
    198             Log.e(TAG, "IO exception while writing file", e);
    199         } catch (UnsupportedFormatException e) {
    200             Log.e(TAG, "UnsupportedFormatException", e);
    201         }
    202 
    203         return diff;
    204     }
    205 
    206     private void checkDictionary(final FusionDictionary dict, final List<String> words,
    207             final SparseArray<List<Integer>> bigrams,
    208             final HashMap<String, List<String>> shortcutMap) {
    209         assertNotNull(dict);
    210 
    211         // check unigram
    212         for (final String word : words) {
    213             final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
    214             assertNotNull(ptNode);
    215         }
    216 
    217         // check bigram
    218         for (int i = 0; i < bigrams.size(); ++i) {
    219             final int w1 = bigrams.keyAt(i);
    220             for (final int w2 : bigrams.valueAt(i)) {
    221                 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
    222                         words.get(w1));
    223                 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
    224             }
    225         }
    226 
    227         // check shortcut
    228         if (shortcutMap != null) {
    229             for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) {
    230                 assertTrue(words.contains(entry.getKey()));
    231                 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
    232                         entry.getKey());
    233                 for (final String word : entry.getValue()) {
    234                     assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
    235                             ptNode.getShortcut(word));
    236                 }
    237             }
    238         }
    239     }
    240 
    241     private String outputOptions(final int bufferType,
    242             final FormatSpec.FormatOptions formatOptions) {
    243         String result = " : buffer type = "
    244                 + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
    245         result += " : version = " + formatOptions.mVersion;
    246         return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate;
    247     }
    248 
    249     private DictionaryOptions getDictionaryOptions(final String id, final String version) {
    250         final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(),
    251                 false, false);
    252         options.mAttributes.put("version", version);
    253         options.mAttributes.put("dictionary", id);
    254         return options;
    255     }
    256 
    257     private File setUpDictionaryFile(final String name, final String version) {
    258         File file = null;
    259         try {
    260             file = new File(getContext().getCacheDir(), name + "." + version
    261                     + TEST_DICT_FILE_EXTENSION);
    262             file.createNewFile();
    263         } catch (IOException e) {
    264             // do nothing
    265         }
    266         assertTrue("Failed to create the dictionary file.", file.exists());
    267         return file;
    268     }
    269 
    270     private DictDecoder getDictDecoder(final File file, final int bufferType,
    271             final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
    272         if (formatOptions.mVersion == FormatSpec.VERSION4) {
    273             final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
    274             return FormatSpec.getDictDecoder(new File(getContext().getCacheDir(),
    275                     header.getId() + "." + header.getVersion()), bufferType);
    276         } else {
    277             return FormatSpec.getDictDecoder(file, bufferType);
    278         }
    279     }
    280     // Tests for readDictionaryBinary and writeDictionaryBinary
    281 
    282     private long timeReadingAndCheckDict(final File file, final List<String> words,
    283             final SparseArray<List<Integer>> bigrams,
    284             final HashMap<String, List<String>> shortcutMap, final int bufferType,
    285             final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
    286         long now, diff = -1;
    287 
    288         FusionDictionary dict = null;
    289         try {
    290             final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions,
    291                     dictOptions);
    292             now = System.currentTimeMillis();
    293             dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
    294             diff  = System.currentTimeMillis() - now;
    295         } catch (IOException e) {
    296             Log.e(TAG, "IOException while reading dictionary", e);
    297         } catch (UnsupportedFormatException e) {
    298             Log.e(TAG, "Unsupported format", e);
    299         }
    300 
    301         checkDictionary(dict, words, bigrams, shortcutMap);
    302         return diff;
    303     }
    304 
    305     // Tests for readDictionaryBinary and writeDictionaryBinary
    306     private String runReadAndWrite(final List<String> words,
    307             final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts,
    308             final int bufferType, final FormatSpec.FormatOptions formatOptions,
    309             final String message) {
    310 
    311         final String dictName = "runReadAndWrite";
    312         final String dictVersion = Long.toString(System.currentTimeMillis());
    313         final File file = setUpDictionaryFile(dictName, dictVersion);
    314 
    315         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    316                 getDictionaryOptions(dictName, dictVersion));
    317         addUnigrams(words.size(), dict, words, shortcuts);
    318         addBigrams(dict, words, bigrams);
    319         checkDictionary(dict, words, bigrams, shortcuts);
    320 
    321         final long write = timeWritingDictToFile(file, dict, formatOptions);
    322         final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType,
    323                 formatOptions, dict.mOptions);
    324 
    325         return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
    326                 + " : " + outputOptions(bufferType, formatOptions);
    327     }
    328 
    329     private void runReadAndWriteTests(final List<String> results, final int bufferType,
    330             final FormatSpec.FormatOptions formatOptions) {
    331         results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType,
    332                 formatOptions, "unigram"));
    333         results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType,
    334                 formatOptions, "chain"));
    335         results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
    336                 formatOptions, "star"));
    337         results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
    338                 "unigram with shortcuts"));
    339         results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
    340                 "chain with shortcuts"));
    341         results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
    342                 "star with shortcuts"));
    343     }
    344 
    345     // Unit test for CharEncoding.readString and CharEncoding.writeString.
    346     public void testCharEncoding() {
    347         // the max length of a word in sWords is less than 50.
    348         // See generateWords.
    349         final byte[] buffer = new byte[50 * 3];
    350         final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
    351         for (final String word : sWords) {
    352             Log.d("testReadAndWriteString", "write : " + word);
    353             Arrays.fill(buffer, (byte)0);
    354             CharEncoding.writeString(buffer, 0, word);
    355             dictBuffer.position(0);
    356             final String str = CharEncoding.readString(dictBuffer);
    357             assertEquals(word, str);
    358         }
    359     }
    360 
    361     public void testReadAndWriteWithByteBuffer() {
    362         final List<String> results = CollectionUtils.newArrayList();
    363 
    364         runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2);
    365         runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
    366         runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
    367         runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
    368         runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
    369         runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
    370 
    371         for (final String result : results) {
    372             Log.d(TAG, result);
    373         }
    374     }
    375 
    376     public void testReadAndWriteWithByteArray() {
    377         final List<String> results = CollectionUtils.newArrayList();
    378 
    379         runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2);
    380         runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
    381         runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
    382         runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
    383         runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
    384         runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
    385 
    386         for (final String result : results) {
    387             Log.d(TAG, result);
    388         }
    389     }
    390 
    391     // Tests for readUnigramsAndBigramsBinary
    392 
    393     private void checkWordMap(final List<String> expectedWords,
    394             final SparseArray<List<Integer>> expectedBigrams,
    395             final TreeMap<Integer, String> resultWords,
    396             final TreeMap<Integer, Integer> resultFrequencies,
    397             final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams) {
    398         // check unigrams
    399         final Set<String> actualWordsSet = new HashSet<String>(resultWords.values());
    400         final Set<String> expectedWordsSet = new HashSet<String>(expectedWords);
    401         assertEquals(actualWordsSet, expectedWordsSet);
    402 
    403         for (int freq : resultFrequencies.values()) {
    404             assertEquals(freq, UNIGRAM_FREQ);
    405         }
    406 
    407         // check bigrams
    408         final HashMap<String, List<String>> expBigrams = new HashMap<String, List<String>>();
    409         for (int i = 0; i < expectedBigrams.size(); ++i) {
    410             final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
    411             for (int w2 : expectedBigrams.valueAt(i)) {
    412                 if (expBigrams.get(word1) == null) {
    413                     expBigrams.put(word1, new ArrayList<String>());
    414                 }
    415                 expBigrams.get(word1).add(expectedWords.get(w2));
    416             }
    417         }
    418 
    419         final HashMap<String, List<String>> actBigrams = new HashMap<String, List<String>>();
    420         for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
    421             final String word1 = resultWords.get(entry.getKey());
    422             final int unigramFreq = resultFrequencies.get(entry.getKey());
    423             for (PendingAttribute attr : entry.getValue()) {
    424                 final String word2 = resultWords.get(attr.mAddress);
    425                 if (actBigrams.get(word1) == null) {
    426                     actBigrams.put(word1, new ArrayList<String>());
    427                 }
    428                 actBigrams.get(word1).add(word2);
    429 
    430                 final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
    431                         unigramFreq, attr.mFrequency);
    432                 assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
    433             }
    434         }
    435 
    436         assertEquals(actBigrams, expBigrams);
    437     }
    438 
    439     private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words,
    440             final SparseArray<List<Integer>> bigrams, final int bufferType,
    441             final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
    442         FileInputStream inStream = null;
    443 
    444         final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap();
    445         final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams =
    446                 CollectionUtils.newTreeMap();
    447         final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
    448 
    449         long now = -1, diff = -1;
    450         try {
    451             final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions,
    452                     dictOptions);
    453             now = System.currentTimeMillis();
    454             dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams);
    455             diff = System.currentTimeMillis() - now;
    456         } catch (IOException e) {
    457             Log.e(TAG, "IOException", e);
    458         } catch (UnsupportedFormatException e) {
    459             Log.e(TAG, "UnsupportedFormatException", e);
    460         } finally {
    461             if (inStream != null) {
    462                 try {
    463                     inStream.close();
    464                 } catch (IOException e) {
    465                     // do nothing
    466                 }
    467             }
    468         }
    469 
    470         checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams);
    471         return diff;
    472     }
    473 
    474     private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words,
    475             final SparseArray<List<Integer>> bigrams, final int bufferType,
    476             final FormatSpec.FormatOptions formatOptions, final String message) {
    477         final String dictName = "runReadUnigrams";
    478         final String dictVersion = Long.toString(System.currentTimeMillis());
    479         final File file = setUpDictionaryFile(dictName, dictVersion);
    480 
    481         // making the dictionary from lists of words.
    482         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    483                 getDictionaryOptions(dictName, dictVersion));
    484         addUnigrams(words.size(), dict, words, null /* shortcutMap */);
    485         addBigrams(dict, words, bigrams);
    486 
    487         timeWritingDictToFile(file, dict, formatOptions);
    488 
    489         long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType,
    490                 formatOptions, dict.mOptions);
    491         long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */,
    492                 bufferType, formatOptions, dict.mOptions);
    493 
    494         return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
    495                 + " : " + message + " : " + outputOptions(bufferType, formatOptions);
    496     }
    497 
    498     private void runReadUnigramsAndBigramsTests(final ArrayList<String> results,
    499             final int bufferType, final FormatSpec.FormatOptions formatOptions) {
    500         results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
    501                 formatOptions, "unigram"));
    502         results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
    503                 formatOptions, "chain"));
    504         results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType,
    505                 formatOptions, "star"));
    506     }
    507 
    508     public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
    509         final ArrayList<String> results = CollectionUtils.newArrayList();
    510 
    511         runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2);
    512         runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
    513         runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
    514         runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
    515         runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
    516         runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER,
    517                 VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
    518 
    519         for (final String result : results) {
    520             Log.d(TAG, result);
    521         }
    522     }
    523 
    524     public void testReadUnigramsAndBigramsBinaryWithByteArray() {
    525         final ArrayList<String> results = CollectionUtils.newArrayList();
    526 
    527         runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2);
    528         runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
    529         runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
    530         runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
    531         runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
    532         runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY,
    533                 VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
    534 
    535         for (final String result : results) {
    536             Log.d(TAG, result);
    537         }
    538     }
    539 
    540     // Tests for getTerminalPosition
    541     private String getWordFromBinary(final DictDecoder dictDecoder, final int address) {
    542         if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0);
    543 
    544         FileHeader fileHeader = null;
    545         try {
    546             fileHeader = dictDecoder.readHeader();
    547         } catch (IOException e) {
    548             return null;
    549         } catch (UnsupportedFormatException e) {
    550             return null;
    551         }
    552         if (fileHeader == null) return null;
    553         return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
    554                 address, fileHeader.mFormatOptions).mWord;
    555     }
    556 
    557     private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,
    558             final boolean contained) {
    559         long diff = -1;
    560         int position = -1;
    561         try {
    562             final long now = System.nanoTime();
    563             position = dictDecoder.getTerminalPosition(word);
    564             diff = System.nanoTime() - now;
    565         } catch (IOException e) {
    566             Log.e(TAG, "IOException while getTerminalPosition", e);
    567         } catch (UnsupportedFormatException e) {
    568             Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
    569         }
    570 
    571         assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
    572         if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
    573         return diff;
    574     }
    575 
    576     private void runGetTerminalPosition(final ArrayList<String> words,
    577             final SparseArray<List<Integer>> bigrams, final int bufferType,
    578             final FormatOptions formatOptions, final String message) {
    579         final String dictName = "testGetTerminalPosition";
    580         final String dictVersion = Long.toString(System.currentTimeMillis());
    581         final File file = setUpDictionaryFile(dictName, dictVersion);
    582 
    583         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    584                 getDictionaryOptions(dictName, dictVersion));
    585         addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
    586         addBigrams(dict, words, bigrams);
    587         timeWritingDictToFile(file, dict, formatOptions);
    588 
    589         final DictDecoder dictDecoder = getDictDecoder(file, DictDecoder.USE_BYTEARRAY,
    590                 formatOptions, dict.mOptions);
    591         try {
    592             dictDecoder.openDictBuffer();
    593         } catch (IOException e) {
    594             // ignore
    595             Log.e(TAG, "IOException while opening the buffer", e);
    596         }
    597         assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
    598 
    599         try {
    600             // too long word
    601             final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
    602             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord));
    603 
    604             // null
    605             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null));
    606 
    607             // empty string
    608             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(""));
    609         } catch (IOException e) {
    610         } catch (UnsupportedFormatException e) {
    611         }
    612 
    613         // Test a word that is contained within the dictionary.
    614         long sum = 0;
    615         for (int i = 0; i < sWords.size(); ++i) {
    616             final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true);
    617             sum += time == -1 ? 0 : time;
    618         }
    619         Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message
    620                 + " : " + outputOptions(bufferType, formatOptions));
    621 
    622         // Test a word that isn't contained within the dictionary.
    623         final Random random = new Random((int)System.currentTimeMillis());
    624         final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
    625                 random);
    626         for (int i = 0; i < 1000; ++i) {
    627             final String word = CodePointUtils.generateWord(random, codePointSet);
    628             if (sWords.indexOf(word) != -1) continue;
    629             checkGetTerminalPosition(dictDecoder, word, false);
    630         }
    631     }
    632 
    633     private void runGetTerminalPositionTests(final int bufferType,
    634             final FormatOptions formatOptions) {
    635         runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram");
    636     }
    637 
    638     public void testGetTerminalPosition() {
    639         final ArrayList<String> results = CollectionUtils.newArrayList();
    640 
    641         runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION2);
    642         runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
    643         runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
    644         runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
    645         runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
    646         runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
    647 
    648         runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION2);
    649         runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
    650         runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
    651         runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
    652         runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
    653         runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
    654 
    655         for (final String result : results) {
    656             Log.d(TAG, result);
    657         }
    658     }
    659 
    660     private void runTestDeleteWord(final FormatOptions formatOptions) {
    661         final String dictName = "testDeleteWord";
    662         final String dictVersion = Long.toString(System.currentTimeMillis());
    663         final File file = setUpDictionaryFile(dictName, dictVersion);
    664 
    665         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    666                 new FusionDictionary.DictionaryOptions(
    667                         new HashMap<String, String>(), false, false));
    668         addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
    669         timeWritingDictToFile(file, dict, formatOptions);
    670 
    671         final DictUpdater dictUpdater;
    672         if (formatOptions.mVersion == 3) {
    673             dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
    674         } else if (formatOptions.mVersion == 4) {
    675             dictUpdater = new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
    676         } else {
    677             throw new RuntimeException("DictUpdater for version " + formatOptions.mVersion
    678                     + " doesn't exist.");
    679         }
    680 
    681         try {
    682             MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
    683                     dictUpdater.getTerminalPosition(sWords.get(0)));
    684             dictUpdater.deleteWord(sWords.get(0));
    685             assertEquals(FormatSpec.NOT_VALID_WORD,
    686                     dictUpdater.getTerminalPosition(sWords.get(0)));
    687 
    688             MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
    689                     dictUpdater.getTerminalPosition(sWords.get(5)));
    690             dictUpdater.deleteWord(sWords.get(5));
    691             assertEquals(FormatSpec.NOT_VALID_WORD,
    692                     dictUpdater.getTerminalPosition(sWords.get(5)));
    693         } catch (IOException e) {
    694         } catch (UnsupportedFormatException e) {
    695         }
    696     }
    697 
    698     public void testDeleteWord() {
    699         runTestDeleteWord(VERSION3_WITH_DYNAMIC_UPDATE);
    700         runTestDeleteWord(VERSION4_WITH_DYNAMIC_UPDATE);
    701     }
    702 }
    703