Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import android.test.AndroidTestCase;
     20 import android.test.suitebuilder.annotation.LargeTest;
     21 import android.util.Log;
     22 import android.util.Pair;
     23 import android.util.SparseArray;
     24 
     25 import com.android.inputmethod.latin.BinaryDictionary;
     26 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
     27 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
     28 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     29 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
     30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
     31 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
     32 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
     33 
     34 import java.io.File;
     35 import java.io.IOException;
     36 import java.util.ArrayList;
     37 import java.util.Arrays;
     38 import java.util.HashMap;
     39 import java.util.HashSet;
     40 import java.util.List;
     41 import java.util.Locale;
     42 import java.util.Map.Entry;
     43 import java.util.Random;
     44 import java.util.Set;
     45 import java.util.TreeMap;
     46 
     47 /**
     48  * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils.
     49  */
     50 @LargeTest
     51 public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
     52     private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName();
     53     private static final int DEFAULT_MAX_UNIGRAMS = 300;
     54     private static final int DEFAULT_CODE_POINT_SET_SIZE = 50;
     55     private static final int LARGE_CODE_POINT_SET_SIZE = 300;
     56     private static final int UNIGRAM_FREQ = 10;
     57     private static final int BIGRAM_FREQ = 50;
     58     private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
     59     private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
     60     private static final int NUM_OF_SHORTCUTS = 5;
     61 
     62     private static final ArrayList<String> sWords = new ArrayList<>();
     63     private static final ArrayList<String> sWordsWithVariousCodePoints = new ArrayList<>();
     64     private static final SparseArray<List<Integer>> sEmptyBigrams = new SparseArray<>();
     65     private static final SparseArray<List<Integer>> sStarBigrams = new SparseArray<>();
     66     private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>();
     67     private static final HashMap<String, List<String>> sShortcuts = new HashMap<>();
     68 
     69     public BinaryDictDecoderEncoderTests() {
     70         this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
     71     }
     72 
     73     public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) {
     74         super();
     75         BinaryDictionaryUtils.setCurrentTimeForTest(0);
     76         Log.e(TAG, "Testing dictionary: seed is " + seed);
     77         final Random random = new Random(seed);
     78         sWords.clear();
     79         sWordsWithVariousCodePoints.clear();
     80         generateWords(maxUnigrams, random);
     81 
     82         for (int i = 0; i < sWords.size(); ++i) {
     83             sChainBigrams.put(i, new ArrayList<Integer>());
     84             if (i > 0) {
     85                 sChainBigrams.get(i - 1).add(i);
     86             }
     87         }
     88 
     89         sStarBigrams.put(0, new ArrayList<Integer>());
     90         // MAX - 1 because we added one above already
     91         final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1);
     92         for (int i = 1; i < maxBigrams; ++i) {
     93             sStarBigrams.get(0).add(i);
     94         }
     95 
     96         sShortcuts.clear();
     97         for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
     98             final int from = Math.abs(random.nextInt()) % sWords.size();
     99             sShortcuts.put(sWords.get(from), new ArrayList<String>());
    100             for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
    101                 final int to = Math.abs(random.nextInt()) % sWords.size();
    102                 sShortcuts.get(sWords.get(from)).add(sWords.get(to));
    103             }
    104         }
    105     }
    106 
    107     @Override
    108     protected void setUp() throws Exception {
    109         super.setUp();
    110         BinaryDictionaryUtils.setCurrentTimeForTest(0);
    111     }
    112 
    113     @Override
    114     protected void tearDown() throws Exception {
    115         // Quit test mode.
    116         BinaryDictionaryUtils.setCurrentTimeForTest(-1);
    117         super.tearDown();
    118     }
    119 
    120     private void generateWords(final int number, final Random random) {
    121         final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
    122                 random);
    123         final Set<String> wordSet = new HashSet<>();
    124         while (wordSet.size() < number) {
    125             wordSet.add(CodePointUtils.generateWord(random, codePointSet));
    126         }
    127         sWords.addAll(wordSet);
    128 
    129         final int[] largeCodePointSet = CodePointUtils.generateCodePointSet(
    130                 LARGE_CODE_POINT_SET_SIZE, random);
    131         wordSet.clear();
    132         while (wordSet.size() < number) {
    133             wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet));
    134         }
    135         sWordsWithVariousCodePoints.addAll(wordSet);
    136     }
    137 
    138     /**
    139      * Adds unigrams to the dictionary.
    140      */
    141     private void addUnigrams(final int number, final FusionDictionary dict,
    142             final List<String> words, final HashMap<String, List<String>> shortcutMap) {
    143         for (int i = 0; i < number; ++i) {
    144             final String word = words.get(i);
    145             final ArrayList<WeightedString> shortcuts = new ArrayList<>();
    146             if (shortcutMap != null && shortcutMap.containsKey(word)) {
    147                 for (final String shortcut : shortcutMap.get(word)) {
    148                     shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
    149                 }
    150             }
    151             dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
    152                     (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */);
    153         }
    154     }
    155 
    156     private void addBigrams(final FusionDictionary dict,
    157             final List<String> words,
    158             final SparseArray<List<Integer>> bigrams) {
    159         for (int i = 0; i < bigrams.size(); ++i) {
    160             final int w1 = bigrams.keyAt(i);
    161             for (int w2 : bigrams.valueAt(i)) {
    162                 dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ));
    163             }
    164         }
    165     }
    166 
    167 //    The following is useful to dump the dictionary into a textual file, but it can't compile
    168 //    on-device, so it's commented out.
    169 //    private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename)
    170 //            throws IOException {
    171 //        com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined(
    172 //                new java.io.FileWriter(new File(filename)), dict);
    173 //    }
    174 
    175     private long timeWritingDictToFile(final File file, final FusionDictionary dict,
    176             final FormatSpec.FormatOptions formatOptions) {
    177 
    178         long now = -1, diff = -1;
    179 
    180         try {
    181             final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
    182 
    183             now = System.currentTimeMillis();
    184             // If you need to dump the dict to a textual file, uncomment the line below and the
    185             // function above
    186             // dumpToCombinedFileForDebug(file, "/tmp/foo");
    187             dictEncoder.writeDictionary(dict, formatOptions);
    188             diff = System.currentTimeMillis() - now;
    189         } catch (IOException e) {
    190             Log.e(TAG, "IO exception while writing file", e);
    191         } catch (UnsupportedFormatException e) {
    192             Log.e(TAG, "UnsupportedFormatException", e);
    193         }
    194 
    195         return diff;
    196     }
    197 
    198     private void checkDictionary(final FusionDictionary dict, final List<String> words,
    199             final SparseArray<List<Integer>> bigrams,
    200             final HashMap<String, List<String>> shortcutMap) {
    201         assertNotNull(dict);
    202 
    203         // check unigram
    204         for (final String word : words) {
    205             final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
    206             assertNotNull(ptNode);
    207         }
    208 
    209         // check bigram
    210         for (int i = 0; i < bigrams.size(); ++i) {
    211             final int w1 = bigrams.keyAt(i);
    212             for (final int w2 : bigrams.valueAt(i)) {
    213                 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
    214                         words.get(w1));
    215                 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
    216             }
    217         }
    218 
    219         // check shortcut
    220         if (shortcutMap != null) {
    221             for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) {
    222                 assertTrue(words.contains(entry.getKey()));
    223                 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
    224                         entry.getKey());
    225                 for (final String word : entry.getValue()) {
    226                     assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
    227                             ptNode.getShortcut(word));
    228                 }
    229             }
    230         }
    231     }
    232 
    233     private String outputOptions(final int bufferType,
    234             final FormatSpec.FormatOptions formatOptions) {
    235         String result = " : buffer type = "
    236                 + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
    237         return result + " : version = " + formatOptions.mVersion;
    238     }
    239 
    240     // Tests for readDictionaryBinary and writeDictionaryBinary
    241 
    242     private long timeReadingAndCheckDict(final File file, final List<String> words,
    243             final SparseArray<List<Integer>> bigrams,
    244             final HashMap<String, List<String>> shortcutMap, final int bufferType) {
    245         long now, diff = -1;
    246 
    247         FusionDictionary dict = null;
    248         try {
    249             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
    250                     bufferType);
    251             now = System.currentTimeMillis();
    252             dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
    253             diff  = System.currentTimeMillis() - now;
    254         } catch (IOException e) {
    255             Log.e(TAG, "IOException while reading dictionary", e);
    256         } catch (UnsupportedFormatException e) {
    257             Log.e(TAG, "Unsupported format", e);
    258         }
    259 
    260         checkDictionary(dict, words, bigrams, shortcutMap);
    261         return diff;
    262     }
    263 
    264     // Tests for readDictionaryBinary and writeDictionaryBinary
    265     private String runReadAndWrite(final List<String> words,
    266             final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts,
    267             final int bufferType, final FormatSpec.FormatOptions formatOptions,
    268             final String message) {
    269 
    270         final String dictName = "runReadAndWrite";
    271         final String dictVersion = Long.toString(System.currentTimeMillis());
    272         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
    273                 getContext().getCacheDir());
    274 
    275         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    276                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
    277         addUnigrams(words.size(), dict, words, shortcuts);
    278         addBigrams(dict, words, bigrams);
    279         checkDictionary(dict, words, bigrams, shortcuts);
    280 
    281         final long write = timeWritingDictToFile(file, dict, formatOptions);
    282         final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType);
    283 
    284         return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
    285                 + " : " + outputOptions(bufferType, formatOptions);
    286     }
    287 
    288     private void runReadAndWriteTests(final List<String> results, final int bufferType,
    289             final FormatSpec.FormatOptions formatOptions) {
    290         results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType,
    291                 formatOptions, "unigram"));
    292         results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType,
    293                 formatOptions, "chain"));
    294         results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
    295                 formatOptions, "star"));
    296         results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
    297                 "unigram with shortcuts"));
    298         results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
    299                 "chain with shortcuts"));
    300         results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
    301                 "star with shortcuts"));
    302         results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams,
    303                 null /* shortcuts */, bufferType, formatOptions,
    304                 "unigram with various code points"));
    305     }
    306 
    307     // Unit test for CharEncoding.readString and CharEncoding.writeString.
    308     public void testCharEncoding() {
    309         // the max length of a word in sWords is less than 50.
    310         // See generateWords.
    311         final byte[] buffer = new byte[50 * 3];
    312         final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
    313         for (final String word : sWords) {
    314             Arrays.fill(buffer, (byte) 0);
    315             CharEncoding.writeString(buffer, 0, word);
    316             dictBuffer.position(0);
    317             final String str = CharEncoding.readString(dictBuffer);
    318             assertEquals(word, str);
    319         }
    320     }
    321 
    322     public void testReadAndWriteWithByteBuffer() {
    323         final List<String> results = new ArrayList<>();
    324 
    325         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
    326                 BinaryDictUtils.VERSION2_OPTIONS);
    327         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
    328                 BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
    329         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
    330                 BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
    331         for (final String result : results) {
    332             Log.d(TAG, result);
    333         }
    334     }
    335 
    336     public void testReadAndWriteWithByteArray() {
    337         final List<String> results = new ArrayList<>();
    338 
    339         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
    340                 BinaryDictUtils.VERSION2_OPTIONS);
    341         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
    342                 BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
    343         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
    344                 BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
    345 
    346         for (final String result : results) {
    347             Log.d(TAG, result);
    348         }
    349     }
    350 
    351     // Tests for readUnigramsAndBigramsBinary
    352 
    353     private void checkWordMap(final List<String> expectedWords,
    354             final SparseArray<List<Integer>> expectedBigrams,
    355             final TreeMap<Integer, String> resultWords,
    356             final TreeMap<Integer, Integer> resultFrequencies,
    357             final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams,
    358             final boolean checkProbability) {
    359         // check unigrams
    360         final Set<String> actualWordsSet = new HashSet<>(resultWords.values());
    361         final Set<String> expectedWordsSet = new HashSet<>(expectedWords);
    362         assertEquals(actualWordsSet, expectedWordsSet);
    363         if (checkProbability) {
    364             for (int freq : resultFrequencies.values()) {
    365                 assertEquals(freq, UNIGRAM_FREQ);
    366             }
    367         }
    368 
    369         // check bigrams
    370         final HashMap<String, Set<String>> expBigrams = new HashMap<>();
    371         for (int i = 0; i < expectedBigrams.size(); ++i) {
    372             final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
    373             for (int w2 : expectedBigrams.valueAt(i)) {
    374                 if (expBigrams.get(word1) == null) {
    375                     expBigrams.put(word1, new HashSet<String>());
    376                 }
    377                 expBigrams.get(word1).add(expectedWords.get(w2));
    378             }
    379         }
    380 
    381         final HashMap<String, Set<String>> actBigrams = new HashMap<>();
    382         for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
    383             final String word1 = resultWords.get(entry.getKey());
    384             final int unigramFreq = resultFrequencies.get(entry.getKey());
    385             for (PendingAttribute attr : entry.getValue()) {
    386                 final String word2 = resultWords.get(attr.mAddress);
    387                 if (actBigrams.get(word1) == null) {
    388                     actBigrams.put(word1, new HashSet<String>());
    389                 }
    390                 actBigrams.get(word1).add(word2);
    391 
    392                 if (checkProbability) {
    393                     final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
    394                             unigramFreq, attr.mFrequency);
    395                     assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
    396                 }
    397             }
    398         }
    399         assertEquals(actBigrams, expBigrams);
    400     }
    401 
    402     private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words,
    403             final SparseArray<List<Integer>> bigrams, final int bufferType,
    404             final boolean checkProbability) {
    405         final TreeMap<Integer, String> resultWords = new TreeMap<>();
    406         final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = new TreeMap<>();
    407         final TreeMap<Integer, Integer> resultFreqs = new TreeMap<>();
    408 
    409         long now = -1, diff = -1;
    410         try {
    411             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
    412                     bufferType);
    413             now = System.currentTimeMillis();
    414             dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams);
    415             diff = System.currentTimeMillis() - now;
    416         } catch (IOException e) {
    417             Log.e(TAG, "IOException", e);
    418         } catch (UnsupportedFormatException e) {
    419             Log.e(TAG, "UnsupportedFormatException", e);
    420         }
    421 
    422         checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability);
    423         return diff;
    424     }
    425 
    426     private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words,
    427             final SparseArray<List<Integer>> bigrams, final int bufferType,
    428             final FormatSpec.FormatOptions formatOptions, final String message) {
    429         final String dictName = "runReadUnigrams";
    430         final String dictVersion = Long.toString(System.currentTimeMillis());
    431         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
    432                 getContext().getCacheDir());
    433 
    434         // making the dictionary from lists of words.
    435         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    436                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
    437         addUnigrams(words.size(), dict, words, null /* shortcutMap */);
    438         addBigrams(dict, words, bigrams);
    439 
    440         timeWritingDictToFile(file, dict, formatOptions);
    441 
    442         // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the
    443         // probability when there's a timestamp for the entry.
    444         // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native.
    445         long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType,
    446                 !formatOptions.mHasTimestamp /* checkProbability */);
    447         long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */,
    448                 bufferType);
    449 
    450         return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
    451                 + " : " + message + " : " + outputOptions(bufferType, formatOptions);
    452     }
    453 
    454     private void runReadUnigramsAndBigramsTests(final ArrayList<String> results,
    455             final int bufferType, final FormatSpec.FormatOptions formatOptions) {
    456         results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
    457                 formatOptions, "unigram"));
    458         results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
    459                 formatOptions, "chain"));
    460         results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType,
    461                 formatOptions, "star"));
    462     }
    463 
    464     public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
    465         final ArrayList<String> results = new ArrayList<>();
    466 
    467         runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
    468                 BinaryDictUtils.VERSION2_OPTIONS);
    469 
    470         for (final String result : results) {
    471             Log.d(TAG, result);
    472         }
    473     }
    474 
    475     public void testReadUnigramsAndBigramsBinaryWithByteArray() {
    476         final ArrayList<String> results = new ArrayList<>();
    477 
    478         runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
    479                 BinaryDictUtils.VERSION2_OPTIONS);
    480 
    481         for (final String result : results) {
    482             Log.d(TAG, result);
    483         }
    484     }
    485 
    486     // Tests for getTerminalPosition
    487     private String getWordFromBinary(final DictDecoder dictDecoder, final int address) {
    488         if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0);
    489 
    490         DictionaryHeader fileHeader = null;
    491         try {
    492             fileHeader = dictDecoder.readHeader();
    493         } catch (IOException e) {
    494             return null;
    495         } catch (UnsupportedFormatException e) {
    496             return null;
    497         }
    498         if (fileHeader == null) return null;
    499         return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
    500                 address).mWord;
    501     }
    502 
    503     private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,
    504             final boolean contained) {
    505         long diff = -1;
    506         int position = -1;
    507         try {
    508             final long now = System.nanoTime();
    509             position = dictDecoder.getTerminalPosition(word);
    510             diff = System.nanoTime() - now;
    511         } catch (IOException e) {
    512             Log.e(TAG, "IOException while getTerminalPosition", e);
    513         } catch (UnsupportedFormatException e) {
    514             Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
    515         }
    516 
    517         assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
    518         if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
    519         return diff;
    520     }
    521 
    522     private void runGetTerminalPosition(final ArrayList<String> words,
    523             final SparseArray<List<Integer>> bigrams, final int bufferType,
    524             final FormatOptions formatOptions, final String message) {
    525         final String dictName = "testGetTerminalPosition";
    526         final String dictVersion = Long.toString(System.currentTimeMillis());
    527         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
    528                 getContext().getCacheDir());
    529 
    530         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    531                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
    532         addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
    533         addBigrams(dict, words, bigrams);
    534         timeWritingDictToFile(file, dict, formatOptions);
    535 
    536         final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
    537                 DictDecoder.USE_BYTEARRAY);
    538         try {
    539             dictDecoder.openDictBuffer();
    540         } catch (IOException e) {
    541             Log.e(TAG, "IOException while opening the buffer", e);
    542         } catch (UnsupportedFormatException e) {
    543             Log.e(TAG, "IOException while opening the buffer", e);
    544         }
    545         assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
    546 
    547         try {
    548             // too long word
    549             final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
    550             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord));
    551 
    552             // null
    553             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null));
    554 
    555             // empty string
    556             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(""));
    557         } catch (IOException e) {
    558         } catch (UnsupportedFormatException e) {
    559         }
    560 
    561         // Test a word that is contained within the dictionary.
    562         long sum = 0;
    563         for (int i = 0; i < sWords.size(); ++i) {
    564             final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true);
    565             sum += time == -1 ? 0 : time;
    566         }
    567         Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message
    568                 + " : " + outputOptions(bufferType, formatOptions));
    569 
    570         // Test a word that isn't contained within the dictionary.
    571         final Random random = new Random((int)System.currentTimeMillis());
    572         final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
    573                 random);
    574         for (int i = 0; i < 1000; ++i) {
    575             final String word = CodePointUtils.generateWord(random, codePointSet);
    576             if (sWords.indexOf(word) != -1) continue;
    577             checkGetTerminalPosition(dictDecoder, word, false);
    578         }
    579     }
    580 
    581     private void runGetTerminalPositionTests(final int bufferType,
    582             final FormatOptions formatOptions) {
    583         runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram");
    584     }
    585 
    586     public void testGetTerminalPosition() {
    587         final ArrayList<String> results = new ArrayList<>();
    588 
    589         runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
    590                 BinaryDictUtils.VERSION2_OPTIONS);
    591         runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
    592                 BinaryDictUtils.VERSION2_OPTIONS);
    593 
    594         for (final String result : results) {
    595             Log.d(TAG, result);
    596         }
    597     }
    598 
    599     public void testVer2DictGetWordProperty() {
    600         final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
    601         final ArrayList<String> words = sWords;
    602         final HashMap<String, List<String>> shortcuts = sShortcuts;
    603         final String dictName = "testGetWordProperty";
    604         final String dictVersion = Long.toString(System.currentTimeMillis());
    605         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    606                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
    607         addUnigrams(words.size(), dict, words, shortcuts);
    608         addBigrams(dict, words, sEmptyBigrams);
    609         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
    610                 getContext().getCacheDir());
    611         file.delete();
    612         timeWritingDictToFile(file, dict, formatOptions);
    613         final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
    614                 0 /* offset */, file.length(), true /* useFullEditDistance */,
    615                 Locale.ENGLISH, dictName, false /* isUpdatable */);
    616         for (final String word : words) {
    617             final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
    618                     false /* isBeginningOfSentence */);
    619             assertEquals(word, wordProperty.mWord);
    620             assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
    621             if (shortcuts.containsKey(word)) {
    622                 assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size());
    623                 final List<String> shortcutList = shortcuts.get(word);
    624                 assertTrue(wordProperty.mHasShortcuts);
    625                 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
    626                     assertTrue(shortcutList.contains(shortcutTarget.mWord));
    627                     assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
    628                     shortcutList.remove(shortcutTarget.mWord);
    629                 }
    630                 assertTrue(shortcutList.isEmpty());
    631             }
    632         }
    633     }
    634 
    635     public void testVer2DictIteration() {
    636         final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
    637         final ArrayList<String> words = sWords;
    638         final HashMap<String, List<String>> shortcuts = sShortcuts;
    639         final SparseArray<List<Integer>> bigrams = sEmptyBigrams;
    640         final String dictName = "testGetWordProperty";
    641         final String dictVersion = Long.toString(System.currentTimeMillis());
    642         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    643                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
    644         addUnigrams(words.size(), dict, words, shortcuts);
    645         addBigrams(dict, words, bigrams);
    646         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
    647                 getContext().getCacheDir());
    648         timeWritingDictToFile(file, dict, formatOptions);
    649         Log.d(TAG, file.getAbsolutePath());
    650         final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
    651                 0 /* offset */, file.length(), true /* useFullEditDistance */,
    652                 Locale.ENGLISH, dictName, false /* isUpdatable */);
    653 
    654         final HashSet<String> wordSet = new HashSet<>(words);
    655         final HashSet<Pair<String, String>> bigramSet = new HashSet<>();
    656 
    657         for (int i = 0; i < words.size(); i++) {
    658             final List<Integer> bigramList = bigrams.get(i);
    659             if (bigramList != null) {
    660                 for (final Integer word1Index : bigramList) {
    661                     final String word1 = words.get(word1Index);
    662                     bigramSet.add(new Pair<>(words.get(i), word1));
    663                 }
    664             }
    665         }
    666         int token = 0;
    667         do {
    668             final BinaryDictionary.GetNextWordPropertyResult result =
    669                     binaryDictionary.getNextWordProperty(token);
    670             final WordProperty wordProperty = result.mWordProperty;
    671             final String word0 = wordProperty.mWord;
    672             assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability);
    673             wordSet.remove(word0);
    674             if (shortcuts.containsKey(word0)) {
    675                 assertEquals(shortcuts.get(word0).size(), wordProperty.mShortcutTargets.size());
    676                 final List<String> shortcutList = shortcuts.get(word0);
    677                 assertNotNull(wordProperty.mShortcutTargets);
    678                 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
    679                     assertTrue(shortcutList.contains(shortcutTarget.mWord));
    680                     assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
    681                     shortcutList.remove(shortcutTarget.mWord);
    682                 }
    683                 assertTrue(shortcutList.isEmpty());
    684             }
    685             for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
    686                 final String word1 = wordProperty.mBigrams.get(j).mWord;
    687                 final Pair<String, String> bigram = new Pair<>(word0, word1);
    688                 assertTrue(bigramSet.contains(bigram));
    689                 bigramSet.remove(bigram);
    690             }
    691             token = result.mNextToken;
    692         } while (token != 0);
    693         assertTrue(wordSet.isEmpty());
    694         assertTrue(bigramSet.isEmpty());
    695     }
    696 }
    697