Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import android.test.AndroidTestCase;
     20 import android.test.MoreAsserts;
     21 import android.test.suitebuilder.annotation.LargeTest;
     22 import android.util.Log;
     23 
     24 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
     25 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
     26 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
     27 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
     28 import com.android.inputmethod.latin.utils.CollectionUtils;
     29 
     30 import java.io.File;
     31 import java.io.IOException;
     32 import java.util.ArrayList;
     33 import java.util.HashMap;
     34 import java.util.Random;
     35 
     36 @LargeTest
     37 public class BinaryDictIOUtilsTests extends AndroidTestCase {
     38     private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
     39     private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
     40             new FormatSpec.FormatOptions(3, true);
     41 
     42     private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
     43     public static final int DEFAULT_MAX_UNIGRAMS = 1500;
     44     private final int mMaxUnigrams;
     45 
     46     private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
     47 
     48     private static final int VERSION3 = 3;
     49     private static final int VERSION4 = 4;
     50 
     51     private static final String[] CHARACTERS = {
     52         "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
     53         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
     54         "\u00FC" /*  */, "\u00E2" /*  */, "\u00F1" /*  */, // accented characters
     55         "\u4E9C" /*  */, "\u4F0A" /*  */, "\u5B87" /*  */, // kanji
     56         "\uD841\uDE28" /*  */, "\uD840\uDC0B" /*  */, "\uD861\uDED7" /*  */ // surrogate pair
     57     };
     58 
     59     public BinaryDictIOUtilsTests() {
     60         // 1500 is the default max unigrams
     61         this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
     62     }
     63 
     64     public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) {
     65         super();
     66         Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams);
     67         mMaxUnigrams = maxUnigrams;
     68         final Random random = new Random(seed);
     69         sWords.clear();
     70         for (int i = 0; i < maxUnigrams; ++i) {
     71             sWords.add(generateWord(random.nextInt()));
     72         }
     73     }
     74 
     75     // Utilities for test
     76     private String generateWord(final int value) {
     77         final int lengthOfChars = CHARACTERS.length;
     78         StringBuilder builder = new StringBuilder("");
     79         long lvalue = Math.abs((long)value);
     80         while (lvalue > 0) {
     81             builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
     82             lvalue /= lengthOfChars;
     83         }
     84         if (builder.toString().equals("")) return "a";
     85         return builder.toString();
     86     }
     87 
     88     private static void printPtNode(final PtNodeInfo info) {
     89         Log.d(TAG, "    PtNode at " + info.mOriginalAddress);
     90         Log.d(TAG, "        flags = " + info.mFlags);
     91         Log.d(TAG, "        parentAddress = " + info.mParentAddress);
     92         Log.d(TAG, "        characters = " + new String(info.mCharacters, 0,
     93                 info.mCharacters.length));
     94         if (info.mFrequency != -1) Log.d(TAG, "        frequency = " + info.mFrequency);
     95         if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
     96             Log.d(TAG, "        children address = no children address");
     97         } else {
     98             Log.d(TAG, "        children address = " + info.mChildrenAddress);
     99         }
    100         if (info.mShortcutTargets != null) {
    101             for (final WeightedString ws : info.mShortcutTargets) {
    102                 Log.d(TAG, "        shortcuts = " + ws.mWord);
    103             }
    104         }
    105         if (info.mBigrams != null) {
    106             for (final PendingAttribute attr : info.mBigrams) {
    107                 Log.d(TAG, "        bigram = " + attr.mAddress);
    108             }
    109         }
    110         Log.d(TAG, "    end address = " + info.mEndAddress);
    111     }
    112 
    113     private static void printNode(final Ver3DictDecoder dictDecoder,
    114             final FormatSpec.FormatOptions formatOptions) {
    115         final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
    116         Log.d(TAG, "Node at " + dictBuffer.position());
    117         final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
    118         Log.d(TAG, "    ptNodeCount = " + count);
    119         for (int i = 0; i < count; ++i) {
    120             final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(),
    121                     formatOptions);
    122             printPtNode(currentInfo);
    123         }
    124         if (formatOptions.mSupportsDynamicUpdate) {
    125             final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
    126             Log.d(TAG, "    forwardLinkAddress = " + forwardLinkAddress);
    127         }
    128     }
    129 
    130     @SuppressWarnings("unused")
    131     private static void printBinaryFile(final Ver3DictDecoder dictDecoder)
    132             throws IOException, UnsupportedFormatException {
    133         final FileHeader fileHeader = dictDecoder.readHeader();
    134         final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
    135         while (dictBuffer.position() < dictBuffer.limit()) {
    136             printNode(dictDecoder, fileHeader.mFormatOptions);
    137         }
    138     }
    139 
    140     private int getWordPosition(final File file, final String word) {
    141         int position = FormatSpec.NOT_VALID_WORD;
    142 
    143         try {
    144             final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file,
    145                     DictDecoder.USE_READONLY_BYTEBUFFER);
    146             position = dictDecoder.getTerminalPosition(word);
    147         } catch (IOException e) {
    148         } catch (UnsupportedFormatException e) {
    149         }
    150         return position;
    151     }
    152 
    153     /**
    154      * Find a word using the DictDecoder.
    155      *
    156      * @param dictDecoder the dict decoder
    157      * @param word the word searched
    158      * @return the found ptNodeInfo
    159      * @throws IOException
    160      * @throws UnsupportedFormatException
    161      */
    162     private static PtNodeInfo findWordByBinaryDictReader(final DictDecoder dictDecoder,
    163             final String word) throws IOException, UnsupportedFormatException {
    164         int position = dictDecoder.getTerminalPosition(word);
    165         if (position != FormatSpec.NOT_VALID_WORD) {
    166             dictDecoder.setPosition(0);
    167             final FileHeader header = dictDecoder.readHeader();
    168             dictDecoder.setPosition(position);
    169             return dictDecoder.readPtNode(position, header.mFormatOptions);
    170         }
    171         return null;
    172     }
    173 
    174     private PtNodeInfo findWordFromFile(final File file, final String word) {
    175         final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
    176         PtNodeInfo info = null;
    177         try {
    178             dictDecoder.openDictBuffer();
    179             info = findWordByBinaryDictReader(dictDecoder, word);
    180         } catch (IOException e) {
    181         } catch (UnsupportedFormatException e) {
    182         }
    183         return info;
    184     }
    185 
    186     // return amount of time to insert a word
    187     private long insertAndCheckWord(final File file, final String word, final int frequency,
    188             final boolean exist, final ArrayList<WeightedString> bigrams,
    189             final ArrayList<WeightedString> shortcuts, final int formatVersion) {
    190         long amountOfTime = -1;
    191         try {
    192             final DictUpdater dictUpdater;
    193             if (formatVersion == VERSION3) {
    194                 dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
    195             } else {
    196                 throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't"
    197                         + " exist.");
    198             }
    199 
    200             if (!exist) {
    201                 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
    202             }
    203             final long now = System.nanoTime();
    204             dictUpdater.insertWord(word, frequency, bigrams, shortcuts, false, false);
    205             amountOfTime = System.nanoTime() - now;
    206             MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
    207         } catch (IOException e) {
    208             Log.e(TAG, "Raised an IOException while inserting a word", e);
    209         } catch (UnsupportedFormatException e) {
    210             Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e);
    211         }
    212         return amountOfTime;
    213     }
    214 
    215     private void deleteWord(final File file, final String word, final int formatVersion) {
    216         try {
    217             final DictUpdater dictUpdater;
    218             if (formatVersion == VERSION3) {
    219                 dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
    220             } else {
    221                 throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't"
    222                         + " exist.");
    223             }
    224             dictUpdater.deleteWord(word);
    225         } catch (IOException e) {
    226         } catch (UnsupportedFormatException e) {
    227         }
    228     }
    229 
    230     private void checkReverseLookup(final File file, final String word, final int position) {
    231 
    232         try {
    233             final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
    234             final FileHeader fileHeader = dictDecoder.readHeader();
    235             assertEquals(word,
    236                     BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
    237                             position, fileHeader.mFormatOptions).mWord);
    238         } catch (IOException e) {
    239             Log.e(TAG, "Raised an IOException while looking up a word", e);
    240         } catch (UnsupportedFormatException e) {
    241             Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e);
    242         }
    243     }
    244 
    245     private void runTestInsertWord(final int formatVersion) {
    246         File file = null;
    247         try {
    248             file = File.createTempFile("testInsertWord", TEST_DICT_FILE_EXTENSION,
    249                     getContext().getCacheDir());
    250         } catch (IOException e) {
    251             fail("IOException while creating temporary file: " + e);
    252         }
    253 
    254         // set an initial dictionary.
    255         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    256                 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
    257         dict.add("abcd", 10, null, false);
    258 
    259         try {
    260             final DictEncoder dictEncoder = new Ver3DictEncoder(file);
    261             dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
    262         } catch (IOException e) {
    263             fail("IOException while writing an initial dictionary : " + e);
    264         } catch (UnsupportedFormatException e) {
    265             fail("UnsupportedFormatException while writing an initial dictionary : " + e);
    266         }
    267 
    268         MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
    269         insertAndCheckWord(file, "abcde", 10, false, null, null, formatVersion);
    270 
    271         insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatVersion);
    272         checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
    273 
    274         insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatVersion);
    275         checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
    276 
    277         // update the existing word.
    278         insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatVersion);
    279 
    280         // split 1
    281         insertAndCheckWord(file, "ab", 20, false, null, null, formatVersion);
    282 
    283         // split 2
    284         insertAndCheckWord(file, "ami", 30, false, null, null, formatVersion);
    285 
    286         deleteWord(file, "ami", formatVersion);
    287         assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
    288 
    289         insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatVersion);
    290 
    291         deleteWord(file, "abcd", formatVersion);
    292         assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
    293     }
    294 
    295     public void testInsertWord() {
    296         runTestInsertWord(VERSION3);
    297     }
    298 
    299     private void runTestInsertWordWithBigrams(final int formatVersion) {
    300         File file = null;
    301         try {
    302             file = File.createTempFile("testInsertWordWithBigrams", TEST_DICT_FILE_EXTENSION,
    303                     getContext().getCacheDir());
    304         } catch (IOException e) {
    305             fail("IOException while creating temporary file: " + e);
    306         }
    307 
    308         // set an initial dictionary.
    309         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    310                 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
    311         dict.add("abcd", 10, null, false);
    312         dict.add("efgh", 15, null, false);
    313 
    314         try {
    315             final DictEncoder dictEncoder = new Ver3DictEncoder(file);
    316             dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
    317         } catch (IOException e) {
    318             fail("IOException while writing an initial dictionary : " + e);
    319         } catch (UnsupportedFormatException e) {
    320             fail("UnsupportedFormatException while writing an initial dictionary : " + e);
    321         }
    322 
    323         final ArrayList<WeightedString> banana = new ArrayList<WeightedString>();
    324         banana.add(new WeightedString("banana", 10));
    325 
    326         insertAndCheckWord(file, "banana", 0, false, null, null, formatVersion);
    327         insertAndCheckWord(file, "recursive", 60, true, banana, null, formatVersion);
    328 
    329         final PtNodeInfo info = findWordFromFile(file, "recursive");
    330         int bananaPos = getWordPosition(file, "banana");
    331         assertNotNull(info.mBigrams);
    332         assertEquals(info.mBigrams.size(), 1);
    333         assertEquals(info.mBigrams.get(0).mAddress, bananaPos);
    334     }
    335 
    336     public void testInsertWordWithBigrams() {
    337         runTestInsertWordWithBigrams(VERSION3);
    338     }
    339 
    340     private void runTestRandomWords(final int formatVersion) {
    341         File file = null;
    342         try {
    343             file = File.createTempFile("testRandomWord", TEST_DICT_FILE_EXTENSION,
    344                     getContext().getCacheDir());
    345         } catch (IOException e) {
    346         }
    347         assertNotNull(file);
    348 
    349         // set an initial dictionary.
    350         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    351                 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
    352                         false));
    353         dict.add("initial", 10, null, false);
    354 
    355         try {
    356             final DictEncoder dictEncoder = new Ver3DictEncoder(file);
    357             dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
    358         } catch (IOException e) {
    359             assertTrue(false);
    360         } catch (UnsupportedFormatException e) {
    361             assertTrue(false);
    362         }
    363 
    364         long maxTimeToInsert = 0, sum = 0;
    365         long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert.
    366         int cnt = 0;
    367         for (final String word : sWords) {
    368             final long diff = insertAndCheckWord(file, word,
    369                     cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatVersion);
    370             maxTimeToInsert = Math.max(maxTimeToInsert, diff);
    371             minTimeToInsert = Math.min(minTimeToInsert, diff);
    372             sum += diff;
    373             cnt++;
    374         }
    375         cnt = 0;
    376         for (final String word : sWords) {
    377             MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
    378         }
    379 
    380         Log.d(TAG, "Test version " + formatVersion);
    381         Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
    382         Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
    383         Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms.");
    384     }
    385 
    386     public void testRandomWords() {
    387         runTestRandomWords(VERSION3);
    388     }
    389 }
    390