1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import android.test.AndroidTestCase; 20 import android.test.MoreAsserts; 21 import android.test.suitebuilder.annotation.LargeTest; 22 import android.util.Log; 23 24 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 25 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 26 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 27 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 28 import com.android.inputmethod.latin.utils.CollectionUtils; 29 30 import java.io.File; 31 import java.io.IOException; 32 import java.util.ArrayList; 33 import java.util.HashMap; 34 import java.util.Random; 35 36 @LargeTest 37 public class BinaryDictIOUtilsTests extends AndroidTestCase { 38 private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName(); 39 private static final FormatSpec.FormatOptions FORMAT_OPTIONS = 40 new FormatSpec.FormatOptions(3, true); 41 42 private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); 43 public static final int DEFAULT_MAX_UNIGRAMS = 1500; 44 private final int mMaxUnigrams; 45 46 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 47 48 private static final int VERSION3 = 3; 49 private static final int VERSION4 = 4; 50 51 private static final String[] CHARACTERS = { 52 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", 53 "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", 54 "\u00FC" /* */, "\u00E2" /* */, "\u00F1" /* */, // accented characters 55 "\u4E9C" /* */, "\u4F0A" /* */, "\u5B87" /* */, // kanji 56 "\uD841\uDE28" /* */, "\uD840\uDC0B" /* */, "\uD861\uDED7" /* */ // surrogate pair 57 }; 58 59 public BinaryDictIOUtilsTests() { 60 // 1500 is the default max unigrams 61 this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); 62 } 63 64 public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) { 65 super(); 66 Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams); 67 mMaxUnigrams = maxUnigrams; 68 final Random random = new Random(seed); 69 sWords.clear(); 70 for (int i = 0; i < maxUnigrams; ++i) { 71 sWords.add(generateWord(random.nextInt())); 72 } 73 } 74 75 // Utilities for test 76 private String generateWord(final int value) { 77 final int lengthOfChars = CHARACTERS.length; 78 StringBuilder builder = new StringBuilder(""); 79 long lvalue = Math.abs((long)value); 80 while (lvalue > 0) { 81 builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]); 82 lvalue /= lengthOfChars; 83 } 84 if (builder.toString().equals("")) return "a"; 85 return builder.toString(); 86 } 87 88 private static void printPtNode(final PtNodeInfo info) { 89 Log.d(TAG, " PtNode at " + info.mOriginalAddress); 90 Log.d(TAG, " flags = " + info.mFlags); 91 Log.d(TAG, " parentAddress = " + info.mParentAddress); 92 Log.d(TAG, " characters = " + new String(info.mCharacters, 0, 93 info.mCharacters.length)); 94 if (info.mFrequency != -1) Log.d(TAG, " frequency = " + info.mFrequency); 95 if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { 96 Log.d(TAG, " children address = no children address"); 97 } else { 98 Log.d(TAG, " children address = " + info.mChildrenAddress); 99 } 100 if (info.mShortcutTargets != null) { 101 for (final WeightedString ws : info.mShortcutTargets) { 102 Log.d(TAG, " shortcuts = " + ws.mWord); 103 } 104 } 105 if (info.mBigrams != null) { 106 for (final PendingAttribute attr : info.mBigrams) { 107 Log.d(TAG, " bigram = " + attr.mAddress); 108 } 109 } 110 Log.d(TAG, " end address = " + info.mEndAddress); 111 } 112 113 private static void printNode(final Ver3DictDecoder dictDecoder, 114 final FormatSpec.FormatOptions formatOptions) { 115 final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); 116 Log.d(TAG, "Node at " + dictBuffer.position()); 117 final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer); 118 Log.d(TAG, " ptNodeCount = " + count); 119 for (int i = 0; i < count; ++i) { 120 final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(), 121 formatOptions); 122 printPtNode(currentInfo); 123 } 124 if (formatOptions.mSupportsDynamicUpdate) { 125 final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); 126 Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress); 127 } 128 } 129 130 @SuppressWarnings("unused") 131 private static void printBinaryFile(final Ver3DictDecoder dictDecoder) 132 throws IOException, UnsupportedFormatException { 133 final FileHeader fileHeader = dictDecoder.readHeader(); 134 final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); 135 while (dictBuffer.position() < dictBuffer.limit()) { 136 printNode(dictDecoder, fileHeader.mFormatOptions); 137 } 138 } 139 140 private int getWordPosition(final File file, final String word) { 141 int position = FormatSpec.NOT_VALID_WORD; 142 143 try { 144 final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, 145 DictDecoder.USE_READONLY_BYTEBUFFER); 146 position = dictDecoder.getTerminalPosition(word); 147 } catch (IOException e) { 148 } catch (UnsupportedFormatException e) { 149 } 150 return position; 151 } 152 153 /** 154 * Find a word using the DictDecoder. 155 * 156 * @param dictDecoder the dict decoder 157 * @param word the word searched 158 * @return the found ptNodeInfo 159 * @throws IOException 160 * @throws UnsupportedFormatException 161 */ 162 private static PtNodeInfo findWordByBinaryDictReader(final DictDecoder dictDecoder, 163 final String word) throws IOException, UnsupportedFormatException { 164 int position = dictDecoder.getTerminalPosition(word); 165 if (position != FormatSpec.NOT_VALID_WORD) { 166 dictDecoder.setPosition(0); 167 final FileHeader header = dictDecoder.readHeader(); 168 dictDecoder.setPosition(position); 169 return dictDecoder.readPtNode(position, header.mFormatOptions); 170 } 171 return null; 172 } 173 174 private PtNodeInfo findWordFromFile(final File file, final String word) { 175 final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file); 176 PtNodeInfo info = null; 177 try { 178 dictDecoder.openDictBuffer(); 179 info = findWordByBinaryDictReader(dictDecoder, word); 180 } catch (IOException e) { 181 } catch (UnsupportedFormatException e) { 182 } 183 return info; 184 } 185 186 // return amount of time to insert a word 187 private long insertAndCheckWord(final File file, final String word, final int frequency, 188 final boolean exist, final ArrayList<WeightedString> bigrams, 189 final ArrayList<WeightedString> shortcuts, final int formatVersion) { 190 long amountOfTime = -1; 191 try { 192 final DictUpdater dictUpdater; 193 if (formatVersion == VERSION3) { 194 dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); 195 } else { 196 throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't" 197 + " exist."); 198 } 199 200 if (!exist) { 201 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); 202 } 203 final long now = System.nanoTime(); 204 dictUpdater.insertWord(word, frequency, bigrams, shortcuts, false, false); 205 amountOfTime = System.nanoTime() - now; 206 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); 207 } catch (IOException e) { 208 Log.e(TAG, "Raised an IOException while inserting a word", e); 209 } catch (UnsupportedFormatException e) { 210 Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e); 211 } 212 return amountOfTime; 213 } 214 215 private void deleteWord(final File file, final String word, final int formatVersion) { 216 try { 217 final DictUpdater dictUpdater; 218 if (formatVersion == VERSION3) { 219 dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); 220 } else { 221 throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't" 222 + " exist."); 223 } 224 dictUpdater.deleteWord(word); 225 } catch (IOException e) { 226 } catch (UnsupportedFormatException e) { 227 } 228 } 229 230 private void checkReverseLookup(final File file, final String word, final int position) { 231 232 try { 233 final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file); 234 final FileHeader fileHeader = dictDecoder.readHeader(); 235 assertEquals(word, 236 BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize, 237 position, fileHeader.mFormatOptions).mWord); 238 } catch (IOException e) { 239 Log.e(TAG, "Raised an IOException while looking up a word", e); 240 } catch (UnsupportedFormatException e) { 241 Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e); 242 } 243 } 244 245 private void runTestInsertWord(final int formatVersion) { 246 File file = null; 247 try { 248 file = File.createTempFile("testInsertWord", TEST_DICT_FILE_EXTENSION, 249 getContext().getCacheDir()); 250 } catch (IOException e) { 251 fail("IOException while creating temporary file: " + e); 252 } 253 254 // set an initial dictionary. 255 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 256 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); 257 dict.add("abcd", 10, null, false); 258 259 try { 260 final DictEncoder dictEncoder = new Ver3DictEncoder(file); 261 dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); 262 } catch (IOException e) { 263 fail("IOException while writing an initial dictionary : " + e); 264 } catch (UnsupportedFormatException e) { 265 fail("UnsupportedFormatException while writing an initial dictionary : " + e); 266 } 267 268 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); 269 insertAndCheckWord(file, "abcde", 10, false, null, null, formatVersion); 270 271 insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatVersion); 272 checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); 273 274 insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatVersion); 275 checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); 276 277 // update the existing word. 278 insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatVersion); 279 280 // split 1 281 insertAndCheckWord(file, "ab", 20, false, null, null, formatVersion); 282 283 // split 2 284 insertAndCheckWord(file, "ami", 30, false, null, null, formatVersion); 285 286 deleteWord(file, "ami", formatVersion); 287 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami")); 288 289 insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatVersion); 290 291 deleteWord(file, "abcd", formatVersion); 292 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); 293 } 294 295 public void testInsertWord() { 296 runTestInsertWord(VERSION3); 297 } 298 299 private void runTestInsertWordWithBigrams(final int formatVersion) { 300 File file = null; 301 try { 302 file = File.createTempFile("testInsertWordWithBigrams", TEST_DICT_FILE_EXTENSION, 303 getContext().getCacheDir()); 304 } catch (IOException e) { 305 fail("IOException while creating temporary file: " + e); 306 } 307 308 // set an initial dictionary. 309 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 310 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); 311 dict.add("abcd", 10, null, false); 312 dict.add("efgh", 15, null, false); 313 314 try { 315 final DictEncoder dictEncoder = new Ver3DictEncoder(file); 316 dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); 317 } catch (IOException e) { 318 fail("IOException while writing an initial dictionary : " + e); 319 } catch (UnsupportedFormatException e) { 320 fail("UnsupportedFormatException while writing an initial dictionary : " + e); 321 } 322 323 final ArrayList<WeightedString> banana = new ArrayList<WeightedString>(); 324 banana.add(new WeightedString("banana", 10)); 325 326 insertAndCheckWord(file, "banana", 0, false, null, null, formatVersion); 327 insertAndCheckWord(file, "recursive", 60, true, banana, null, formatVersion); 328 329 final PtNodeInfo info = findWordFromFile(file, "recursive"); 330 int bananaPos = getWordPosition(file, "banana"); 331 assertNotNull(info.mBigrams); 332 assertEquals(info.mBigrams.size(), 1); 333 assertEquals(info.mBigrams.get(0).mAddress, bananaPos); 334 } 335 336 public void testInsertWordWithBigrams() { 337 runTestInsertWordWithBigrams(VERSION3); 338 } 339 340 private void runTestRandomWords(final int formatVersion) { 341 File file = null; 342 try { 343 file = File.createTempFile("testRandomWord", TEST_DICT_FILE_EXTENSION, 344 getContext().getCacheDir()); 345 } catch (IOException e) { 346 } 347 assertNotNull(file); 348 349 // set an initial dictionary. 350 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 351 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, 352 false)); 353 dict.add("initial", 10, null, false); 354 355 try { 356 final DictEncoder dictEncoder = new Ver3DictEncoder(file); 357 dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); 358 } catch (IOException e) { 359 assertTrue(false); 360 } catch (UnsupportedFormatException e) { 361 assertTrue(false); 362 } 363 364 long maxTimeToInsert = 0, sum = 0; 365 long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert. 366 int cnt = 0; 367 for (final String word : sWords) { 368 final long diff = insertAndCheckWord(file, word, 369 cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatVersion); 370 maxTimeToInsert = Math.max(maxTimeToInsert, diff); 371 minTimeToInsert = Math.min(minTimeToInsert, diff); 372 sum += diff; 373 cnt++; 374 } 375 cnt = 0; 376 for (final String word : sWords) { 377 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); 378 } 379 380 Log.d(TAG, "Test version " + formatVersion); 381 Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms."); 382 Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms."); 383 Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms."); 384 } 385 386 public void testRandomWords() { 387 runTestRandomWords(VERSION3); 388 } 389 } 390