1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import android.test.AndroidTestCase; 20 import android.test.MoreAsserts; 21 import android.test.suitebuilder.annotation.LargeTest; 22 import android.util.Log; 23 24 import com.android.inputmethod.latin.CollectionUtils; 25 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper; 26 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; 27 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 28 import com.android.inputmethod.latin.makedict.FusionDictionary.Node; 29 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 30 31 import java.io.BufferedOutputStream; 32 import java.io.File; 33 import java.io.FileInputStream; 34 import java.io.FileOutputStream; 35 import java.io.IOException; 36 import java.io.RandomAccessFile; 37 import java.nio.channels.FileChannel; 38 import java.util.ArrayList; 39 import java.util.HashMap; 40 import java.util.Random; 41 42 @LargeTest 43 public class BinaryDictIOUtilsTests extends AndroidTestCase { 44 private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName(); 45 private static final FormatSpec.FormatOptions FORMAT_OPTIONS = 46 new FormatSpec.FormatOptions(3, true); 47 private static final int MAX_UNIGRAMS = 1500; 48 49 private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); 50 51 private static final String[] CHARACTERS = { 52 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", 53 "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", 54 "\u00FC" /* */, "\u00E2" /* */, "\u00F1" /* */, // accented characters 55 "\u4E9C" /* */, "\u4F0A" /* */, "\u5B87" /* */, // kanji 56 "\uD841\uDE28" /* */, "\uD840\uDC0B" /* */, "\uD861\uDeD7" /* */ // surrogate pair 57 }; 58 59 public BinaryDictIOUtilsTests() { 60 super(); 61 final Random random = new Random(123456); 62 sWords.clear(); 63 for (int i = 0; i < MAX_UNIGRAMS; ++i) { 64 sWords.add(generateWord(random.nextInt())); 65 } 66 } 67 68 // Utilities for test 69 private String generateWord(final int value) { 70 final int lengthOfChars = CHARACTERS.length; 71 StringBuilder builder = new StringBuilder(""); 72 long lvalue = Math.abs((long)value); 73 while (lvalue > 0) { 74 builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]); 75 lvalue /= lengthOfChars; 76 } 77 if (builder.toString().equals("")) return "a"; 78 return builder.toString(); 79 } 80 81 private static void printCharGroup(final CharGroupInfo info) { 82 Log.d(TAG, " CharGroup at " + info.mOriginalAddress); 83 Log.d(TAG, " flags = " + info.mFlags); 84 Log.d(TAG, " parentAddress = " + info.mParentAddress); 85 Log.d(TAG, " characters = " + new String(info.mCharacters, 0, 86 info.mCharacters.length)); 87 if (info.mFrequency != -1) Log.d(TAG, " frequency = " + info.mFrequency); 88 if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { 89 Log.d(TAG, " children address = no children address"); 90 } else { 91 Log.d(TAG, " children address = " + info.mChildrenAddress); 92 } 93 if (info.mShortcutTargets != null) { 94 for (final WeightedString ws : info.mShortcutTargets) { 95 Log.d(TAG, " shortcuts = " + ws.mWord); 96 } 97 } 98 if (info.mBigrams != null) { 99 for (final PendingAttribute attr : info.mBigrams) { 100 Log.d(TAG, " bigram = " + attr.mAddress); 101 } 102 } 103 Log.d(TAG, " end address = " + info.mEndAddress); 104 } 105 106 private static void printNode(final FusionDictionaryBufferInterface buffer, 107 final FormatSpec.FormatOptions formatOptions) { 108 Log.d(TAG, "Node at " + buffer.position()); 109 final int count = BinaryDictInputOutput.readCharGroupCount(buffer); 110 Log.d(TAG, " charGroupCount = " + count); 111 for (int i = 0; i < count; ++i) { 112 final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, 113 buffer.position(), formatOptions); 114 printCharGroup(currentInfo); 115 } 116 if (formatOptions.mSupportsDynamicUpdate) { 117 final int forwardLinkAddress = buffer.readUnsignedInt24(); 118 Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress); 119 } 120 } 121 122 private static void printBinaryFile(final FusionDictionaryBufferInterface buffer) 123 throws IOException, UnsupportedFormatException { 124 FileHeader header = BinaryDictInputOutput.readHeader(buffer); 125 while (buffer.position() < buffer.limit()) { 126 printNode(buffer, header.mFormatOptions); 127 } 128 } 129 130 private int getWordPosition(final File file, final String word) { 131 int position = FormatSpec.NOT_VALID_WORD; 132 FileInputStream inStream = null; 133 try { 134 inStream = new FileInputStream(file); 135 final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( 136 inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); 137 position = BinaryDictIOUtils.getTerminalPosition(buffer, word); 138 } catch (IOException e) { 139 } catch (UnsupportedFormatException e) { 140 } finally { 141 if (inStream != null) { 142 try { 143 inStream.close(); 144 } catch (IOException e) { 145 // do nothing 146 } 147 } 148 } 149 return position; 150 } 151 152 private CharGroupInfo findWordFromFile(final File file, final String word) { 153 FileInputStream inStream = null; 154 CharGroupInfo info = null; 155 try { 156 inStream = new FileInputStream(file); 157 final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( 158 inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); 159 info = BinaryDictIOUtils.findWordFromBuffer(buffer, word); 160 } catch (IOException e) { 161 } catch (UnsupportedFormatException e) { 162 } finally { 163 if (inStream != null) { 164 try { 165 inStream.close(); 166 } catch (IOException e) { 167 // do nothing 168 } 169 } 170 } 171 return info; 172 } 173 174 // return amount of time to insert a word 175 private long insertAndCheckWord(final File file, final String word, final int frequency, 176 final boolean exist, final ArrayList<WeightedString> bigrams, 177 final ArrayList<WeightedString> shortcuts) { 178 RandomAccessFile raFile = null; 179 BufferedOutputStream outStream = null; 180 FusionDictionaryBufferInterface buffer = null; 181 long amountOfTime = -1; 182 try { 183 raFile = new RandomAccessFile(file, "rw"); 184 buffer = new ByteBufferWrapper(raFile.getChannel().map( 185 FileChannel.MapMode.READ_WRITE, 0, file.length())); 186 outStream = new BufferedOutputStream(new FileOutputStream(file, true)); 187 188 if (!exist) { 189 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); 190 } 191 final long now = System.nanoTime(); 192 BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, bigrams, shortcuts, 193 false, false); 194 amountOfTime = System.nanoTime() - now; 195 outStream.flush(); 196 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); 197 outStream.close(); 198 raFile.close(); 199 } catch (IOException e) { 200 } catch (UnsupportedFormatException e) { 201 } finally { 202 if (outStream != null) { 203 try { 204 outStream.close(); 205 } catch (IOException e) { 206 // do nothing 207 } 208 } 209 if (raFile != null) { 210 try { 211 raFile.close(); 212 } catch (IOException e) { 213 // do nothing 214 } 215 } 216 } 217 return amountOfTime; 218 } 219 220 private void deleteWord(final File file, final String word) { 221 RandomAccessFile raFile = null; 222 FusionDictionaryBufferInterface buffer = null; 223 try { 224 raFile = new RandomAccessFile(file, "rw"); 225 buffer = new ByteBufferWrapper(raFile.getChannel().map( 226 FileChannel.MapMode.READ_WRITE, 0, file.length())); 227 BinaryDictIOUtils.deleteWord(buffer, word); 228 } catch (IOException e) { 229 } catch (UnsupportedFormatException e) { 230 } finally { 231 if (raFile != null) { 232 try { 233 raFile.close(); 234 } catch (IOException e) { 235 // do nothing 236 } 237 } 238 } 239 } 240 241 private void checkReverseLookup(final File file, final String word, final int position) { 242 FileInputStream inStream = null; 243 try { 244 inStream = new FileInputStream(file); 245 final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( 246 inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); 247 final FileHeader header = BinaryDictInputOutput.readHeader(buffer); 248 assertEquals(word, BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize, 249 position - header.mHeaderSize, header.mFormatOptions).mWord); 250 } catch (IOException e) { 251 } catch (UnsupportedFormatException e) { 252 } finally { 253 if (inStream != null) { 254 try { 255 inStream.close(); 256 } catch (IOException e) { 257 // do nothing 258 } 259 } 260 } 261 } 262 263 public void testInsertWord() { 264 File file = null; 265 try { 266 file = File.createTempFile("testInsertWord", ".dict", getContext().getCacheDir()); 267 } catch (IOException e) { 268 fail("IOException while creating temporary file: " + e); 269 } 270 271 // set an initial dictionary. 272 final FusionDictionary dict = new FusionDictionary(new Node(), 273 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); 274 dict.add("abcd", 10, null, false); 275 276 try { 277 final FileOutputStream out = new FileOutputStream(file); 278 BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); 279 out.close(); 280 } catch (IOException e) { 281 fail("IOException while writing an initial dictionary : " + e); 282 } catch (UnsupportedFormatException e) { 283 fail("UnsupportedFormatException while writing an initial dictionary : " + e); 284 } 285 286 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); 287 insertAndCheckWord(file, "abcde", 10, false, null, null); 288 289 insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null); 290 checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); 291 292 insertAndCheckWord(file, "abcdabcd", 10, false, null, null); 293 checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); 294 295 // update the existing word. 296 insertAndCheckWord(file, "abcdabcd", 15, true, null, null); 297 298 // split 1 299 insertAndCheckWord(file, "ab", 20, false, null, null); 300 301 // split 2 302 insertAndCheckWord(file, "ami", 30, false, null, null); 303 304 deleteWord(file, "ami"); 305 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami")); 306 307 insertAndCheckWord(file, "abcdabfg", 30, false, null, null); 308 309 deleteWord(file, "abcd"); 310 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); 311 } 312 313 public void testInsertWordWithBigrams() { 314 File file = null; 315 try { 316 file = File.createTempFile("testInsertWordWithBigrams", ".dict", 317 getContext().getCacheDir()); 318 } catch (IOException e) { 319 fail("IOException while creating temporary file: " + e); 320 } 321 322 // set an initial dictionary. 323 final FusionDictionary dict = new FusionDictionary(new Node(), 324 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); 325 dict.add("abcd", 10, null, false); 326 dict.add("efgh", 15, null, false); 327 328 try { 329 final FileOutputStream out = new FileOutputStream(file); 330 BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); 331 out.close(); 332 } catch (IOException e) { 333 fail("IOException while writing an initial dictionary : " + e); 334 } catch (UnsupportedFormatException e) { 335 fail("UnsupportedFormatException while writing an initial dictionary : " + e); 336 } 337 338 final ArrayList<WeightedString> banana = new ArrayList<WeightedString>(); 339 banana.add(new WeightedString("banana", 10)); 340 341 insertAndCheckWord(file, "banana", 0, false, null, null); 342 insertAndCheckWord(file, "recursive", 60, true, banana, null); 343 344 final CharGroupInfo info = findWordFromFile(file, "recursive"); 345 int bananaPos = getWordPosition(file, "banana"); 346 assertNotNull(info.mBigrams); 347 assertEquals(info.mBigrams.size(), 1); 348 assertEquals(info.mBigrams.get(0).mAddress, bananaPos); 349 } 350 351 public void testRandomWords() { 352 File file = null; 353 try { 354 file = File.createTempFile("testRandomWord", ".dict", getContext().getCacheDir()); 355 } catch (IOException e) { 356 } 357 assertNotNull(file); 358 359 // set an initial dictionary. 360 final FusionDictionary dict = new FusionDictionary(new Node(), 361 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, 362 false)); 363 dict.add("initial", 10, null, false); 364 365 try { 366 final FileOutputStream out = new FileOutputStream(file); 367 BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); 368 out.close(); 369 } catch (IOException e) { 370 assertTrue(false); 371 } catch (UnsupportedFormatException e) { 372 assertTrue(false); 373 } 374 375 long maxTimeToInsert = 0, sum = 0; 376 long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert. 377 int cnt = 0; 378 for (final String word : sWords) { 379 final long diff = insertAndCheckWord(file, word, 380 cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null); 381 maxTimeToInsert = Math.max(maxTimeToInsert, diff); 382 minTimeToInsert = Math.min(minTimeToInsert, diff); 383 sum += diff; 384 cnt++; 385 } 386 cnt = 0; 387 for (final String word : sWords) { 388 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); 389 } 390 391 Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms."); 392 Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms."); 393 Log.d(TAG, "avg = " + ((double)sum/MAX_UNIGRAMS/1000000) + " ms."); 394 } 395 } 396