Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import android.test.AndroidTestCase;
     20 import android.test.MoreAsserts;
     21 import android.test.suitebuilder.annotation.LargeTest;
     22 import android.util.Log;
     23 
     24 import com.android.inputmethod.latin.CollectionUtils;
     25 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper;
     26 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
     27 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
     28 import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
     29 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
     30 
     31 import java.io.BufferedOutputStream;
     32 import java.io.File;
     33 import java.io.FileInputStream;
     34 import java.io.FileOutputStream;
     35 import java.io.IOException;
     36 import java.io.RandomAccessFile;
     37 import java.nio.channels.FileChannel;
     38 import java.util.ArrayList;
     39 import java.util.HashMap;
     40 import java.util.Random;
     41 
     42 @LargeTest
     43 public class BinaryDictIOUtilsTests  extends AndroidTestCase {
     44     private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
     45     private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
     46             new FormatSpec.FormatOptions(3, true);
     47     private static final int MAX_UNIGRAMS = 1500;
     48 
     49     private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
     50 
     51     private static final String[] CHARACTERS = {
     52         "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
     53         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
     54         "\u00FC" /*  */, "\u00E2" /*  */, "\u00F1" /*  */, // accented characters
     55         "\u4E9C" /*  */, "\u4F0A" /*  */, "\u5B87" /*  */, // kanji
     56         "\uD841\uDE28" /*  */, "\uD840\uDC0B" /*  */, "\uD861\uDeD7" /*  */ // surrogate pair
     57     };
     58 
     59     public BinaryDictIOUtilsTests() {
     60         super();
     61         final Random random = new Random(123456);
     62         sWords.clear();
     63         for (int i = 0; i < MAX_UNIGRAMS; ++i) {
     64             sWords.add(generateWord(random.nextInt()));
     65         }
     66     }
     67 
     68     // Utilities for test
     69     private String generateWord(final int value) {
     70         final int lengthOfChars = CHARACTERS.length;
     71         StringBuilder builder = new StringBuilder("");
     72         long lvalue = Math.abs((long)value);
     73         while (lvalue > 0) {
     74             builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
     75             lvalue /= lengthOfChars;
     76         }
     77         if (builder.toString().equals("")) return "a";
     78         return builder.toString();
     79     }
     80 
     81     private static void printCharGroup(final CharGroupInfo info) {
     82         Log.d(TAG, "    CharGroup at " + info.mOriginalAddress);
     83         Log.d(TAG, "        flags = " + info.mFlags);
     84         Log.d(TAG, "        parentAddress = " + info.mParentAddress);
     85         Log.d(TAG, "        characters = " + new String(info.mCharacters, 0,
     86                 info.mCharacters.length));
     87         if (info.mFrequency != -1) Log.d(TAG, "        frequency = " + info.mFrequency);
     88         if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
     89             Log.d(TAG, "        children address = no children address");
     90         } else {
     91             Log.d(TAG, "        children address = " + info.mChildrenAddress);
     92         }
     93         if (info.mShortcutTargets != null) {
     94             for (final WeightedString ws : info.mShortcutTargets) {
     95                 Log.d(TAG, "        shortcuts = " + ws.mWord);
     96             }
     97         }
     98         if (info.mBigrams != null) {
     99             for (final PendingAttribute attr : info.mBigrams) {
    100                 Log.d(TAG, "        bigram = " + attr.mAddress);
    101             }
    102         }
    103         Log.d(TAG, "    end address = " + info.mEndAddress);
    104     }
    105 
    106     private static void printNode(final FusionDictionaryBufferInterface buffer,
    107             final FormatSpec.FormatOptions formatOptions) {
    108         Log.d(TAG, "Node at " + buffer.position());
    109         final int count = BinaryDictInputOutput.readCharGroupCount(buffer);
    110         Log.d(TAG, "    charGroupCount = " + count);
    111         for (int i = 0; i < count; ++i) {
    112             final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
    113                     buffer.position(), formatOptions);
    114             printCharGroup(currentInfo);
    115         }
    116         if (formatOptions.mSupportsDynamicUpdate) {
    117             final int forwardLinkAddress = buffer.readUnsignedInt24();
    118             Log.d(TAG, "    forwardLinkAddress = " + forwardLinkAddress);
    119         }
    120     }
    121 
    122     private static void printBinaryFile(final FusionDictionaryBufferInterface buffer)
    123             throws IOException, UnsupportedFormatException {
    124         FileHeader header = BinaryDictInputOutput.readHeader(buffer);
    125         while (buffer.position() < buffer.limit()) {
    126             printNode(buffer, header.mFormatOptions);
    127         }
    128     }
    129 
    130     private int getWordPosition(final File file, final String word) {
    131         int position = FormatSpec.NOT_VALID_WORD;
    132         FileInputStream inStream = null;
    133         try {
    134             inStream = new FileInputStream(file);
    135             final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
    136                     inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
    137             position = BinaryDictIOUtils.getTerminalPosition(buffer, word);
    138         } catch (IOException e) {
    139         } catch (UnsupportedFormatException e) {
    140         } finally {
    141             if (inStream != null) {
    142                 try {
    143                     inStream.close();
    144                 } catch (IOException e) {
    145                     // do nothing
    146                 }
    147             }
    148         }
    149         return position;
    150     }
    151 
    152     private CharGroupInfo findWordFromFile(final File file, final String word) {
    153         FileInputStream inStream = null;
    154         CharGroupInfo info = null;
    155         try {
    156             inStream = new FileInputStream(file);
    157             final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
    158                     inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
    159             info = BinaryDictIOUtils.findWordFromBuffer(buffer, word);
    160         } catch (IOException e) {
    161         } catch (UnsupportedFormatException e) {
    162         } finally {
    163             if (inStream != null) {
    164                 try {
    165                     inStream.close();
    166                 } catch (IOException e) {
    167                     // do nothing
    168                 }
    169             }
    170         }
    171         return info;
    172     }
    173 
    174     // return amount of time to insert a word
    175     private long insertAndCheckWord(final File file, final String word, final int frequency,
    176             final boolean exist, final ArrayList<WeightedString> bigrams,
    177             final ArrayList<WeightedString> shortcuts) {
    178         RandomAccessFile raFile = null;
    179         BufferedOutputStream outStream = null;
    180         FusionDictionaryBufferInterface buffer = null;
    181         long amountOfTime = -1;
    182         try {
    183             raFile = new RandomAccessFile(file, "rw");
    184             buffer = new ByteBufferWrapper(raFile.getChannel().map(
    185                     FileChannel.MapMode.READ_WRITE, 0, file.length()));
    186             outStream = new BufferedOutputStream(new FileOutputStream(file, true));
    187 
    188             if (!exist) {
    189                 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
    190             }
    191             final long now = System.nanoTime();
    192             BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, bigrams, shortcuts,
    193                     false, false);
    194             amountOfTime = System.nanoTime() - now;
    195             outStream.flush();
    196             MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
    197             outStream.close();
    198             raFile.close();
    199         } catch (IOException e) {
    200         } catch (UnsupportedFormatException e) {
    201         } finally {
    202             if (outStream != null) {
    203                 try {
    204                     outStream.close();
    205                 } catch (IOException e) {
    206                     // do nothing
    207                 }
    208             }
    209             if (raFile != null) {
    210                 try {
    211                     raFile.close();
    212                 } catch (IOException e) {
    213                     // do nothing
    214                 }
    215             }
    216         }
    217         return amountOfTime;
    218     }
    219 
    220     private void deleteWord(final File file, final String word) {
    221         RandomAccessFile raFile = null;
    222         FusionDictionaryBufferInterface buffer = null;
    223         try {
    224             raFile = new RandomAccessFile(file, "rw");
    225             buffer = new ByteBufferWrapper(raFile.getChannel().map(
    226                     FileChannel.MapMode.READ_WRITE, 0, file.length()));
    227             BinaryDictIOUtils.deleteWord(buffer, word);
    228         } catch (IOException e) {
    229         } catch (UnsupportedFormatException e) {
    230         } finally {
    231             if (raFile != null) {
    232                 try {
    233                     raFile.close();
    234                 } catch (IOException e) {
    235                     // do nothing
    236                 }
    237             }
    238         }
    239     }
    240 
    241     private void checkReverseLookup(final File file, final String word, final int position) {
    242         FileInputStream inStream = null;
    243         try {
    244             inStream = new FileInputStream(file);
    245             final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
    246                     inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
    247             final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
    248             assertEquals(word, BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize,
    249                     position - header.mHeaderSize, header.mFormatOptions).mWord);
    250         } catch (IOException e) {
    251         } catch (UnsupportedFormatException e) {
    252         } finally {
    253             if (inStream != null) {
    254                 try {
    255                     inStream.close();
    256                 } catch (IOException e) {
    257                     // do nothing
    258                 }
    259             }
    260         }
    261     }
    262 
    263     public void testInsertWord() {
    264         File file = null;
    265         try {
    266             file = File.createTempFile("testInsertWord", ".dict", getContext().getCacheDir());
    267         } catch (IOException e) {
    268             fail("IOException while creating temporary file: " + e);
    269         }
    270 
    271         // set an initial dictionary.
    272         final FusionDictionary dict = new FusionDictionary(new Node(),
    273                 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
    274         dict.add("abcd", 10, null, false);
    275 
    276         try {
    277             final FileOutputStream out = new FileOutputStream(file);
    278             BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
    279             out.close();
    280         } catch (IOException e) {
    281             fail("IOException while writing an initial dictionary : " + e);
    282         } catch (UnsupportedFormatException e) {
    283             fail("UnsupportedFormatException while writing an initial dictionary : " + e);
    284         }
    285 
    286         MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
    287         insertAndCheckWord(file, "abcde", 10, false, null, null);
    288 
    289         insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null);
    290         checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
    291 
    292         insertAndCheckWord(file, "abcdabcd", 10, false, null, null);
    293         checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
    294 
    295         // update the existing word.
    296         insertAndCheckWord(file, "abcdabcd", 15, true, null, null);
    297 
    298         // split 1
    299         insertAndCheckWord(file, "ab", 20, false, null, null);
    300 
    301         // split 2
    302         insertAndCheckWord(file, "ami", 30, false, null, null);
    303 
    304         deleteWord(file, "ami");
    305         assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
    306 
    307         insertAndCheckWord(file, "abcdabfg", 30, false, null, null);
    308 
    309         deleteWord(file, "abcd");
    310         assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
    311     }
    312 
    313     public void testInsertWordWithBigrams() {
    314         File file = null;
    315         try {
    316             file = File.createTempFile("testInsertWordWithBigrams", ".dict",
    317                     getContext().getCacheDir());
    318         } catch (IOException e) {
    319             fail("IOException while creating temporary file: " + e);
    320         }
    321 
    322         // set an initial dictionary.
    323         final FusionDictionary dict = new FusionDictionary(new Node(),
    324                 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
    325         dict.add("abcd", 10, null, false);
    326         dict.add("efgh", 15, null, false);
    327 
    328         try {
    329             final FileOutputStream out = new FileOutputStream(file);
    330             BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
    331             out.close();
    332         } catch (IOException e) {
    333             fail("IOException while writing an initial dictionary : " + e);
    334         } catch (UnsupportedFormatException e) {
    335             fail("UnsupportedFormatException while writing an initial dictionary : " + e);
    336         }
    337 
    338         final ArrayList<WeightedString> banana = new ArrayList<WeightedString>();
    339         banana.add(new WeightedString("banana", 10));
    340 
    341         insertAndCheckWord(file, "banana", 0, false, null, null);
    342         insertAndCheckWord(file, "recursive", 60, true, banana, null);
    343 
    344         final CharGroupInfo info = findWordFromFile(file, "recursive");
    345         int bananaPos = getWordPosition(file, "banana");
    346         assertNotNull(info.mBigrams);
    347         assertEquals(info.mBigrams.size(), 1);
    348         assertEquals(info.mBigrams.get(0).mAddress, bananaPos);
    349     }
    350 
    351     public void testRandomWords() {
    352         File file = null;
    353         try {
    354             file = File.createTempFile("testRandomWord", ".dict", getContext().getCacheDir());
    355         } catch (IOException e) {
    356         }
    357         assertNotNull(file);
    358 
    359         // set an initial dictionary.
    360         final FusionDictionary dict = new FusionDictionary(new Node(),
    361                 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
    362                         false));
    363         dict.add("initial", 10, null, false);
    364 
    365         try {
    366             final FileOutputStream out = new FileOutputStream(file);
    367             BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
    368             out.close();
    369         } catch (IOException e) {
    370             assertTrue(false);
    371         } catch (UnsupportedFormatException e) {
    372             assertTrue(false);
    373         }
    374 
    375         long maxTimeToInsert = 0, sum = 0;
    376         long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert.
    377         int cnt = 0;
    378         for (final String word : sWords) {
    379             final long diff = insertAndCheckWord(file, word,
    380                     cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null);
    381             maxTimeToInsert = Math.max(maxTimeToInsert, diff);
    382             minTimeToInsert = Math.min(minTimeToInsert, diff);
    383             sum += diff;
    384             cnt++;
    385         }
    386         cnt = 0;
    387         for (final String word : sWords) {
    388             MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
    389         }
    390 
    391         Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
    392         Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
    393         Log.d(TAG, "avg = " + ((double)sum/MAX_UNIGRAMS/1000000) + " ms.");
    394     }
    395 }
    396