Home | History | Annotate | Download | only in dicttool
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.dicttool;
     18 
     19 import com.android.inputmethod.latin.common.CodePointUtils;
     20 import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils;
     21 import com.android.inputmethod.latin.dicttool.Compress;
     22 import com.android.inputmethod.latin.dicttool.Crypt;
     23 import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
     24 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
     25 import com.android.inputmethod.latin.makedict.BinaryDictUtils;
     26 import com.android.inputmethod.latin.makedict.DictDecoder;
     27 import com.android.inputmethod.latin.makedict.DictEncoder;
     28 import com.android.inputmethod.latin.makedict.DictionaryHeader;
     29 import com.android.inputmethod.latin.makedict.FormatSpec;
     30 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
     31 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     32 import com.android.inputmethod.latin.makedict.FusionDictionary;
     33 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
     34 import com.android.inputmethod.latin.makedict.ProbabilityInfo;
     35 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
     36 import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
     37 
     38 import junit.framework.TestCase;
     39 
     40 import java.io.BufferedOutputStream;
     41 import java.io.File;
     42 import java.io.FileOutputStream;
     43 import java.io.IOException;
     44 import java.io.OutputStream;
     45 import java.util.ArrayList;
     46 import java.util.HashMap;
     47 import java.util.HashSet;
     48 import java.util.Random;
     49 import java.util.Set;
     50 
     51 /**
     52  * Unit tests for BinaryDictOffdeviceUtils
     53  */
     54 public class BinaryDictOffdeviceUtilsTests extends TestCase {
     55     private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
     56     private static final int CODE_POINT_SET_SIZE = 300;
     57     final Random mRandom;
     58     private static final ArrayList<String> sWords = new ArrayList<>();
     59 
     60     public BinaryDictOffdeviceUtilsTests(final long seed, final int maxUnigrams) {
     61         super();
     62         mRandom = new Random(seed);
     63         sWords.clear();
     64         generateWords(maxUnigrams, mRandom);
     65     }
     66 
     67     private static void generateWords(final int maxUnigrams, final Random random) {
     68         final int[] codePointSet = CodePointUtils.generateCodePointSet(
     69                 CODE_POINT_SET_SIZE, random);
     70         final Set<String> wordSet = new HashSet<>();
     71         while (wordSet.size() < maxUnigrams) {
     72             wordSet.add(CodePointUtils.generateWord(random, codePointSet));
     73         }
     74         sWords.addAll(wordSet);
     75     }
     76 
     77     public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
     78         final String VERSION = "1";
     79         final String LOCALE = "test";
     80         final String ID = "main:test";
     81 
     82         // Create a thrice-compressed dictionary file.
     83         final DictionaryOptions testOptions = new DictionaryOptions(new HashMap<String, String>());
     84         testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_VERSION_KEY, VERSION);
     85         testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, LOCALE);
     86         testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_ID_KEY, ID);
     87         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), testOptions);
     88         dict.add("foo", new ProbabilityInfo(TEST_FREQ), null, false /* isNotAWord */,
     89                 false /* isPossiblyOffensive */);
     90         dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */,
     91                 false /* isPossiblyOffensive */);
     92         dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */,
     93                 false /* isPossiblyOffensive */);
     94         dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */,
     95                 false /* isPossiblyOffensive */);
     96         dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */,
     97                 false /* isPossiblyOffensive */);
     98 
     99         final File dst = File.createTempFile("testGetRawDict", ".tmp");
    100         dst.deleteOnExit();
    101         try (final OutputStream out = Compress.getCompressedStream(
    102                 new BufferedOutputStream(new FileOutputStream(dst)))) {
    103             final DictEncoder dictEncoder = new Ver2DictEncoder(out);
    104             dictEncoder.writeDictionary(dict, new FormatOptions(FormatSpec.VERSION202, false));
    105         }
    106 
    107         // Test for an actually compressed dictionary and its contents
    108         final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodeSpec =
    109                 BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
    110         assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain());
    111         final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mResult, 0,
    112                 decodeSpec.mResult.length());
    113         final FusionDictionary resultDict =
    114                 dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
    115         assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get(
    116                 DictionaryHeader.DICTIONARY_VERSION_KEY));
    117         assertEquals("Wrong locale attribute", LOCALE, resultDict.mOptions.mAttributes.get(
    118                 DictionaryHeader.DICTIONARY_LOCALE_KEY));
    119         assertEquals("Wrong id attribute", ID, resultDict.mOptions.mAttributes.get(
    120                 DictionaryHeader.DICTIONARY_ID_KEY));
    121         assertEquals("Dictionary can't be read back correctly",
    122                 FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getProbability(),
    123                 TEST_FREQ);
    124     }
    125 
    126     public void testGetRawDictFails() throws IOException {
    127         // Randomly create some 4k file containing garbage
    128         final File dst = File.createTempFile("testGetRawDict", ".tmp");
    129         dst.deleteOnExit();
    130         try (final OutputStream out = new BufferedOutputStream(new FileOutputStream(dst))) {
    131             for (int i = 0; i < 1024; ++i) {
    132                 out.write(0x12345678);
    133             }
    134         }
    135 
    136         // Test that a random data file actually fails
    137         assertNull("Wrongly identified data file",
    138                 BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst));
    139 
    140         final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
    141         gzDst.deleteOnExit();
    142         try (final OutputStream gzOut = Compress.getCompressedStream(
    143                 new BufferedOutputStream(new FileOutputStream(gzDst)))) {
    144             for (int i = 0; i < 1024; ++i) {
    145                 gzOut.write(0x12345678);
    146             }
    147         }
    148 
    149         // Test that a compressed random data file actually fails
    150         assertNull("Wrongly identified data file",
    151                 BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst));
    152     }
    153 
    154     public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt)
    155             throws IOException, UnsupportedFormatException {
    156         final String dictName = "testHeaderReaderProcessor";
    157         final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
    158         final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5;
    159         final HashMap<String, String> options = new HashMap<>();
    160         // Required attributes
    161         options.put("dictionary", "main:en_US");
    162         options.put("locale", "en_US");
    163         options.put("version", Integer.toString(mRandom.nextInt()));
    164         // Add some random options for test
    165         final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1);
    166         for (int i = 0; i < numberOfOptionsToAdd; ++i) {
    167             options.put(sWords.get(2 * i), sWords.get(2 * 1 + 1));
    168         }
    169         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
    170                 new DictionaryOptions(options));
    171 
    172         for (int i = 0; i < sWords.size(); ++i) {
    173             final String word = sWords.get(i);
    174             dict.add(word, new ProbabilityInfo(TEST_FREQ), null /* shortcuts */,
    175                     false /* isNotAWord */, false /* isPossiblyOffensive */);
    176         }
    177 
    178         File file = File.createTempFile(dictName, ".tmp");
    179         final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
    180         dictEncoder.writeDictionary(dict, formatOptions);
    181 
    182         if (compress) {
    183             final File rawFile = file;
    184             file = File.createTempFile(dictName + ".compress", ".tmp");
    185             final Compress.Compressor compressCommand = new Compress.Compressor();
    186             compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
    187             compressCommand.run();
    188         }
    189         if (crypt) {
    190             final File rawFile = file;
    191             file = File.createTempFile(dictName + ".crypt", ".tmp");
    192             final Crypt.Encrypter cryptCommand = new Crypt.Encrypter();
    193             cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
    194             cryptCommand.run();
    195         }
    196 
    197         final DecoderChainSpec<DictionaryHeader> spec =
    198                 BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file,
    199                         new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
    200         assertNotNull("Can't decode a dictionary we just wrote : " + file, spec);
    201         final DictionaryHeader header = spec.mResult;
    202         assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""),
    203                 spec.describeChain());
    204         assertEquals(header.mDictionaryOptions.mAttributes, options);
    205     }
    206 
    207     public void testHeaderReaderProcessor() throws IOException, UnsupportedFormatException {
    208         runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */);
    209         runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */);
    210         runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */);
    211     }
    212 }
    213