Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
     20 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     21 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
     22 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
     23 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
     24 
     25 import java.io.File;
     26 import java.io.FileNotFoundException;
     27 import java.io.FileOutputStream;
     28 import java.io.IOException;
     29 import java.io.OutputStream;
     30 import java.util.ArrayList;
     31 import java.util.Iterator;
     32 
     33 /**
     34  * An implementation of DictEncoder for version 3 binary dictionary.
     35  */
     36 public class Ver3DictEncoder implements DictEncoder {
     37 
     38     private final File mDictFile;
     39     private OutputStream mOutStream;
     40     private byte[] mBuffer;
     41     private int mPosition;
     42 
     43     public Ver3DictEncoder(final File dictFile) {
     44         mDictFile = dictFile;
     45         mOutStream = null;
     46         mBuffer = null;
     47     }
     48 
     49     // This constructor is used only by BinaryDictOffdeviceUtilsTests.
     50     // If you want to use this in the production code, you should consider keeping consistency of
     51     // the interface of Ver3DictDecoder by using factory.
     52     public Ver3DictEncoder(final OutputStream outStream) {
     53         mDictFile = null;
     54         mOutStream = outStream;
     55     }
     56 
     57     private void openStream() throws FileNotFoundException {
     58         mOutStream = new FileOutputStream(mDictFile);
     59     }
     60 
     61     private void close() throws IOException {
     62         if (mOutStream != null) {
     63             mOutStream.close();
     64             mOutStream = null;
     65         }
     66     }
     67 
     68     @Override
     69     public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
     70             throws IOException, UnsupportedFormatException {
     71         if (formatOptions.mVersion > FormatSpec.VERSION3) {
     72             throw new UnsupportedFormatException(
     73                     "The given format options has wrong version number : "
     74                     + formatOptions.mVersion);
     75         }
     76 
     77         if (mOutStream == null) {
     78             openStream();
     79         }
     80         BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions);
     81 
     82         // Addresses are limited to 3 bytes, but since addresses can be relative to each node
     83         // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
     84         // the order of the PtNode arrays becomes a quite complicated problem, because though the
     85         // dictionary itself does not have a size limit, each node array must still be within 16MB
     86         // of all its children and parents. As long as this is ensured, the dictionary file may
     87         // grow to any size.
     88 
     89         // Leave the choice of the optimal node order to the flattenTree function.
     90         MakedictLog.i("Flattening the tree...");
     91         ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
     92 
     93         MakedictLog.i("Computing addresses...");
     94         BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions);
     95         MakedictLog.i("Checking PtNode array...");
     96         if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
     97 
     98         // Create a buffer that matches the final dictionary size.
     99         final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
    100         final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
    101         mBuffer = new byte[bufferSize];
    102 
    103         MakedictLog.i("Writing file...");
    104 
    105         for (PtNodeArray nodeArray : flatNodes) {
    106             BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions);
    107         }
    108         if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes);
    109         mOutStream.write(mBuffer, 0, mPosition);
    110 
    111         MakedictLog.i("Done");
    112         close();
    113     }
    114 
    115     @Override
    116     public void setPosition(final int position) {
    117         if (mBuffer == null || position < 0 || position >= mBuffer.length) return;
    118         mPosition = position;
    119     }
    120 
    121     @Override
    122     public int getPosition() {
    123         return mPosition;
    124     }
    125 
    126     @Override
    127     public void writePtNodeCount(final int ptNodeCount) {
    128         final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
    129         if (countSize != 1 && countSize != 2) {
    130             throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
    131         }
    132         mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, ptNodeCount,
    133                 countSize);
    134     }
    135 
    136     private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) {
    137         final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
    138         mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition,
    139                 BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions),
    140                 FormatSpec.PTNODE_FLAGS_SIZE);
    141     }
    142 
    143     private void writeParentPosition(final int parentPosition, final PtNode ptNode,
    144             final FormatOptions formatOptions) {
    145         if (parentPosition == FormatSpec.NO_PARENT_ADDRESS) {
    146             mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition,
    147                     parentPosition, formatOptions);
    148         } else {
    149             mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition,
    150                     parentPosition - ptNode.mCachedAddressAfterUpdate, formatOptions);
    151         }
    152     }
    153 
    154     private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) {
    155         mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition);
    156         if (hasSeveralChars) {
    157             mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
    158         }
    159     }
    160 
    161     private void writeFrequency(final int frequency) {
    162         if (frequency >= 0) {
    163             mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency,
    164                     FormatSpec.PTNODE_FREQUENCY_SIZE);
    165         }
    166     }
    167 
    168     private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) {
    169         final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
    170         if (formatOptions.mSupportsDynamicUpdate) {
    171             mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition,
    172                     childrenPos);
    173         } else {
    174             mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
    175                     childrenPos);
    176         }
    177     }
    178 
    179     /**
    180      * Write a shortcut attributes list to mBuffer.
    181      *
    182      * @param shortcuts the shortcut attributes list.
    183      */
    184     private void writeShortcuts(final ArrayList<WeightedString> shortcuts) {
    185         if (null == shortcuts || shortcuts.isEmpty()) return;
    186 
    187         final int indexOfShortcutByteSize = mPosition;
    188         mPosition += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
    189         final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
    190         while (shortcutIterator.hasNext()) {
    191             final WeightedString target = shortcutIterator.next();
    192             final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
    193                     shortcutIterator.hasNext(),
    194                     target.mFrequency);
    195             mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags,
    196                     FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
    197             final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord);
    198             mPosition += shortcutShift;
    199         }
    200         final int shortcutByteSize = mPosition - indexOfShortcutByteSize;
    201         if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) {
    202             throw new RuntimeException("Shortcut list too large");
    203         }
    204         BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize,
    205                 FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
    206     }
    207 
    208     /**
    209      * Write a bigram attributes list to mBuffer.
    210      *
    211      * @param bigrams the bigram attributes list.
    212      * @param dict the dictionary the node array is a part of (for relative offsets).
    213      */
    214     private void writeBigrams(final ArrayList<WeightedString> bigrams,
    215             final FusionDictionary dict) {
    216         if (bigrams == null) return;
    217 
    218         final Iterator<WeightedString> bigramIterator = bigrams.iterator();
    219         while (bigramIterator.hasNext()) {
    220             final WeightedString bigram = bigramIterator.next();
    221             final PtNode target =
    222                     FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
    223             final int addressOfBigram = target.mCachedAddressAfterUpdate;
    224             final int unigramFrequencyForThisWord = target.mFrequency;
    225             final int offset = addressOfBigram
    226                     - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
    227             final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
    228                     offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
    229             mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
    230                     FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
    231             mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
    232                     Math.abs(offset));
    233         }
    234     }
    235 
    236     @Override
    237     public void writeForwardLinkAddress(final int forwardLinkAddress) {
    238         mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, forwardLinkAddress,
    239                 FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
    240     }
    241 
    242     @Override
    243     public void writePtNode(final PtNode ptNode, final int parentPosition,
    244             final FormatOptions formatOptions, final FusionDictionary dict) {
    245         writePtNodeFlags(ptNode, formatOptions);
    246         writeParentPosition(parentPosition, ptNode, formatOptions);
    247         writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
    248         writeFrequency(ptNode.mFrequency);
    249         writeChildrenPosition(ptNode, formatOptions);
    250         writeShortcuts(ptNode.mShortcutTargets);
    251         writeBigrams(ptNode.mBigrams, dict);
    252     }
    253 }
    254