1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 20 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 21 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 22 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 23 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 24 25 import java.io.File; 26 import java.io.FileNotFoundException; 27 import java.io.FileOutputStream; 28 import java.io.IOException; 29 import java.io.OutputStream; 30 import java.util.ArrayList; 31 import java.util.Iterator; 32 33 /** 34 * An implementation of DictEncoder for version 3 binary dictionary. 35 */ 36 public class Ver3DictEncoder implements DictEncoder { 37 38 private final File mDictFile; 39 private OutputStream mOutStream; 40 private byte[] mBuffer; 41 private int mPosition; 42 43 public Ver3DictEncoder(final File dictFile) { 44 mDictFile = dictFile; 45 mOutStream = null; 46 mBuffer = null; 47 } 48 49 // This constructor is used only by BinaryDictOffdeviceUtilsTests. 50 // If you want to use this in the production code, you should consider keeping consistency of 51 // the interface of Ver3DictDecoder by using factory. 52 public Ver3DictEncoder(final OutputStream outStream) { 53 mDictFile = null; 54 mOutStream = outStream; 55 } 56 57 private void openStream() throws FileNotFoundException { 58 mOutStream = new FileOutputStream(mDictFile); 59 } 60 61 private void close() throws IOException { 62 if (mOutStream != null) { 63 mOutStream.close(); 64 mOutStream = null; 65 } 66 } 67 68 @Override 69 public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) 70 throws IOException, UnsupportedFormatException { 71 if (formatOptions.mVersion > FormatSpec.VERSION3) { 72 throw new UnsupportedFormatException( 73 "The given format options has wrong version number : " 74 + formatOptions.mVersion); 75 } 76 77 if (mOutStream == null) { 78 openStream(); 79 } 80 BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions); 81 82 // Addresses are limited to 3 bytes, but since addresses can be relative to each node 83 // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding 84 // the order of the PtNode arrays becomes a quite complicated problem, because though the 85 // dictionary itself does not have a size limit, each node array must still be within 16MB 86 // of all its children and parents. As long as this is ensured, the dictionary file may 87 // grow to any size. 88 89 // Leave the choice of the optimal node order to the flattenTree function. 90 MakedictLog.i("Flattening the tree..."); 91 ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); 92 93 MakedictLog.i("Computing addresses..."); 94 BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions); 95 MakedictLog.i("Checking PtNode array..."); 96 if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); 97 98 // Create a buffer that matches the final dictionary size. 99 final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); 100 final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; 101 mBuffer = new byte[bufferSize]; 102 103 MakedictLog.i("Writing file..."); 104 105 for (PtNodeArray nodeArray : flatNodes) { 106 BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions); 107 } 108 if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes); 109 mOutStream.write(mBuffer, 0, mPosition); 110 111 MakedictLog.i("Done"); 112 close(); 113 } 114 115 @Override 116 public void setPosition(final int position) { 117 if (mBuffer == null || position < 0 || position >= mBuffer.length) return; 118 mPosition = position; 119 } 120 121 @Override 122 public int getPosition() { 123 return mPosition; 124 } 125 126 @Override 127 public void writePtNodeCount(final int ptNodeCount) { 128 final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); 129 if (countSize != 1 && countSize != 2) { 130 throw new RuntimeException("Strange size from getGroupCountSize : " + countSize); 131 } 132 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, ptNodeCount, 133 countSize); 134 } 135 136 private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) { 137 final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); 138 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, 139 BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions), 140 FormatSpec.PTNODE_FLAGS_SIZE); 141 } 142 143 private void writeParentPosition(final int parentPosition, final PtNode ptNode, 144 final FormatOptions formatOptions) { 145 if (parentPosition == FormatSpec.NO_PARENT_ADDRESS) { 146 mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition, 147 parentPosition, formatOptions); 148 } else { 149 mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition, 150 parentPosition - ptNode.mCachedAddressAfterUpdate, formatOptions); 151 } 152 } 153 154 private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) { 155 mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition); 156 if (hasSeveralChars) { 157 mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; 158 } 159 } 160 161 private void writeFrequency(final int frequency) { 162 if (frequency >= 0) { 163 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency, 164 FormatSpec.PTNODE_FREQUENCY_SIZE); 165 } 166 } 167 168 private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) { 169 final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); 170 if (formatOptions.mSupportsDynamicUpdate) { 171 mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition, 172 childrenPos); 173 } else { 174 mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, 175 childrenPos); 176 } 177 } 178 179 /** 180 * Write a shortcut attributes list to mBuffer. 181 * 182 * @param shortcuts the shortcut attributes list. 183 */ 184 private void writeShortcuts(final ArrayList<WeightedString> shortcuts) { 185 if (null == shortcuts || shortcuts.isEmpty()) return; 186 187 final int indexOfShortcutByteSize = mPosition; 188 mPosition += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE; 189 final Iterator<WeightedString> shortcutIterator = shortcuts.iterator(); 190 while (shortcutIterator.hasNext()) { 191 final WeightedString target = shortcutIterator.next(); 192 final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( 193 shortcutIterator.hasNext(), 194 target.mFrequency); 195 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags, 196 FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 197 final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord); 198 mPosition += shortcutShift; 199 } 200 final int shortcutByteSize = mPosition - indexOfShortcutByteSize; 201 if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) { 202 throw new RuntimeException("Shortcut list too large"); 203 } 204 BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize, 205 FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); 206 } 207 208 /** 209 * Write a bigram attributes list to mBuffer. 210 * 211 * @param bigrams the bigram attributes list. 212 * @param dict the dictionary the node array is a part of (for relative offsets). 213 */ 214 private void writeBigrams(final ArrayList<WeightedString> bigrams, 215 final FusionDictionary dict) { 216 if (bigrams == null) return; 217 218 final Iterator<WeightedString> bigramIterator = bigrams.iterator(); 219 while (bigramIterator.hasNext()) { 220 final WeightedString bigram = bigramIterator.next(); 221 final PtNode target = 222 FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); 223 final int addressOfBigram = target.mCachedAddressAfterUpdate; 224 final int unigramFrequencyForThisWord = target.mFrequency; 225 final int offset = addressOfBigram 226 - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 227 final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), 228 offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); 229 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags, 230 FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 231 mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, 232 Math.abs(offset)); 233 } 234 } 235 236 @Override 237 public void writeForwardLinkAddress(final int forwardLinkAddress) { 238 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, forwardLinkAddress, 239 FormatSpec.FORWARD_LINK_ADDRESS_SIZE); 240 } 241 242 @Override 243 public void writePtNode(final PtNode ptNode, final int parentPosition, 244 final FormatOptions formatOptions, final FusionDictionary dict) { 245 writePtNodeFlags(ptNode, formatOptions); 246 writeParentPosition(parentPosition, ptNode, formatOptions); 247 writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); 248 writeFrequency(ptNode.mFrequency); 249 writeChildrenPosition(ptNode, formatOptions); 250 writeShortcuts(ptNode.mShortcutTargets); 251 writeBigrams(ptNode.mBigrams, dict); 252 } 253 } 254