1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import com.android.inputmethod.annotations.UsedForTesting; 20 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 21 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 22 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 23 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 24 25 import java.io.File; 26 import java.io.FileNotFoundException; 27 import java.io.FileOutputStream; 28 import java.io.IOException; 29 import java.io.OutputStream; 30 import java.util.ArrayList; 31 import java.util.Iterator; 32 33 /** 34 * An implementation of DictEncoder for version 2 binary dictionary. 35 */ 36 @UsedForTesting 37 public class Ver2DictEncoder implements DictEncoder { 38 39 private final File mDictFile; 40 private OutputStream mOutStream; 41 private byte[] mBuffer; 42 private int mPosition; 43 44 @UsedForTesting 45 public Ver2DictEncoder(final File dictFile) { 46 mDictFile = dictFile; 47 mOutStream = null; 48 mBuffer = null; 49 } 50 51 // This constructor is used only by BinaryDictOffdeviceUtilsTests. 52 // If you want to use this in the production code, you should consider keeping consistency of 53 // the interface of Ver3DictDecoder by using factory. 54 @UsedForTesting 55 public Ver2DictEncoder(final OutputStream outStream) { 56 mDictFile = null; 57 mOutStream = outStream; 58 } 59 60 private void openStream() throws FileNotFoundException { 61 mOutStream = new FileOutputStream(mDictFile); 62 } 63 64 private void close() throws IOException { 65 if (mOutStream != null) { 66 mOutStream.close(); 67 mOutStream = null; 68 } 69 } 70 71 @Override 72 public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) 73 throws IOException, UnsupportedFormatException { 74 if (formatOptions.mVersion > FormatSpec.VERSION2) { 75 throw new UnsupportedFormatException( 76 "The given format options has wrong version number : " 77 + formatOptions.mVersion); 78 } 79 80 if (mOutStream == null) { 81 openStream(); 82 } 83 BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions); 84 85 // Addresses are limited to 3 bytes, but since addresses can be relative to each node 86 // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding 87 // the order of the PtNode arrays becomes a quite complicated problem, because though the 88 // dictionary itself does not have a size limit, each node array must still be within 16MB 89 // of all its children and parents. As long as this is ensured, the dictionary file may 90 // grow to any size. 91 92 // Leave the choice of the optimal node order to the flattenTree function. 93 MakedictLog.i("Flattening the tree..."); 94 ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); 95 96 MakedictLog.i("Computing addresses..."); 97 BinaryDictEncoderUtils.computeAddresses(dict, flatNodes); 98 MakedictLog.i("Checking PtNode array..."); 99 if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); 100 101 // Create a buffer that matches the final dictionary size. 102 final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); 103 final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; 104 mBuffer = new byte[bufferSize]; 105 106 MakedictLog.i("Writing file..."); 107 108 for (PtNodeArray nodeArray : flatNodes) { 109 BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray); 110 } 111 if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes); 112 mOutStream.write(mBuffer, 0, mPosition); 113 114 MakedictLog.i("Done"); 115 close(); 116 } 117 118 @Override 119 public void setPosition(final int position) { 120 if (mBuffer == null || position < 0 || position >= mBuffer.length) return; 121 mPosition = position; 122 } 123 124 @Override 125 public int getPosition() { 126 return mPosition; 127 } 128 129 @Override 130 public void writePtNodeCount(final int ptNodeCount) { 131 final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); 132 if (countSize != 1 && countSize != 2) { 133 throw new RuntimeException("Strange size from getGroupCountSize : " + countSize); 134 } 135 final int encodedPtNodeCount = (countSize == 2) ? 136 (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount; 137 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, encodedPtNodeCount, 138 countSize); 139 } 140 141 private void writePtNodeFlags(final PtNode ptNode) { 142 final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode); 143 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, 144 BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos), 145 FormatSpec.PTNODE_FLAGS_SIZE); 146 } 147 148 private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) { 149 mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition); 150 if (hasSeveralChars) { 151 mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; 152 } 153 } 154 155 private void writeFrequency(final int frequency) { 156 if (frequency >= 0) { 157 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency, 158 FormatSpec.PTNODE_FREQUENCY_SIZE); 159 } 160 } 161 162 private void writeChildrenPosition(final PtNode ptNode) { 163 final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode); 164 mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, 165 childrenPos); 166 } 167 168 /** 169 * Write a shortcut attributes list to mBuffer. 170 * 171 * @param shortcuts the shortcut attributes list. 172 */ 173 private void writeShortcuts(final ArrayList<WeightedString> shortcuts) { 174 if (null == shortcuts || shortcuts.isEmpty()) return; 175 176 final int indexOfShortcutByteSize = mPosition; 177 mPosition += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE; 178 final Iterator<WeightedString> shortcutIterator = shortcuts.iterator(); 179 while (shortcutIterator.hasNext()) { 180 final WeightedString target = shortcutIterator.next(); 181 final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( 182 shortcutIterator.hasNext(), 183 target.getProbability()); 184 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags, 185 FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 186 final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord); 187 mPosition += shortcutShift; 188 } 189 final int shortcutByteSize = mPosition - indexOfShortcutByteSize; 190 if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) { 191 throw new RuntimeException("Shortcut list too large"); 192 } 193 BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize, 194 FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); 195 } 196 197 /** 198 * Write a bigram attributes list to mBuffer. 199 * 200 * @param bigrams the bigram attributes list. 201 * @param dict the dictionary the node array is a part of (for relative offsets). 202 */ 203 private void writeBigrams(final ArrayList<WeightedString> bigrams, 204 final FusionDictionary dict) { 205 if (bigrams == null) return; 206 207 final Iterator<WeightedString> bigramIterator = bigrams.iterator(); 208 while (bigramIterator.hasNext()) { 209 final WeightedString bigram = bigramIterator.next(); 210 final PtNode target = 211 FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); 212 final int addressOfBigram = target.mCachedAddressAfterUpdate; 213 final int unigramFrequencyForThisWord = target.getProbability(); 214 final int offset = addressOfBigram 215 - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 216 final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), 217 offset, bigram.getProbability(), unigramFrequencyForThisWord, bigram.mWord); 218 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags, 219 FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 220 mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, 221 Math.abs(offset)); 222 } 223 } 224 225 @Override 226 public void writeForwardLinkAddress(final int forwardLinkAddress) { 227 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, forwardLinkAddress, 228 FormatSpec.FORWARD_LINK_ADDRESS_SIZE); 229 } 230 231 @Override 232 public void writePtNode(final PtNode ptNode, final FusionDictionary dict) { 233 writePtNodeFlags(ptNode); 234 writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); 235 writeFrequency(ptNode.getProbability()); 236 writeChildrenPosition(ptNode); 237 writeShortcuts(ptNode.mShortcutTargets); 238 writeBigrams(ptNode.mBigrams, dict); 239 } 240 } 241