1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import com.android.inputmethod.annotations.UsedForTesting; 20 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 21 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 22 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 23 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 24 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 25 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 26 import com.android.inputmethod.latin.utils.CollectionUtils; 27 28 import android.util.Log; 29 30 import java.io.File; 31 import java.io.FileNotFoundException; 32 import java.io.IOException; 33 import java.util.ArrayList; 34 import java.util.Arrays; 35 36 /** 37 * An implementation of binary dictionary decoder for version 4 binary dictionary. 38 */ 39 @UsedForTesting 40 public class Ver4DictDecoder extends AbstractDictDecoder { 41 private static final String TAG = Ver4DictDecoder.class.getSimpleName(); 42 43 private static final int FILETYPE_TRIE = 1; 44 private static final int FILETYPE_FREQUENCY = 2; 45 private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3; 46 private static final int FILETYPE_BIGRAM_FREQ = 4; 47 private static final int FILETYPE_SHORTCUT = 5; 48 49 private final File mDictDirectory; 50 private final DictionaryBufferFactory mBufferFactory; 51 protected DictBuffer mDictBuffer; 52 private DictBuffer mFrequencyBuffer; 53 private DictBuffer mTerminalAddressTableBuffer; 54 private DictBuffer mBigramBuffer; 55 private DictBuffer mShortcutBuffer; 56 private SparseTable mBigramAddressTable; 57 private SparseTable mShortcutAddressTable; 58 59 @UsedForTesting 60 /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { 61 mDictDirectory = dictDirectory; 62 mDictBuffer = mFrequencyBuffer = null; 63 64 if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) { 65 mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory(); 66 } else if ((factoryFlag & MASK_DICTBUFFER) == USE_BYTEARRAY) { 67 mBufferFactory = new DictionaryBufferFromByteArrayFactory(); 68 } else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) { 69 mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory(); 70 } else { 71 mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory(); 72 } 73 } 74 75 @UsedForTesting 76 /* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) { 77 mDictDirectory = dictDirectory; 78 mBufferFactory = factory; 79 mDictBuffer = mFrequencyBuffer = null; 80 } 81 82 private File getFile(final int fileType) { 83 if (fileType == FILETYPE_TRIE) { 84 return new File(mDictDirectory, 85 mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION); 86 } else if (fileType == FILETYPE_FREQUENCY) { 87 return new File(mDictDirectory, 88 mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION); 89 } else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) { 90 return new File(mDictDirectory, 91 mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); 92 } else if (fileType == FILETYPE_BIGRAM_FREQ) { 93 return new File(mDictDirectory, 94 mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION 95 + FormatSpec.BIGRAM_FREQ_CONTENT_ID); 96 } else if (fileType == FILETYPE_SHORTCUT) { 97 return new File(mDictDirectory, 98 mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION 99 + FormatSpec.SHORTCUT_CONTENT_ID); 100 } else { 101 throw new RuntimeException("Unsupported kind of file : " + fileType); 102 } 103 } 104 105 @Override 106 public void openDictBuffer() throws FileNotFoundException, IOException { 107 mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE)); 108 mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); 109 mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( 110 getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); 111 mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ)); 112 loadBigramAddressSparseTable(); 113 mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT)); 114 loadShortcutAddressSparseTable(); 115 } 116 117 @Override 118 public boolean isDictBufferOpen() { 119 return mDictBuffer != null; 120 } 121 122 /* package */ DictBuffer getDictBuffer() { 123 return mDictBuffer; 124 } 125 126 @Override 127 public FileHeader readHeader() throws IOException, UnsupportedFormatException { 128 if (mDictBuffer == null) { 129 openDictBuffer(); 130 } 131 final FileHeader header = super.readHeader(mDictBuffer); 132 final int version = header.mFormatOptions.mVersion; 133 if (version != 4) { 134 throw new UnsupportedFormatException("File header has a wrong version : " + version); 135 } 136 return header; 137 } 138 139 private void loadBigramAddressSparseTable() throws IOException { 140 final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() 141 + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); 142 final File freqsFile = new File(mDictDirectory, mDictDirectory.getName() 143 + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX 144 + FormatSpec.BIGRAM_FREQ_CONTENT_ID); 145 mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile }, 146 FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); 147 } 148 149 // TODO: Let's have something like SparseTableContentsReader in this class. 150 private void loadShortcutAddressSparseTable() throws IOException { 151 final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() 152 + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); 153 final File contentFile = new File(mDictDirectory, mDictDirectory.getName() 154 + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX 155 + FormatSpec.SHORTCUT_CONTENT_ID); 156 mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile, 157 new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); 158 } 159 160 protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { 161 protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { 162 frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); 163 return frequencyBuffer.readUnsignedByte(); 164 } 165 166 protected static int readTerminalId(final DictBuffer dictBuffer) { 167 return dictBuffer.readInt(); 168 } 169 } 170 171 private ArrayList<WeightedString> readShortcuts(final int terminalId) { 172 if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null; 173 174 final ArrayList<WeightedString> ret = CollectionUtils.newArrayList(); 175 final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX, 176 terminalId); 177 mShortcutBuffer.position(posOfShortcuts); 178 while (true) { 179 final int flags = mShortcutBuffer.readUnsignedByte(); 180 final String word = CharEncoding.readString(mShortcutBuffer); 181 ret.add(new WeightedString(word, 182 flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); 183 if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; 184 } 185 return ret; 186 } 187 188 // TODO: Make this buffer thread safe. 189 // TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH. 190 private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; 191 @Override 192 public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { 193 int addressPointer = ptNodePos; 194 final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); 195 addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; 196 197 final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); 198 if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { 199 addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; 200 } 201 202 final int characters[]; 203 if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { 204 int index = 0; 205 int character = CharEncoding.readChar(mDictBuffer); 206 addressPointer += CharEncoding.getCharSize(character); 207 while (FormatSpec.INVALID_CHARACTER != character 208 && index < FormatSpec.MAX_WORD_LENGTH) { 209 mCharacterBuffer[index++] = character; 210 character = CharEncoding.readChar(mDictBuffer); 211 addressPointer += CharEncoding.getCharSize(character); 212 } 213 characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); 214 } else { 215 final int character = CharEncoding.readChar(mDictBuffer); 216 addressPointer += CharEncoding.getCharSize(character); 217 characters = new int[] { character }; 218 } 219 final int terminalId; 220 if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { 221 terminalId = PtNodeReader.readTerminalId(mDictBuffer); 222 addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE; 223 } else { 224 terminalId = PtNode.NOT_A_TERMINAL; 225 } 226 227 final int frequency; 228 if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { 229 frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId); 230 } else { 231 frequency = PtNode.NOT_A_TERMINAL; 232 } 233 int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); 234 if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { 235 childrenAddress += addressPointer; 236 } 237 addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); 238 final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId); 239 240 final ArrayList<PendingAttribute> bigrams; 241 if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { 242 bigrams = new ArrayList<PendingAttribute>(); 243 final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId); 244 mBigramBuffer.position(posOfBigrams); 245 while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { 246 // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, 247 // remaining bigram entries are ignored. 248 final int bigramFlags = mBigramBuffer.readUnsignedByte(); 249 final int targetTerminalId = mBigramBuffer.readUnsignedInt24(); 250 mTerminalAddressTableBuffer.position( 251 targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); 252 final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24(); 253 bigrams.add(new PendingAttribute( 254 bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, 255 targetAddress)); 256 if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; 257 } 258 if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { 259 throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() 260 + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); 261 } 262 } else { 263 bigrams = null; 264 } 265 return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, 266 parentAddress, childrenAddress, shortcutTargets, bigrams); 267 } 268 269 private void deleteDictFiles() { 270 final File[] files = mDictDirectory.listFiles(); 271 for (int i = 0; i < files.length; ++i) { 272 files[i].delete(); 273 } 274 } 275 276 @Override 277 public FusionDictionary readDictionaryBinary(final FusionDictionary dict, 278 final boolean deleteDictIfBroken) 279 throws FileNotFoundException, IOException, UnsupportedFormatException { 280 if (mDictBuffer == null) { 281 openDictBuffer(); 282 } 283 try { 284 return BinaryDictDecoderUtils.readDictionaryBinary(this, dict); 285 } catch (IOException e) { 286 Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e); 287 if (deleteDictIfBroken) { 288 deleteDictFiles(); 289 } 290 throw e; 291 } catch (UnsupportedFormatException e) { 292 Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e); 293 if (deleteDictIfBroken) { 294 deleteDictFiles(); 295 } 296 throw e; 297 } 298 } 299 300 @Override 301 public void setPosition(int newPos) { 302 mDictBuffer.position(newPos); 303 } 304 305 @Override 306 public int getPosition() { 307 return mDictBuffer.position(); 308 } 309 310 @Override 311 public int readPtNodeCount() { 312 return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer); 313 } 314 315 @Override 316 public boolean readAndFollowForwardLink() { 317 final int nextAddress = mDictBuffer.readUnsignedInt24(); 318 if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) { 319 mDictBuffer.position(nextAddress); 320 return true; 321 } 322 return false; 323 } 324 325 @Override 326 public boolean hasNextPtNodeArray() { 327 return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS; 328 } 329 330 @Override 331 public void skipPtNode(final FormatOptions formatOptions) { 332 final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); 333 PtNodeReader.readParentAddress(mDictBuffer, formatOptions); 334 BinaryDictIOUtils.skipString(mDictBuffer, 335 (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); 336 if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer); 337 PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions); 338 } 339 } 340