1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import com.android.inputmethod.annotations.UsedForTesting; 20 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 21 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 22 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 23 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 24 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 25 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 26 import com.android.inputmethod.latin.utils.CollectionUtils; 27 28 import android.util.Log; 29 30 import java.io.File; 31 import java.io.FileNotFoundException; 32 import java.io.IOException; 33 import java.util.ArrayList; 34 import java.util.Arrays; 35 36 /** 37 * An implementation of binary dictionary decoder for version 4 binary dictionary. 38 */ 39 @UsedForTesting 40 public class Ver4DictDecoder extends AbstractDictDecoder { 41 private static final String TAG = Ver4DictDecoder.class.getSimpleName(); 42 43 private static final int FILETYPE_TRIE = 1; 44 private static final int FILETYPE_FREQUENCY = 2; 45 private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3; 46 private static final int FILETYPE_BIGRAM_FREQ = 4; 47 private static final int FILETYPE_SHORTCUT = 5; 48 49 private final File mDictDirectory; 50 private final DictionaryBufferFactory mBufferFactory; 51 protected DictBuffer mDictBuffer; 52 private DictBuffer mFrequencyBuffer; 53 private DictBuffer mTerminalAddressTableBuffer; 54 private DictBuffer mBigramBuffer; 55 private DictBuffer mShortcutBuffer; 56 private SparseTable mBigramAddressTable; 57 private SparseTable mShortcutAddressTable; 58 59 @UsedForTesting 60 /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { 61 mDictDirectory = dictDirectory; 62 mDictBuffer = mFrequencyBuffer = null; 63 64 if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) { 65 mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory(); 66 } else if ((factoryFlag & MASK_DICTBUFFER) == USE_BYTEARRAY) { 67 mBufferFactory = new DictionaryBufferFromByteArrayFactory(); 68 } else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) { 69 mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory(); 70 } else { 71 mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory(); 72 } 73 } 74 75 @UsedForTesting 76 /* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) { 77 mDictDirectory = dictDirectory; 78 mBufferFactory = factory; 79 mDictBuffer = mFrequencyBuffer = null; 80 } 81 82 private File getFile(final int fileType) { 83 if (fileType == FILETYPE_TRIE) { 84 return new File(mDictDirectory, 85 mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION); 86 } else if (fileType == FILETYPE_FREQUENCY) { 87 return new File(mDictDirectory, 88 mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION); 89 } else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) { 90 return new File(mDictDirectory, 91 mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); 92 } else if (fileType == FILETYPE_BIGRAM_FREQ) { 93 return new File(mDictDirectory, 94 mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION 95 + FormatSpec.BIGRAM_FREQ_CONTENT_ID); 96 } else if (fileType == FILETYPE_SHORTCUT) { 97 return new File(mDictDirectory, 98 mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION 99 + FormatSpec.SHORTCUT_CONTENT_ID); 100 } else { 101 throw new RuntimeException("Unsupported kind of file : " + fileType); 102 } 103 } 104 105 @Override 106 public void openDictBuffer() throws FileNotFoundException, IOException { 107 mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE)); 108 mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); 109 mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( 110 getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); 111 mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ)); 112 loadBigramAddressSparseTable(); 113 mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT)); 114 loadShortcutAddressSparseTable(); 115 } 116 117 @Override 118 public boolean isDictBufferOpen() { 119 return mDictBuffer != null; 120 } 121 122 /* package */ DictBuffer getDictBuffer() { 123 return mDictBuffer; 124 } 125 126 @Override 127 public FileHeader readHeader() throws IOException, UnsupportedFormatException { 128 if (mDictBuffer == null) { 129 openDictBuffer(); 130 } 131 final FileHeader header = super.readHeader(mDictBuffer); 132 final int version = header.mFormatOptions.mVersion; 133 if (version != 4) { 134 throw new UnsupportedFormatException("File header has a wrong version : " + version); 135 } 136 return header; 137 } 138 139 private void loadBigramAddressSparseTable() throws IOException { 140 final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() 141 + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); 142 final File freqsFile = new File(mDictDirectory, mDictDirectory.getName() 143 + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX 144 + FormatSpec.BIGRAM_FREQ_CONTENT_ID); 145 mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile }, 146 FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); 147 } 148 149 // TODO: Let's have something like SparseTableContentsReader in this class. 150 private void loadShortcutAddressSparseTable() throws IOException { 151 final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() 152 + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); 153 final File contentFile = new File(mDictDirectory, mDictDirectory.getName() 154 + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX 155 + FormatSpec.SHORTCUT_CONTENT_ID); 156 final File timestampsFile = new File(mDictDirectory, mDictDirectory.getName() 157 + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX 158 + FormatSpec.SHORTCUT_CONTENT_ID); 159 mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile, 160 new File[] { contentFile, timestampsFile }, 161 FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); 162 } 163 164 protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { 165 protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { 166 frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); 167 return frequencyBuffer.readUnsignedByte(); 168 } 169 170 protected static int readTerminalId(final DictBuffer dictBuffer) { 171 return dictBuffer.readInt(); 172 } 173 } 174 175 private ArrayList<WeightedString> readShortcuts(final int terminalId) { 176 if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null; 177 178 final ArrayList<WeightedString> ret = CollectionUtils.newArrayList(); 179 final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX, 180 terminalId); 181 mShortcutBuffer.position(posOfShortcuts); 182 while (true) { 183 final int flags = mShortcutBuffer.readUnsignedByte(); 184 final String word = CharEncoding.readString(mShortcutBuffer); 185 ret.add(new WeightedString(word, 186 flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); 187 if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; 188 } 189 return ret; 190 } 191 192 // TODO: Make this buffer thread safe. 193 // TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH. 194 private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; 195 @Override 196 public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { 197 int addressPointer = ptNodePos; 198 final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); 199 addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; 200 201 final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); 202 if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { 203 addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; 204 } 205 206 final int characters[]; 207 if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { 208 int index = 0; 209 int character = CharEncoding.readChar(mDictBuffer); 210 addressPointer += CharEncoding.getCharSize(character); 211 while (FormatSpec.INVALID_CHARACTER != character 212 && index < FormatSpec.MAX_WORD_LENGTH) { 213 mCharacterBuffer[index++] = character; 214 character = CharEncoding.readChar(mDictBuffer); 215 addressPointer += CharEncoding.getCharSize(character); 216 } 217 characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); 218 } else { 219 final int character = CharEncoding.readChar(mDictBuffer); 220 addressPointer += CharEncoding.getCharSize(character); 221 characters = new int[] { character }; 222 } 223 final int terminalId; 224 if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { 225 terminalId = PtNodeReader.readTerminalId(mDictBuffer); 226 addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE; 227 } else { 228 terminalId = PtNode.NOT_A_TERMINAL; 229 } 230 231 final int frequency; 232 if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { 233 frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId); 234 } else { 235 frequency = PtNode.NOT_A_TERMINAL; 236 } 237 int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); 238 if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { 239 childrenAddress += addressPointer; 240 } 241 addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); 242 final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId); 243 244 final ArrayList<PendingAttribute> bigrams; 245 if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { 246 bigrams = new ArrayList<PendingAttribute>(); 247 final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId); 248 mBigramBuffer.position(posOfBigrams); 249 while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { 250 // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, 251 // remaining bigram entries are ignored. 252 final int bigramFlags = mBigramBuffer.readUnsignedByte(); 253 final int targetTerminalId = mBigramBuffer.readUnsignedInt24(); 254 mTerminalAddressTableBuffer.position( 255 targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); 256 final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24(); 257 bigrams.add(new PendingAttribute( 258 bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, 259 targetAddress)); 260 if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; 261 } 262 if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { 263 throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() 264 + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); 265 } 266 } else { 267 bigrams = null; 268 } 269 return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, 270 parentAddress, childrenAddress, shortcutTargets, bigrams); 271 } 272 273 private void deleteDictFiles() { 274 final File[] files = mDictDirectory.listFiles(); 275 for (int i = 0; i < files.length; ++i) { 276 files[i].delete(); 277 } 278 } 279 280 @Override 281 public FusionDictionary readDictionaryBinary(final FusionDictionary dict, 282 final boolean deleteDictIfBroken) 283 throws FileNotFoundException, IOException, UnsupportedFormatException { 284 if (mDictBuffer == null) { 285 openDictBuffer(); 286 } 287 try { 288 return BinaryDictDecoderUtils.readDictionaryBinary(this, dict); 289 } catch (IOException e) { 290 Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e); 291 if (deleteDictIfBroken) { 292 deleteDictFiles(); 293 } 294 throw e; 295 } catch (UnsupportedFormatException e) { 296 Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e); 297 if (deleteDictIfBroken) { 298 deleteDictFiles(); 299 } 300 throw e; 301 } 302 } 303 304 @Override 305 public void setPosition(int newPos) { 306 mDictBuffer.position(newPos); 307 } 308 309 @Override 310 public int getPosition() { 311 return mDictBuffer.position(); 312 } 313 314 @Override 315 public int readPtNodeCount() { 316 return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer); 317 } 318 319 @Override 320 public boolean readAndFollowForwardLink() { 321 final int nextAddress = mDictBuffer.readUnsignedInt24(); 322 if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) { 323 mDictBuffer.position(nextAddress); 324 return true; 325 } 326 return false; 327 } 328 329 @Override 330 public boolean hasNextPtNodeArray() { 331 return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS; 332 } 333 334 @Override 335 public void skipPtNode(final FormatOptions formatOptions) { 336 final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); 337 PtNodeReader.readParentAddress(mDictBuffer, formatOptions); 338 BinaryDictIOUtils.skipString(mDictBuffer, 339 (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); 340 if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer); 341 PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions); 342 } 343 } 344