1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import com.android.inputmethod.annotations.UsedForTesting; 20 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 21 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 22 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 23 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 24 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 25 26 import java.io.IOException; 27 import java.util.ArrayList; 28 import java.util.HashMap; 29 import java.util.TreeMap; 30 31 /** 32 * A base class of the binary dictionary decoder. 33 */ 34 public abstract class AbstractDictDecoder implements DictDecoder { 35 protected FileHeader readHeader(final DictBuffer dictBuffer) 36 throws IOException, UnsupportedFormatException { 37 if (dictBuffer == null) { 38 openDictBuffer(); 39 } 40 41 final int version = HeaderReader.readVersion(dictBuffer); 42 if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION 43 || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { 44 throw new UnsupportedFormatException("Unsupported version : " + version); 45 } 46 // TODO: Remove this field. 47 final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer); 48 49 final int headerSize = HeaderReader.readHeaderSize(dictBuffer); 50 51 if (headerSize < 0) { 52 throw new UnsupportedFormatException("header size can't be negative."); 53 } 54 55 final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer, 56 headerSize); 57 58 final FileHeader header = new FileHeader(headerSize, 59 new FusionDictionary.DictionaryOptions(attributes, 60 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 61 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), 62 new FormatOptions(version, 63 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE), 64 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG))); 65 return header; 66 } 67 68 @Override @UsedForTesting 69 public int getTerminalPosition(final String word) 70 throws IOException, UnsupportedFormatException { 71 if (!isDictBufferOpen()) { 72 openDictBuffer(); 73 } 74 return BinaryDictIOUtils.getTerminalPosition(this, word); 75 } 76 77 @Override @UsedForTesting 78 public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words, 79 final TreeMap<Integer, Integer> frequencies, 80 final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams) 81 throws IOException, UnsupportedFormatException { 82 if (!isDictBufferOpen()) { 83 openDictBuffer(); 84 } 85 BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); 86 } 87 88 /** 89 * A utility class for reading a file header. 90 */ 91 protected static class HeaderReader { 92 protected static int readVersion(final DictBuffer dictBuffer) 93 throws IOException, UnsupportedFormatException { 94 return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); 95 } 96 97 protected static int readOptionFlags(final DictBuffer dictBuffer) { 98 return dictBuffer.readUnsignedShort(); 99 } 100 101 protected static int readHeaderSize(final DictBuffer dictBuffer) { 102 return dictBuffer.readInt(); 103 } 104 105 protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer, 106 final int headerSize) { 107 final HashMap<String, String> attributes = new HashMap<String, String>(); 108 while (dictBuffer.position() < headerSize) { 109 // We can avoid an infinite loop here since dictBuffer.position() is always 110 // increased by calling CharEncoding.readString. 111 final String key = CharEncoding.readString(dictBuffer); 112 final String value = CharEncoding.readString(dictBuffer); 113 attributes.put(key, value); 114 } 115 dictBuffer.position(headerSize); 116 return attributes; 117 } 118 } 119 120 /** 121 * A utility class for reading a PtNode. 122 */ 123 protected static class PtNodeReader { 124 protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { 125 return dictBuffer.readUnsignedByte(); 126 } 127 128 protected static int readParentAddress(final DictBuffer dictBuffer, 129 final FormatOptions formatOptions) { 130 if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { 131 return BinaryDictDecoderUtils.readSInt24(dictBuffer); 132 } else { 133 return FormatSpec.NO_PARENT_ADDRESS; 134 } 135 } 136 137 protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, 138 final FormatOptions formatOptions) { 139 if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { 140 final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); 141 if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; 142 return address; 143 } else { 144 switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { 145 case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: 146 return dictBuffer.readUnsignedByte(); 147 case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: 148 return dictBuffer.readUnsignedShort(); 149 case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: 150 return dictBuffer.readUnsignedInt24(); 151 case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: 152 default: 153 return FormatSpec.NO_CHILDREN_ADDRESS; 154 } 155 } 156 } 157 158 // Reads shortcuts and returns the read length. 159 protected static int readShortcut(final DictBuffer dictBuffer, 160 final ArrayList<WeightedString> shortcutTargets) { 161 final int pointerBefore = dictBuffer.position(); 162 dictBuffer.readUnsignedShort(); // skip the size 163 while (true) { 164 final int targetFlags = dictBuffer.readUnsignedByte(); 165 final String word = CharEncoding.readString(dictBuffer); 166 shortcutTargets.add(new WeightedString(word, 167 targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); 168 if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; 169 } 170 return dictBuffer.position() - pointerBefore; 171 } 172 173 protected static int readBigramAddresses(final DictBuffer dictBuffer, 174 final ArrayList<PendingAttribute> bigrams, final int baseAddress) { 175 int readLength = 0; 176 int bigramCount = 0; 177 while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { 178 final int bigramFlags = dictBuffer.readUnsignedByte(); 179 ++readLength; 180 final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) 181 ? 1 : -1; 182 int bigramAddress = baseAddress + readLength; 183 switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { 184 case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: 185 bigramAddress += sign * dictBuffer.readUnsignedByte(); 186 readLength += 1; 187 break; 188 case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: 189 bigramAddress += sign * dictBuffer.readUnsignedShort(); 190 readLength += 2; 191 break; 192 case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: 193 bigramAddress += sign * dictBuffer.readUnsignedInt24(); 194 readLength += 3; 195 break; 196 default: 197 throw new RuntimeException("Has bigrams with no address"); 198 } 199 bigrams.add(new PendingAttribute( 200 bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, 201 bigramAddress)); 202 if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; 203 } 204 return readLength; 205 } 206 } 207 } 208