1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import com.android.inputmethod.annotations.UsedForTesting; 20 import com.android.inputmethod.latin.Constants; 21 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 22 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 23 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 24 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 25 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 26 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 27 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; 28 29 import java.io.File; 30 import java.io.FileInputStream; 31 import java.io.FileNotFoundException; 32 import java.io.IOException; 33 import java.io.OutputStream; 34 import java.util.ArrayList; 35 import java.util.Iterator; 36 import java.util.Map; 37 import java.util.Stack; 38 39 public final class BinaryDictIOUtils { 40 private static final boolean DBG = false; 41 42 private BinaryDictIOUtils() { 43 // This utility class is not publicly instantiable. 44 } 45 46 private static final class Position { 47 public static final int NOT_READ_PTNODE_COUNT = -1; 48 49 public int mAddress; 50 public int mNumOfPtNode; 51 public int mPosition; 52 public int mLength; 53 54 public Position(int address, int length) { 55 mAddress = address; 56 mLength = length; 57 mNumOfPtNode = NOT_READ_PTNODE_COUNT; 58 } 59 } 60 61 /** 62 * Retrieves all node arrays without recursive call. 63 */ 64 private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder, 65 final int headerSize, final Map<Integer, String> words, 66 final Map<Integer, Integer> frequencies, 67 final Map<Integer, ArrayList<PendingAttribute>> bigrams, 68 final FormatOptions formatOptions) { 69 int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1]; 70 71 Stack<Position> stack = new Stack<Position>(); 72 int index = 0; 73 74 Position initPos = new Position(headerSize, 0); 75 stack.push(initPos); 76 77 while (!stack.empty()) { 78 Position p = stack.peek(); 79 80 if (DBG) { 81 MakedictLog.d("read: address=" + p.mAddress + ", numOfPtNode=" + 82 p.mNumOfPtNode + ", position=" + p.mPosition + ", length=" + p.mLength); 83 } 84 85 if (dictDecoder.getPosition() != p.mAddress) dictDecoder.setPosition(p.mAddress); 86 if (index != p.mLength) index = p.mLength; 87 88 if (p.mNumOfPtNode == Position.NOT_READ_PTNODE_COUNT) { 89 p.mNumOfPtNode = dictDecoder.readPtNodeCount(); 90 p.mAddress += getPtNodeCountSize(p.mNumOfPtNode); 91 p.mPosition = 0; 92 } 93 if (p.mNumOfPtNode == 0) { 94 stack.pop(); 95 continue; 96 } 97 PtNodeInfo info = dictDecoder.readPtNode(p.mAddress, formatOptions); 98 for (int i = 0; i < info.mCharacters.length; ++i) { 99 pushedChars[index++] = info.mCharacters[i]; 100 } 101 p.mPosition++; 102 103 final boolean isMovedPtNode = isMovedPtNode(info.mFlags, 104 formatOptions); 105 final boolean isDeletedPtNode = isDeletedPtNode(info.mFlags, 106 formatOptions); 107 if (!isMovedPtNode && !isDeletedPtNode 108 && info.mFrequency != FusionDictionary.PtNode.NOT_A_TERMINAL) {// found word 109 words.put(info.mOriginalAddress, new String(pushedChars, 0, index)); 110 frequencies.put(info.mOriginalAddress, info.mFrequency); 111 if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams); 112 } 113 114 if (p.mPosition == p.mNumOfPtNode) { 115 if (formatOptions.mSupportsDynamicUpdate) { 116 final boolean hasValidForwardLinkAddress = 117 dictDecoder.readAndFollowForwardLink(); 118 if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) { 119 // The node array has a forward link. 120 p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT; 121 p.mAddress = dictDecoder.getPosition(); 122 } else { 123 stack.pop(); 124 } 125 } else { 126 stack.pop(); 127 } 128 } else { 129 // The Ptnode array has more PtNodes. 130 p.mAddress = dictDecoder.getPosition(); 131 } 132 133 if (!isMovedPtNode && hasChildrenAddress(info.mChildrenAddress)) { 134 final Position childrenPos = new Position(info.mChildrenAddress, index); 135 stack.push(childrenPos); 136 } 137 } 138 } 139 140 /** 141 * Reads unigrams and bigrams from the binary file. 142 * Doesn't store a full memory representation of the dictionary. 143 * 144 * @param dictDecoder the dict decoder. 145 * @param words the map to store the address as a key and the word as a value. 146 * @param frequencies the map to store the address as a key and the frequency as a value. 147 * @param bigrams the map to store the address as a key and the list of address as a value. 148 * @throws IOException if the file can't be read. 149 * @throws UnsupportedFormatException if the format of the file is not recognized. 150 */ 151 /* package */ static void readUnigramsAndBigramsBinary(final DictDecoder dictDecoder, 152 final Map<Integer, String> words, final Map<Integer, Integer> frequencies, 153 final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, 154 UnsupportedFormatException { 155 // Read header 156 final FileHeader header = dictDecoder.readHeader(); 157 readUnigramsAndBigramsBinaryInner(dictDecoder, header.mHeaderSize, words, 158 frequencies, bigrams, header.mFormatOptions); 159 } 160 161 /** 162 * Gets the address of the last PtNode of the exact matching word in the dictionary. 163 * If no match is found, returns NOT_VALID_WORD. 164 * 165 * @param dictDecoder the dict decoder. 166 * @param word the word we search for. 167 * @return the address of the terminal node. 168 * @throws IOException if the file can't be read. 169 * @throws UnsupportedFormatException if the format of the file is not recognized. 170 */ 171 @UsedForTesting 172 /* package */ static int getTerminalPosition(final DictDecoder dictDecoder, 173 final String word) throws IOException, UnsupportedFormatException { 174 if (word == null) return FormatSpec.NOT_VALID_WORD; 175 dictDecoder.setPosition(0); 176 177 final FileHeader header = dictDecoder.readHeader(); 178 int wordPos = 0; 179 final int wordLen = word.codePointCount(0, word.length()); 180 for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { 181 if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD; 182 183 do { 184 final int ptNodeCount = dictDecoder.readPtNodeCount(); 185 boolean foundNextPtNode = false; 186 for (int i = 0; i < ptNodeCount; ++i) { 187 final int ptNodePos = dictDecoder.getPosition(); 188 final PtNodeInfo currentInfo = dictDecoder.readPtNode(ptNodePos, 189 header.mFormatOptions); 190 final boolean isMovedNode = isMovedPtNode(currentInfo.mFlags, 191 header.mFormatOptions); 192 final boolean isDeletedNode = isDeletedPtNode(currentInfo.mFlags, 193 header.mFormatOptions); 194 if (isMovedNode) continue; 195 boolean same = true; 196 for (int p = 0, j = word.offsetByCodePoints(0, wordPos); 197 p < currentInfo.mCharacters.length; 198 ++p, j = word.offsetByCodePoints(j, 1)) { 199 if (wordPos + p >= wordLen 200 || word.codePointAt(j) != currentInfo.mCharacters[p]) { 201 same = false; 202 break; 203 } 204 } 205 206 if (same) { 207 // found the PtNode matches the word. 208 if (wordPos + currentInfo.mCharacters.length == wordLen) { 209 if (currentInfo.mFrequency == PtNode.NOT_A_TERMINAL 210 || isDeletedNode) { 211 return FormatSpec.NOT_VALID_WORD; 212 } else { 213 return ptNodePos; 214 } 215 } 216 wordPos += currentInfo.mCharacters.length; 217 if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { 218 return FormatSpec.NOT_VALID_WORD; 219 } 220 foundNextPtNode = true; 221 dictDecoder.setPosition(currentInfo.mChildrenAddress); 222 break; 223 } 224 } 225 226 // If we found the next PtNode, it is under the file pointer. 227 // But if not, we are at the end of this node array so we expect to have 228 // a forward link address that we need to consult and possibly resume 229 // search on the next node array in the linked list. 230 if (foundNextPtNode) break; 231 if (!header.mFormatOptions.mSupportsDynamicUpdate) { 232 return FormatSpec.NOT_VALID_WORD; 233 } 234 235 final boolean hasValidForwardLinkAddress = 236 dictDecoder.readAndFollowForwardLink(); 237 if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) { 238 return FormatSpec.NOT_VALID_WORD; 239 } 240 } while(true); 241 } 242 return FormatSpec.NOT_VALID_WORD; 243 } 244 245 /** 246 * @return the size written, in bytes. Always 3 bytes. 247 */ 248 static int writeSInt24ToBuffer(final DictBuffer dictBuffer, 249 final int value) { 250 final int absValue = Math.abs(value); 251 dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); 252 dictBuffer.put((byte)((absValue >> 8) & 0xFF)); 253 dictBuffer.put((byte)(absValue & 0xFF)); 254 return 3; 255 } 256 257 /** 258 * @return the size written, in bytes. Always 3 bytes. 259 */ 260 static int writeSInt24ToStream(final OutputStream destination, final int value) 261 throws IOException { 262 final int absValue = Math.abs(value); 263 destination.write((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); 264 destination.write((byte)((absValue >> 8) & 0xFF)); 265 destination.write((byte)(absValue & 0xFF)); 266 return 3; 267 } 268 269 /** 270 * @return the size written, in bytes. 1, 2, or 3 bytes. 271 */ 272 private static int writeVariableAddress(final OutputStream destination, final int value) 273 throws IOException { 274 switch (BinaryDictEncoderUtils.getByteSize(value)) { 275 case 1: 276 destination.write((byte)value); 277 break; 278 case 2: 279 destination.write((byte)(0xFF & (value >> 8))); 280 destination.write((byte)(0xFF & value)); 281 break; 282 case 3: 283 destination.write((byte)(0xFF & (value >> 16))); 284 destination.write((byte)(0xFF & (value >> 8))); 285 destination.write((byte)(0xFF & value)); 286 break; 287 } 288 return BinaryDictEncoderUtils.getByteSize(value); 289 } 290 291 static void skipString(final DictBuffer dictBuffer, 292 final boolean hasMultipleChars) { 293 if (hasMultipleChars) { 294 int character = CharEncoding.readChar(dictBuffer); 295 while (character != FormatSpec.INVALID_CHARACTER) { 296 character = CharEncoding.readChar(dictBuffer); 297 } 298 } else { 299 CharEncoding.readChar(dictBuffer); 300 } 301 } 302 303 /** 304 * Write a string to a stream. 305 * 306 * @param destination the stream to write. 307 * @param word the string to be written. 308 * @return the size written, in bytes. 309 * @throws IOException 310 */ 311 private static int writeString(final OutputStream destination, final String word) 312 throws IOException { 313 int size = 0; 314 final int length = word.length(); 315 for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { 316 final int codePoint = word.codePointAt(i); 317 if (CharEncoding.getCharSize(codePoint) == 1) { 318 destination.write((byte)codePoint); 319 size++; 320 } else { 321 destination.write((byte)(0xFF & (codePoint >> 16))); 322 destination.write((byte)(0xFF & (codePoint >> 8))); 323 destination.write((byte)(0xFF & codePoint)); 324 size += 3; 325 } 326 } 327 destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR); 328 size += FormatSpec.PTNODE_TERMINATOR_SIZE; 329 return size; 330 } 331 332 /** 333 * Write a PtNode to an output stream from a PtNodeInfo. 334 * A PtNode is an in-memory representation of a node in the patricia trie. 335 * A PtNode info is a container for low-level information about how the 336 * PtNode is stored in the binary format. 337 * 338 * @param destination the stream to write. 339 * @param info the PtNode info to be written. 340 * @return the size written, in bytes. 341 */ 342 private static int writePtNode(final OutputStream destination, final PtNodeInfo info) 343 throws IOException { 344 int size = FormatSpec.PTNODE_FLAGS_SIZE; 345 destination.write((byte)info.mFlags); 346 final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ? 347 FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress; 348 size += writeSInt24ToStream(destination, parentOffset); 349 350 for (int i = 0; i < info.mCharacters.length; ++i) { 351 if (CharEncoding.getCharSize(info.mCharacters[i]) == 1) { 352 destination.write((byte)info.mCharacters[i]); 353 size++; 354 } else { 355 size += writeSInt24ToStream(destination, info.mCharacters[i]); 356 } 357 } 358 if (info.mCharacters.length > 1) { 359 destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR); 360 size++; 361 } 362 363 if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { 364 destination.write((byte)info.mFrequency); 365 size++; 366 } 367 368 if (DBG) { 369 MakedictLog.d("writePtNode origin=" + info.mOriginalAddress + ", size=" + size 370 + ", child=" + info.mChildrenAddress + ", characters =" 371 + new String(info.mCharacters, 0, info.mCharacters.length)); 372 } 373 final int childrenOffset = info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ? 374 0 : info.mChildrenAddress - (info.mOriginalAddress + size); 375 writeSInt24ToStream(destination, childrenOffset); 376 size += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; 377 378 if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) { 379 final int shortcutListSize = 380 BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets); 381 destination.write((byte)(shortcutListSize >> 8)); 382 destination.write((byte)(shortcutListSize & 0xFF)); 383 size += 2; 384 final Iterator<WeightedString> shortcutIterator = info.mShortcutTargets.iterator(); 385 while (shortcutIterator.hasNext()) { 386 final WeightedString target = shortcutIterator.next(); 387 destination.write((byte)BinaryDictEncoderUtils.makeShortcutFlags( 388 shortcutIterator.hasNext(), target.mFrequency)); 389 size++; 390 size += writeString(destination, target.mWord); 391 } 392 } 393 394 if (info.mBigrams != null) { 395 // TODO: Consolidate this code with the code that computes the size of the bigram list 396 // in BinaryDictEncoderUtils#computeActualNodeArraySize 397 for (int i = 0; i < info.mBigrams.size(); ++i) { 398 399 final int bigramFrequency = info.mBigrams.get(i).mFrequency; 400 int bigramFlags = (i < info.mBigrams.size() - 1) 401 ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0; 402 size++; 403 final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress 404 + size); 405 bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0; 406 switch (BinaryDictEncoderUtils.getByteSize(bigramOffset)) { 407 case 1: 408 bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE; 409 break; 410 case 2: 411 bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES; 412 break; 413 case 3: 414 bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES; 415 break; 416 } 417 bigramFlags |= bigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY; 418 destination.write((byte)bigramFlags); 419 size += writeVariableAddress(destination, Math.abs(bigramOffset)); 420 } 421 } 422 return size; 423 } 424 425 /** 426 * Compute the size of the PtNode. 427 */ 428 static int computePtNodeSize(final PtNodeInfo info, final FormatOptions formatOptions) { 429 int size = FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE 430 + BinaryDictEncoderUtils.getPtNodeCharactersSize(info.mCharacters) 431 + getChildrenAddressSize(info.mFlags, formatOptions); 432 if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { 433 size += FormatSpec.PTNODE_FREQUENCY_SIZE; 434 } 435 if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) { 436 size += BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets); 437 } 438 if (info.mBigrams != null) { 439 for (final PendingAttribute attr : info.mBigrams) { 440 size += FormatSpec.PTNODE_FLAGS_SIZE; 441 size += BinaryDictEncoderUtils.getByteSize(attr.mAddress); 442 } 443 } 444 return size; 445 } 446 447 /** 448 * Write a node array to the stream. 449 * 450 * @param destination the stream to write. 451 * @param infos an array of PtNodeInfo to be written. 452 * @return the size written, in bytes. 453 * @throws IOException 454 */ 455 static int writeNodes(final OutputStream destination, final PtNodeInfo[] infos) 456 throws IOException { 457 int size = getPtNodeCountSize(infos.length); 458 switch (getPtNodeCountSize(infos.length)) { 459 case 1: 460 destination.write((byte)infos.length); 461 break; 462 case 2: 463 final int encodedPtNodeCount = 464 infos.length | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG; 465 destination.write((byte)(encodedPtNodeCount >> 8)); 466 destination.write((byte)(encodedPtNodeCount & 0xFF)); 467 break; 468 default: 469 throw new RuntimeException("Invalid node count size."); 470 } 471 for (final PtNodeInfo info : infos) size += writePtNode(destination, info); 472 writeSInt24ToStream(destination, FormatSpec.NO_FORWARD_LINK_ADDRESS); 473 return size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; 474 } 475 476 private static final int HEADER_READING_BUFFER_SIZE = 16384; 477 /** 478 * Convenience method to read the header of a binary file. 479 * 480 * This is quite resource intensive - don't call when performance is critical. 481 * 482 * @param file The file to read. 483 * @param offset The offset in the file where to start reading the data. 484 * @param length The length of the data file. 485 */ 486 private static FileHeader getDictionaryFileHeader( 487 final File file, final long offset, final long length) 488 throws FileNotFoundException, IOException, UnsupportedFormatException { 489 final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE]; 490 final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, 491 new DictDecoder.DictionaryBufferFactory() { 492 @Override 493 public DictBuffer getDictionaryBuffer(File file) 494 throws FileNotFoundException, IOException { 495 final FileInputStream inStream = new FileInputStream(file); 496 try { 497 inStream.skip(offset); 498 inStream.read(buffer); 499 return new ByteArrayDictBuffer(buffer); 500 } finally { 501 inStream.close(); 502 } 503 } 504 } 505 ); 506 return dictDecoder.readHeader(); 507 } 508 509 public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset, 510 final long length) { 511 try { 512 final FileHeader header = getDictionaryFileHeader(file, offset, length); 513 return header; 514 } catch (UnsupportedFormatException e) { 515 return null; 516 } catch (IOException e) { 517 return null; 518 } 519 } 520 521 /** 522 * Helper method to hide the actual value of the no children address. 523 */ 524 public static boolean hasChildrenAddress(final int address) { 525 return FormatSpec.NO_CHILDREN_ADDRESS != address; 526 } 527 528 /** 529 * Helper method to check whether the node is moved. 530 */ 531 public static boolean isMovedPtNode(final int flags, final FormatOptions options) { 532 return options.mSupportsDynamicUpdate 533 && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED); 534 } 535 536 /** 537 * Helper method to check whether the dictionary can be updated dynamically. 538 */ 539 public static boolean supportsDynamicUpdate(final FormatOptions options) { 540 return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE 541 && options.mSupportsDynamicUpdate; 542 } 543 544 /** 545 * Helper method to check whether the node is deleted. 546 */ 547 public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) { 548 return formatOptions.mSupportsDynamicUpdate 549 && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED); 550 } 551 552 /** 553 * Compute the binary size of the node count 554 * @param count the node count 555 * @return the size of the node count, either 1 or 2 bytes. 556 */ 557 public static int getPtNodeCountSize(final int count) { 558 if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= count) { 559 return 1; 560 } else if (FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY >= count) { 561 return 2; 562 } else { 563 throw new RuntimeException("Can't have more than " 564 + FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY + " PtNode in a PtNodeArray (found " 565 + count + ")"); 566 } 567 } 568 569 static int getChildrenAddressSize(final int optionFlags, 570 final FormatOptions formatOptions) { 571 if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; 572 switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { 573 case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: 574 return 1; 575 case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: 576 return 2; 577 case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: 578 return 3; 579 case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: 580 default: 581 return 0; 582 } 583 } 584 585 /** 586 * Calculate bigram frequency from compressed value 587 * 588 * @param unigramFrequency 589 * @param bigramFrequency compressed frequency 590 * @return approximate bigram frequency 591 */ 592 public static int reconstructBigramFrequency(final int unigramFrequency, 593 final int bigramFrequency) { 594 final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency) 595 / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY); 596 final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f); 597 return (int)resultFreqFloat; 598 } 599 } 600