Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import com.android.inputmethod.annotations.UsedForTesting;
     20 import com.android.inputmethod.latin.Constants;
     21 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
     22 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
     23 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
     24 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     25 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
     26 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
     27 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
     28 
     29 import java.io.File;
     30 import java.io.FileInputStream;
     31 import java.io.FileNotFoundException;
     32 import java.io.IOException;
     33 import java.io.OutputStream;
     34 import java.util.ArrayList;
     35 import java.util.Iterator;
     36 import java.util.Map;
     37 import java.util.Stack;
     38 
     39 public final class BinaryDictIOUtils {
     40     private static final boolean DBG = false;
     41 
     42     private BinaryDictIOUtils() {
     43         // This utility class is not publicly instantiable.
     44     }
     45 
     46     private static final class Position {
     47         public static final int NOT_READ_PTNODE_COUNT = -1;
     48 
     49         public int mAddress;
     50         public int mNumOfPtNode;
     51         public int mPosition;
     52         public int mLength;
     53 
     54         public Position(int address, int length) {
     55             mAddress = address;
     56             mLength = length;
     57             mNumOfPtNode = NOT_READ_PTNODE_COUNT;
     58         }
     59     }
     60 
     61     /**
     62      * Retrieves all node arrays without recursive call.
     63      */
     64     private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder,
     65             final int headerSize, final Map<Integer, String> words,
     66             final Map<Integer, Integer> frequencies,
     67             final Map<Integer, ArrayList<PendingAttribute>> bigrams,
     68             final FormatOptions formatOptions) {
     69         int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1];
     70 
     71         Stack<Position> stack = new Stack<Position>();
     72         int index = 0;
     73 
     74         Position initPos = new Position(headerSize, 0);
     75         stack.push(initPos);
     76 
     77         while (!stack.empty()) {
     78             Position p = stack.peek();
     79 
     80             if (DBG) {
     81                 MakedictLog.d("read: address=" + p.mAddress + ", numOfPtNode=" +
     82                         p.mNumOfPtNode + ", position=" + p.mPosition + ", length=" + p.mLength);
     83             }
     84 
     85             if (dictDecoder.getPosition() != p.mAddress) dictDecoder.setPosition(p.mAddress);
     86             if (index != p.mLength) index = p.mLength;
     87 
     88             if (p.mNumOfPtNode == Position.NOT_READ_PTNODE_COUNT) {
     89                 p.mNumOfPtNode = dictDecoder.readPtNodeCount();
     90                 p.mAddress += getPtNodeCountSize(p.mNumOfPtNode);
     91                 p.mPosition = 0;
     92             }
     93             if (p.mNumOfPtNode == 0) {
     94                 stack.pop();
     95                 continue;
     96             }
     97             PtNodeInfo info = dictDecoder.readPtNode(p.mAddress, formatOptions);
     98             for (int i = 0; i < info.mCharacters.length; ++i) {
     99                 pushedChars[index++] = info.mCharacters[i];
    100             }
    101             p.mPosition++;
    102 
    103             final boolean isMovedPtNode = isMovedPtNode(info.mFlags,
    104                     formatOptions);
    105             final boolean isDeletedPtNode = isDeletedPtNode(info.mFlags,
    106                     formatOptions);
    107             if (!isMovedPtNode && !isDeletedPtNode
    108                     && info.mFrequency != FusionDictionary.PtNode.NOT_A_TERMINAL) {// found word
    109                 words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
    110                 frequencies.put(info.mOriginalAddress, info.mFrequency);
    111                 if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
    112             }
    113 
    114             if (p.mPosition == p.mNumOfPtNode) {
    115                 if (formatOptions.mSupportsDynamicUpdate) {
    116                     final boolean hasValidForwardLinkAddress =
    117                             dictDecoder.readAndFollowForwardLink();
    118                     if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
    119                         // The node array has a forward link.
    120                         p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT;
    121                         p.mAddress = dictDecoder.getPosition();
    122                     } else {
    123                         stack.pop();
    124                     }
    125                 } else {
    126                     stack.pop();
    127                 }
    128             } else {
    129                 // The Ptnode array has more PtNodes.
    130                 p.mAddress = dictDecoder.getPosition();
    131             }
    132 
    133             if (!isMovedPtNode && hasChildrenAddress(info.mChildrenAddress)) {
    134                 final Position childrenPos = new Position(info.mChildrenAddress, index);
    135                 stack.push(childrenPos);
    136             }
    137         }
    138     }
    139 
    140     /**
    141      * Reads unigrams and bigrams from the binary file.
    142      * Doesn't store a full memory representation of the dictionary.
    143      *
    144      * @param dictDecoder the dict decoder.
    145      * @param words the map to store the address as a key and the word as a value.
    146      * @param frequencies the map to store the address as a key and the frequency as a value.
    147      * @param bigrams the map to store the address as a key and the list of address as a value.
    148      * @throws IOException if the file can't be read.
    149      * @throws UnsupportedFormatException if the format of the file is not recognized.
    150      */
    151     /* package */ static void readUnigramsAndBigramsBinary(final DictDecoder dictDecoder,
    152             final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
    153             final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
    154             UnsupportedFormatException {
    155         // Read header
    156         final FileHeader header = dictDecoder.readHeader();
    157         readUnigramsAndBigramsBinaryInner(dictDecoder, header.mHeaderSize, words,
    158                 frequencies, bigrams, header.mFormatOptions);
    159     }
    160 
    161     /**
    162      * Gets the address of the last PtNode of the exact matching word in the dictionary.
    163      * If no match is found, returns NOT_VALID_WORD.
    164      *
    165      * @param dictDecoder the dict decoder.
    166      * @param word the word we search for.
    167      * @return the address of the terminal node.
    168      * @throws IOException if the file can't be read.
    169      * @throws UnsupportedFormatException if the format of the file is not recognized.
    170      */
    171     @UsedForTesting
    172     /* package */ static int getTerminalPosition(final DictDecoder dictDecoder,
    173             final String word) throws IOException, UnsupportedFormatException {
    174         if (word == null) return FormatSpec.NOT_VALID_WORD;
    175         dictDecoder.setPosition(0);
    176 
    177         final FileHeader header = dictDecoder.readHeader();
    178         int wordPos = 0;
    179         final int wordLen = word.codePointCount(0, word.length());
    180         for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
    181             if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
    182 
    183             do {
    184                 final int ptNodeCount = dictDecoder.readPtNodeCount();
    185                 boolean foundNextPtNode = false;
    186                 for (int i = 0; i < ptNodeCount; ++i) {
    187                     final int ptNodePos = dictDecoder.getPosition();
    188                     final PtNodeInfo currentInfo = dictDecoder.readPtNode(ptNodePos,
    189                             header.mFormatOptions);
    190                     final boolean isMovedNode = isMovedPtNode(currentInfo.mFlags,
    191                             header.mFormatOptions);
    192                     final boolean isDeletedNode = isDeletedPtNode(currentInfo.mFlags,
    193                             header.mFormatOptions);
    194                     if (isMovedNode) continue;
    195                     boolean same = true;
    196                     for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
    197                             p < currentInfo.mCharacters.length;
    198                             ++p, j = word.offsetByCodePoints(j, 1)) {
    199                         if (wordPos + p >= wordLen
    200                                 || word.codePointAt(j) != currentInfo.mCharacters[p]) {
    201                             same = false;
    202                             break;
    203                         }
    204                     }
    205 
    206                     if (same) {
    207                         // found the PtNode matches the word.
    208                         if (wordPos + currentInfo.mCharacters.length == wordLen) {
    209                             if (currentInfo.mFrequency == PtNode.NOT_A_TERMINAL
    210                                     || isDeletedNode) {
    211                                 return FormatSpec.NOT_VALID_WORD;
    212                             } else {
    213                                 return ptNodePos;
    214                             }
    215                         }
    216                         wordPos += currentInfo.mCharacters.length;
    217                         if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
    218                             return FormatSpec.NOT_VALID_WORD;
    219                         }
    220                         foundNextPtNode = true;
    221                         dictDecoder.setPosition(currentInfo.mChildrenAddress);
    222                         break;
    223                     }
    224                 }
    225 
    226                 // If we found the next PtNode, it is under the file pointer.
    227                 // But if not, we are at the end of this node array so we expect to have
    228                 // a forward link address that we need to consult and possibly resume
    229                 // search on the next node array in the linked list.
    230                 if (foundNextPtNode) break;
    231                 if (!header.mFormatOptions.mSupportsDynamicUpdate) {
    232                     return FormatSpec.NOT_VALID_WORD;
    233                 }
    234 
    235                 final boolean hasValidForwardLinkAddress =
    236                         dictDecoder.readAndFollowForwardLink();
    237                 if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) {
    238                     return FormatSpec.NOT_VALID_WORD;
    239                 }
    240             } while(true);
    241         }
    242         return FormatSpec.NOT_VALID_WORD;
    243     }
    244 
    245     /**
    246      * @return the size written, in bytes. Always 3 bytes.
    247      */
    248     static int writeSInt24ToBuffer(final DictBuffer dictBuffer,
    249             final int value) {
    250         final int absValue = Math.abs(value);
    251         dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
    252         dictBuffer.put((byte)((absValue >> 8) & 0xFF));
    253         dictBuffer.put((byte)(absValue & 0xFF));
    254         return 3;
    255     }
    256 
    257     /**
    258      * @return the size written, in bytes. Always 3 bytes.
    259      */
    260     static int writeSInt24ToStream(final OutputStream destination, final int value)
    261             throws IOException {
    262         final int absValue = Math.abs(value);
    263         destination.write((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
    264         destination.write((byte)((absValue >> 8) & 0xFF));
    265         destination.write((byte)(absValue & 0xFF));
    266         return 3;
    267     }
    268 
    269     /**
    270      * @return the size written, in bytes. 1, 2, or 3 bytes.
    271      */
    272     private static int writeVariableAddress(final OutputStream destination, final int value)
    273             throws IOException {
    274         switch (BinaryDictEncoderUtils.getByteSize(value)) {
    275         case 1:
    276             destination.write((byte)value);
    277             break;
    278         case 2:
    279             destination.write((byte)(0xFF & (value >> 8)));
    280             destination.write((byte)(0xFF & value));
    281             break;
    282         case 3:
    283             destination.write((byte)(0xFF & (value >> 16)));
    284             destination.write((byte)(0xFF & (value >> 8)));
    285             destination.write((byte)(0xFF & value));
    286             break;
    287         }
    288         return BinaryDictEncoderUtils.getByteSize(value);
    289     }
    290 
    291     static void skipString(final DictBuffer dictBuffer,
    292             final boolean hasMultipleChars) {
    293         if (hasMultipleChars) {
    294             int character = CharEncoding.readChar(dictBuffer);
    295             while (character != FormatSpec.INVALID_CHARACTER) {
    296                 character = CharEncoding.readChar(dictBuffer);
    297             }
    298         } else {
    299             CharEncoding.readChar(dictBuffer);
    300         }
    301     }
    302 
    303     /**
    304      * Write a string to a stream.
    305      *
    306      * @param destination the stream to write.
    307      * @param word the string to be written.
    308      * @return the size written, in bytes.
    309      * @throws IOException
    310      */
    311     private static int writeString(final OutputStream destination, final String word)
    312             throws IOException {
    313         int size = 0;
    314         final int length = word.length();
    315         for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
    316             final int codePoint = word.codePointAt(i);
    317             if (CharEncoding.getCharSize(codePoint) == 1) {
    318                 destination.write((byte)codePoint);
    319                 size++;
    320             } else {
    321                 destination.write((byte)(0xFF & (codePoint >> 16)));
    322                 destination.write((byte)(0xFF & (codePoint >> 8)));
    323                 destination.write((byte)(0xFF & codePoint));
    324                 size += 3;
    325             }
    326         }
    327         destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
    328         size += FormatSpec.PTNODE_TERMINATOR_SIZE;
    329         return size;
    330     }
    331 
    332     /**
    333      * Write a PtNode to an output stream from a PtNodeInfo.
    334      * A PtNode is an in-memory representation of a node in the patricia trie.
    335      * A PtNode info is a container for low-level information about how the
    336      * PtNode is stored in the binary format.
    337      *
    338      * @param destination the stream to write.
    339      * @param info the PtNode info to be written.
    340      * @return the size written, in bytes.
    341      */
    342     private static int writePtNode(final OutputStream destination, final PtNodeInfo info)
    343             throws IOException {
    344         int size = FormatSpec.PTNODE_FLAGS_SIZE;
    345         destination.write((byte)info.mFlags);
    346         final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ?
    347                 FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress;
    348         size += writeSInt24ToStream(destination, parentOffset);
    349 
    350         for (int i = 0; i < info.mCharacters.length; ++i) {
    351             if (CharEncoding.getCharSize(info.mCharacters[i]) == 1) {
    352                 destination.write((byte)info.mCharacters[i]);
    353                 size++;
    354             } else {
    355                 size += writeSInt24ToStream(destination, info.mCharacters[i]);
    356             }
    357         }
    358         if (info.mCharacters.length > 1) {
    359             destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
    360             size++;
    361         }
    362 
    363         if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
    364             destination.write((byte)info.mFrequency);
    365             size++;
    366         }
    367 
    368         if (DBG) {
    369             MakedictLog.d("writePtNode origin=" + info.mOriginalAddress + ", size=" + size
    370                     + ", child=" + info.mChildrenAddress + ", characters ="
    371                     + new String(info.mCharacters, 0, info.mCharacters.length));
    372         }
    373         final int childrenOffset = info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ?
    374                 0 : info.mChildrenAddress - (info.mOriginalAddress + size);
    375         writeSInt24ToStream(destination, childrenOffset);
    376         size += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
    377 
    378         if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) {
    379             final int shortcutListSize =
    380                     BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets);
    381             destination.write((byte)(shortcutListSize >> 8));
    382             destination.write((byte)(shortcutListSize & 0xFF));
    383             size += 2;
    384             final Iterator<WeightedString> shortcutIterator = info.mShortcutTargets.iterator();
    385             while (shortcutIterator.hasNext()) {
    386                 final WeightedString target = shortcutIterator.next();
    387                 destination.write((byte)BinaryDictEncoderUtils.makeShortcutFlags(
    388                         shortcutIterator.hasNext(), target.mFrequency));
    389                 size++;
    390                 size += writeString(destination, target.mWord);
    391             }
    392         }
    393 
    394         if (info.mBigrams != null) {
    395             // TODO: Consolidate this code with the code that computes the size of the bigram list
    396             //        in BinaryDictEncoderUtils#computeActualNodeArraySize
    397             for (int i = 0; i < info.mBigrams.size(); ++i) {
    398 
    399                 final int bigramFrequency = info.mBigrams.get(i).mFrequency;
    400                 int bigramFlags = (i < info.mBigrams.size() - 1)
    401                         ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0;
    402                 size++;
    403                 final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress
    404                         + size);
    405                 bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0;
    406                 switch (BinaryDictEncoderUtils.getByteSize(bigramOffset)) {
    407                 case 1:
    408                     bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE;
    409                     break;
    410                 case 2:
    411                     bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES;
    412                     break;
    413                 case 3:
    414                     bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES;
    415                     break;
    416                 }
    417                 bigramFlags |= bigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
    418                 destination.write((byte)bigramFlags);
    419                 size += writeVariableAddress(destination, Math.abs(bigramOffset));
    420             }
    421         }
    422         return size;
    423     }
    424 
    425     /**
    426      * Compute the size of the PtNode.
    427      */
    428     static int computePtNodeSize(final PtNodeInfo info, final FormatOptions formatOptions) {
    429         int size = FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
    430                 + BinaryDictEncoderUtils.getPtNodeCharactersSize(info.mCharacters)
    431                 + getChildrenAddressSize(info.mFlags, formatOptions);
    432         if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
    433             size += FormatSpec.PTNODE_FREQUENCY_SIZE;
    434         }
    435         if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) {
    436             size += BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets);
    437         }
    438         if (info.mBigrams != null) {
    439             for (final PendingAttribute attr : info.mBigrams) {
    440                 size += FormatSpec.PTNODE_FLAGS_SIZE;
    441                 size += BinaryDictEncoderUtils.getByteSize(attr.mAddress);
    442             }
    443         }
    444         return size;
    445     }
    446 
    447     /**
    448      * Write a node array to the stream.
    449      *
    450      * @param destination the stream to write.
    451      * @param infos an array of PtNodeInfo to be written.
    452      * @return the size written, in bytes.
    453      * @throws IOException
    454      */
    455     static int writeNodes(final OutputStream destination, final PtNodeInfo[] infos)
    456             throws IOException {
    457         int size = getPtNodeCountSize(infos.length);
    458         switch (getPtNodeCountSize(infos.length)) {
    459             case 1:
    460                 destination.write((byte)infos.length);
    461                 break;
    462             case 2:
    463                 final int encodedPtNodeCount =
    464                         infos.length | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
    465                 destination.write((byte)(encodedPtNodeCount >> 8));
    466                 destination.write((byte)(encodedPtNodeCount & 0xFF));
    467                 break;
    468             default:
    469                 throw new RuntimeException("Invalid node count size.");
    470         }
    471         for (final PtNodeInfo info : infos) size += writePtNode(destination, info);
    472         writeSInt24ToStream(destination, FormatSpec.NO_FORWARD_LINK_ADDRESS);
    473         return size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
    474     }
    475 
    476     private static final int HEADER_READING_BUFFER_SIZE = 16384;
    477     /**
    478      * Convenience method to read the header of a binary file.
    479      *
    480      * This is quite resource intensive - don't call when performance is critical.
    481      *
    482      * @param file The file to read.
    483      * @param offset The offset in the file where to start reading the data.
    484      * @param length The length of the data file.
    485      */
    486     private static FileHeader getDictionaryFileHeader(
    487             final File file, final long offset, final long length)
    488             throws FileNotFoundException, IOException, UnsupportedFormatException {
    489         final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
    490         final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file,
    491                 new DictDecoder.DictionaryBufferFactory() {
    492                     @Override
    493                     public DictBuffer getDictionaryBuffer(File file)
    494                             throws FileNotFoundException, IOException {
    495                         final FileInputStream inStream = new FileInputStream(file);
    496                         try {
    497                             inStream.skip(offset);
    498                             inStream.read(buffer);
    499                             return new ByteArrayDictBuffer(buffer);
    500                         } finally {
    501                             inStream.close();
    502                         }
    503                     }
    504                 }
    505         );
    506         return dictDecoder.readHeader();
    507     }
    508 
    509     public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
    510             final long length) {
    511         try {
    512             final FileHeader header = getDictionaryFileHeader(file, offset, length);
    513             return header;
    514         } catch (UnsupportedFormatException e) {
    515             return null;
    516         } catch (IOException e) {
    517             return null;
    518         }
    519     }
    520 
    521     /**
    522      * Helper method to hide the actual value of the no children address.
    523      */
    524     public static boolean hasChildrenAddress(final int address) {
    525         return FormatSpec.NO_CHILDREN_ADDRESS != address;
    526     }
    527 
    528     /**
    529      * Helper method to check whether the node is moved.
    530      */
    531     public static boolean isMovedPtNode(final int flags, final FormatOptions options) {
    532         return options.mSupportsDynamicUpdate
    533                 && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED);
    534     }
    535 
    536     /**
    537      * Helper method to check whether the dictionary can be updated dynamically.
    538      */
    539     public static boolean supportsDynamicUpdate(final FormatOptions options) {
    540         return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE
    541                 && options.mSupportsDynamicUpdate;
    542     }
    543 
    544     /**
    545      * Helper method to check whether the node is deleted.
    546      */
    547     public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) {
    548         return formatOptions.mSupportsDynamicUpdate
    549                 && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
    550     }
    551 
    552     /**
    553      * Compute the binary size of the node count
    554      * @param count the node count
    555      * @return the size of the node count, either 1 or 2 bytes.
    556      */
    557     public static int getPtNodeCountSize(final int count) {
    558         if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= count) {
    559             return 1;
    560         } else if (FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY >= count) {
    561             return 2;
    562         } else {
    563             throw new RuntimeException("Can't have more than "
    564                     + FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY + " PtNode in a PtNodeArray (found "
    565                     + count + ")");
    566         }
    567     }
    568 
    569     static int getChildrenAddressSize(final int optionFlags,
    570             final FormatOptions formatOptions) {
    571         if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
    572         switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
    573             case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
    574                 return 1;
    575             case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
    576                 return 2;
    577             case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
    578                 return 3;
    579             case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
    580             default:
    581                 return 0;
    582         }
    583     }
    584 
    585     /**
    586      * Calculate bigram frequency from compressed value
    587      *
    588      * @param unigramFrequency
    589      * @param bigramFrequency compressed frequency
    590      * @return approximate bigram frequency
    591      */
    592     public static int reconstructBigramFrequency(final int unigramFrequency,
    593             final int bigramFrequency) {
    594         final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
    595                 / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
    596         final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
    597         return (int)resultFreqFloat;
    598     }
    599 }
    600