Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import com.android.inputmethod.annotations.UsedForTesting;
     20 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
     21 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     22 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
     23 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
     24 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
     25 
     26 import java.io.File;
     27 import java.io.FileInputStream;
     28 import java.io.FileNotFoundException;
     29 import java.io.IOException;
     30 import java.io.OutputStream;
     31 import java.nio.ByteBuffer;
     32 import java.nio.channels.FileChannel;
     33 import java.util.ArrayList;
     34 import java.util.Map;
     35 import java.util.TreeMap;
     36 
     37 /**
     38  * Decodes binary files for a FusionDictionary.
     39  *
     40  * All the methods in this class are static.
     41  *
     42  * TODO: Remove calls from classes except Ver3DictDecoder
     43  * TODO: Move this file to makedict/internal.
     44  * TODO: Rename this class to DictDecoderUtils.
     45  */
     46 public final class BinaryDictDecoderUtils {
     47 
     48     private static final boolean DBG = MakedictLog.DBG;
     49 
     50     private BinaryDictDecoderUtils() {
     51         // This utility class is not publicly instantiable.
     52     }
     53 
     54     private static final int MAX_JUMPS = 12;
     55 
     56     @UsedForTesting
     57     public interface DictBuffer {
     58         public int readUnsignedByte();
     59         public int readUnsignedShort();
     60         public int readUnsignedInt24();
     61         public int readInt();
     62         public int position();
     63         public void position(int newPosition);
     64         public void put(final byte b);
     65         public int limit();
     66         @UsedForTesting
     67         public int capacity();
     68     }
     69 
     70     public static final class ByteBufferDictBuffer implements DictBuffer {
     71         private ByteBuffer mBuffer;
     72 
     73         public ByteBufferDictBuffer(final ByteBuffer buffer) {
     74             mBuffer = buffer;
     75         }
     76 
     77         @Override
     78         public int readUnsignedByte() {
     79             return mBuffer.get() & 0xFF;
     80         }
     81 
     82         @Override
     83         public int readUnsignedShort() {
     84             return mBuffer.getShort() & 0xFFFF;
     85         }
     86 
     87         @Override
     88         public int readUnsignedInt24() {
     89             final int retval = readUnsignedByte();
     90             return (retval << 16) + readUnsignedShort();
     91         }
     92 
     93         @Override
     94         public int readInt() {
     95             return mBuffer.getInt();
     96         }
     97 
     98         @Override
     99         public int position() {
    100             return mBuffer.position();
    101         }
    102 
    103         @Override
    104         public void position(int newPos) {
    105             mBuffer.position(newPos);
    106         }
    107 
    108         @Override
    109         public void put(final byte b) {
    110             mBuffer.put(b);
    111         }
    112 
    113         @Override
    114         public int limit() {
    115             return mBuffer.limit();
    116         }
    117 
    118         @Override
    119         public int capacity() {
    120             return mBuffer.capacity();
    121         }
    122     }
    123 
    124     /**
    125      * A class grouping utility function for our specific character encoding.
    126      */
    127     static final class CharEncoding {
    128         private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
    129         private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
    130 
    131         /**
    132          * Helper method to find out whether this code fits on one byte
    133          */
    134         private static boolean fitsOnOneByte(final int character) {
    135             return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
    136                     && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
    137         }
    138 
    139         /**
    140          * Compute the size of a character given its character code.
    141          *
    142          * Char format is:
    143          * 1 byte = bbbbbbbb match
    144          * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
    145          * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
    146          *       unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
    147          *       00011111 would be outside unicode.
    148          * else: iso-latin-1 code
    149          * This allows for the whole unicode range to be encoded, including chars outside of
    150          * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
    151          * characters which should never happen anyway (and still work, but take 3 bytes).
    152          *
    153          * @param character the character code.
    154          * @return the size in binary encoded-form, either 1 or 3 bytes.
    155          */
    156         static int getCharSize(final int character) {
    157             // See char encoding in FusionDictionary.java
    158             if (fitsOnOneByte(character)) return 1;
    159             if (FormatSpec.INVALID_CHARACTER == character) return 1;
    160             return 3;
    161         }
    162 
    163         /**
    164          * Compute the byte size of a character array.
    165          */
    166         static int getCharArraySize(final int[] chars) {
    167             int size = 0;
    168             for (int character : chars) size += getCharSize(character);
    169             return size;
    170         }
    171 
    172         /**
    173          * Writes a char array to a byte buffer.
    174          *
    175          * @param codePoints the code point array to write.
    176          * @param buffer the byte buffer to write to.
    177          * @param index the index in buffer to write the character array to.
    178          * @return the index after the last character.
    179          */
    180         static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
    181             for (int codePoint : codePoints) {
    182                 if (1 == getCharSize(codePoint)) {
    183                     buffer[index++] = (byte)codePoint;
    184                 } else {
    185                     buffer[index++] = (byte)(0xFF & (codePoint >> 16));
    186                     buffer[index++] = (byte)(0xFF & (codePoint >> 8));
    187                     buffer[index++] = (byte)(0xFF & codePoint);
    188                 }
    189             }
    190             return index;
    191         }
    192 
    193         /**
    194          * Writes a string with our character format to a byte buffer.
    195          *
    196          * This will also write the terminator byte.
    197          *
    198          * @param buffer the byte buffer to write to.
    199          * @param origin the offset to write from.
    200          * @param word the string to write.
    201          * @return the size written, in bytes.
    202          */
    203         static int writeString(final byte[] buffer, final int origin,
    204                 final String word) {
    205             final int length = word.length();
    206             int index = origin;
    207             for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
    208                 final int codePoint = word.codePointAt(i);
    209                 if (1 == getCharSize(codePoint)) {
    210                     buffer[index++] = (byte)codePoint;
    211                 } else {
    212                     buffer[index++] = (byte)(0xFF & (codePoint >> 16));
    213                     buffer[index++] = (byte)(0xFF & (codePoint >> 8));
    214                     buffer[index++] = (byte)(0xFF & codePoint);
    215                 }
    216             }
    217             buffer[index++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
    218             return index - origin;
    219         }
    220 
    221         /**
    222          * Writes a string with our character format to an OutputStream.
    223          *
    224          * This will also write the terminator byte.
    225          *
    226          * @param buffer the OutputStream to write to.
    227          * @param word the string to write.
    228          */
    229         static void writeString(final OutputStream buffer, final String word) throws IOException {
    230             final int length = word.length();
    231             for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
    232                 final int codePoint = word.codePointAt(i);
    233                 if (1 == getCharSize(codePoint)) {
    234                     buffer.write((byte) codePoint);
    235                 } else {
    236                     buffer.write((byte) (0xFF & (codePoint >> 16)));
    237                     buffer.write((byte) (0xFF & (codePoint >> 8)));
    238                     buffer.write((byte) (0xFF & codePoint));
    239                 }
    240             }
    241             buffer.write(FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
    242         }
    243 
    244         /**
    245          * Reads a string from a DictBuffer. This is the converse of the above method.
    246          */
    247         static String readString(final DictBuffer dictBuffer) {
    248             final StringBuilder s = new StringBuilder();
    249             int character = readChar(dictBuffer);
    250             while (character != FormatSpec.INVALID_CHARACTER) {
    251                 s.appendCodePoint(character);
    252                 character = readChar(dictBuffer);
    253             }
    254             return s.toString();
    255         }
    256 
    257         /**
    258          * Reads a character from the buffer.
    259          *
    260          * This follows the character format documented earlier in this source file.
    261          *
    262          * @param dictBuffer the buffer, positioned over an encoded character.
    263          * @return the character code.
    264          */
    265         static int readChar(final DictBuffer dictBuffer) {
    266             int character = dictBuffer.readUnsignedByte();
    267             if (!fitsOnOneByte(character)) {
    268                 if (FormatSpec.PTNODE_CHARACTERS_TERMINATOR == character) {
    269                     return FormatSpec.INVALID_CHARACTER;
    270                 }
    271                 character <<= 16;
    272                 character += dictBuffer.readUnsignedShort();
    273             }
    274             return character;
    275         }
    276     }
    277 
    278     // Input methods: Read a binary dictionary to memory.
    279     // readDictionaryBinary is the public entry point for them.
    280 
    281     static int readSInt24(final DictBuffer dictBuffer) {
    282         final int retval = dictBuffer.readUnsignedInt24();
    283         final int sign = ((retval & FormatSpec.MSB24) != 0) ? -1 : 1;
    284         return sign * (retval & FormatSpec.SINT24_MAX);
    285     }
    286 
    287     static int readChildrenAddress(final DictBuffer dictBuffer,
    288             final int optionFlags, final FormatOptions options) {
    289         if (options.mSupportsDynamicUpdate) {
    290             final int address = dictBuffer.readUnsignedInt24();
    291             if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
    292             if ((address & FormatSpec.MSB24) != 0) {
    293                 return -(address & FormatSpec.SINT24_MAX);
    294             } else {
    295                 return address;
    296             }
    297         }
    298         switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
    299             case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
    300                 return dictBuffer.readUnsignedByte();
    301             case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
    302                 return dictBuffer.readUnsignedShort();
    303             case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
    304                 return dictBuffer.readUnsignedInt24();
    305             case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
    306             default:
    307                 return FormatSpec.NO_CHILDREN_ADDRESS;
    308         }
    309     }
    310 
    311     static int readParentAddress(final DictBuffer dictBuffer,
    312             final FormatOptions formatOptions) {
    313         if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
    314             final int parentAddress = dictBuffer.readUnsignedInt24();
    315             final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
    316             return sign * (parentAddress & FormatSpec.SINT24_MAX);
    317         } else {
    318             return FormatSpec.NO_PARENT_ADDRESS;
    319         }
    320     }
    321 
    322     /**
    323      * Reads and returns the PtNode count out of a buffer and forwards the pointer.
    324      */
    325     /* package */ static int readPtNodeCount(final DictBuffer dictBuffer) {
    326         final int msb = dictBuffer.readUnsignedByte();
    327         if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= msb) {
    328             return msb;
    329         } else {
    330             return ((FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT & msb) << 8)
    331                     + dictBuffer.readUnsignedByte();
    332         }
    333     }
    334 
    335     /**
    336      * Finds, as a string, the word at the position passed as an argument.
    337      *
    338      * @param dictDecoder the dict decoder.
    339      * @param headerSize the size of the header.
    340      * @param pos the position to seek.
    341      * @param formatOptions file format options.
    342      * @return the word with its frequency, as a weighted string.
    343      */
    344     /* package for tests */ static WeightedString getWordAtPosition(final DictDecoder dictDecoder,
    345             final int headerSize, final int pos, final FormatOptions formatOptions) {
    346         final WeightedString result;
    347         final int originalPos = dictDecoder.getPosition();
    348         dictDecoder.setPosition(pos);
    349 
    350         if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
    351             result = getWordAtPositionWithParentAddress(dictDecoder, pos, formatOptions);
    352         } else {
    353             result = getWordAtPositionWithoutParentAddress(dictDecoder, headerSize, pos,
    354                     formatOptions);
    355         }
    356 
    357         dictDecoder.setPosition(originalPos);
    358         return result;
    359     }
    360 
    361     @SuppressWarnings("unused")
    362     private static WeightedString getWordAtPositionWithParentAddress(final DictDecoder dictDecoder,
    363             final int pos, final FormatOptions options) {
    364         int currentPos = pos;
    365         int frequency = Integer.MIN_VALUE;
    366         final StringBuilder builder = new StringBuilder();
    367         // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
    368         for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
    369             PtNodeInfo currentInfo;
    370             int loopCounter = 0;
    371             do {
    372                 dictDecoder.setPosition(currentPos);
    373                 currentInfo = dictDecoder.readPtNode(currentPos, options);
    374                 if (BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags, options)) {
    375                     currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
    376                 }
    377                 if (DBG && loopCounter++ > MAX_JUMPS) {
    378                     MakedictLog.d("Too many jumps - probably a bug");
    379                 }
    380             } while (BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags, options));
    381             if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
    382             builder.insert(0,
    383                     new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
    384             if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
    385             currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
    386         }
    387         return new WeightedString(builder.toString(), frequency);
    388     }
    389 
    390     private static WeightedString getWordAtPositionWithoutParentAddress(
    391             final DictDecoder dictDecoder, final int headerSize, final int pos,
    392             final FormatOptions options) {
    393         dictDecoder.setPosition(headerSize);
    394         final int count = dictDecoder.readPtNodeCount();
    395         int groupPos = headerSize + BinaryDictIOUtils.getPtNodeCountSize(count);
    396         final StringBuilder builder = new StringBuilder();
    397         WeightedString result = null;
    398 
    399         PtNodeInfo last = null;
    400         for (int i = count - 1; i >= 0; --i) {
    401             PtNodeInfo info = dictDecoder.readPtNode(groupPos, options);
    402             groupPos = info.mEndAddress;
    403             if (info.mOriginalAddress == pos) {
    404                 builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
    405                 result = new WeightedString(builder.toString(), info.mFrequency);
    406                 break; // and return
    407             }
    408             if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
    409                 if (info.mChildrenAddress > pos) {
    410                     if (null == last) continue;
    411                     builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
    412                     dictDecoder.setPosition(last.mChildrenAddress);
    413                     i = dictDecoder.readPtNodeCount();
    414                     groupPos = last.mChildrenAddress + BinaryDictIOUtils.getPtNodeCountSize(i);
    415                     last = null;
    416                     continue;
    417                 }
    418                 last = info;
    419             }
    420             if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
    421                 builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
    422                 dictDecoder.setPosition(last.mChildrenAddress);
    423                 i = dictDecoder.readPtNodeCount();
    424                 groupPos = last.mChildrenAddress + BinaryDictIOUtils.getPtNodeCountSize(i);
    425                 last = null;
    426                 continue;
    427             }
    428         }
    429         return result;
    430     }
    431 
    432     /**
    433      * Reads a single node array from a buffer.
    434      *
    435      * This methods reads the file at the current position. A node array is fully expected to start
    436      * at the current position.
    437      * This will recursively read other node arrays into the structure, populating the reverse
    438      * maps on the fly and using them to keep track of already read nodes.
    439      *
    440      * @param dictDecoder the dict decoder, correctly positioned at the start of a node array.
    441      * @param headerSize the size, in bytes, of the file header.
    442      * @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
    443      * @param reversePtNodeMap a mapping from addresses to already read PtNodes.
    444      * @param options file format options.
    445      * @return the read node array with all his children already read.
    446      */
    447     private static PtNodeArray readNodeArray(final DictDecoder dictDecoder,
    448             final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
    449             final Map<Integer, PtNode> reversePtNodeMap, final FormatOptions options)
    450             throws IOException {
    451         final ArrayList<PtNode> nodeArrayContents = new ArrayList<PtNode>();
    452         final int nodeArrayOriginPos = dictDecoder.getPosition();
    453 
    454         do { // Scan the linked-list node.
    455             final int nodeArrayHeadPos = dictDecoder.getPosition();
    456             final int count = dictDecoder.readPtNodeCount();
    457             int groupOffsetPos = nodeArrayHeadPos + BinaryDictIOUtils.getPtNodeCountSize(count);
    458             for (int i = count; i > 0; --i) { // Scan the array of PtNode.
    459                 PtNodeInfo info = dictDecoder.readPtNode(groupOffsetPos, options);
    460                 if (BinaryDictIOUtils.isMovedPtNode(info.mFlags, options)) continue;
    461                 ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
    462                 ArrayList<WeightedString> bigrams = null;
    463                 if (null != info.mBigrams) {
    464                     bigrams = new ArrayList<WeightedString>();
    465                     for (PendingAttribute bigram : info.mBigrams) {
    466                         final WeightedString word = getWordAtPosition(dictDecoder, headerSize,
    467                                 bigram.mAddress, options);
    468                         final int reconstructedFrequency =
    469                                 BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
    470                                         bigram.mFrequency);
    471                         bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
    472                     }
    473                 }
    474                 if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
    475                     PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
    476                     if (null == children) {
    477                         final int currentPosition = dictDecoder.getPosition();
    478                         dictDecoder.setPosition(info.mChildrenAddress);
    479                         children = readNodeArray(dictDecoder, headerSize, reverseNodeArrayMap,
    480                                 reversePtNodeMap, options);
    481                         dictDecoder.setPosition(currentPosition);
    482                     }
    483                     nodeArrayContents.add(
    484                             new PtNode(info.mCharacters, shortcutTargets, bigrams,
    485                                     info.mFrequency,
    486                                     0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
    487                                     0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
    488                 } else {
    489                     nodeArrayContents.add(
    490                             new PtNode(info.mCharacters, shortcutTargets, bigrams,
    491                                     info.mFrequency,
    492                                     0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
    493                                     0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
    494                 }
    495                 groupOffsetPos = info.mEndAddress;
    496             }
    497 
    498             // reach the end of the array.
    499             if (options.mSupportsDynamicUpdate) {
    500                 final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink();
    501                 if (!hasValidForwardLink) break;
    502             }
    503         } while (options.mSupportsDynamicUpdate && dictDecoder.hasNextPtNodeArray());
    504 
    505         final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
    506         nodeArray.mCachedAddressBeforeUpdate = nodeArrayOriginPos;
    507         nodeArray.mCachedAddressAfterUpdate = nodeArrayOriginPos;
    508         reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
    509         return nodeArray;
    510     }
    511 
    512     /**
    513      * Helper function to get the binary format version from the header.
    514      * @throws IOException
    515      */
    516     private static int getFormatVersion(final DictBuffer dictBuffer)
    517             throws IOException {
    518         final int magic = dictBuffer.readInt();
    519         if (FormatSpec.MAGIC_NUMBER == magic) return dictBuffer.readUnsignedShort();
    520         return FormatSpec.NOT_A_VERSION_NUMBER;
    521     }
    522 
    523     /**
    524      * Helper function to get and validate the binary format version.
    525      * @throws UnsupportedFormatException
    526      * @throws IOException
    527      */
    528     static int checkFormatVersion(final DictBuffer dictBuffer)
    529             throws IOException, UnsupportedFormatException {
    530         final int version = getFormatVersion(dictBuffer);
    531         if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
    532                 || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
    533             throw new UnsupportedFormatException("This file has version " + version
    534                     + ", but this implementation does not support versions above "
    535                     + FormatSpec.MAXIMUM_SUPPORTED_VERSION);
    536         }
    537         return version;
    538     }
    539 
    540     /**
    541      * Reads a buffer and returns the memory representation of the dictionary.
    542      *
    543      * This high-level method takes a buffer and reads its contents, populating a
    544      * FusionDictionary structure. The optional dict argument is an existing dictionary to
    545      * which words from the buffer should be added. If it is null, a new dictionary is created.
    546      *
    547      * @param dictDecoder the dict decoder.
    548      * @param dict an optional dictionary to add words to, or null.
    549      * @return the created (or merged) dictionary.
    550      */
    551     @UsedForTesting
    552     /* package */ static FusionDictionary readDictionaryBinary(final DictDecoder dictDecoder,
    553             final FusionDictionary dict) throws IOException, UnsupportedFormatException {
    554         // Read header
    555         final FileHeader fileHeader = dictDecoder.readHeader();
    556 
    557         Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
    558         Map<Integer, PtNode> reversePtNodeMapping = new TreeMap<Integer, PtNode>();
    559         final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mHeaderSize,
    560                 reverseNodeArrayMapping, reversePtNodeMapping, fileHeader.mFormatOptions);
    561 
    562         FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
    563         if (null != dict) {
    564             for (final Word w : dict) {
    565                 if (w.mIsBlacklistEntry) {
    566                     newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
    567                 } else {
    568                     newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
    569                 }
    570             }
    571             for (final Word w : dict) {
    572                 // By construction a binary dictionary may not have bigrams pointing to
    573                 // words that are not also registered as unigrams so we don't have to avoid
    574                 // them explicitly here.
    575                 for (final WeightedString bigram : w.mBigrams) {
    576                     newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
    577                 }
    578             }
    579         }
    580 
    581         return newDict;
    582     }
    583 
    584     /**
    585      * Helper method to pass a file name instead of a File object to isBinaryDictionary.
    586      */
    587     public static boolean isBinaryDictionary(final String filename) {
    588         final File file = new File(filename);
    589         return isBinaryDictionary(file);
    590     }
    591 
    592     /**
    593      * Basic test to find out whether the file is a binary dictionary or not.
    594      *
    595      * Concretely this only tests the magic number.
    596      *
    597      * @param file The file to test.
    598      * @return true if it's a binary dictionary, false otherwise
    599      */
    600     public static boolean isBinaryDictionary(final File file) {
    601         FileInputStream inStream = null;
    602         try {
    603             inStream = new FileInputStream(file);
    604             final ByteBuffer buffer = inStream.getChannel().map(
    605                     FileChannel.MapMode.READ_ONLY, 0, file.length());
    606             final int version = getFormatVersion(new ByteBufferDictBuffer(buffer));
    607             return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
    608                     && version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
    609         } catch (FileNotFoundException e) {
    610             return false;
    611         } catch (IOException e) {
    612             return false;
    613         } finally {
    614             if (inStream != null) {
    615                 try {
    616                     inStream.close();
    617                 } catch (IOException e) {
    618                     // do nothing
    619                 }
    620             }
    621         }
    622     }
    623 }
    624