Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import com.android.inputmethod.annotations.UsedForTesting;
     20 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
     21 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
     22 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
     23 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     24 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
     25 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
     26 import com.android.inputmethod.latin.utils.JniUtils;
     27 
     28 import android.util.Log;
     29 
     30 import java.io.File;
     31 import java.io.FileNotFoundException;
     32 import java.io.IOException;
     33 import java.util.ArrayList;
     34 import java.util.Arrays;
     35 
     36 /**
     37  * An implementation of DictDecoder for version 3 binary dictionary.
     38  */
     39 @UsedForTesting
     40 public class Ver3DictDecoder extends AbstractDictDecoder {
     41     private static final String TAG = Ver3DictDecoder.class.getSimpleName();
     42 
     43     static {
     44         JniUtils.loadNativeLibrary();
     45     }
     46 
     47     // TODO: implement something sensical instead of just a phony method
     48     private static native int doNothing();
     49 
     50     protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
     51         private static int readFrequency(final DictBuffer dictBuffer) {
     52             return dictBuffer.readUnsignedByte();
     53         }
     54     }
     55 
     56     protected final File mDictionaryBinaryFile;
     57     private final DictionaryBufferFactory mBufferFactory;
     58     protected DictBuffer mDictBuffer;
     59 
     60     /* package */ Ver3DictDecoder(final File file, final int factoryFlag) {
     61         mDictionaryBinaryFile = file;
     62         mDictBuffer = null;
     63 
     64         if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
     65             mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
     66         } else if ((factoryFlag  & MASK_DICTBUFFER) == USE_BYTEARRAY) {
     67             mBufferFactory = new DictionaryBufferFromByteArrayFactory();
     68         } else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) {
     69             mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory();
     70         } else {
     71             mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
     72         }
     73     }
     74 
     75     /* package */ Ver3DictDecoder(final File file, final DictionaryBufferFactory factory) {
     76         mDictionaryBinaryFile = file;
     77         mBufferFactory = factory;
     78     }
     79 
     80     @Override
     81     public void openDictBuffer() throws FileNotFoundException, IOException {
     82         mDictBuffer = mBufferFactory.getDictionaryBuffer(mDictionaryBinaryFile);
     83     }
     84 
     85     @Override
     86     public boolean isDictBufferOpen() {
     87         return mDictBuffer != null;
     88     }
     89 
     90     /* package */ DictBuffer getDictBuffer() {
     91         return mDictBuffer;
     92     }
     93 
     94     @UsedForTesting
     95     /* package */ DictBuffer openAndGetDictBuffer() throws FileNotFoundException, IOException {
     96         openDictBuffer();
     97         return getDictBuffer();
     98     }
     99 
    100     @Override
    101     public FileHeader readHeader() throws IOException, UnsupportedFormatException {
    102         if (mDictBuffer == null) {
    103             openDictBuffer();
    104         }
    105         final FileHeader header = super.readHeader(mDictBuffer);
    106         final int version = header.mFormatOptions.mVersion;
    107         if (!(version >= 2 && version <= 3)) {
    108           throw new UnsupportedFormatException("File header has a wrong version : " + version);
    109         }
    110         return header;
    111     }
    112 
    113     // TODO: Make this buffer multi thread safe.
    114     private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
    115     @Override
    116     public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) {
    117         int addressPointer = ptNodePos;
    118         final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
    119         addressPointer += FormatSpec.PTNODE_FLAGS_SIZE;
    120 
    121         final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options);
    122         if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
    123             addressPointer += FormatSpec.PARENT_ADDRESS_SIZE;
    124         }
    125 
    126         final int characters[];
    127         if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
    128             int index = 0;
    129             int character = CharEncoding.readChar(mDictBuffer);
    130             addressPointer += CharEncoding.getCharSize(character);
    131             while (FormatSpec.INVALID_CHARACTER != character) {
    132                 // FusionDictionary is making sure that the length of the word is smaller than
    133                 // MAX_WORD_LENGTH.
    134                 // So we'll never write past the end of mCharacterBuffer.
    135                 mCharacterBuffer[index++] = character;
    136                 character = CharEncoding.readChar(mDictBuffer);
    137                 addressPointer += CharEncoding.getCharSize(character);
    138             }
    139             characters = Arrays.copyOfRange(mCharacterBuffer, 0, index);
    140         } else {
    141             final int character = CharEncoding.readChar(mDictBuffer);
    142             addressPointer += CharEncoding.getCharSize(character);
    143             characters = new int[] { character };
    144         }
    145         final int frequency;
    146         if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
    147             frequency = PtNodeReader.readFrequency(mDictBuffer);
    148             addressPointer += FormatSpec.PTNODE_FREQUENCY_SIZE;
    149         } else {
    150             frequency = PtNode.NOT_A_TERMINAL;
    151         }
    152         int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
    153         if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
    154             childrenAddress += addressPointer;
    155         }
    156         addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
    157         final ArrayList<WeightedString> shortcutTargets;
    158         if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
    159             // readShortcut will add shortcuts to shortcutTargets.
    160             shortcutTargets = new ArrayList<WeightedString>();
    161             addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
    162         } else {
    163             shortcutTargets = null;
    164         }
    165 
    166         final ArrayList<PendingAttribute> bigrams;
    167         if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
    168             bigrams = new ArrayList<PendingAttribute>();
    169             addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
    170                     addressPointer);
    171             if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
    172                 throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
    173                         + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
    174             }
    175         } else {
    176             bigrams = null;
    177         }
    178         return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
    179                 parentAddress, childrenAddress, shortcutTargets, bigrams);
    180     }
    181 
    182     @Override
    183     public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
    184             final boolean deleteDictIfBroken)
    185             throws FileNotFoundException, IOException, UnsupportedFormatException {
    186         if (mDictBuffer == null) {
    187             openDictBuffer();
    188         }
    189         try {
    190             return BinaryDictDecoderUtils.readDictionaryBinary(this, dict);
    191         } catch (IOException e) {
    192             Log.e(TAG, "The dictionary " + mDictionaryBinaryFile.getName() + " is broken.", e);
    193             if (deleteDictIfBroken && !mDictionaryBinaryFile.delete()) {
    194                 Log.e(TAG, "Failed to delete the broken dictionary.");
    195             }
    196             throw e;
    197         } catch (UnsupportedFormatException e) {
    198             Log.e(TAG, "The dictionary " + mDictionaryBinaryFile.getName() + " is broken.", e);
    199             if (deleteDictIfBroken && !mDictionaryBinaryFile.delete()) {
    200                 Log.e(TAG, "Failed to delete the broken dictionary.");
    201             }
    202             throw e;
    203         }
    204     }
    205 
    206     @Override
    207     public void setPosition(int newPos) {
    208         mDictBuffer.position(newPos);
    209     }
    210 
    211     @Override
    212     public int getPosition() {
    213         return mDictBuffer.position();
    214     }
    215 
    216     @Override
    217     public int readPtNodeCount() {
    218         return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer);
    219     }
    220 
    221     @Override
    222     public boolean readAndFollowForwardLink() {
    223         final int nextAddress = mDictBuffer.readUnsignedInt24();
    224         if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) {
    225             mDictBuffer.position(nextAddress);
    226             return true;
    227         }
    228         return false;
    229     }
    230 
    231     @Override
    232     public boolean hasNextPtNodeArray() {
    233         return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS;
    234     }
    235 
    236     @Override
    237     public void skipPtNode(final FormatOptions formatOptions) {
    238         final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
    239         PtNodeReader.readParentAddress(mDictBuffer, formatOptions);
    240         BinaryDictIOUtils.skipString(mDictBuffer,
    241                 (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
    242         PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions);
    243         if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readFrequency(mDictBuffer);
    244         if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
    245             final int shortcutsSize = mDictBuffer.readUnsignedShort();
    246             mDictBuffer.position(mDictBuffer.position() + shortcutsSize
    247                     - FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
    248         }
    249         if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
    250             int bigramCount = 0;
    251             while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
    252                 final int bigramFlags = mDictBuffer.readUnsignedByte();
    253                 switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
    254                     case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
    255                         mDictBuffer.readUnsignedByte();
    256                         break;
    257                     case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
    258                         mDictBuffer.readUnsignedShort();
    259                         break;
    260                     case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
    261                         mDictBuffer.readUnsignedInt24();
    262                         break;
    263                 }
    264                 if ((bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT) == 0) break;
    265             }
    266             if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
    267                 throw new RuntimeException("Too many bigrams in a PtNode.");
    268             }
    269         }
    270     }
    271 }
    272