Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import com.android.inputmethod.annotations.UsedForTesting;
     20 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
     21 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
     22 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     23 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
     24 
     25 import java.io.File;
     26 import java.io.FileInputStream;
     27 import java.io.FileNotFoundException;
     28 import java.io.IOException;
     29 import java.io.RandomAccessFile;
     30 import java.nio.ByteBuffer;
     31 import java.nio.channels.FileChannel;
     32 import java.util.ArrayList;
     33 import java.util.TreeMap;
     34 
     35 /**
     36  * An interface of binary dictionary decoders.
     37  */
     38 public interface DictDecoder {
     39 
     40     /**
     41      * Reads and returns the file header.
     42      */
     43     public FileHeader readHeader() throws IOException, UnsupportedFormatException;
     44 
     45     /**
     46      * Reads PtNode from nodeAddress.
     47      * @param ptNodePos the position of PtNode.
     48      * @param formatOptions the format options.
     49      * @return PtNodeInfo.
     50      */
     51     public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
     52 
     53     /**
     54      * Reads a buffer and returns the memory representation of the dictionary.
     55      *
     56      * This high-level method takes a buffer and reads its contents, populating a
     57      * FusionDictionary structure. The optional dict argument is an existing dictionary to
     58      * which words from the buffer should be added. If it is null, a new dictionary is created.
     59      *
     60      * @param dict an optional dictionary to add words to, or null.
     61      * @param deleteDictIfBroken a flag indicating whether this method should remove the broken
     62      * dictionary or not.
     63      * @return the created (or merged) dictionary.
     64      */
     65     @UsedForTesting
     66     public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
     67             final boolean deleteDictIfBroken)
     68                     throws FileNotFoundException, IOException, UnsupportedFormatException;
     69 
     70     /**
     71      * Gets the address of the last PtNode of the exact matching word in the dictionary.
     72      * If no match is found, returns NOT_VALID_WORD.
     73      *
     74      * @param word the word we search for.
     75      * @return the address of the terminal node.
     76      * @throws IOException if the file can't be read.
     77      * @throws UnsupportedFormatException if the format of the file is not recognized.
     78      */
     79     @UsedForTesting
     80     public int getTerminalPosition(final String word)
     81             throws IOException, UnsupportedFormatException;
     82 
     83     /**
     84      * Reads unigrams and bigrams from the binary file.
     85      * Doesn't store a full memory representation of the dictionary.
     86      *
     87      * @param words the map to store the address as a key and the word as a value.
     88      * @param frequencies the map to store the address as a key and the frequency as a value.
     89      * @param bigrams the map to store the address as a key and the list of address as a value.
     90      * @throws IOException if the file can't be read.
     91      * @throws UnsupportedFormatException if the format of the file is not recognized.
     92      */
     93     @UsedForTesting
     94     public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
     95             final TreeMap<Integer, Integer> frequencies,
     96             final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
     97                 throws IOException, UnsupportedFormatException;
     98 
     99     /**
    100      * Sets the position of the buffer to the given value.
    101      *
    102      * @param newPos the new position
    103      */
    104     public void setPosition(final int newPos);
    105 
    106     /**
    107      * Gets the position of the buffer.
    108      *
    109      * @return the position
    110      */
    111     public int getPosition();
    112 
    113     /**
    114      * Reads and returns the PtNode count out of a buffer and forwards the pointer.
    115      */
    116     public int readPtNodeCount();
    117 
    118     /**
    119      * Reads the forward link and advances the position.
    120      *
    121      * @return true if this method moves the file pointer, false otherwise.
    122      */
    123     public boolean readAndFollowForwardLink();
    124     public boolean hasNextPtNodeArray();
    125 
    126     /**
    127      * Opens the dictionary file and makes DictBuffer.
    128      */
    129     @UsedForTesting
    130     public void openDictBuffer() throws FileNotFoundException, IOException;
    131     @UsedForTesting
    132     public boolean isDictBufferOpen();
    133 
    134     // Constants for DictionaryBufferFactory.
    135     public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
    136     public static final int USE_BYTEARRAY = 0x02000000;
    137     public static final int USE_WRITABLE_BYTEBUFFER = 0x03000000;
    138     public static final int MASK_DICTBUFFER = 0x0F000000;
    139 
    140     public interface DictionaryBufferFactory {
    141         public DictBuffer getDictionaryBuffer(final File file)
    142                 throws FileNotFoundException, IOException;
    143     }
    144 
    145     /**
    146      * Creates DictionaryBuffer using a ByteBuffer
    147      *
    148      * This class uses less memory than DictionaryBufferFromByteArrayFactory,
    149      * but doesn't perform as fast.
    150      * When operating on a big dictionary, this class is preferred.
    151      */
    152     public static final class DictionaryBufferFromReadOnlyByteBufferFactory
    153             implements DictionaryBufferFactory {
    154         @Override
    155         public DictBuffer getDictionaryBuffer(final File file)
    156                 throws FileNotFoundException, IOException {
    157             FileInputStream inStream = null;
    158             ByteBuffer buffer = null;
    159             try {
    160                 inStream = new FileInputStream(file);
    161                 buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
    162                         0, file.length());
    163             } finally {
    164                 if (inStream != null) {
    165                     inStream.close();
    166                 }
    167             }
    168             if (buffer != null) {
    169                 return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
    170             }
    171             return null;
    172         }
    173     }
    174 
    175     /**
    176      * Creates DictionaryBuffer using a byte array
    177      *
    178      * This class performs faster than other classes, but consumes more memory.
    179      * When operating on a small dictionary, this class is preferred.
    180      */
    181     public static final class DictionaryBufferFromByteArrayFactory
    182             implements DictionaryBufferFactory {
    183         @Override
    184         public DictBuffer getDictionaryBuffer(final File file)
    185                 throws FileNotFoundException, IOException {
    186             FileInputStream inStream = null;
    187             try {
    188                 inStream = new FileInputStream(file);
    189                 final byte[] array = new byte[(int) file.length()];
    190                 inStream.read(array);
    191                 return new ByteArrayDictBuffer(array);
    192             } finally {
    193                 if (inStream != null) {
    194                     inStream.close();
    195                 }
    196             }
    197         }
    198     }
    199 
    200     /**
    201      * Creates DictionaryBuffer using a writable ByteBuffer and a RandomAccessFile.
    202      *
    203      * This class doesn't perform as fast as other classes,
    204      * but this class is the only option available for destructive operations (insert or delete)
    205      * on a dictionary.
    206      */
    207     @UsedForTesting
    208     public static final class DictionaryBufferFromWritableByteBufferFactory
    209             implements DictionaryBufferFactory {
    210         @Override
    211         public DictBuffer getDictionaryBuffer(final File file)
    212                 throws FileNotFoundException, IOException {
    213             RandomAccessFile raFile = null;
    214             ByteBuffer buffer = null;
    215             try {
    216                 raFile = new RandomAccessFile(file, "rw");
    217                 buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
    218             } finally {
    219                 if (raFile != null) {
    220                     raFile.close();
    221                 }
    222             }
    223             if (buffer != null) {
    224                 return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
    225             }
    226             return null;
    227         }
    228     }
    229 
    230     public void skipPtNode(final FormatOptions formatOptions);
    231 }
    232