Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import com.android.inputmethod.annotations.UsedForTesting;
     20 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
     21 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
     22 
     23 import java.io.File;
     24 import java.io.FileInputStream;
     25 import java.io.FileNotFoundException;
     26 import java.io.IOException;
     27 import java.io.RandomAccessFile;
     28 import java.nio.ByteBuffer;
     29 import java.nio.channels.FileChannel;
     30 import java.util.ArrayList;
     31 import java.util.TreeMap;
     32 
     33 /**
     34  * An interface of binary dictionary decoders.
     35  */
     36 // TODO: Straighten out responsibility for the buffer's file pointer.
     37 public interface DictDecoder {
     38 
     39     /**
     40      * Reads and returns the file header.
     41      */
     42     public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException;
     43 
     44     /**
     45      * Reads PtNode from ptNodePos.
     46      * @param ptNodePos the position of PtNode.
     47      * @return PtNodeInfo.
     48      */
     49     public PtNodeInfo readPtNode(final int ptNodePos);
     50 
     51     /**
     52      * Reads a buffer and returns the memory representation of the dictionary.
     53      *
     54      * This high-level method takes a buffer and reads its contents, populating a
     55      * FusionDictionary structure.
     56      *
     57      * @param deleteDictIfBroken a flag indicating whether this method should remove the broken
     58      * dictionary or not.
     59      * @return the created dictionary.
     60      */
     61     @UsedForTesting
     62     public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken)
     63             throws FileNotFoundException, IOException, UnsupportedFormatException;
     64 
     65     /**
     66      * Gets the address of the last PtNode of the exact matching word in the dictionary.
     67      * If no match is found, returns NOT_VALID_WORD.
     68      *
     69      * @param word the word we search for.
     70      * @return the address of the terminal node.
     71      * @throws IOException if the file can't be read.
     72      * @throws UnsupportedFormatException if the format of the file is not recognized.
     73      */
     74     @UsedForTesting
     75     public int getTerminalPosition(final String word)
     76             throws IOException, UnsupportedFormatException;
     77 
     78     /**
     79      * Reads unigrams and bigrams from the binary file.
     80      * Doesn't store a full memory representation of the dictionary.
     81      *
     82      * @param words the map to store the address as a key and the word as a value.
     83      * @param frequencies the map to store the address as a key and the frequency as a value.
     84      * @param bigrams the map to store the address as a key and the list of address as a value.
     85      * @throws IOException if the file can't be read.
     86      * @throws UnsupportedFormatException if the format of the file is not recognized.
     87      */
     88     @UsedForTesting
     89     public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
     90             final TreeMap<Integer, Integer> frequencies,
     91             final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
     92                 throws IOException, UnsupportedFormatException;
     93 
     94     /**
     95      * Sets the position of the buffer to the given value.
     96      *
     97      * @param newPos the new position
     98      */
     99     public void setPosition(final int newPos);
    100 
    101     /**
    102      * Gets the position of the buffer.
    103      *
    104      * @return the position
    105      */
    106     public int getPosition();
    107 
    108     /**
    109      * Reads and returns the PtNode count out of a buffer and forwards the pointer.
    110      */
    111     public int readPtNodeCount();
    112 
    113     /**
    114      * Opens the dictionary file and makes DictBuffer.
    115      */
    116     @UsedForTesting
    117     public void openDictBuffer() throws FileNotFoundException, IOException,
    118             UnsupportedFormatException;
    119     @UsedForTesting
    120     public boolean isDictBufferOpen();
    121 
    122     // Constants for DictionaryBufferFactory.
    123     public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
    124     public static final int USE_BYTEARRAY = 0x02000000;
    125     public static final int USE_WRITABLE_BYTEBUFFER = 0x03000000;
    126     public static final int MASK_DICTBUFFER = 0x0F000000;
    127 
    128     public interface DictionaryBufferFactory {
    129         public DictBuffer getDictionaryBuffer(final File file)
    130                 throws FileNotFoundException, IOException;
    131     }
    132 
    133     /**
    134      * Creates DictionaryBuffer using a ByteBuffer
    135      *
    136      * This class uses less memory than DictionaryBufferFromByteArrayFactory,
    137      * but doesn't perform as fast.
    138      * When operating on a big dictionary, this class is preferred.
    139      */
    140     public static final class DictionaryBufferFromReadOnlyByteBufferFactory
    141             implements DictionaryBufferFactory {
    142         @Override
    143         public DictBuffer getDictionaryBuffer(final File file)
    144                 throws FileNotFoundException, IOException {
    145             FileInputStream inStream = null;
    146             ByteBuffer buffer = null;
    147             try {
    148                 inStream = new FileInputStream(file);
    149                 buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
    150                         0, file.length());
    151             } finally {
    152                 if (inStream != null) {
    153                     inStream.close();
    154                 }
    155             }
    156             if (buffer != null) {
    157                 return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
    158             }
    159             return null;
    160         }
    161     }
    162 
    163     /**
    164      * Creates DictionaryBuffer using a byte array
    165      *
    166      * This class performs faster than other classes, but consumes more memory.
    167      * When operating on a small dictionary, this class is preferred.
    168      */
    169     public static final class DictionaryBufferFromByteArrayFactory
    170             implements DictionaryBufferFactory {
    171         @Override
    172         public DictBuffer getDictionaryBuffer(final File file)
    173                 throws FileNotFoundException, IOException {
    174             FileInputStream inStream = null;
    175             try {
    176                 inStream = new FileInputStream(file);
    177                 final byte[] array = new byte[(int) file.length()];
    178                 inStream.read(array);
    179                 return new ByteArrayDictBuffer(array);
    180             } finally {
    181                 if (inStream != null) {
    182                     inStream.close();
    183                 }
    184             }
    185         }
    186     }
    187 
    188     /**
    189      * Creates DictionaryBuffer using a writable ByteBuffer and a RandomAccessFile.
    190      *
    191      * This class doesn't perform as fast as other classes,
    192      * but this class is the only option available for destructive operations (insert or delete)
    193      * on a dictionary.
    194      */
    195     @UsedForTesting
    196     public static final class DictionaryBufferFromWritableByteBufferFactory
    197             implements DictionaryBufferFactory {
    198         @Override
    199         public DictBuffer getDictionaryBuffer(final File file)
    200                 throws FileNotFoundException, IOException {
    201             RandomAccessFile raFile = null;
    202             ByteBuffer buffer = null;
    203             try {
    204                 raFile = new RandomAccessFile(file, "rw");
    205                 buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
    206             } finally {
    207                 if (raFile != null) {
    208                     raFile.close();
    209                 }
    210             }
    211             if (buffer != null) {
    212                 return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
    213             }
    214             return null;
    215         }
    216     }
    217 
    218     /**
    219      * @return whether this decoder has a valid binary dictionary that it can decode.
    220      */
    221     public boolean hasValidRawBinaryDictionary();
    222 }
    223