Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.util.Log;
     20 
     21 import com.android.inputmethod.annotations.UsedForTesting;
     22 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
     23 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
     24 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
     25 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     26 import com.android.inputmethod.latin.makedict.FusionDictionary;
     27 import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
     28 import com.android.inputmethod.latin.makedict.PendingAttribute;
     29 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
     30 
     31 import java.io.IOException;
     32 import java.io.OutputStream;
     33 import java.util.ArrayList;
     34 import java.util.HashMap;
     35 import java.util.Map;
     36 
     37 /**
     38  * Reads and writes Binary files for a UserHistoryDictionary.
     39  *
     40  * All the methods in this class are static.
     41  */
     42 public final class UserHistoryDictIOUtils {
     43     private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
     44     private static final boolean DEBUG = false;
     45 
     46     public interface OnAddWordListener {
     47         public void setUnigram(final String word, final String shortcutTarget, final int frequency);
     48         public void setBigram(final String word1, final String word2, final int frequency);
     49     }
     50 
     51     @UsedForTesting
     52     public interface BigramDictionaryInterface {
     53         public int getFrequency(final String word1, final String word2);
     54     }
     55 
     56     public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
     57         private byte[] mBuffer;
     58         private int mPosition;
     59 
     60         public ByteArrayWrapper(final byte[] buffer) {
     61             mBuffer = buffer;
     62             mPosition = 0;
     63         }
     64 
     65         @Override
     66         public int readUnsignedByte() {
     67             return mBuffer[mPosition++] & 0xFF;
     68         }
     69 
     70         @Override
     71         public int readUnsignedShort() {
     72             final int retval = readUnsignedByte();
     73             return (retval << 8) + readUnsignedByte();
     74         }
     75 
     76         @Override
     77         public int readUnsignedInt24() {
     78             final int retval = readUnsignedShort();
     79             return (retval << 8) + readUnsignedByte();
     80         }
     81 
     82         @Override
     83         public int readInt() {
     84             final int retval = readUnsignedShort();
     85             return (retval << 16) + readUnsignedShort();
     86         }
     87 
     88         @Override
     89         public int position() {
     90             return mPosition;
     91         }
     92 
     93         @Override
     94         public void position(int position) {
     95             mPosition = position;
     96         }
     97 
     98         @Override
     99         public void put(final byte b) {
    100             mBuffer[mPosition++] = b;
    101         }
    102 
    103         @Override
    104         public int limit() {
    105             return mBuffer.length - 1;
    106         }
    107 
    108         @Override
    109         public int capacity() {
    110             return mBuffer.length;
    111         }
    112     }
    113 
    114     /**
    115      * Writes dictionary to file.
    116      */
    117     public static void writeDictionaryBinary(final OutputStream destination,
    118             final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
    119             final FormatOptions formatOptions) {
    120         final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
    121         try {
    122             BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
    123             Log.d(TAG, "end writing");
    124         } catch (IOException e) {
    125             Log.e(TAG, "IO exception while writing file", e);
    126         } catch (UnsupportedFormatException e) {
    127             Log.e(TAG, "Unsupported format", e);
    128         }
    129     }
    130 
    131     /**
    132      * Constructs a new FusionDictionary from BigramDictionaryInterface.
    133      */
    134     @UsedForTesting
    135     static FusionDictionary constructFusionDictionary(
    136             final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
    137         final FusionDictionary fusionDict = new FusionDictionary(new Node(),
    138                 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
    139                         false));
    140         int profTotal = 0;
    141         for (final String word1 : bigrams.keySet()) {
    142             final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
    143             for (final String word2 : word1Bigrams.keySet()) {
    144                 final int freq = dict.getFrequency(word1, word2);
    145                 if (freq == -1) {
    146                     // don't add this bigram.
    147                     continue;
    148                 }
    149                 if (DEBUG) {
    150                     if (word1 == null) {
    151                         Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
    152                     } else {
    153                         Log.d(TAG, "add bigram: " + word1
    154                                 + "," + word2 + "," + Integer.toString(freq));
    155                     }
    156                     profTotal++;
    157                 }
    158                 if (word1 == null) { // unigram
    159                     fusionDict.add(word2, freq, null, false /* isNotAWord */);
    160                 } else { // bigram
    161                     if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) {
    162                         fusionDict.add(word1, 2, null, false /* isNotAWord */);
    163                     }
    164                     fusionDict.setBigram(word1, word2, freq);
    165                 }
    166                 bigrams.updateBigram(word1, word2, (byte)freq);
    167             }
    168         }
    169         if (DEBUG) {
    170             Log.d(TAG, "add " + profTotal + "words");
    171         }
    172         return fusionDict;
    173     }
    174 
    175     /**
    176      * Reads dictionary from file.
    177      */
    178     public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer,
    179             final OnAddWordListener dict) {
    180         final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
    181         final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
    182         final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
    183         try {
    184             BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies,
    185                     bigrams);
    186         } catch (IOException e) {
    187             Log.e(TAG, "IO exception while reading file", e);
    188         } catch (UnsupportedFormatException e) {
    189             Log.e(TAG, "Unsupported format", e);
    190         } catch (ArrayIndexOutOfBoundsException e) {
    191             Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
    192         }
    193         addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
    194     }
    195 
    196     /**
    197      * Adds all unigrams and bigrams in maps to OnAddWordListener.
    198      */
    199     @UsedForTesting
    200     static void addWordsFromWordMap(final Map<Integer, String> unigrams,
    201             final Map<Integer, Integer> frequencies,
    202             final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) {
    203         for (Map.Entry<Integer, String> entry : unigrams.entrySet()) {
    204             final String word1 = entry.getValue();
    205             final int unigramFrequency = frequencies.get(entry.getKey());
    206             to.setUnigram(word1, null, unigramFrequency);
    207             final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
    208             if (attrList != null) {
    209                 for (final PendingAttribute attr : attrList) {
    210                     final String word2 = unigrams.get(attr.mAddress);
    211                     if (word1 == null || word2 == null) {
    212                         Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
    213                         continue;
    214                     }
    215                     to.setBigram(word1, word2,
    216                             BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency,
    217                                     attr.mFrequency));
    218                 }
    219             }
    220         }
    221 
    222     }
    223 }
    224