Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.util.Log;
     20 
     21 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
     22 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
     23 import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
     24 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
     25 import com.android.inputmethod.latin.makedict.FusionDictionary;
     26 import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
     27 import com.android.inputmethod.latin.makedict.PendingAttribute;
     28 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
     29 
     30 import java.io.IOException;
     31 import java.io.OutputStream;
     32 import java.util.ArrayList;
     33 import java.util.HashMap;
     34 import java.util.Map;
     35 
     36 /**
     37  * Reads and writes Binary files for a UserHistoryDictionary.
     38  *
     39  * All the methods in this class are static.
     40  */
     41 public final class UserHistoryDictIOUtils {
     42     private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
     43     private static final boolean DEBUG = false;
     44 
     45     public interface OnAddWordListener {
     46         public void setUnigram(final String word, final String shortcutTarget, final int frequency);
     47         public void setBigram(final String word1, final String word2, final int frequency);
     48     }
     49 
     50     public interface BigramDictionaryInterface {
     51         public int getFrequency(final String word1, final String word2);
     52     }
     53 
     54     public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
     55         private byte[] mBuffer;
     56         private int mPosition;
     57 
     58         public ByteArrayWrapper(final byte[] buffer) {
     59             mBuffer = buffer;
     60             mPosition = 0;
     61         }
     62 
     63         @Override
     64         public int readUnsignedByte() {
     65             return ((int)mBuffer[mPosition++]) & 0xFF;
     66         }
     67 
     68         @Override
     69         public int readUnsignedShort() {
     70             final int retval = readUnsignedByte();
     71             return (retval << 8) + readUnsignedByte();
     72         }
     73 
     74         @Override
     75         public int readUnsignedInt24() {
     76             final int retval = readUnsignedShort();
     77             return (retval << 8) + readUnsignedByte();
     78         }
     79 
     80         @Override
     81         public int readInt() {
     82             final int retval = readUnsignedShort();
     83             return (retval << 16) + readUnsignedShort();
     84         }
     85 
     86         @Override
     87         public int position() {
     88             return mPosition;
     89         }
     90 
     91         @Override
     92         public void position(int position) {
     93             mPosition = position;
     94         }
     95 
     96         @Override
     97         public void put(final byte b) {
     98             mBuffer[mPosition++] = b;
     99         }
    100 
    101         @Override
    102         public int limit() {
    103             return mBuffer.length - 1;
    104         }
    105 
    106         @Override
    107         public int capacity() {
    108             return mBuffer.length;
    109         }
    110     }
    111 
    112     /**
    113      * Writes dictionary to file.
    114      */
    115     public static void writeDictionaryBinary(final OutputStream destination,
    116             final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
    117             final FormatOptions formatOptions) {
    118         final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
    119         try {
    120             BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
    121             Log.d(TAG, "end writing");
    122         } catch (IOException e) {
    123             Log.e(TAG, "IO exception while writing file: " + e);
    124         } catch (UnsupportedFormatException e) {
    125             Log.e(TAG, "Unsupported fomat: " + e);
    126         }
    127     }
    128 
    129     /**
    130      * Constructs a new FusionDictionary from BigramDictionaryInterface.
    131      */
    132     /* packages for test */ static FusionDictionary constructFusionDictionary(
    133             final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
    134         final FusionDictionary fusionDict = new FusionDictionary(new Node(),
    135                 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
    136                         false));
    137         int profTotal = 0;
    138         for (final String word1 : bigrams.keySet()) {
    139             final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
    140             for (final String word2 : word1Bigrams.keySet()) {
    141                 final int freq = dict.getFrequency(word1, word2);
    142                 if (freq == -1) {
    143                     // don't add this bigram.
    144                     continue;
    145                 }
    146                 if (DEBUG) {
    147                     if (word1 == null) {
    148                         Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
    149                     } else {
    150                         Log.d(TAG, "add bigram: " + word1
    151                                 + "," + word2 + "," + Integer.toString(freq));
    152                     }
    153                     profTotal++;
    154                 }
    155                 if (word1 == null) { // unigram
    156                     fusionDict.add(word2, freq, null, false /* isNotAWord */);
    157                 } else { // bigram
    158                     if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) {
    159                         fusionDict.add(word1, 2, null, false /* isNotAWord */);
    160                     }
    161                     fusionDict.setBigram(word1, word2, freq);
    162                 }
    163                 bigrams.updateBigram(word1, word2, (byte)freq);
    164             }
    165         }
    166         if (DEBUG) {
    167             Log.d(TAG, "add " + profTotal + "words");
    168         }
    169         return fusionDict;
    170     }
    171 
    172     /**
    173      * Reads dictionary from file.
    174      */
    175     public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer,
    176             final OnAddWordListener dict) {
    177         final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
    178         final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
    179         final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
    180         try {
    181             BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies,
    182                     bigrams);
    183         } catch (IOException e) {
    184             Log.e(TAG, "IO exception while reading file: " + e);
    185         } catch (UnsupportedFormatException e) {
    186             Log.e(TAG, "Unsupported format: " + e);
    187         } catch (ArrayIndexOutOfBoundsException e) {
    188             Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file: " + e);
    189         }
    190         addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
    191     }
    192 
    193     /**
    194      * Adds all unigrams and bigrams in maps to OnAddWordListener.
    195      */
    196     /* package for test */ static void addWordsFromWordMap(final Map<Integer, String> unigrams,
    197             final Map<Integer, Integer> frequencies,
    198             final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) {
    199         for (Map.Entry<Integer, String> entry : unigrams.entrySet()) {
    200             final String word1 = entry.getValue();
    201             final int unigramFrequency = frequencies.get(entry.getKey());
    202             to.setUnigram(word1, null, unigramFrequency);
    203             final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
    204             if (attrList != null) {
    205                 for (final PendingAttribute attr : attrList) {
    206                     to.setBigram(word1, unigrams.get(attr.mAddress),
    207                             BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency,
    208                                     attr.mFrequency));
    209                 }
    210             }
    211         }
    212 
    213     }
    214 }