Home | History | Annotate | Download | only in makedict
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.makedict;
     18 
     19 import com.android.inputmethod.annotations.UsedForTesting;
     20 import com.android.inputmethod.latin.BinaryDictionary;
     21 import com.android.inputmethod.latin.utils.CombinedFormatUtils;
     22 import com.android.inputmethod.latin.utils.StringUtils;
     23 
     24 import java.util.ArrayList;
     25 import java.util.Arrays;
     26 
     27 /**
     28  * Utility class for a word with a probability.
     29  *
     30  * This is chiefly used to iterate a dictionary.
     31  */
     32 public final class WordProperty implements Comparable<WordProperty> {
     33     public final String mWord;
     34     public final ProbabilityInfo mProbabilityInfo;
     35     public final ArrayList<WeightedString> mShortcutTargets;
     36     public final ArrayList<WeightedString> mBigrams;
     37     // TODO: Support mIsBeginningOfSentence.
     38     public final boolean mIsBeginningOfSentence;
     39     public final boolean mIsNotAWord;
     40     public final boolean mIsBlacklistEntry;
     41     public final boolean mHasShortcuts;
     42     public final boolean mHasBigrams;
     43 
     44     private int mHashCode = 0;
     45 
     46     @UsedForTesting
     47     public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
     48             final ArrayList<WeightedString> shortcutTargets,
     49             final ArrayList<WeightedString> bigrams,
     50             final boolean isNotAWord, final boolean isBlacklistEntry) {
     51         mWord = word;
     52         mProbabilityInfo = probabilityInfo;
     53         mShortcutTargets = shortcutTargets;
     54         mBigrams = bigrams;
     55         mIsBeginningOfSentence = false;
     56         mIsNotAWord = isNotAWord;
     57         mIsBlacklistEntry = isBlacklistEntry;
     58         mHasBigrams = bigrams != null && !bigrams.isEmpty();
     59         mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
     60     }
     61 
     62     private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
     63       return new ProbabilityInfo(
     64               probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
     65               probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
     66               probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
     67               probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
     68     }
     69 
     70     // Construct word property using information from native code.
     71     // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
     72     public WordProperty(final int[] codePoints, final boolean isNotAWord,
     73             final boolean isBlacklisted, final boolean hasBigram, final boolean hasShortcuts,
     74             final boolean isBeginningOfSentence, final int[] probabilityInfo,
     75             final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
     76             final ArrayList<int[]> shortcutTargets,
     77             final ArrayList<Integer> shortcutProbabilities) {
     78         mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
     79         mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
     80         mShortcutTargets = new ArrayList<>();
     81         mBigrams = new ArrayList<>();
     82         mIsBeginningOfSentence = isBeginningOfSentence;
     83         mIsNotAWord = isNotAWord;
     84         mIsBlacklistEntry = isBlacklisted;
     85         mHasShortcuts = hasShortcuts;
     86         mHasBigrams = hasBigram;
     87 
     88         final int bigramTargetCount = bigramTargets.size();
     89         for (int i = 0; i < bigramTargetCount; i++) {
     90             final String bigramTargetString =
     91                     StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
     92             mBigrams.add(new WeightedString(bigramTargetString,
     93                     createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))));
     94         }
     95 
     96         final int shortcutTargetCount = shortcutTargets.size();
     97         for (int i = 0; i < shortcutTargetCount; i++) {
     98             final String shortcutTargetString =
     99                     StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i));
    100             mShortcutTargets.add(
    101                     new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
    102         }
    103     }
    104 
    105     public int getProbability() {
    106         return mProbabilityInfo.mProbability;
    107     }
    108 
    109     private static int computeHashCode(WordProperty word) {
    110         return Arrays.hashCode(new Object[] {
    111                 word.mWord,
    112                 word.mProbabilityInfo,
    113                 word.mShortcutTargets.hashCode(),
    114                 word.mBigrams.hashCode(),
    115                 word.mIsNotAWord,
    116                 word.mIsBlacklistEntry
    117         });
    118     }
    119 
    120     /**
    121      * Three-way comparison.
    122      *
    123      * A Word x is greater than a word y if x has a higher frequency. If they have the same
    124      * frequency, they are sorted in lexicographic order.
    125      */
    126     @Override
    127     public int compareTo(final WordProperty w) {
    128         if (getProbability() < w.getProbability()) return 1;
    129         if (getProbability() > w.getProbability()) return -1;
    130         return mWord.compareTo(w.mWord);
    131     }
    132 
    133     /**
    134      * Equality test.
    135      *
    136      * Words are equal if they have the same frequency, the same spellings, and the same
    137      * attributes.
    138      */
    139     @Override
    140     public boolean equals(Object o) {
    141         if (o == this) return true;
    142         if (!(o instanceof WordProperty)) return false;
    143         WordProperty w = (WordProperty)o;
    144         return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
    145                 && mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams)
    146                 && mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
    147                 && mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams;
    148     }
    149 
    150     @Override
    151     public int hashCode() {
    152         if (mHashCode == 0) {
    153             mHashCode = computeHashCode(this);
    154         }
    155         return mHashCode;
    156     }
    157 
    158     @UsedForTesting
    159     public boolean isValid() {
    160         return getProbability() != BinaryDictionary.NOT_A_PROBABILITY;
    161     }
    162 
    163     @Override
    164     public String toString() {
    165         return CombinedFormatUtils.formatWordProperty(this);
    166     }
    167 }
    168