1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import com.android.inputmethod.annotations.UsedForTesting; 20 import com.android.inputmethod.latin.BinaryDictionary; 21 import com.android.inputmethod.latin.Dictionary; 22 import com.android.inputmethod.latin.NgramContext; 23 import com.android.inputmethod.latin.NgramContext.WordInfo; 24 import com.android.inputmethod.latin.common.StringUtils; 25 import com.android.inputmethod.latin.utils.CombinedFormatUtils; 26 27 import java.util.ArrayList; 28 import java.util.Arrays; 29 30 import javax.annotation.Nullable; 31 32 /** 33 * Utility class for a word with a probability. 34 * 35 * This is chiefly used to iterate a dictionary. 36 */ 37 public final class WordProperty implements Comparable<WordProperty> { 38 public final String mWord; 39 public final ProbabilityInfo mProbabilityInfo; 40 public final ArrayList<NgramProperty> mNgrams; 41 // TODO: Support mIsBeginningOfSentence. 42 public final boolean mIsBeginningOfSentence; 43 public final boolean mIsNotAWord; 44 public final boolean mIsPossiblyOffensive; 45 public final boolean mHasNgrams; 46 47 private int mHashCode = 0; 48 49 // TODO: Support n-gram. 50 @UsedForTesting 51 public WordProperty(final String word, final ProbabilityInfo probabilityInfo, 52 @Nullable final ArrayList<WeightedString> bigrams, 53 final boolean isNotAWord, final boolean isPossiblyOffensive) { 54 mWord = word; 55 mProbabilityInfo = probabilityInfo; 56 if (null == bigrams) { 57 mNgrams = null; 58 } else { 59 mNgrams = new ArrayList<>(); 60 final NgramContext ngramContext = new NgramContext(new WordInfo(mWord)); 61 for (final WeightedString bigramTarget : bigrams) { 62 mNgrams.add(new NgramProperty(bigramTarget, ngramContext)); 63 } 64 } 65 mIsBeginningOfSentence = false; 66 mIsNotAWord = isNotAWord; 67 mIsPossiblyOffensive = isPossiblyOffensive; 68 mHasNgrams = bigrams != null && !bigrams.isEmpty(); 69 } 70 71 private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) { 72 return new ProbabilityInfo( 73 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX], 74 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX], 75 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX], 76 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]); 77 } 78 79 // Construct word property using information from native code. 80 // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. 81 public WordProperty(final int[] codePoints, final boolean isNotAWord, 82 final boolean isPossiblyOffensive, final boolean hasBigram, 83 final boolean isBeginningOfSentence, final int[] probabilityInfo, 84 final ArrayList<int[][]> ngramPrevWordsArray, 85 final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray, 86 final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo) { 87 mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); 88 mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); 89 final ArrayList<NgramProperty> ngrams = new ArrayList<>(); 90 mIsBeginningOfSentence = isBeginningOfSentence; 91 mIsNotAWord = isNotAWord; 92 mIsPossiblyOffensive = isPossiblyOffensive; 93 mHasNgrams = hasBigram; 94 95 final int relatedNgramCount = ngramTargets.size(); 96 for (int i = 0; i < relatedNgramCount; i++) { 97 final String ngramTargetString = 98 StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i)); 99 final WeightedString ngramTarget = new WeightedString(ngramTargetString, 100 createProbabilityInfoFromArray(ngramProbabilityInfo.get(i))); 101 final int[][] prevWords = ngramPrevWordsArray.get(i); 102 final boolean[] isBeginningOfSentenceArray = 103 ngramPrevWordIsBeginningOfSentenceArray.get(i); 104 final WordInfo[] wordInfoArray = new WordInfo[prevWords.length]; 105 for (int j = 0; j < prevWords.length; j++) { 106 wordInfoArray[j] = isBeginningOfSentenceArray[j] 107 ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO 108 : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray( 109 prevWords[j])); 110 } 111 final NgramContext ngramContext = new NgramContext(wordInfoArray); 112 ngrams.add(new NgramProperty(ngramTarget, ngramContext)); 113 } 114 mNgrams = ngrams.isEmpty() ? null : ngrams; 115 } 116 117 // TODO: Remove 118 @UsedForTesting 119 public ArrayList<WeightedString> getBigrams() { 120 if (null == mNgrams) { 121 return null; 122 } 123 final ArrayList<WeightedString> bigrams = new ArrayList<>(); 124 for (final NgramProperty ngram : mNgrams) { 125 if (ngram.mNgramContext.getPrevWordCount() == 1) { 126 bigrams.add(ngram.mTargetWord); 127 } 128 } 129 return bigrams; 130 } 131 132 public int getProbability() { 133 return mProbabilityInfo.mProbability; 134 } 135 136 private static int computeHashCode(WordProperty word) { 137 return Arrays.hashCode(new Object[] { 138 word.mWord, 139 word.mProbabilityInfo, 140 word.mNgrams, 141 word.mIsNotAWord, 142 word.mIsPossiblyOffensive 143 }); 144 } 145 146 /** 147 * Three-way comparison. 148 * 149 * A Word x is greater than a word y if x has a higher frequency. If they have the same 150 * frequency, they are sorted in lexicographic order. 151 */ 152 @Override 153 public int compareTo(final WordProperty w) { 154 if (getProbability() < w.getProbability()) return 1; 155 if (getProbability() > w.getProbability()) return -1; 156 return mWord.compareTo(w.mWord); 157 } 158 159 /** 160 * Equality test. 161 * 162 * Words are equal if they have the same frequency, the same spellings, and the same 163 * attributes. 164 */ 165 @Override 166 public boolean equals(Object o) { 167 if (o == this) return true; 168 if (!(o instanceof WordProperty)) return false; 169 WordProperty w = (WordProperty)o; 170 return mProbabilityInfo.equals(w.mProbabilityInfo) 171 && mWord.equals(w.mWord) && equals(mNgrams, w.mNgrams) 172 && mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive 173 && mHasNgrams == w.mHasNgrams; 174 } 175 176 // TDOO: Have a utility method like java.util.Objects.equals. 177 private static <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) { 178 if (null == a) { 179 return null == b; 180 } 181 return a.equals(b); 182 } 183 184 @Override 185 public int hashCode() { 186 if (mHashCode == 0) { 187 mHashCode = computeHashCode(this); 188 } 189 return mHashCode; 190 } 191 192 @UsedForTesting 193 public boolean isValid() { 194 return getProbability() != Dictionary.NOT_A_PROBABILITY; 195 } 196 197 @Override 198 public String toString() { 199 return CombinedFormatUtils.formatWordProperty(this); 200 } 201 } 202