1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import com.android.inputmethod.annotations.UsedForTesting; 20 import com.android.inputmethod.latin.BinaryDictionary; 21 import com.android.inputmethod.latin.utils.CombinedFormatUtils; 22 import com.android.inputmethod.latin.utils.StringUtils; 23 24 import java.util.ArrayList; 25 import java.util.Arrays; 26 27 /** 28 * Utility class for a word with a probability. 29 * 30 * This is chiefly used to iterate a dictionary. 31 */ 32 public final class WordProperty implements Comparable<WordProperty> { 33 public final String mWord; 34 public final ProbabilityInfo mProbabilityInfo; 35 public final ArrayList<WeightedString> mShortcutTargets; 36 public final ArrayList<WeightedString> mBigrams; 37 // TODO: Support mIsBeginningOfSentence. 38 public final boolean mIsBeginningOfSentence; 39 public final boolean mIsNotAWord; 40 public final boolean mIsBlacklistEntry; 41 public final boolean mHasShortcuts; 42 public final boolean mHasBigrams; 43 44 private int mHashCode = 0; 45 46 @UsedForTesting 47 public WordProperty(final String word, final ProbabilityInfo probabilityInfo, 48 final ArrayList<WeightedString> shortcutTargets, 49 final ArrayList<WeightedString> bigrams, 50 final boolean isNotAWord, final boolean isBlacklistEntry) { 51 mWord = word; 52 mProbabilityInfo = probabilityInfo; 53 mShortcutTargets = shortcutTargets; 54 mBigrams = bigrams; 55 mIsBeginningOfSentence = false; 56 mIsNotAWord = isNotAWord; 57 mIsBlacklistEntry = isBlacklistEntry; 58 mHasBigrams = bigrams != null && !bigrams.isEmpty(); 59 mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty(); 60 } 61 62 private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) { 63 return new ProbabilityInfo( 64 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX], 65 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX], 66 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX], 67 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]); 68 } 69 70 // Construct word property using information from native code. 71 // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. 72 public WordProperty(final int[] codePoints, final boolean isNotAWord, 73 final boolean isBlacklisted, final boolean hasBigram, final boolean hasShortcuts, 74 final boolean isBeginningOfSentence, final int[] probabilityInfo, 75 final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo, 76 final ArrayList<int[]> shortcutTargets, 77 final ArrayList<Integer> shortcutProbabilities) { 78 mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); 79 mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); 80 mShortcutTargets = new ArrayList<>(); 81 mBigrams = new ArrayList<>(); 82 mIsBeginningOfSentence = isBeginningOfSentence; 83 mIsNotAWord = isNotAWord; 84 mIsBlacklistEntry = isBlacklisted; 85 mHasShortcuts = hasShortcuts; 86 mHasBigrams = hasBigram; 87 88 final int bigramTargetCount = bigramTargets.size(); 89 for (int i = 0; i < bigramTargetCount; i++) { 90 final String bigramTargetString = 91 StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i)); 92 mBigrams.add(new WeightedString(bigramTargetString, 93 createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)))); 94 } 95 96 final int shortcutTargetCount = shortcutTargets.size(); 97 for (int i = 0; i < shortcutTargetCount; i++) { 98 final String shortcutTargetString = 99 StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i)); 100 mShortcutTargets.add( 101 new WeightedString(shortcutTargetString, shortcutProbabilities.get(i))); 102 } 103 } 104 105 public int getProbability() { 106 return mProbabilityInfo.mProbability; 107 } 108 109 private static int computeHashCode(WordProperty word) { 110 return Arrays.hashCode(new Object[] { 111 word.mWord, 112 word.mProbabilityInfo, 113 word.mShortcutTargets.hashCode(), 114 word.mBigrams.hashCode(), 115 word.mIsNotAWord, 116 word.mIsBlacklistEntry 117 }); 118 } 119 120 /** 121 * Three-way comparison. 122 * 123 * A Word x is greater than a word y if x has a higher frequency. If they have the same 124 * frequency, they are sorted in lexicographic order. 125 */ 126 @Override 127 public int compareTo(final WordProperty w) { 128 if (getProbability() < w.getProbability()) return 1; 129 if (getProbability() > w.getProbability()) return -1; 130 return mWord.compareTo(w.mWord); 131 } 132 133 /** 134 * Equality test. 135 * 136 * Words are equal if they have the same frequency, the same spellings, and the same 137 * attributes. 138 */ 139 @Override 140 public boolean equals(Object o) { 141 if (o == this) return true; 142 if (!(o instanceof WordProperty)) return false; 143 WordProperty w = (WordProperty)o; 144 return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord) 145 && mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams) 146 && mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry 147 && mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams; 148 } 149 150 @Override 151 public int hashCode() { 152 if (mHashCode == 0) { 153 mHashCode = computeHashCode(this); 154 } 155 return mHashCode; 156 } 157 158 @UsedForTesting 159 public boolean isValid() { 160 return getProbability() != BinaryDictionary.NOT_A_PROBABILITY; 161 } 162 163 @Override 164 public String toString() { 165 return CombinedFormatUtils.formatWordProperty(this); 166 } 167 } 168