Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.text.TextUtils;
     20 
     21 import com.android.inputmethod.annotations.UsedForTesting;
     22 import com.android.inputmethod.latin.common.StringUtils;
     23 import com.android.inputmethod.latin.define.DecoderSpecificConstants;
     24 
     25 import java.util.ArrayList;
     26 import java.util.Arrays;
     27 
     28 import javax.annotation.Nonnull;
     29 
     30 /**
     31  * Class to represent information of previous words. This class is used to add n-gram entries
     32  * into binary dictionaries, to get predictions, and to get suggestions.
     33  */
     34 public class NgramContext {
     35     @Nonnull
     36     public static final NgramContext EMPTY_PREV_WORDS_INFO =
     37             new NgramContext(WordInfo.EMPTY_WORD_INFO);
     38     @Nonnull
     39     public static final NgramContext BEGINNING_OF_SENTENCE =
     40             new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
     41 
     42     public static final String BEGINNING_OF_SENTENCE_TAG = "<S>";
     43 
     44     public static final String CONTEXT_SEPARATOR = " ";
     45 
     46     public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) {
     47         return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO);
     48     }
     49 
     50     /**
     51      * Word information used to represent previous words information.
     52      */
     53     public static class WordInfo {
     54         @Nonnull
     55         public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
     56         @Nonnull
     57         public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo();
     58 
     59         // This is an empty char sequence when mIsBeginningOfSentence is true.
     60         public final CharSequence mWord;
     61         // TODO: Have sentence separator.
     62         // Whether the current context is beginning of sentence or not. This is true when composing
     63         // at the beginning of an input field or composing a word after a sentence separator.
     64         public final boolean mIsBeginningOfSentence;
     65 
     66         // Beginning of sentence.
     67         private WordInfo() {
     68             mWord = "";
     69             mIsBeginningOfSentence = true;
     70         }
     71 
     72         public WordInfo(final CharSequence word) {
     73             mWord = word;
     74             mIsBeginningOfSentence = false;
     75         }
     76 
     77         public boolean isValid() {
     78             return mWord != null;
     79         }
     80 
     81         @Override
     82         public int hashCode() {
     83             return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
     84         }
     85 
     86         @Override
     87         public boolean equals(Object o) {
     88             if (this == o) return true;
     89             if (!(o instanceof WordInfo)) return false;
     90             final WordInfo wordInfo = (WordInfo)o;
     91             if (mWord == null || wordInfo.mWord == null) {
     92                 return mWord == wordInfo.mWord
     93                         && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
     94             }
     95             return TextUtils.equals(mWord, wordInfo.mWord)
     96                     && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
     97         }
     98     }
     99 
    100     // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
    101     // have any context for that previous word including the "beginning of sentence context" - we
    102     // just don't know what to predict using the information. An example of that is after a comma.
    103     // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
    104     // WordComposer was reset and before starting a new composing word, but we should never be
    105     // calling getSuggetions* in this situation.
    106     private final WordInfo[] mPrevWordsInfo;
    107     private final int mPrevWordsCount;
    108 
    109     private final int mMaxPrevWordCount;
    110 
    111     // Construct from the previous word information.
    112     public NgramContext(final WordInfo... prevWordsInfo) {
    113         this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo);
    114     }
    115 
    116     public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) {
    117         mPrevWordsInfo = prevWordsInfo;
    118         mPrevWordsCount = prevWordsInfo.length;
    119         mMaxPrevWordCount = maxPrevWordCount;
    120     }
    121 
    122     /**
    123      * Create next prevWordsInfo using current prevWordsInfo.
    124      */
    125     @Nonnull
    126     public NgramContext getNextNgramContext(final WordInfo wordInfo) {
    127         final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1);
    128         final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
    129         prevWordsInfo[0] = wordInfo;
    130         System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
    131         return new NgramContext(mMaxPrevWordCount, prevWordsInfo);
    132     }
    133 
    134 
    135     /**
    136      * Extracts the previous words context.
    137      *
    138      * @return a String with the previous words separated by white space.
    139      */
    140     public String extractPrevWordsContext() {
    141         final ArrayList<String> terms = new ArrayList<>();
    142         for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
    143             if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
    144                 final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
    145                 if (wordInfo.mIsBeginningOfSentence) {
    146                     terms.add(BEGINNING_OF_SENTENCE_TAG);
    147                 } else {
    148                     final String term = wordInfo.mWord.toString();
    149                     if (!term.isEmpty()) {
    150                         terms.add(term);
    151                     }
    152                 }
    153             }
    154         }
    155         return TextUtils.join(CONTEXT_SEPARATOR, terms);
    156     }
    157 
    158     /**
    159      * Extracts the previous words context.
    160      *
    161      * @return a String array with the previous words.
    162      */
    163     public String[] extractPrevWordsContextArray() {
    164         final ArrayList<String> prevTermList = new ArrayList<>();
    165         for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
    166             if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
    167                 final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
    168                 if (wordInfo.mIsBeginningOfSentence) {
    169                     prevTermList.add(BEGINNING_OF_SENTENCE_TAG);
    170                 } else {
    171                     final String term = wordInfo.mWord.toString();
    172                     if (!term.isEmpty()) {
    173                         prevTermList.add(term);
    174                     }
    175                 }
    176             }
    177         }
    178         final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]);
    179         return contextStringArray;
    180     }
    181 
    182     public boolean isValid() {
    183         return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
    184     }
    185 
    186     public boolean isBeginningOfSentenceContext() {
    187         return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
    188     }
    189 
    190     // n is 1-indexed.
    191     // TODO: Remove
    192     public CharSequence getNthPrevWord(final int n) {
    193         if (n <= 0 || n > mPrevWordsCount) {
    194             return null;
    195         }
    196         return mPrevWordsInfo[n - 1].mWord;
    197     }
    198 
    199     // n is 1-indexed.
    200     @UsedForTesting
    201     public boolean isNthPrevWordBeginningOfSentence(final int n) {
    202         if (n <= 0 || n > mPrevWordsCount) {
    203             return false;
    204         }
    205         return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
    206     }
    207 
    208     public void outputToArray(final int[][] codePointArrays,
    209             final boolean[] isBeginningOfSentenceArray) {
    210         for (int i = 0; i < mPrevWordsCount; i++) {
    211             final WordInfo wordInfo = mPrevWordsInfo[i];
    212             if (wordInfo == null || !wordInfo.isValid()) {
    213                 codePointArrays[i] = new int[0];
    214                 isBeginningOfSentenceArray[i] = false;
    215                 continue;
    216             }
    217             codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
    218             isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
    219         }
    220     }
    221 
    222     public int getPrevWordCount() {
    223         return mPrevWordsCount;
    224     }
    225 
    226     @Override
    227     public int hashCode() {
    228         int hashValue = 0;
    229         for (final WordInfo wordInfo : mPrevWordsInfo) {
    230             if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
    231                 break;
    232             }
    233             hashValue ^= wordInfo.hashCode();
    234         }
    235         return hashValue;
    236     }
    237 
    238     @Override
    239     public boolean equals(Object o) {
    240         if (this == o) return true;
    241         if (!(o instanceof NgramContext)) return false;
    242         final NgramContext prevWordsInfo = (NgramContext)o;
    243 
    244         final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
    245         for (int i = 0; i < minLength; i++) {
    246             if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
    247                 return false;
    248             }
    249         }
    250         final WordInfo[] longerWordsInfo;
    251         final int longerWordsInfoCount;
    252         if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
    253             longerWordsInfo = mPrevWordsInfo;
    254             longerWordsInfoCount = mPrevWordsCount;
    255         } else {
    256             longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
    257             longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
    258         }
    259         for (int i = minLength; i < longerWordsInfoCount; i++) {
    260             if (longerWordsInfo[i] != null
    261                     && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
    262                 return false;
    263             }
    264         }
    265         return true;
    266     }
    267 
    268     @Override
    269     public String toString() {
    270         final StringBuffer builder = new StringBuffer();
    271         for (int i = 0; i < mPrevWordsCount; i++) {
    272             final WordInfo wordInfo = mPrevWordsInfo[i];
    273             builder.append("PrevWord[");
    274             builder.append(i);
    275             builder.append("]: ");
    276             if (wordInfo == null) {
    277                 builder.append("null. ");
    278                 continue;
    279             }
    280             if (!wordInfo.isValid()) {
    281                 builder.append("Empty. ");
    282                 continue;
    283             }
    284             builder.append(wordInfo.mWord);
    285             builder.append(", isBeginningOfSentence: ");
    286             builder.append(wordInfo.mIsBeginningOfSentence);
    287             builder.append(". ");
    288         }
    289         return builder.toString();
    290     }
    291 }
    292