1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2014, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package android.icu.text; 11 12 import java.text.CharacterIterator; 13 import java.util.BitSet; 14 15 import android.icu.impl.CharacterIteration; 16 17 abstract class DictionaryBreakEngine implements LanguageBreakEngine { 18 19 /* Helper class for improving readability of the Thai/Lao/Khmer word break 20 * algorithm. 21 */ 22 static class PossibleWord { 23 // List size, limited by the maximum number of words in the dictionary 24 // that form a nested sequence. 25 private final static int POSSIBLE_WORD_LIST_MAX = 20; 26 //list of word candidate lengths, in increasing length order 27 private int lengths[]; 28 private int count[]; // Count of candidates 29 private int prefix; // The longest match with a dictionary word 30 private int offset; // Offset in the text of these candidates 31 private int mark; // The preferred candidate's offset 32 private int current; // The candidate we're currently looking at 33 34 // Default constructor 35 public PossibleWord() { 36 lengths = new int[POSSIBLE_WORD_LIST_MAX]; 37 count = new int[1]; // count needs to be an array of 1 so that it can be pass as reference 38 offset = -1; 39 } 40 41 // Fill the list of candidates if needed, select the longest, and return the number found 42 public int candidates(CharacterIterator fIter, DictionaryMatcher dict, int rangeEnd) { 43 int start = fIter.getIndex(); 44 if (start != offset) { 45 offset = start; 46 prefix = dict.matches(fIter, rangeEnd - start, lengths, count, lengths.length); 47 // Dictionary leaves text after longest prefix, not longest word. Back up. 48 if (count[0] <= 0) { 49 fIter.setIndex(start); 50 } 51 } 52 if (count[0] > 0) { 53 fIter.setIndex(start + lengths[count[0]-1]); 54 } 55 current = count[0] - 1; 56 mark = current; 57 return count[0]; 58 } 59 60 // Select the currently marked candidate, point after it in the text, and invalidate self 61 public int acceptMarked(CharacterIterator fIter) { 62 fIter.setIndex(offset + lengths[mark]); 63 return lengths[mark]; 64 } 65 66 // Backup from the current candidate to the next shorter one; return true if that exists 67 // and point the text after it 68 public boolean backUp(CharacterIterator fIter) { 69 if (current > 0) { 70 fIter.setIndex(offset + lengths[--current]); 71 return true; 72 } 73 return false; 74 } 75 76 // Return the longest prefix this candidate location shares with a dictionary word 77 public int longestPrefix() { 78 return prefix; 79 } 80 81 // Mark the current candidate as the one we like 82 public void markCurrent() { 83 mark = current; 84 } 85 } 86 87 /** 88 * A deque-like structure holding raw ints. 89 * Partial, limited implementation, only what is needed by the dictionary implementation. 90 * For internal use only. 91 * @hide draft / provisional / internal are hidden on Android 92 */ 93 static class DequeI implements Cloneable { 94 private int[] data = new int[50]; 95 private int lastIdx = 4; // or base of stack. Index of element. 96 private int firstIdx = 4; // or Top of Stack. Index of element + 1. 97 98 @Override 99 public Object clone() throws CloneNotSupportedException { 100 DequeI result = (DequeI)super.clone(); 101 result.data = data.clone(); 102 return result; 103 } 104 105 int size() { 106 return firstIdx - lastIdx; 107 } 108 109 boolean isEmpty() { 110 return size() == 0; 111 } 112 113 private void grow() { 114 int[] newData = new int[data.length * 2]; 115 System.arraycopy(data, 0, newData, 0, data.length); 116 data = newData; 117 } 118 119 void offer(int v) { 120 // Note that the actual use cases of offer() add at most one element. 121 // We make no attempt to handle more than a few. 122 assert lastIdx > 0; 123 data[--lastIdx] = v; 124 } 125 126 void push(int v) { 127 if (firstIdx >= data.length) { 128 grow(); 129 } 130 data[firstIdx++] = v; 131 } 132 133 int pop() { 134 assert size() > 0; 135 return data[--firstIdx]; 136 } 137 138 int peek() { 139 assert size() > 0; 140 return data[firstIdx - 1]; 141 } 142 143 int peekLast() { 144 assert size() > 0; 145 return data[lastIdx]; 146 } 147 148 int pollLast() { 149 assert size() > 0; 150 return data[lastIdx++]; 151 } 152 153 boolean contains(int v) { 154 for (int i=lastIdx; i< firstIdx; i++) { 155 if (data[i] == v) { 156 return true; 157 } 158 } 159 return false; 160 } 161 162 int elementAt(int i) { 163 assert i < size(); 164 return data[lastIdx + i]; 165 } 166 167 void removeAllElements() { 168 lastIdx = firstIdx = 4; 169 } 170 } 171 172 UnicodeSet fSet = new UnicodeSet(); 173 private BitSet fTypes = new BitSet(32); 174 175 /** 176 * @param breakTypes The types of break iterators that can use this engine. 177 * For example, BreakIterator.KIND_LINE 178 */ 179 public DictionaryBreakEngine(Integer... breakTypes) { 180 for (Integer type: breakTypes) { 181 fTypes.set(type); 182 } 183 } 184 185 @Override 186 public boolean handles(int c, int breakType) { 187 return fTypes.get(breakType) && // this type can use us 188 fSet.contains(c); // we recognize the character 189 } 190 191 @Override 192 public int findBreaks(CharacterIterator text, int startPos, int endPos, 193 int breakType, DequeI foundBreaks) { 194 int result = 0; 195 196 // Find the span of characters included in the set. 197 // The span to break begins at the current position int the text, and 198 // extends towards the start or end of the text, depending on 'reverse'. 199 200 int start = text.getIndex(); 201 int current; 202 int rangeStart; 203 int rangeEnd; 204 int c = CharacterIteration.current32(text); 205 while ((current = text.getIndex()) < endPos && fSet.contains(c)) { 206 CharacterIteration.next32(text); 207 c = CharacterIteration.current32(text); 208 } 209 rangeStart = start; 210 rangeEnd = current; 211 212 // if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) { 213 // TODO: Why does icu4c have this? 214 result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); 215 text.setIndex(current); 216 217 return result; 218 } 219 220 void setCharacters(UnicodeSet set) { 221 fSet = new UnicodeSet(set); 222 fSet.compact(); 223 } 224 225 /** 226 * <p>Divide up a range of known dictionary characters handled by this break engine.</p> 227 * 228 * @param text A UText representing the text 229 * @param rangeStart The start of the range of dictionary characters 230 * @param rangeEnd The end of the range of dictionary characters 231 * @param foundBreaks Output of break positions. Positions are pushed. 232 * Pre-existing contents of the output stack are unaltered. 233 * @return The number of breaks found 234 */ 235 abstract int divideUpDictionaryRange(CharacterIterator text, 236 int rangeStart, 237 int rangeEnd, 238 DequeI foundBreaks ); 239 } 240