1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2014, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package android.icu.text; 11 12 import java.text.CharacterIterator; 13 14 import android.icu.impl.CharacterIteration; 15 16 abstract class DictionaryBreakEngine implements LanguageBreakEngine { 17 18 /* Helper class for improving readability of the Thai/Lao/Khmer word break 19 * algorithm. 20 */ 21 static class PossibleWord { 22 // List size, limited by the maximum number of words in the dictionary 23 // that form a nested sequence. 24 private final static int POSSIBLE_WORD_LIST_MAX = 20; 25 //list of word candidate lengths, in increasing length order 26 private int lengths[]; 27 private int count[]; // Count of candidates 28 private int prefix; // The longest match with a dictionary word 29 private int offset; // Offset in the text of these candidates 30 private int mark; // The preferred candidate's offset 31 private int current; // The candidate we're currently looking at 32 33 // Default constructor 34 public PossibleWord() { 35 lengths = new int[POSSIBLE_WORD_LIST_MAX]; 36 count = new int[1]; // count needs to be an array of 1 so that it can be pass as reference 37 offset = -1; 38 } 39 40 // Fill the list of candidates if needed, select the longest, and return the number found 41 public int candidates(CharacterIterator fIter, DictionaryMatcher dict, int rangeEnd) { 42 int start = fIter.getIndex(); 43 if (start != offset) { 44 offset = start; 45 prefix = dict.matches(fIter, rangeEnd - start, lengths, count, lengths.length); 46 // Dictionary leaves text after longest prefix, not longest word. Back up. 47 if (count[0] <= 0) { 48 fIter.setIndex(start); 49 } 50 } 51 if (count[0] > 0) { 52 fIter.setIndex(start + lengths[count[0]-1]); 53 } 54 current = count[0] - 1; 55 mark = current; 56 return count[0]; 57 } 58 59 // Select the currently marked candidate, point after it in the text, and invalidate self 60 public int acceptMarked(CharacterIterator fIter) { 61 fIter.setIndex(offset + lengths[mark]); 62 return lengths[mark]; 63 } 64 65 // Backup from the current candidate to the next shorter one; return true if that exists 66 // and point the text after it 67 public boolean backUp(CharacterIterator fIter) { 68 if (current > 0) { 69 fIter.setIndex(offset + lengths[--current]); 70 return true; 71 } 72 return false; 73 } 74 75 // Return the longest prefix this candidate location shares with a dictionary word 76 public int longestPrefix() { 77 return prefix; 78 } 79 80 // Mark the current candidate as the one we like 81 public void markCurrent() { 82 mark = current; 83 } 84 } 85 86 /** 87 * A deque-like structure holding raw ints. 88 * Partial, limited implementation, only what is needed by the dictionary implementation. 89 * For internal use only. 90 * @hide draft / provisional / internal are hidden on Android 91 */ 92 static class DequeI implements Cloneable { 93 private int[] data = new int[50]; 94 private int lastIdx = 4; // or base of stack. Index of element. 95 private int firstIdx = 4; // or Top of Stack. Index of element + 1. 96 97 @Override 98 public Object clone() throws CloneNotSupportedException { 99 DequeI result = (DequeI)super.clone(); 100 result.data = data.clone(); 101 return result; 102 } 103 104 int size() { 105 return firstIdx - lastIdx; 106 } 107 108 boolean isEmpty() { 109 return size() == 0; 110 } 111 112 private void grow() { 113 int[] newData = new int[data.length * 2]; 114 System.arraycopy(data, 0, newData, 0, data.length); 115 data = newData; 116 } 117 118 void offer(int v) { 119 // Note that the actual use cases of offer() add at most one element. 120 // We make no attempt to handle more than a few. 121 assert lastIdx > 0; 122 data[--lastIdx] = v; 123 } 124 125 void push(int v) { 126 if (firstIdx >= data.length) { 127 grow(); 128 } 129 data[firstIdx++] = v; 130 } 131 132 int pop() { 133 assert size() > 0; 134 return data[--firstIdx]; 135 } 136 137 int peek() { 138 assert size() > 0; 139 return data[firstIdx - 1]; 140 } 141 142 int peekLast() { 143 assert size() > 0; 144 return data[lastIdx]; 145 } 146 147 int pollLast() { 148 assert size() > 0; 149 return data[lastIdx++]; 150 } 151 152 boolean contains(int v) { 153 for (int i=lastIdx; i< firstIdx; i++) { 154 if (data[i] == v) { 155 return true; 156 } 157 } 158 return false; 159 } 160 161 int elementAt(int i) { 162 assert i < size(); 163 return data[lastIdx + i]; 164 } 165 166 void removeAllElements() { 167 lastIdx = firstIdx = 4; 168 } 169 } 170 171 UnicodeSet fSet = new UnicodeSet(); 172 173 /** 174 * Constructor 175 */ 176 public DictionaryBreakEngine() { 177 } 178 179 @Override 180 public boolean handles(int c) { 181 return fSet.contains(c); // we recognize the character 182 } 183 184 @Override 185 public int findBreaks(CharacterIterator text, int startPos, int endPos, 186 DequeI foundBreaks) { 187 int result = 0; 188 189 // Find the span of characters included in the set. 190 // The span to break begins at the current position int the text, and 191 // extends towards the start or end of the text, depending on 'reverse'. 192 193 int start = text.getIndex(); 194 int current; 195 int rangeStart; 196 int rangeEnd; 197 int c = CharacterIteration.current32(text); 198 while ((current = text.getIndex()) < endPos && fSet.contains(c)) { 199 CharacterIteration.next32(text); 200 c = CharacterIteration.current32(text); 201 } 202 rangeStart = start; 203 rangeEnd = current; 204 205 result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); 206 text.setIndex(current); 207 208 return result; 209 } 210 211 void setCharacters(UnicodeSet set) { 212 fSet = new UnicodeSet(set); 213 fSet.compact(); 214 } 215 216 /** 217 * <p>Divide up a range of known dictionary characters handled by this break engine.</p> 218 * 219 * @param text A UText representing the text 220 * @param rangeStart The start of the range of dictionary characters 221 * @param rangeEnd The end of the range of dictionary characters 222 * @param foundBreaks Output of break positions. Positions are pushed. 223 * Pre-existing contents of the output stack are unaltered. 224 * @return The number of breaks found 225 */ 226 abstract int divideUpDictionaryRange(CharacterIterator text, 227 int rangeStart, 228 int rangeEnd, 229 DequeI foundBreaks ); 230 } 231