1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2014, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.text; 10 11 import java.util.Iterator; 12 13 /** 14 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It 15 * iterates over either code points or code point ranges. After all 16 * code points or ranges have been returned, it returns the 17 * multicharacter strings of the UnicodSet, if any. 18 * 19 * <p>To iterate over code points and multicharacter strings, 20 * use a loop like this: 21 * <pre> 22 * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { 23 * processString(it.getString()); 24 * } 25 * </pre> 26 * 27 * <p>To iterate over code point ranges, use a loop like this: 28 * <pre> 29 * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange();) { 30 * if (it.codepoint != UnicodeSetIterator.IS_STRING) { 31 * processCodepointRange(it.codepoint, it.codepointEnd); 32 * } else { 33 * processString(it.getString()); 34 * } 35 * } 36 * </pre> 37 * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification. 38 * Do not alter the UnicodeSet while iterating. 39 * @author M. Davis 40 * @stable ICU 2.0 41 */ 42 public class UnicodeSetIterator { 43 44 /** 45 * Value of <tt>codepoint</tt> if the iterator points to a string. 46 * If <tt>codepoint == IS_STRING</tt>, then examine 47 * <tt>string</tt> for the current iteration result. 48 * @stable ICU 2.0 49 */ 50 public static int IS_STRING = -1; 51 52 /** 53 * Current code point, or the special value <tt>IS_STRING</tt>, if 54 * the iterator points to a string. 55 * @stable ICU 2.0 56 */ 57 public int codepoint; 58 59 /** 60 * When iterating over ranges using <tt>nextRange()</tt>, 61 * <tt>codepointEnd</tt> contains the inclusive end of the 62 * iteration range, if <tt>codepoint != IS_STRING</tt>. If 63 * iterating over code points using <tt>next()</tt>, or if 64 * <tt>codepoint == IS_STRING</tt>, then the value of 65 * <tt>codepointEnd</tt> is undefined. 66 * @stable ICU 2.0 67 */ 68 public int codepointEnd; 69 70 /** 71 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points 72 * to the current string. If <tt>codepoint != IS_STRING</tt>, the 73 * value of <tt>string</tt> is undefined. 74 * @stable ICU 2.0 75 */ 76 public String string; 77 78 /** 79 * Create an iterator over the given set. 80 * @param set set to iterate over 81 * @stable ICU 2.0 82 */ 83 public UnicodeSetIterator(UnicodeSet set) { 84 reset(set); 85 } 86 87 /** 88 * Create an iterator over nothing. <tt>next()</tt> and 89 * <tt>nextRange()</tt> return false. This is a convenience 90 * constructor allowing the target to be set later. 91 * @stable ICU 2.0 92 */ 93 public UnicodeSetIterator() { 94 reset(new UnicodeSet()); 95 } 96 97 /** 98 * Returns the next element in the set, either a single code point 99 * or a string. If there are no more elements in the set, return 100 * false. If <tt>codepoint == IS_STRING</tt>, the value is a 101 * string in the <tt>string</tt> field. Otherwise the value is a 102 * single code point in the <tt>codepoint</tt> field. 103 * 104 * <p>The order of iteration is all code points in sorted order, 105 * followed by all strings sorted order. <tt>codepointEnd</tt> is 106 * undefined after calling this method. <tt>string</tt> is 107 * undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix 108 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without 109 * calling <tt>reset()</tt> between them. The results of doing so 110 * are undefined. 111 * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification. 112 * Do not alter the UnicodeSet while iterating. 113 * @return true if there was another element in the set and this 114 * object contains the element. 115 * @stable ICU 2.0 116 */ 117 public boolean next() { 118 if (nextElement <= endElement) { 119 codepoint = codepointEnd = nextElement++; 120 return true; 121 } 122 if (range < endRange) { 123 loadRange(++range); 124 codepoint = codepointEnd = nextElement++; 125 return true; 126 } 127 128 // stringIterator == null iff there are no string elements remaining 129 130 if (stringIterator == null) { 131 return false; 132 } 133 codepoint = IS_STRING; // signal that value is actually a string 134 string = stringIterator.next(); 135 if (!stringIterator.hasNext()) { 136 stringIterator = null; 137 } 138 return true; 139 } 140 141 /** 142 * Returns the next element in the set, either a code point range 143 * or a string. If there are no more elements in the set, return 144 * false. If <tt>codepoint == IS_STRING</tt>, the value is a 145 * string in the <tt>string</tt> field. Otherwise the value is a 146 * range of one or more code points from <tt>codepoint</tt> to 147 * <tt>codepointeEnd</tt> inclusive. 148 * 149 * <p>The order of iteration is all code points ranges in sorted 150 * order, followed by all strings sorted order. Ranges are 151 * disjoint and non-contiguous. <tt>string</tt> is undefined 152 * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to 153 * <tt>next()</tt> and <tt>nextRange()</tt> without calling 154 * <tt>reset()</tt> between them. The results of doing so are 155 * undefined. 156 * 157 * @return true if there was another element in the set and this 158 * object contains the element. 159 * @stable ICU 2.0 160 */ 161 public boolean nextRange() { 162 if (nextElement <= endElement) { 163 codepointEnd = endElement; 164 codepoint = nextElement; 165 nextElement = endElement+1; 166 return true; 167 } 168 if (range < endRange) { 169 loadRange(++range); 170 codepointEnd = endElement; 171 codepoint = nextElement; 172 nextElement = endElement+1; 173 return true; 174 } 175 176 // stringIterator == null iff there are no string elements remaining 177 178 if (stringIterator == null) { 179 return false; 180 } 181 codepoint = IS_STRING; // signal that value is actually a string 182 string = stringIterator.next(); 183 if (!stringIterator.hasNext()) { 184 stringIterator = null; 185 } 186 return true; 187 } 188 189 /** 190 * Sets this iterator to visit the elements of the given set and 191 * resets it to the start of that set. The iterator is valid only 192 * so long as <tt>set</tt> is valid. 193 * @param uset the set to iterate over. 194 * @stable ICU 2.0 195 */ 196 public void reset(UnicodeSet uset) { 197 set = uset; 198 reset(); 199 } 200 201 /** 202 * Resets this iterator to the start of the set. 203 * @stable ICU 2.0 204 */ 205 public void reset() { 206 endRange = set.getRangeCount() - 1; 207 range = 0; 208 endElement = -1; 209 nextElement = 0; 210 if (endRange >= 0) { 211 loadRange(range); 212 } 213 stringIterator = null; 214 if (set.strings != null) { 215 stringIterator = set.strings.iterator(); 216 if (!stringIterator.hasNext()) { 217 stringIterator = null; 218 } 219 } 220 } 221 222 /** 223 * Gets the current string from the iterator. Only use after calling next(), not nextRange(). 224 * @stable ICU 4.0 225 */ 226 public String getString() { 227 if (codepoint != IS_STRING) { 228 return UTF16.valueOf(codepoint); 229 } 230 return string; 231 } 232 233 // ======================= PRIVATES =========================== 234 235 private UnicodeSet set; 236 private int endRange = 0; 237 private int range = 0; 238 239 /** 240 * @internal 241 * @deprecated This API is ICU internal only. 242 */ 243 @Deprecated 244 public UnicodeSet getSet() { 245 return set; 246 } 247 248 /** 249 * @internal 250 * @deprecated This API is ICU internal only. 251 */ 252 @Deprecated 253 protected int endElement; 254 /** 255 * @internal 256 * @deprecated This API is ICU internal only. 257 */ 258 @Deprecated 259 protected int nextElement; 260 private Iterator<String> stringIterator = null; 261 262 /** 263 * Invariant: stringIterator is null when there are no (more) strings remaining 264 */ 265 266 /** 267 * @internal 268 * @deprecated This API is ICU internal only. 269 */ 270 @Deprecated 271 protected void loadRange(int aRange) { 272 nextElement = set.getRangeStart(aRange); 273 endElement = set.getRangeEnd(aRange); 274 } 275 } 276