1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ********************************************************************** 6 * Copyright (c) 2002-2007, International Business Machines Corporation 7 * and others. All Rights Reserved. 8 ********************************************************************** 9 * Date Name Description 10 * 01/14/2002 aliu Creation. 11 ********************************************************************** 12 */ 13 14 package android.icu.text; 15 import android.icu.impl.Utility; 16 17 /** 18 * A replacer that produces static text as its output. The text may 19 * contain transliterator stand-in characters that represent nested 20 * UnicodeReplacer objects, making it possible to encode a tree of 21 * replacers in a StringReplacer. A StringReplacer that contains such 22 * stand-ins is called a <em>complex</em> StringReplacer. A complex 23 * StringReplacer has a slower processing loop than a non-complex one. 24 * @author Alan Liu 25 */ 26 class StringReplacer implements UnicodeReplacer { 27 28 /** 29 * Output text, possibly containing stand-in characters that 30 * represent nested UnicodeReplacers. 31 */ 32 private String output; 33 34 /** 35 * Cursor position. Value is ignored if hasCursor is false. 36 */ 37 private int cursorPos; 38 39 /** 40 * True if this object outputs a cursor position. 41 */ 42 private boolean hasCursor; 43 44 /** 45 * A complex object contains nested replacers and requires more 46 * complex processing. StringReplacers are initially assumed to 47 * be complex. If no nested replacers are seen during processing, 48 * then isComplex is set to false, and future replacements are 49 * short circuited for better performance. 50 */ 51 private boolean isComplex; 52 53 /** 54 * Object that translates stand-in characters in 'output' to 55 * UnicodeReplacer objects. 56 */ 57 private final RuleBasedTransliterator.Data data; 58 59 /** 60 * Construct a StringReplacer that sets the emits the given output 61 * text and sets the cursor to the given position. 62 * @param theOutput text that will replace input text when the 63 * replace() method is called. May contain stand-in characters 64 * that represent nested replacers. 65 * @param theCursorPos cursor position that will be returned by 66 * the replace() method 67 * @param theData transliterator context object that translates 68 * stand-in characters to UnicodeReplacer objects 69 */ 70 public StringReplacer(String theOutput, 71 int theCursorPos, 72 RuleBasedTransliterator.Data theData) { 73 output = theOutput; 74 cursorPos = theCursorPos; 75 hasCursor = true; 76 data = theData; 77 isComplex = true; 78 } 79 80 /** 81 * Construct a StringReplacer that sets the emits the given output 82 * text and does not modify the cursor. 83 * @param theOutput text that will replace input text when the 84 * replace() method is called. May contain stand-in characters 85 * that represent nested replacers. 86 * @param theData transliterator context object that translates 87 * stand-in characters to UnicodeReplacer objects 88 */ 89 public StringReplacer(String theOutput, 90 RuleBasedTransliterator.Data theData) { 91 output = theOutput; 92 cursorPos = 0; 93 hasCursor = false; 94 data = theData; 95 isComplex = true; 96 } 97 98 //= public static UnicodeReplacer valueOf(String output, 99 //= int cursorPos, 100 //= RuleBasedTransliterator.Data data) { 101 //= if (output.length() == 1) { 102 //= char c = output.charAt(0); 103 //= UnicodeReplacer r = data.lookupReplacer(c); 104 //= if (r != null) { 105 //= return r; 106 //= } 107 //= } 108 //= return new StringReplacer(output, cursorPos, data); 109 //= } 110 111 /** 112 * UnicodeReplacer API 113 */ 114 @Override 115 public int replace(Replaceable text, 116 int start, 117 int limit, 118 int[] cursor) { 119 int outLen; 120 int newStart = 0; 121 122 // NOTE: It should be possible to _always_ run the complex 123 // processing code; just slower. If not, then there is a bug 124 // in the complex processing code. 125 126 // Simple (no nested replacers) Processing Code : 127 if (!isComplex) { 128 text.replace(start, limit, output); 129 outLen = output.length(); 130 131 // Setup default cursor position (for cursorPos within output) 132 newStart = cursorPos; 133 } 134 135 // Complex (nested replacers) Processing Code : 136 else { 137 /* When there are segments to be copied, use the Replaceable.copy() 138 * API in order to retain out-of-band data. Copy everything to the 139 * end of the string, then copy them back over the key. This preserves 140 * the integrity of indices into the key and surrounding context while 141 * generating the output text. 142 */ 143 StringBuffer buf = new StringBuffer(); 144 int oOutput; // offset into 'output' 145 isComplex = false; 146 147 // The temporary buffer starts at tempStart, and extends 148 // to destLimit + tempExtra. The start of the buffer has a single 149 // character from before the key. This provides style 150 // data when addition characters are filled into the 151 // temporary buffer. If there is nothing to the left, use 152 // the non-character U+FFFF, which Replaceable subclasses 153 // should treat specially as a "no-style character." 154 // destStart points to the point after the style context 155 // character, so it is tempStart+1 or tempStart+2. 156 int tempStart = text.length(); // start of temp buffer 157 int destStart = tempStart; // copy new text to here 158 if (start > 0) { 159 int len = UTF16.getCharCount(text.char32At(start-1)); 160 text.copy(start-len, start, tempStart); 161 destStart += len; 162 } else { 163 text.replace(tempStart, tempStart, "\uFFFF"); 164 destStart++; 165 } 166 int destLimit = destStart; 167 int tempExtra = 0; // temp chars after destLimit 168 169 for (oOutput=0; oOutput<output.length(); ) { 170 if (oOutput == cursorPos) { 171 // Record the position of the cursor 172 newStart = buf.length() + destLimit - destStart; // relative to start 173 // the buf.length() was inserted for bug 5789 174 // the problem is that if we are accumulating into a buffer (when r == null below) 175 // then the actual length of the text at that point needs to add the buf length. 176 // there was an alternative suggested in #5789, but that looks like it won't work 177 // if we have accumulated some stuff in the dest part AND have a non-zero buffer. 178 } 179 int c = UTF16.charAt(output, oOutput); 180 181 // When we are at the last position copy the right style 182 // context character into the temporary buffer. We don't 183 // do this before because it will provide an incorrect 184 // right context for previous replace() operations. 185 int nextIndex = oOutput + UTF16.getCharCount(c); 186 if (nextIndex == output.length()) { 187 tempExtra = UTF16.getCharCount(text.char32At(limit)); 188 text.copy(limit, limit+tempExtra, destLimit); 189 } 190 191 UnicodeReplacer r = data.lookupReplacer(c); 192 if (r == null) { 193 // Accumulate straight (non-segment) text. 194 UTF16.append(buf, c); 195 } else { 196 isComplex = true; 197 198 // Insert any accumulated straight text. 199 if (buf.length() > 0) { 200 text.replace(destLimit, destLimit, buf.toString()); 201 destLimit += buf.length(); 202 buf.setLength(0); 203 } 204 205 // Delegate output generation to replacer object 206 int len = r.replace(text, destLimit, destLimit, cursor); 207 destLimit += len; 208 } 209 oOutput = nextIndex; 210 } 211 // Insert any accumulated straight text. 212 if (buf.length() > 0) { 213 text.replace(destLimit, destLimit, buf.toString()); 214 destLimit += buf.length(); 215 } 216 if (oOutput == cursorPos) { 217 // Record the position of the cursor 218 newStart = destLimit - destStart; // relative to start 219 } 220 221 outLen = destLimit - destStart; 222 223 // Copy new text to start, and delete it 224 text.copy(destStart, destLimit, start); 225 text.replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); 226 227 // Delete the old text (the key) 228 text.replace(start + outLen, limit + outLen, ""); 229 } 230 231 if (hasCursor) { 232 // Adjust the cursor for positions outside the key. These 233 // refer to code points rather than code units. If cursorPos 234 // is within the output string, then use newStart, which has 235 // already been set above. 236 if (cursorPos < 0) { 237 newStart = start; 238 int n = cursorPos; 239 // Outside the output string, cursorPos counts code points 240 while (n < 0 && newStart > 0) { 241 newStart -= UTF16.getCharCount(text.char32At(newStart-1)); 242 ++n; 243 } 244 newStart += n; 245 } else if (cursorPos > output.length()) { 246 newStart = start + outLen; 247 int n = cursorPos - output.length(); 248 // Outside the output string, cursorPos counts code points 249 while (n > 0 && newStart < text.length()) { 250 newStart += UTF16.getCharCount(text.char32At(newStart)); 251 --n; 252 } 253 newStart += n; 254 } else { 255 // Cursor is within output string. It has been set up above 256 // to be relative to start. 257 newStart += start; 258 } 259 260 cursor[0] = newStart; 261 } 262 263 return outLen; 264 } 265 266 /** 267 * UnicodeReplacer API 268 */ 269 @Override 270 public String toReplacerPattern(boolean escapeUnprintable) { 271 StringBuffer rule = new StringBuffer(); 272 StringBuffer quoteBuf = new StringBuffer(); 273 274 int cursor = cursorPos; 275 276 // Handle a cursor preceding the output 277 if (hasCursor && cursor < 0) { 278 while (cursor++ < 0) { 279 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); 280 } 281 // Fall through and append '|' below 282 } 283 284 for (int i=0; i<output.length(); ++i) { 285 if (hasCursor && i == cursor) { 286 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); 287 } 288 char c = output.charAt(i); // Ok to use 16-bits here 289 290 UnicodeReplacer r = data.lookupReplacer(c); 291 if (r == null) { 292 Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf); 293 } else { 294 StringBuffer buf = new StringBuffer(" "); 295 buf.append(r.toReplacerPattern(escapeUnprintable)); 296 buf.append(' '); 297 Utility.appendToRule(rule, buf.toString(), 298 true, escapeUnprintable, quoteBuf); 299 } 300 } 301 302 // Handle a cursor after the output. Use > rather than >= because 303 // if cursor == output.length() it is at the end of the output, 304 // which is the default position, so we need not emit it. 305 if (hasCursor && cursor > output.length()) { 306 cursor -= output.length(); 307 while (cursor-- > 0) { 308 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); 309 } 310 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); 311 } 312 // Flush quoteBuf out to result 313 Utility.appendToRule(rule, -1, 314 true, escapeUnprintable, quoteBuf); 315 316 return rule.toString(); 317 } 318 319 /** 320 * Union the set of all characters that may output by this object 321 * into the given set. 322 * @param toUnionTo the set into which to union the output characters 323 */ 324 @Override 325 public void addReplacementSetTo(UnicodeSet toUnionTo) { 326 int ch; 327 for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) { 328 ch = UTF16.charAt(output, i); 329 UnicodeReplacer r = data.lookupReplacer(ch); 330 if (r == null) { 331 toUnionTo.add(ch); 332 } else { 333 r.addReplacementSetTo(toUnionTo); 334 } 335 } 336 } 337 } 338 339 //eof 340