Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 **********************************************************************
      6 *   Copyright (c) 2002-2007, International Business Machines Corporation
      7 *   and others.  All Rights Reserved.
      8 **********************************************************************
      9 *   Date        Name        Description
     10 *   01/14/2002  aliu        Creation.
     11 **********************************************************************
     12 */
     13 
     14 package android.icu.text;
     15 import android.icu.impl.Utility;
     16 
     17 /**
     18  * A replacer that produces static text as its output.  The text may
     19  * contain transliterator stand-in characters that represent nested
     20  * UnicodeReplacer objects, making it possible to encode a tree of
     21  * replacers in a StringReplacer.  A StringReplacer that contains such
     22  * stand-ins is called a <em>complex</em> StringReplacer.  A complex
     23  * StringReplacer has a slower processing loop than a non-complex one.
     24  * @author Alan Liu
     25  */
     26 class StringReplacer implements UnicodeReplacer {
     27 
     28     /**
     29      * Output text, possibly containing stand-in characters that
     30      * represent nested UnicodeReplacers.
     31      */
     32     private String output;
     33 
     34     /**
     35      * Cursor position.  Value is ignored if hasCursor is false.
     36      */
     37     private int cursorPos;
     38 
     39     /**
     40      * True if this object outputs a cursor position.
     41      */
     42     private boolean hasCursor;
     43 
     44     /**
     45      * A complex object contains nested replacers and requires more
     46      * complex processing.  StringReplacers are initially assumed to
     47      * be complex.  If no nested replacers are seen during processing,
     48      * then isComplex is set to false, and future replacements are
     49      * short circuited for better performance.
     50      */
     51     private boolean isComplex;
     52 
     53     /**
     54      * Object that translates stand-in characters in 'output' to
     55      * UnicodeReplacer objects.
     56      */
     57     private final RuleBasedTransliterator.Data data;
     58 
     59     /**
     60      * Construct a StringReplacer that sets the emits the given output
     61      * text and sets the cursor to the given position.
     62      * @param theOutput text that will replace input text when the
     63      * replace() method is called.  May contain stand-in characters
     64      * that represent nested replacers.
     65      * @param theCursorPos cursor position that will be returned by
     66      * the replace() method
     67      * @param theData transliterator context object that translates
     68      * stand-in characters to UnicodeReplacer objects
     69      */
     70     public StringReplacer(String theOutput,
     71                           int theCursorPos,
     72                           RuleBasedTransliterator.Data theData) {
     73         output = theOutput;
     74         cursorPos = theCursorPos;
     75         hasCursor = true;
     76         data = theData;
     77         isComplex = true;
     78     }
     79 
     80     /**
     81      * Construct a StringReplacer that sets the emits the given output
     82      * text and does not modify the cursor.
     83      * @param theOutput text that will replace input text when the
     84      * replace() method is called.  May contain stand-in characters
     85      * that represent nested replacers.
     86      * @param theData transliterator context object that translates
     87      * stand-in characters to UnicodeReplacer objects
     88      */
     89     public StringReplacer(String theOutput,
     90                           RuleBasedTransliterator.Data theData) {
     91         output = theOutput;
     92         cursorPos = 0;
     93         hasCursor = false;
     94         data = theData;
     95         isComplex = true;
     96     }
     97 
     98 //=    public static UnicodeReplacer valueOf(String output,
     99 //=                                          int cursorPos,
    100 //=                                          RuleBasedTransliterator.Data data) {
    101 //=        if (output.length() == 1) {
    102 //=            char c = output.charAt(0);
    103 //=            UnicodeReplacer r = data.lookupReplacer(c);
    104 //=            if (r != null) {
    105 //=                return r;
    106 //=            }
    107 //=        }
    108 //=        return new StringReplacer(output, cursorPos, data);
    109 //=    }
    110 
    111     /**
    112      * UnicodeReplacer API
    113      */
    114     @Override
    115     public int replace(Replaceable text,
    116                        int start,
    117                        int limit,
    118                        int[] cursor) {
    119         int outLen;
    120         int newStart = 0;
    121 
    122         // NOTE: It should be possible to _always_ run the complex
    123         // processing code; just slower.  If not, then there is a bug
    124         // in the complex processing code.
    125 
    126         // Simple (no nested replacers) Processing Code :
    127         if (!isComplex) {
    128             text.replace(start, limit, output);
    129             outLen = output.length();
    130 
    131             // Setup default cursor position (for cursorPos within output)
    132             newStart = cursorPos;
    133         }
    134 
    135         // Complex (nested replacers) Processing Code :
    136         else {
    137             /* When there are segments to be copied, use the Replaceable.copy()
    138              * API in order to retain out-of-band data.  Copy everything to the
    139              * end of the string, then copy them back over the key.  This preserves
    140              * the integrity of indices into the key and surrounding context while
    141              * generating the output text.
    142              */
    143             StringBuffer buf = new StringBuffer();
    144             int oOutput; // offset into 'output'
    145             isComplex = false;
    146 
    147             // The temporary buffer starts at tempStart, and extends
    148             // to destLimit + tempExtra.  The start of the buffer has a single
    149             // character from before the key.  This provides style
    150             // data when addition characters are filled into the
    151             // temporary buffer.  If there is nothing to the left, use
    152             // the non-character U+FFFF, which Replaceable subclasses
    153             // should treat specially as a "no-style character."
    154             // destStart points to the point after the style context
    155             // character, so it is tempStart+1 or tempStart+2.
    156             int tempStart = text.length(); // start of temp buffer
    157             int destStart = tempStart; // copy new text to here
    158             if (start > 0) {
    159                 int len = UTF16.getCharCount(text.char32At(start-1));
    160                 text.copy(start-len, start, tempStart);
    161                 destStart += len;
    162             } else {
    163                 text.replace(tempStart, tempStart, "\uFFFF");
    164                 destStart++;
    165             }
    166             int destLimit = destStart;
    167             int tempExtra = 0; // temp chars after destLimit
    168 
    169             for (oOutput=0; oOutput<output.length(); ) {
    170                 if (oOutput == cursorPos) {
    171                     // Record the position of the cursor
    172                     newStart = buf.length() + destLimit - destStart; // relative to start
    173                     // the buf.length() was inserted for bug 5789
    174                     // the problem is that if we are accumulating into a buffer (when r == null below)
    175                     // then the actual length of the text at that point needs to add the buf length.
    176                     // there was an alternative suggested in #5789, but that looks like it won't work
    177                     // if we have accumulated some stuff in the dest part AND have a non-zero buffer.
    178                 }
    179                 int c = UTF16.charAt(output, oOutput);
    180 
    181                 // When we are at the last position copy the right style
    182                 // context character into the temporary buffer.  We don't
    183                 // do this before because it will provide an incorrect
    184                 // right context for previous replace() operations.
    185                 int nextIndex = oOutput + UTF16.getCharCount(c);
    186                 if (nextIndex == output.length()) {
    187                     tempExtra = UTF16.getCharCount(text.char32At(limit));
    188                     text.copy(limit, limit+tempExtra, destLimit);
    189                 }
    190 
    191                 UnicodeReplacer r = data.lookupReplacer(c);
    192                 if (r == null) {
    193                     // Accumulate straight (non-segment) text.
    194                     UTF16.append(buf, c);
    195                 } else {
    196                     isComplex = true;
    197 
    198                     // Insert any accumulated straight text.
    199                     if (buf.length() > 0) {
    200                         text.replace(destLimit, destLimit, buf.toString());
    201                         destLimit += buf.length();
    202                         buf.setLength(0);
    203                     }
    204 
    205                     // Delegate output generation to replacer object
    206                     int len = r.replace(text, destLimit, destLimit, cursor);
    207                     destLimit += len;
    208                 }
    209                 oOutput = nextIndex;
    210             }
    211             // Insert any accumulated straight text.
    212             if (buf.length() > 0) {
    213                 text.replace(destLimit, destLimit, buf.toString());
    214                 destLimit += buf.length();
    215             }
    216             if (oOutput == cursorPos) {
    217                 // Record the position of the cursor
    218                 newStart = destLimit - destStart; // relative to start
    219             }
    220 
    221             outLen = destLimit - destStart;
    222 
    223             // Copy new text to start, and delete it
    224             text.copy(destStart, destLimit, start);
    225             text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
    226 
    227             // Delete the old text (the key)
    228             text.replace(start + outLen, limit + outLen, "");
    229         }
    230 
    231         if (hasCursor) {
    232             // Adjust the cursor for positions outside the key.  These
    233             // refer to code points rather than code units.  If cursorPos
    234             // is within the output string, then use newStart, which has
    235             // already been set above.
    236             if (cursorPos < 0) {
    237                 newStart = start;
    238                 int n = cursorPos;
    239                 // Outside the output string, cursorPos counts code points
    240                 while (n < 0 && newStart > 0) {
    241                     newStart -= UTF16.getCharCount(text.char32At(newStart-1));
    242                     ++n;
    243                 }
    244                 newStart += n;
    245             } else if (cursorPos > output.length()) {
    246                 newStart = start + outLen;
    247                 int n = cursorPos - output.length();
    248                 // Outside the output string, cursorPos counts code points
    249                 while (n > 0 && newStart < text.length()) {
    250                     newStart += UTF16.getCharCount(text.char32At(newStart));
    251                     --n;
    252                 }
    253                 newStart += n;
    254             } else {
    255                 // Cursor is within output string.  It has been set up above
    256                 // to be relative to start.
    257                 newStart += start;
    258             }
    259 
    260             cursor[0] = newStart;
    261         }
    262 
    263         return outLen;
    264     }
    265 
    266     /**
    267      * UnicodeReplacer API
    268      */
    269     @Override
    270     public String toReplacerPattern(boolean escapeUnprintable) {
    271         StringBuffer rule = new StringBuffer();
    272         StringBuffer quoteBuf = new StringBuffer();
    273 
    274         int cursor = cursorPos;
    275 
    276         // Handle a cursor preceding the output
    277         if (hasCursor && cursor < 0) {
    278             while (cursor++ < 0) {
    279                 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
    280             }
    281             // Fall through and append '|' below
    282         }
    283 
    284         for (int i=0; i<output.length(); ++i) {
    285             if (hasCursor && i == cursor) {
    286                 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
    287             }
    288             char c = output.charAt(i); // Ok to use 16-bits here
    289 
    290             UnicodeReplacer r = data.lookupReplacer(c);
    291             if (r == null) {
    292                 Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
    293             } else {
    294                 StringBuffer buf = new StringBuffer(" ");
    295                 buf.append(r.toReplacerPattern(escapeUnprintable));
    296                 buf.append(' ');
    297                 Utility.appendToRule(rule, buf.toString(),
    298                                      true, escapeUnprintable, quoteBuf);
    299             }
    300         }
    301 
    302         // Handle a cursor after the output.  Use > rather than >= because
    303         // if cursor == output.length() it is at the end of the output,
    304         // which is the default position, so we need not emit it.
    305         if (hasCursor && cursor > output.length()) {
    306             cursor -= output.length();
    307             while (cursor-- > 0) {
    308                 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
    309             }
    310             Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
    311         }
    312         // Flush quoteBuf out to result
    313         Utility.appendToRule(rule, -1,
    314                              true, escapeUnprintable, quoteBuf);
    315 
    316         return rule.toString();
    317     }
    318 
    319     /**
    320      * Union the set of all characters that may output by this object
    321      * into the given set.
    322      * @param toUnionTo the set into which to union the output characters
    323      */
    324     @Override
    325     public void addReplacementSetTo(UnicodeSet toUnionTo) {
    326         int ch;
    327         for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
    328             ch = UTF16.charAt(output, i);
    329             UnicodeReplacer r = data.lookupReplacer(ch);
    330             if (r == null) {
    331                 toUnionTo.add(ch);
    332             } else {
    333                 r.addReplacementSetTo(toUnionTo);
    334             }
    335         }
    336     }
    337 }
    338 
    339 //eof
    340