Home | History | Annotate | Download | only in coll
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 *******************************************************************************
      6 * Copyright (C) 2010-2014, International Business Machines
      7 * Corporation and others.  All Rights Reserved.
      8 *******************************************************************************
      9 * FCDUTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp
     10 *
     11 * C++ version created on: 2010oct27
     12 * created by: Markus W. Scherer
     13 */
     14 
     15 package android.icu.impl.coll;
     16 
     17 import android.icu.impl.Normalizer2Impl;
     18 
     19 /**
     20  * Incrementally checks the input text for FCD and normalizes where necessary.
     21  * @hide Only a subset of ICU is exposed in Android
     22  */
     23 public final class FCDUTF16CollationIterator extends UTF16CollationIterator {
     24     /**
     25      * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}.
     26      */
     27     public FCDUTF16CollationIterator(CollationData d) {
     28         super(d);
     29         nfcImpl = d.nfcImpl;
     30     }
     31 
     32     public FCDUTF16CollationIterator(CollationData data, boolean numeric, CharSequence s, int p) {
     33         super(data, numeric, s, p);
     34         rawSeq = s;
     35         segmentStart = p;
     36         rawLimit = s.length();
     37         nfcImpl = data.nfcImpl;
     38         checkDir = 1;
     39     }
     40 
     41     @Override
     42     public boolean equals(Object other) {
     43         // Skip the UTF16CollationIterator and call its parent.
     44         if (!(other instanceof CollationIterator)
     45             || !((CollationIterator)this).equals(other)
     46             || !(other instanceof FCDUTF16CollationIterator))
     47         {
     48             return false;
     49         }
     50         FCDUTF16CollationIterator o = (FCDUTF16CollationIterator)other;
     51         // Compare the iterator state but not the text: Assume that the caller does that.
     52         if (checkDir != o.checkDir) {
     53             return false;
     54         }
     55         if (checkDir == 0 && (seq == rawSeq) != (o.seq == o.rawSeq)) {
     56             return false;
     57         }
     58         if (checkDir != 0 || seq == rawSeq) {
     59             return (pos - rawStart) == (o.pos - /*o.*/ rawStart);
     60         }
     61         else {
     62             return (segmentStart - rawStart) == (o.segmentStart - /*o.*/ rawStart) &&
     63                     (pos - start) == (o.pos - o.start);
     64         }
     65     }
     66 
     67     @Override
     68     public int hashCode() {
     69         assert false : "hashCode not designed";
     70         return 42; // any arbitrary constant will do
     71     }
     72 
     73     @Override
     74     public void resetToOffset(int newOffset) {
     75         reset();
     76         seq = rawSeq;
     77         start = segmentStart = pos = rawStart + newOffset;
     78         limit = rawLimit;
     79         checkDir = 1;
     80     }
     81 
     82     @Override
     83     public int getOffset() {
     84         if(checkDir != 0 || seq == rawSeq) {
     85             return pos - rawStart;
     86         } else if(pos == start) {
     87             return segmentStart - rawStart;
     88         } else {
     89             return segmentLimit - rawStart;
     90         }
     91     }
     92 
     93     @Override
     94     public void setText(boolean numeric, CharSequence s, int p) {
     95         super.setText(numeric, s, p);
     96         rawSeq = s;
     97         segmentStart = p;
     98         rawLimit = limit = s.length();
     99         checkDir = 1;
    100     }
    101 
    102     @Override
    103     public int nextCodePoint() {
    104         char c;
    105         for(;;) {
    106             if(checkDir > 0) {
    107                 if(pos == limit) {
    108                     return Collation.SENTINEL_CP;
    109                 }
    110                 c = seq.charAt(pos++);
    111                 if(CollationFCD.hasTccc(c)) {
    112                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
    113                             (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
    114                         --pos;
    115                         nextSegment();
    116                         c = seq.charAt(pos++);
    117                     }
    118                 }
    119                 break;
    120             } else if(checkDir == 0 && pos != limit) {
    121                 c = seq.charAt(pos++);
    122                 break;
    123             } else {
    124                 switchToForward();
    125             }
    126         }
    127         char trail;
    128         if(Character.isHighSurrogate(c) && pos != limit &&
    129                 Character.isLowSurrogate(trail = seq.charAt(pos))) {
    130             ++pos;
    131             return Character.toCodePoint(c, trail);
    132         } else {
    133             return c;
    134         }
    135     }
    136 
    137     @Override
    138     public int previousCodePoint() {
    139         char c;
    140         for(;;) {
    141             if(checkDir < 0) {
    142                 if(pos == start) {
    143                     return Collation.SENTINEL_CP;
    144                 }
    145                 c = seq.charAt(--pos);
    146                 if(CollationFCD.hasLccc(c)) {
    147                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
    148                             (pos != start && CollationFCD.hasTccc(seq.charAt(pos - 1)))) {
    149                         ++pos;
    150                         previousSegment();
    151                         c = seq.charAt(--pos);
    152                     }
    153                 }
    154                 break;
    155             } else if(checkDir == 0 && pos != start) {
    156                 c = seq.charAt(--pos);
    157                 break;
    158             } else {
    159                 switchToBackward();
    160             }
    161         }
    162         char lead;
    163         if(Character.isLowSurrogate(c) && pos != start &&
    164                 Character.isHighSurrogate(lead = seq.charAt(pos - 1))) {
    165             --pos;
    166             return Character.toCodePoint(lead, c);
    167         } else {
    168             return c;
    169         }
    170     }
    171 
    172     @Override
    173     protected long handleNextCE32() {
    174         char c;
    175         for(;;) {
    176             if(checkDir > 0) {
    177                 if(pos == limit) {
    178                     return NO_CP_AND_CE32;
    179                 }
    180                 c = seq.charAt(pos++);
    181                 if(CollationFCD.hasTccc(c)) {
    182                     if(CollationFCD.maybeTibetanCompositeVowel(c) ||
    183                             (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) {
    184                         --pos;
    185                         nextSegment();
    186                         c = seq.charAt(pos++);
    187                     }
    188                 }
    189                 break;
    190             } else if(checkDir == 0 && pos != limit) {
    191                 c = seq.charAt(pos++);
    192                 break;
    193             } else {
    194                 switchToForward();
    195             }
    196         }
    197         return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c));
    198     }
    199 
    200     /* boolean foundNULTerminator(); */
    201 
    202     @Override
    203     protected void forwardNumCodePoints(int num) {
    204         // Specify the class to avoid a virtual-function indirection.
    205         // In Java, we would declare this class final.
    206         while(num > 0 && nextCodePoint() >= 0) {
    207             --num;
    208         }
    209     }
    210 
    211     @Override
    212     protected void backwardNumCodePoints(int num) {
    213         // Specify the class to avoid a virtual-function indirection.
    214         // In Java, we would declare this class final.
    215         while(num > 0 && previousCodePoint() >= 0) {
    216             --num;
    217         }
    218     }
    219 
    220     /**
    221      * Switches to forward checking if possible.
    222      * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
    223      * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
    224      */
    225     private void switchToForward() {
    226         assert((checkDir < 0 && seq == rawSeq) || (checkDir == 0 && pos == limit));
    227         if(checkDir < 0) {
    228             // Turn around from backward checking.
    229             start = segmentStart = pos;
    230             if(pos == segmentLimit) {
    231                 limit = rawLimit;
    232                 checkDir = 1;  // Check forward.
    233             } else {  // pos < segmentLimit
    234                 checkDir = 0;  // Stay in FCD segment.
    235             }
    236         } else {
    237             // Reached the end of the FCD segment.
    238             if(seq == rawSeq) {
    239                 // The input text segment is FCD, extend it forward.
    240             } else {
    241                 // The input text segment needed to be normalized.
    242                 // Switch to checking forward from it.
    243                 seq = rawSeq;
    244                 pos = start = segmentStart = segmentLimit;
    245                 // Note: If this segment is at the end of the input text,
    246                 // then it might help to return false to indicate that, so that
    247                 // we do not have to re-check and normalize when we turn around and go backwards.
    248                 // However, that would complicate the call sites for an optimization of an unusual case.
    249             }
    250             limit = rawLimit;
    251             checkDir = 1;
    252         }
    253     }
    254 
    255     /**
    256      * Extend the FCD text segment forward or normalize around pos.
    257      * To be called when checkDir > 0 && pos != limit.
    258      * Returns with checkDir == 0 and pos != limit.
    259      */
    260     private void nextSegment() {
    261         assert(checkDir > 0 && seq == rawSeq && pos != limit);
    262         // The input text [segmentStart..pos[ passes the FCD check.
    263         int p = pos;
    264         int prevCC = 0;
    265         for(;;) {
    266             // Fetch the next character's fcd16 value.
    267             int q = p;
    268             int c = Character.codePointAt(seq, p);
    269             p += Character.charCount(c);
    270             int fcd16 = nfcImpl.getFCD16(c);
    271             int leadCC = fcd16 >> 8;
    272             if(leadCC == 0 && q != pos) {
    273                 // FCD boundary before the [q, p[ character.
    274                 limit = segmentLimit = q;
    275                 break;
    276             }
    277             if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
    278                 // Fails FCD check. Find the next FCD boundary and normalize.
    279                 do {
    280                     q = p;
    281                     if(p == rawLimit) { break; }
    282                     c = Character.codePointAt(seq, p);
    283                     p += Character.charCount(c);
    284                 } while(nfcImpl.getFCD16(c) > 0xff);
    285                 normalize(pos, q);
    286                 pos = start;
    287                 break;
    288             }
    289             prevCC = fcd16 & 0xff;
    290             if(p == rawLimit || prevCC == 0) {
    291                 // FCD boundary after the last character.
    292                 limit = segmentLimit = p;
    293                 break;
    294             }
    295         }
    296         assert(pos != limit);
    297         checkDir = 0;
    298     }
    299 
    300     /**
    301      * Switches to backward checking.
    302      * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
    303      * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
    304      */
    305     private void switchToBackward() {
    306         assert((checkDir > 0 && seq == rawSeq) || (checkDir == 0 && pos == start));
    307         if(checkDir > 0) {
    308             // Turn around from forward checking.
    309             limit = segmentLimit = pos;
    310             if(pos == segmentStart) {
    311                 start = rawStart;
    312                 checkDir = -1;  // Check backward.
    313             } else {  // pos > segmentStart
    314                 checkDir = 0;  // Stay in FCD segment.
    315             }
    316         } else {
    317             // Reached the start of the FCD segment.
    318             if(seq == rawSeq) {
    319                 // The input text segment is FCD, extend it backward.
    320             } else {
    321                 // The input text segment needed to be normalized.
    322                 // Switch to checking backward from it.
    323                 seq = rawSeq;
    324                 pos = limit = segmentLimit = segmentStart;
    325             }
    326             start = rawStart;
    327             checkDir = -1;
    328         }
    329     }
    330 
    331     /**
    332      * Extend the FCD text segment backward or normalize around pos.
    333      * To be called when checkDir < 0 && pos != start.
    334      * Returns with checkDir == 0 and pos != start.
    335      */
    336     private void previousSegment() {
    337         assert(checkDir < 0 && seq == rawSeq && pos != start);
    338         // The input text [pos..segmentLimit[ passes the FCD check.
    339         int p = pos;
    340         int nextCC = 0;
    341         for(;;) {
    342             // Fetch the previous character's fcd16 value.
    343             int q = p;
    344             int c = Character.codePointBefore(seq, p);
    345             p -= Character.charCount(c);
    346             int fcd16 = nfcImpl.getFCD16(c);
    347             int trailCC = fcd16 & 0xff;
    348             if(trailCC == 0 && q != pos) {
    349                 // FCD boundary after the [p, q[ character.
    350                 start = segmentStart = q;
    351                 break;
    352             }
    353             if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
    354                                 CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) {
    355                 // Fails FCD check. Find the previous FCD boundary and normalize.
    356                 do {
    357                     q = p;
    358                     if(fcd16 <= 0xff || p == rawStart) { break; }
    359                     c = Character.codePointBefore(seq, p);
    360                     p -= Character.charCount(c);
    361                 } while((fcd16 = nfcImpl.getFCD16(c)) != 0);
    362                 normalize(q, pos);
    363                 pos = limit;
    364                 break;
    365             }
    366             nextCC = fcd16 >> 8;
    367             if(p == rawStart || nextCC == 0) {
    368                 // FCD boundary before the following character.
    369                 start = segmentStart = p;
    370                 break;
    371             }
    372         }
    373         assert(pos != start);
    374         checkDir = 0;
    375     }
    376 
    377     private void normalize(int from, int to) {
    378         if(normalized == null) {
    379             normalized = new StringBuilder();
    380         }
    381         // NFD without argument checking.
    382         nfcImpl.decompose(rawSeq, from, to, normalized, to - from);
    383         // Switch collation processing into the FCD buffer
    384         // with the result of normalizing [segmentStart, segmentLimit[.
    385         segmentStart = from;
    386         segmentLimit = to;
    387         seq = normalized;
    388         start = 0;
    389         limit = start + normalized.length();
    390     }
    391 
    392     // Text pointers: The input text is rawSeq[rawStart, rawLimit[.
    393     // (In C++, these are const UChar * pointers.
    394     // In Java, we use CharSequence rawSeq and the parent class' seq
    395     // together with int indexes.)
    396     //
    397     // checkDir > 0:
    398     //
    399     // The input text rawSeq[segmentStart..pos[ passes the FCD check.
    400     // Moving forward checks incrementally.
    401     // segmentLimit is undefined. seq == rawSeq. limit == rawLimit.
    402     //
    403     // checkDir < 0:
    404     // The input text rawSeq[pos..segmentLimit[ passes the FCD check.
    405     // Moving backward checks incrementally.
    406     // segmentStart is undefined. seq == rawSeq. start == rawStart.
    407     //
    408     // checkDir == 0:
    409     //
    410     // The input text rawSeq[segmentStart..segmentLimit[ is being processed.
    411     // These pointers are at FCD boundaries.
    412     // Either this text segment already passes the FCD check
    413     // and seq==rawSeq && segmentStart==start<=pos<=limit==segmentLimit,
    414     // or the current segment had to be normalized so that
    415     // rawSeq[segmentStart..segmentLimit[ turned into the normalized string,
    416     // corresponding to seq==normalized && 0==start<=pos<=limit==start+normalized.length().
    417     private CharSequence rawSeq;
    418     private static final int rawStart = 0;
    419     private int segmentStart;
    420     private int segmentLimit;
    421     private int rawLimit;
    422 
    423     private final Normalizer2Impl nfcImpl;
    424     private StringBuilder normalized;
    425     // Direction of incremental FCD check. See comments before rawStart.
    426     private int checkDir;
    427 }
    428