Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2010-2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * utf16collationiterator.h
      9 *
     10 * created on: 2010oct27
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #ifndef __UTF16COLLATIONITERATOR_H__
     15 #define __UTF16COLLATIONITERATOR_H__
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_COLLATION
     20 
     21 #include "cmemory.h"
     22 #include "collation.h"
     23 #include "collationdata.h"
     24 #include "collationiterator.h"
     25 #include "normalizer2impl.h"
     26 
     27 U_NAMESPACE_BEGIN
     28 
     29 /**
     30  * UTF-16 collation element and character iterator.
     31  * Handles normalized UTF-16 text inline, with length or NUL-terminated.
     32  * Unnormalized text is handled by a subclass.
     33  */
     34 class U_I18N_API UTF16CollationIterator : public CollationIterator {
     35 public:
     36     UTF16CollationIterator(const CollationData *d, UBool numeric,
     37                            const UChar *s, const UChar *p, const UChar *lim)
     38             : CollationIterator(d, numeric),
     39               start(s), pos(p), limit(lim) {}
     40 
     41     UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText);
     42 
     43     virtual ~UTF16CollationIterator();
     44 
     45     virtual UBool operator==(const CollationIterator &other) const;
     46 
     47     virtual void resetToOffset(int32_t newOffset);
     48 
     49     virtual int32_t getOffset() const;
     50 
     51     void setText(const UChar *s, const UChar *lim) {
     52         reset();
     53         start = pos = s;
     54         limit = lim;
     55     }
     56 
     57     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
     58 
     59     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
     60 
     61 protected:
     62     // Copy constructor only for subclasses which set the pointers.
     63     UTF16CollationIterator(const UTF16CollationIterator &other)
     64             : CollationIterator(other),
     65               start(NULL), pos(NULL), limit(NULL) {}
     66 
     67     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
     68 
     69     virtual UChar handleGetTrailSurrogate();
     70 
     71     virtual UBool foundNULTerminator();
     72 
     73     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     74 
     75     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     76 
     77     // UTF-16 string pointers.
     78     // limit can be NULL for NUL-terminated strings.
     79     const UChar *start, *pos, *limit;
     80 };
     81 
     82 /**
     83  * Incrementally checks the input text for FCD and normalizes where necessary.
     84  */
     85 class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
     86 public:
     87     FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
     88                               const UChar *s, const UChar *p, const UChar *lim)
     89             : UTF16CollationIterator(data, numeric, s, p, lim),
     90               rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim),
     91               nfcImpl(data->nfcImpl),
     92               checkDir(1) {}
     93 
     94     FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText);
     95 
     96     virtual ~FCDUTF16CollationIterator();
     97 
     98     virtual UBool operator==(const CollationIterator &other) const;
     99 
    100     virtual void resetToOffset(int32_t newOffset);
    101 
    102     virtual int32_t getOffset() const;
    103 
    104     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
    105 
    106     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
    107 
    108 protected:
    109     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
    110 
    111     virtual UBool foundNULTerminator();
    112 
    113     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
    114 
    115     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
    116 
    117 private:
    118     /**
    119      * Switches to forward checking if possible.
    120      * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
    121      * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
    122      */
    123     void switchToForward();
    124 
    125     /**
    126      * Extend the FCD text segment forward or normalize around pos.
    127      * To be called when checkDir > 0 && pos != limit.
    128      * @return TRUE if success, checkDir == 0 and pos != limit
    129      */
    130     UBool nextSegment(UErrorCode &errorCode);
    131 
    132     /**
    133      * Switches to backward checking.
    134      * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
    135      * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
    136      */
    137     void switchToBackward();
    138 
    139     /**
    140      * Extend the FCD text segment backward or normalize around pos.
    141      * To be called when checkDir < 0 && pos != start.
    142      * @return TRUE if success, checkDir == 0 and pos != start
    143      */
    144     UBool previousSegment(UErrorCode &errorCode);
    145 
    146     UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode);
    147 
    148     // Text pointers: The input text is [rawStart, rawLimit[
    149     // where rawLimit can be NULL for NUL-terminated text.
    150     //
    151     // checkDir > 0:
    152     //
    153     // The input text [segmentStart..pos[ passes the FCD check.
    154     // Moving forward checks incrementally.
    155     // segmentLimit is undefined. limit == rawLimit.
    156     //
    157     // checkDir < 0:
    158     // The input text [pos..segmentLimit[ passes the FCD check.
    159     // Moving backward checks incrementally.
    160     // segmentStart is undefined, start == rawStart.
    161     //
    162     // checkDir == 0:
    163     //
    164     // The input text [segmentStart..segmentLimit[ is being processed.
    165     // These pointers are at FCD boundaries.
    166     // Either this text segment already passes the FCD check
    167     // and segmentStart==start<=pos<=limit==segmentLimit,
    168     // or the current segment had to be normalized so that
    169     // [segmentStart..segmentLimit[ turned into the normalized string,
    170     // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
    171     const UChar *rawStart;
    172     const UChar *segmentStart;
    173     const UChar *segmentLimit;
    174     // rawLimit==NULL for a NUL-terminated string.
    175     const UChar *rawLimit;
    176 
    177     const Normalizer2Impl &nfcImpl;
    178     UnicodeString normalized;
    179     // Direction of incremental FCD check. See comments before rawStart.
    180     int8_t checkDir;
    181 };
    182 
    183 U_NAMESPACE_END
    184 
    185 #endif  // !UCONFIG_NO_COLLATION
    186 #endif  // __UTF16COLLATIONITERATOR_H__
    187