Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2010-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * utf16collationiterator.h
      7 *
      8 * created on: 2010oct27
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #ifndef __UTF16COLLATIONITERATOR_H__
     13 #define __UTF16COLLATIONITERATOR_H__
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_COLLATION
     18 
     19 #include "cmemory.h"
     20 #include "collation.h"
     21 #include "collationdata.h"
     22 #include "collationiterator.h"
     23 #include "normalizer2impl.h"
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 /**
     28  * UTF-16 collation element and character iterator.
     29  * Handles normalized UTF-16 text inline, with length or NUL-terminated.
     30  * Unnormalized text is handled by a subclass.
     31  */
     32 class U_I18N_API UTF16CollationIterator : public CollationIterator {
     33 public:
     34     UTF16CollationIterator(const CollationData *d, UBool numeric,
     35                            const UChar *s, const UChar *p, const UChar *lim)
     36             : CollationIterator(d, numeric),
     37               start(s), pos(p), limit(lim) {}
     38 
     39     UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText);
     40 
     41     virtual ~UTF16CollationIterator();
     42 
     43     virtual UBool operator==(const CollationIterator &other) const;
     44 
     45     virtual void resetToOffset(int32_t newOffset);
     46 
     47     virtual int32_t getOffset() const;
     48 
     49     void setText(const UChar *s, const UChar *lim) {
     50         reset();
     51         start = pos = s;
     52         limit = lim;
     53     }
     54 
     55     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
     56 
     57     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
     58 
     59 protected:
     60     // Copy constructor only for subclasses which set the pointers.
     61     UTF16CollationIterator(const UTF16CollationIterator &other)
     62             : CollationIterator(other),
     63               start(NULL), pos(NULL), limit(NULL) {}
     64 
     65     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
     66 
     67     virtual UChar handleGetTrailSurrogate();
     68 
     69     virtual UBool foundNULTerminator();
     70 
     71     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     72 
     73     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     74 
     75     // UTF-16 string pointers.
     76     // limit can be NULL for NUL-terminated strings.
     77     const UChar *start, *pos, *limit;
     78 };
     79 
     80 /**
     81  * Incrementally checks the input text for FCD and normalizes where necessary.
     82  */
     83 class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
     84 public:
     85     FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
     86                               const UChar *s, const UChar *p, const UChar *lim)
     87             : UTF16CollationIterator(data, numeric, s, p, lim),
     88               rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim),
     89               nfcImpl(data->nfcImpl),
     90               checkDir(1) {}
     91 
     92     FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText);
     93 
     94     virtual ~FCDUTF16CollationIterator();
     95 
     96     virtual UBool operator==(const CollationIterator &other) const;
     97 
     98     virtual void resetToOffset(int32_t newOffset);
     99 
    100     virtual int32_t getOffset() const;
    101 
    102     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
    103 
    104     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
    105 
    106 protected:
    107     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
    108 
    109     virtual UBool foundNULTerminator();
    110 
    111     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
    112 
    113     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
    114 
    115 private:
    116     /**
    117      * Switches to forward checking if possible.
    118      * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
    119      * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
    120      */
    121     void switchToForward();
    122 
    123     /**
    124      * Extend the FCD text segment forward or normalize around pos.
    125      * To be called when checkDir > 0 && pos != limit.
    126      * @return TRUE if success, checkDir == 0 and pos != limit
    127      */
    128     UBool nextSegment(UErrorCode &errorCode);
    129 
    130     /**
    131      * Switches to backward checking.
    132      * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
    133      * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
    134      */
    135     void switchToBackward();
    136 
    137     /**
    138      * Extend the FCD text segment backward or normalize around pos.
    139      * To be called when checkDir < 0 && pos != start.
    140      * @return TRUE if success, checkDir == 0 and pos != start
    141      */
    142     UBool previousSegment(UErrorCode &errorCode);
    143 
    144     UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode);
    145 
    146     // Text pointers: The input text is [rawStart, rawLimit[
    147     // where rawLimit can be NULL for NUL-terminated text.
    148     //
    149     // checkDir > 0:
    150     //
    151     // The input text [segmentStart..pos[ passes the FCD check.
    152     // Moving forward checks incrementally.
    153     // segmentLimit is undefined. limit == rawLimit.
    154     //
    155     // checkDir < 0:
    156     // The input text [pos..segmentLimit[ passes the FCD check.
    157     // Moving backward checks incrementally.
    158     // segmentStart is undefined, start == rawStart.
    159     //
    160     // checkDir == 0:
    161     //
    162     // The input text [segmentStart..segmentLimit[ is being processed.
    163     // These pointers are at FCD boundaries.
    164     // Either this text segment already passes the FCD check
    165     // and segmentStart==start<=pos<=limit==segmentLimit,
    166     // or the current segment had to be normalized so that
    167     // [segmentStart..segmentLimit[ turned into the normalized string,
    168     // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
    169     const UChar *rawStart;
    170     const UChar *segmentStart;
    171     const UChar *segmentLimit;
    172     // rawLimit==NULL for a NUL-terminated string.
    173     const UChar *rawLimit;
    174 
    175     const Normalizer2Impl &nfcImpl;
    176     UnicodeString normalized;
    177     // Direction of incremental FCD check. See comments before rawStart.
    178     int8_t checkDir;
    179 };
    180 
    181 U_NAMESPACE_END
    182 
    183 #endif  // !UCONFIG_NO_COLLATION
    184 #endif  // __UTF16COLLATIONITERATOR_H__
    185