Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2012-2016, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * uitercollationiterator.h
      9 *
     10 * created on: 2012sep23 (from utf16collationiterator.h)
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #ifndef __UITERCOLLATIONITERATOR_H__
     15 #define __UITERCOLLATIONITERATOR_H__
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_COLLATION
     20 
     21 #include "unicode/uiter.h"
     22 #include "cmemory.h"
     23 #include "collation.h"
     24 #include "collationdata.h"
     25 #include "collationiterator.h"
     26 #include "normalizer2impl.h"
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 /**
     31  * UCharIterator-based collation element and character iterator.
     32  * Handles normalized text inline, with length or NUL-terminated.
     33  * Unnormalized text is handled by a subclass.
     34  */
     35 class U_I18N_API UIterCollationIterator : public CollationIterator {
     36 public:
     37     UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
     38             : CollationIterator(d, numeric), iter(ui) {}
     39 
     40     virtual ~UIterCollationIterator();
     41 
     42     virtual void resetToOffset(int32_t newOffset);
     43 
     44     virtual int32_t getOffset() const;
     45 
     46     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
     47 
     48     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
     49 
     50 protected:
     51     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
     52 
     53     virtual UChar handleGetTrailSurrogate();
     54 
     55     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     56 
     57     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     58 
     59     UCharIterator &iter;
     60 };
     61 
     62 /**
     63  * Incrementally checks the input text for FCD and normalizes where necessary.
     64  */
     65 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
     66 public:
     67     FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
     68             : UIterCollationIterator(data, numeric, ui),
     69               state(ITER_CHECK_FWD), start(startIndex),
     70               nfcImpl(data->nfcImpl) {}
     71 
     72     virtual ~FCDUIterCollationIterator();
     73 
     74     virtual void resetToOffset(int32_t newOffset);
     75 
     76     virtual int32_t getOffset() const;
     77 
     78     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
     79 
     80     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
     81 
     82 protected:
     83     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
     84 
     85     virtual UChar handleGetTrailSurrogate();
     86 
     87     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     88 
     89     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     90 
     91 private:
     92     /**
     93      * Switches to forward checking if possible.
     94      */
     95     void switchToForward();
     96 
     97     /**
     98      * Extends the FCD text segment forward or normalizes around pos.
     99      * @return TRUE if success
    100      */
    101     UBool nextSegment(UErrorCode &errorCode);
    102 
    103     /**
    104      * Switches to backward checking.
    105      */
    106     void switchToBackward();
    107 
    108     /**
    109      * Extends the FCD text segment backward or normalizes around pos.
    110      * @return TRUE if success
    111      */
    112     UBool previousSegment(UErrorCode &errorCode);
    113 
    114     UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
    115 
    116     enum State {
    117         /**
    118          * The input text [start..(iter index)[ passes the FCD check.
    119          * Moving forward checks incrementally.
    120          * pos & limit are undefined.
    121          */
    122         ITER_CHECK_FWD,
    123         /**
    124          * The input text [(iter index)..limit[ passes the FCD check.
    125          * Moving backward checks incrementally.
    126          * start & pos are undefined.
    127          */
    128         ITER_CHECK_BWD,
    129         /**
    130          * The input text [start..limit[ passes the FCD check.
    131          * pos tracks the current text index.
    132          */
    133         ITER_IN_FCD_SEGMENT,
    134         /**
    135          * The input text [start..limit[ failed the FCD check and was normalized.
    136          * pos tracks the current index in the normalized string.
    137          * The text iterator is at the limit index.
    138          */
    139         IN_NORM_ITER_AT_LIMIT,
    140         /**
    141          * The input text [start..limit[ failed the FCD check and was normalized.
    142          * pos tracks the current index in the normalized string.
    143          * The text iterator is at the start index.
    144          */
    145         IN_NORM_ITER_AT_START
    146     };
    147 
    148     State state;
    149 
    150     int32_t start;
    151     int32_t pos;
    152     int32_t limit;
    153 
    154     const Normalizer2Impl &nfcImpl;
    155     UnicodeString normalized;
    156 };
    157 
    158 U_NAMESPACE_END
    159 
    160 #endif  // !UCONFIG_NO_COLLATION
    161 #endif  // __UITERCOLLATIONITERATOR_H__
    162