Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2012-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * uitercollationiterator.h
      7 *
      8 * created on: 2012sep23 (from utf16collationiterator.h)
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #ifndef __UITERCOLLATIONITERATOR_H__
     13 #define __UITERCOLLATIONITERATOR_H__
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_COLLATION
     18 
     19 #include "unicode/uiter.h"
     20 #include "cmemory.h"
     21 #include "collation.h"
     22 #include "collationdata.h"
     23 #include "normalizer2impl.h"
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 /**
     28  * UCharIterator-based collation element and character iterator.
     29  * Handles normalized text inline, with length or NUL-terminated.
     30  * Unnormalized text is handled by a subclass.
     31  */
     32 class U_I18N_API UIterCollationIterator : public CollationIterator {
     33 public:
     34     UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
     35             : CollationIterator(d, numeric), iter(ui) {}
     36 
     37     virtual ~UIterCollationIterator();
     38 
     39     virtual void resetToOffset(int32_t newOffset);
     40 
     41     virtual int32_t getOffset() const;
     42 
     43     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
     44 
     45     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
     46 
     47 protected:
     48     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
     49 
     50     virtual UChar handleGetTrailSurrogate();
     51 
     52     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     53 
     54     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     55 
     56     UCharIterator &iter;
     57 };
     58 
     59 /**
     60  * Incrementally checks the input text for FCD and normalizes where necessary.
     61  */
     62 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
     63 public:
     64     FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
     65             : UIterCollationIterator(data, numeric, ui),
     66               state(ITER_CHECK_FWD), start(startIndex),
     67               nfcImpl(data->nfcImpl) {}
     68 
     69     virtual ~FCDUIterCollationIterator();
     70 
     71     virtual void resetToOffset(int32_t newOffset);
     72 
     73     virtual int32_t getOffset() const;
     74 
     75     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
     76 
     77     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
     78 
     79 protected:
     80     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
     81 
     82     virtual UChar handleGetTrailSurrogate();
     83 
     84     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     85 
     86     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     87 
     88 private:
     89     /**
     90      * Switches to forward checking if possible.
     91      */
     92     void switchToForward();
     93 
     94     /**
     95      * Extends the FCD text segment forward or normalizes around pos.
     96      * @return TRUE if success
     97      */
     98     UBool nextSegment(UErrorCode &errorCode);
     99 
    100     /**
    101      * Switches to backward checking.
    102      */
    103     void switchToBackward();
    104 
    105     /**
    106      * Extends the FCD text segment backward or normalizes around pos.
    107      * @return TRUE if success
    108      */
    109     UBool previousSegment(UErrorCode &errorCode);
    110 
    111     UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
    112 
    113     enum State {
    114         /**
    115          * The input text [start..(iter index)[ passes the FCD check.
    116          * Moving forward checks incrementally.
    117          * pos & limit are undefined.
    118          */
    119         ITER_CHECK_FWD,
    120         /**
    121          * The input text [(iter index)..limit[ passes the FCD check.
    122          * Moving backward checks incrementally.
    123          * start & pos are undefined.
    124          */
    125         ITER_CHECK_BWD,
    126         /**
    127          * The input text [start..limit[ passes the FCD check.
    128          * pos tracks the current text index.
    129          */
    130         ITER_IN_FCD_SEGMENT,
    131         /**
    132          * The input text [start..limit[ failed the FCD check and was normalized.
    133          * pos tracks the current index in the normalized string.
    134          * The text iterator is at the limit index.
    135          */
    136         IN_NORM_ITER_AT_LIMIT,
    137         /**
    138          * The input text [start..limit[ failed the FCD check and was normalized.
    139          * pos tracks the current index in the normalized string.
    140          * The text iterator is at the start index.
    141          */
    142         IN_NORM_ITER_AT_START
    143     };
    144 
    145     State state;
    146 
    147     int32_t start;
    148     int32_t pos;
    149     int32_t limit;
    150 
    151     const Normalizer2Impl &nfcImpl;
    152     UnicodeString normalized;
    153 };
    154 
    155 U_NAMESPACE_END
    156 
    157 #endif  // !UCONFIG_NO_COLLATION
    158 #endif  // __UITERCOLLATIONITERATOR_H__
    159