1 /* 2 ******************************************************************************* 3 * Copyright (C) 2012-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * uitercollationiterator.h 7 * 8 * created on: 2012sep23 (from utf16collationiterator.h) 9 * created by: Markus W. Scherer 10 */ 11 12 #ifndef __UITERCOLLATIONITERATOR_H__ 13 #define __UITERCOLLATIONITERATOR_H__ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_COLLATION 18 19 #include "unicode/uiter.h" 20 #include "cmemory.h" 21 #include "collation.h" 22 #include "collationdata.h" 23 #include "normalizer2impl.h" 24 25 U_NAMESPACE_BEGIN 26 27 /** 28 * UCharIterator-based collation element and character iterator. 29 * Handles normalized text inline, with length or NUL-terminated. 30 * Unnormalized text is handled by a subclass. 31 */ 32 class U_I18N_API UIterCollationIterator : public CollationIterator { 33 public: 34 UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui) 35 : CollationIterator(d, numeric), iter(ui) {} 36 37 virtual ~UIterCollationIterator(); 38 39 virtual void resetToOffset(int32_t newOffset); 40 41 virtual int32_t getOffset() const; 42 43 virtual UChar32 nextCodePoint(UErrorCode &errorCode); 44 45 virtual UChar32 previousCodePoint(UErrorCode &errorCode); 46 47 protected: 48 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 49 50 virtual UChar handleGetTrailSurrogate(); 51 52 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 53 54 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 55 56 UCharIterator &iter; 57 }; 58 59 /** 60 * Incrementally checks the input text for FCD and normalizes where necessary. 61 */ 62 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator { 63 public: 64 FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex) 65 : UIterCollationIterator(data, numeric, ui), 66 state(ITER_CHECK_FWD), start(startIndex), 67 nfcImpl(data->nfcImpl) {} 68 69 virtual ~FCDUIterCollationIterator(); 70 71 virtual void resetToOffset(int32_t newOffset); 72 73 virtual int32_t getOffset() const; 74 75 virtual UChar32 nextCodePoint(UErrorCode &errorCode); 76 77 virtual UChar32 previousCodePoint(UErrorCode &errorCode); 78 79 protected: 80 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 81 82 virtual UChar handleGetTrailSurrogate(); 83 84 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 85 86 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 87 88 private: 89 /** 90 * Switches to forward checking if possible. 91 */ 92 void switchToForward(); 93 94 /** 95 * Extends the FCD text segment forward or normalizes around pos. 96 * @return TRUE if success 97 */ 98 UBool nextSegment(UErrorCode &errorCode); 99 100 /** 101 * Switches to backward checking. 102 */ 103 void switchToBackward(); 104 105 /** 106 * Extends the FCD text segment backward or normalizes around pos. 107 * @return TRUE if success 108 */ 109 UBool previousSegment(UErrorCode &errorCode); 110 111 UBool normalize(const UnicodeString &s, UErrorCode &errorCode); 112 113 enum State { 114 /** 115 * The input text [start..(iter index)[ passes the FCD check. 116 * Moving forward checks incrementally. 117 * pos & limit are undefined. 118 */ 119 ITER_CHECK_FWD, 120 /** 121 * The input text [(iter index)..limit[ passes the FCD check. 122 * Moving backward checks incrementally. 123 * start & pos are undefined. 124 */ 125 ITER_CHECK_BWD, 126 /** 127 * The input text [start..limit[ passes the FCD check. 128 * pos tracks the current text index. 129 */ 130 ITER_IN_FCD_SEGMENT, 131 /** 132 * The input text [start..limit[ failed the FCD check and was normalized. 133 * pos tracks the current index in the normalized string. 134 * The text iterator is at the limit index. 135 */ 136 IN_NORM_ITER_AT_LIMIT, 137 /** 138 * The input text [start..limit[ failed the FCD check and was normalized. 139 * pos tracks the current index in the normalized string. 140 * The text iterator is at the start index. 141 */ 142 IN_NORM_ITER_AT_START 143 }; 144 145 State state; 146 147 int32_t start; 148 int32_t pos; 149 int32_t limit; 150 151 const Normalizer2Impl &nfcImpl; 152 UnicodeString normalized; 153 }; 154 155 U_NAMESPACE_END 156 157 #endif // !UCONFIG_NO_COLLATION 158 #endif // __UITERCOLLATIONITERATOR_H__ 159