1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: unorm_it.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003jan21 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __UNORM_IT_H__ 18 #define __UNORM_IT_H__ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION 23 24 #include "unicode/uiter.h" 25 #include "unicode/unorm.h" 26 27 /** 28 * Normalizing UCharIterator wrapper. 29 * This internal API basically duplicates the functionality of the C++ Normalizer 30 * but 31 * - it actually implements a character iterator (UCharIterator) 32 * with few restrictions (see unorm_setIter()) 33 * - it supports UCharIterator getState()/setState() 34 * - it uses lower-level APIs and buffers more text and states, 35 * hopefully resulting in higher performance 36 * 37 * Usage example: 38 * \code 39 * function(UCharIterator *srcIter) { 40 * UNormIterator *uni; 41 * UCharIterator *iter; 42 * UErrorCode errorCode; 43 * 44 * errorCode=U_ZERO_ERROR; 45 * uni=unorm_openIter(&errorCode); 46 * if(U_FAILURE(errorCode)) { 47 * // report error 48 * return; 49 * } 50 * 51 * iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode); 52 * if(U_FAILURE(errorCode)) { 53 * // report error 54 * } else { 55 * // use iter to iterate over the canonically ordered 56 * // version of srcIter's text 57 * uint32_t state; 58 * 59 * ... 60 * 61 * state=uiter_getState(iter); 62 * if(state!=UITER_NO_STATE) { 63 * // use valid state, store it, use iter some more 64 * ... 65 * 66 * // later restore iter to the saved state: 67 * uiter_setState(iter, state, &errorCode); 68 * 69 * ... 70 * } 71 * 72 * ... 73 * } 74 * unorm_closeIter(uni); 75 * } 76 * \endcode 77 * 78 * See also the ICU test suites. 79 * 80 * @internal 81 */ 82 struct UNormIterator; 83 typedef struct UNormIterator UNormIterator; 84 85 /** 86 * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter 87 * of unorm_openIter(). 88 * 89 * @internal 90 */ 91 #define UNORM_ITER_SIZE 1024 92 93 /** 94 * Open a normalizing iterator. Must be closed later. 95 * Use unorm_setIter(). 96 * 97 * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold 98 * the UNormIterator if possible; can be NULL. 99 * @param stackMemSize Number of bytes at stackMem; can be 0, 100 * or should be >= UNORM_ITER_SIZE for a non-NULL stackMem. 101 * @param pErrorCode ICU error code 102 * @return an allocated and pre-initialized UNormIterator 103 * @internal 104 */ 105 U_CAPI UNormIterator * U_EXPORT2 106 unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode); 107 108 /** 109 * Close a normalizing iterator. 110 * 111 * @param uni UNormIterator from unorm_openIter() 112 * @internal 113 */ 114 U_CAPI void U_EXPORT2 115 unorm_closeIter(UNormIterator *uni); 116 117 /** 118 * Set a UCharIterator and a normalization mode for the normalizing iterator 119 * to wrap. The normalizing iterator will read from the character iterator, 120 * normalize the text, and in turn deliver it with its own wrapper UCharIterator 121 * interface which it returns. 122 * 123 * The source iterator remains at its current position through the unorm_setIter() 124 * call but will be used and moved as soon as the 125 * the returned normalizing iterator is. 126 * 127 * The returned interface pointer is valid for as long as the normalizing iterator 128 * is open and until another unorm_setIter() call is made on it. 129 * 130 * The normalizing iterator's UCharIterator interface has the following properties: 131 * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX 132 * - getState() will return UITER_NO_STATE for unknown states for positions 133 * that are not at normalization boundaries 134 * 135 * @param uni UNormIterator from unorm_openIter() 136 * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator. 137 * Must support getState() and setState(). 138 * @param mode The normalization mode. 139 * @param pErrorCode ICU error code 140 * @return an alias to the normalizing iterator's UCharIterator interface 141 * @internal 142 */ 143 U_CAPI UCharIterator * U_EXPORT2 144 unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode); 145 146 #endif /* uconfig.h switches */ 147 148 #endif 149