1 /* 2 ******************************************************************************* 3 * Copyright (C) 2001-2009, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File ucoleitr.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/15/2001 synwee Modified all methods to process its own function 13 * instead of calling the equivalent c++ api (coleitr.h) 14 *******************************************************************************/ 15 16 #ifndef UCOLEITR_H 17 #define UCOLEITR_H 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_COLLATION 22 23 /** 24 * This indicates an error has occured during processing or if no more CEs is 25 * to be returned. 26 * @stable ICU 2.0 27 */ 28 #define UCOL_NULLORDER ((int32_t)0xFFFFFFFF) 29 30 /** 31 * This indicates an error has occured during processing or there are no more CEs 32 * to be returned. 33 * 34 * @internal 35 */ 36 #define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX) 37 38 #include "unicode/ucol.h" 39 40 /** 41 * The UCollationElements struct. 42 * For usage in C programs. 43 * @stable ICU 2.0 44 */ 45 typedef struct UCollationElements UCollationElements; 46 47 /** 48 * \file 49 * \brief C API: UCollationElements 50 * 51 * The UCollationElements API is used as an iterator to walk through each 52 * character of an international string. Use the iterator to return the 53 * ordering priority of the positioned character. The ordering priority of a 54 * character, which we refer to as a key, defines how a character is collated 55 * in the given collation object. 56 * For example, consider the following in Spanish: 57 * <pre> 58 * . "ca" -> the first key is key('c') and second key is key('a'). 59 * . "cha" -> the first key is key('ch') and second key is key('a'). 60 * </pre> 61 * And in German, 62 * <pre> 63 * . "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and 64 * . the third key is key('b'). 65 * </pre> 66 * <p>Example of the iterator usage: (without error checking) 67 * <pre> 68 * . void CollationElementIterator_Example() 69 * . { 70 * . UChar *s; 71 * . t_int32 order, primaryOrder; 72 * . UCollationElements *c; 73 * . UCollatorOld *coll; 74 * . UErrorCode success = U_ZERO_ERROR; 75 * . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) ); 76 * . u_uastrcpy(s, "This is a test"); 77 * . coll = ucol_open(NULL, &success); 78 * . c = ucol_openElements(coll, str, u_strlen(str), &status); 79 * . order = ucol_next(c, &success); 80 * . ucol_reset(c); 81 * . order = ucol_prev(c, &success); 82 * . free(s); 83 * . ucol_close(coll); 84 * . ucol_closeElements(c); 85 * . } 86 * </pre> 87 * <p> 88 * ucol_next() returns the collation order of the next. 89 * ucol_prev() returns the collation order of the previous character. 90 * The Collation Element Iterator moves only in one direction between calls to 91 * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. 92 * Whenever ucol_prev is to be called after ucol_next() or vice versa, 93 * ucol_reset has to be called first to reset the status, shifting pointers to 94 * either the end or the start of the string. Hence at the next call of 95 * ucol_prev or ucol_next, the first or last collation order will be returned. 96 * If a change of direction is done without a ucol_reset, the result is 97 * undefined. 98 * The result of a forward iterate (ucol_next) and reversed result of the 99 * backward iterate (ucol_prev) on the same string are equivalent, if 100 * collation orders with the value UCOL_IGNORABLE are ignored. 101 * Character based on the comparison level of the collator. A collation order 102 * consists of primary order, secondary order and tertiary order. The data 103 * type of the collation order is <strong>t_int32</strong>. 104 * 105 * @see UCollator 106 */ 107 108 /** 109 * Open the collation elements for a string. 110 * 111 * @param coll The collator containing the desired collation rules. 112 * @param text The text to iterate over. 113 * @param textLength The number of characters in text, or -1 if null-terminated 114 * @param status A pointer to an UErrorCode to receive any errors. 115 * @return a struct containing collation element information 116 * @stable ICU 2.0 117 */ 118 U_STABLE UCollationElements* U_EXPORT2 119 ucol_openElements(const UCollator *coll, 120 const UChar *text, 121 int32_t textLength, 122 UErrorCode *status); 123 124 125 /** 126 * get a hash code for a key... Not very useful! 127 * @param key the given key. 128 * @param length the size of the key array. 129 * @return the hash code. 130 * @stable ICU 2.0 131 */ 132 U_STABLE int32_t U_EXPORT2 133 ucol_keyHashCode(const uint8_t* key, int32_t length); 134 135 /** 136 * Close a UCollationElements. 137 * Once closed, a UCollationElements may no longer be used. 138 * @param elems The UCollationElements to close. 139 * @stable ICU 2.0 140 */ 141 U_STABLE void U_EXPORT2 142 ucol_closeElements(UCollationElements *elems); 143 144 /** 145 * Reset the collation elements to their initial state. 146 * This will move the 'cursor' to the beginning of the text. 147 * Property settings for collation will be reset to the current status. 148 * @param elems The UCollationElements to reset. 149 * @see ucol_next 150 * @see ucol_previous 151 * @stable ICU 2.0 152 */ 153 U_STABLE void U_EXPORT2 154 ucol_reset(UCollationElements *elems); 155 156 /** 157 * Set the collation elements to use implicit ordering for Han 158 * even if they've been tailored. This will also force Hangul 159 * syllables to be ordered by decomposing them to their component 160 * Jamo. 161 * 162 * @param elems The UCollationElements containing the text. 163 * @param status A pointer to a UErrorCode to reveive any errors. 164 * 165 * @internal 166 */ 167 U_INTERNAL void U_EXPORT2 168 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status); 169 170 /** 171 * Get the ordering priority of the next collation element in the text. 172 * A single character may contain more than one collation element. 173 * @param elems The UCollationElements containing the text. 174 * @param status A pointer to an UErrorCode to receive any errors. 175 * @return The next collation elements ordering, otherwise returns NULLORDER 176 * if an error has occured or if the end of string has been reached 177 * @stable ICU 2.0 178 */ 179 U_STABLE int32_t U_EXPORT2 180 ucol_next(UCollationElements *elems, UErrorCode *status); 181 182 /** 183 * Get the ordering priority of the previous collation element in the text. 184 * A single character may contain more than one collation element. 185 * Note that internally a stack is used to store buffered collation elements. 186 * It is very rare that the stack will overflow, however if such a case is 187 * encountered, the problem can be solved by increasing the size 188 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h. 189 * @param elems The UCollationElements containing the text. 190 * @param status A pointer to an UErrorCode to receive any errors. Noteably 191 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack 192 * buffer has been exhausted. 193 * @return The previous collation elements ordering, otherwise returns 194 * NULLORDER if an error has occured or if the start of string has 195 * been reached. 196 * @stable ICU 2.0 197 */ 198 U_STABLE int32_t U_EXPORT2 199 ucol_previous(UCollationElements *elems, UErrorCode *status); 200 201 /** 202 * Get the processed ordering priority of the next collation element in the text. 203 * A single character may contain more than one collation element. 204 * 205 * @param elems The UCollationElements containing the text. 206 * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE. 207 * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE. 208 * @param status A pointer to an UErrorCode to receive any errors. 209 * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER 210 * if an error has occured or if the end of string has been reached 211 * 212 * @internal 213 */ 214 U_INTERNAL int64_t U_EXPORT2 215 ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); 216 217 /** 218 * Get the processed ordering priority of the previous collation element in the text. 219 * A single character may contain more than one collation element. 220 * Note that internally a stack is used to store buffered collation elements. 221 * It is very rare that the stack will overflow, however if such a case is 222 * encountered, the problem can be solved by increasing the size 223 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h. 224 * 225 * @param elems The UCollationElements containing the text. 226 * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE 227 * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE 228 * @param status A pointer to an UErrorCode to receive any errors. Noteably 229 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack 230 * buffer has been exhausted. 231 * @return The previous collation elements ordering, otherwise returns 232 * UCOL_PROCESSED_NULLORDER if an error has occured or if the start of 233 * string has been reached. 234 * 235 * @internal 236 */ 237 U_INTERNAL int64_t U_EXPORT2 238 ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); 239 240 /** 241 * Get the maximum length of any expansion sequences that end with the 242 * specified comparison order. 243 * This is useful for .... ? 244 * @param elems The UCollationElements containing the text. 245 * @param order A collation order returned by previous or next. 246 * @return maximum size of the expansion sequences ending with the collation 247 * element or 1 if collation element does not occur at the end of any 248 * expansion sequence 249 * @stable ICU 2.0 250 */ 251 U_STABLE int32_t U_EXPORT2 252 ucol_getMaxExpansion(const UCollationElements *elems, int32_t order); 253 254 /** 255 * Set the text containing the collation elements. 256 * Property settings for collation will remain the same. 257 * In order to reset the iterator to the current collation property settings, 258 * the API reset() has to be called. 259 * @param elems The UCollationElements to set. 260 * @param text The source text containing the collation elements. 261 * @param textLength The length of text, or -1 if null-terminated. 262 * @param status A pointer to an UErrorCode to receive any errors. 263 * @see ucol_getText 264 * @stable ICU 2.0 265 */ 266 U_STABLE void U_EXPORT2 267 ucol_setText( UCollationElements *elems, 268 const UChar *text, 269 int32_t textLength, 270 UErrorCode *status); 271 272 /** 273 * Get the offset of the current source character. 274 * This is an offset into the text of the character containing the current 275 * collation elements. 276 * @param elems The UCollationElements to query. 277 * @return The offset of the current source character. 278 * @see ucol_setOffset 279 * @stable ICU 2.0 280 */ 281 U_STABLE int32_t U_EXPORT2 282 ucol_getOffset(const UCollationElements *elems); 283 284 /** 285 * Set the offset of the current source character. 286 * This is an offset into the text of the character to be processed. 287 * Property settings for collation will remain the same. 288 * In order to reset the iterator to the current collation property settings, 289 * the API reset() has to be called. 290 * @param elems The UCollationElements to set. 291 * @param offset The desired character offset. 292 * @param status A pointer to an UErrorCode to receive any errors. 293 * @see ucol_getOffset 294 * @stable ICU 2.0 295 */ 296 U_STABLE void U_EXPORT2 297 ucol_setOffset(UCollationElements *elems, 298 int32_t offset, 299 UErrorCode *status); 300 301 /** 302 * Get the primary order of a collation order. 303 * @param order the collation order 304 * @return the primary order of a collation order. 305 * @stable ICU 2.6 306 */ 307 U_STABLE int32_t U_EXPORT2 308 ucol_primaryOrder (int32_t order); 309 310 /** 311 * Get the secondary order of a collation order. 312 * @param order the collation order 313 * @return the secondary order of a collation order. 314 * @stable ICU 2.6 315 */ 316 U_STABLE int32_t U_EXPORT2 317 ucol_secondaryOrder (int32_t order); 318 319 /** 320 * Get the tertiary order of a collation order. 321 * @param order the collation order 322 * @return the tertiary order of a collation order. 323 * @stable ICU 2.6 324 */ 325 U_STABLE int32_t U_EXPORT2 326 ucol_tertiaryOrder (int32_t order); 327 328 #endif /* #if !UCONFIG_NO_COLLATION */ 329 330 #endif 331