1 /* 2 ******************************************************************************* 3 * Copyright (C) 2001-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File ucoleitr.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/15/2001 synwee Modified all methods to process its own function 13 * instead of calling the equivalent c++ api (coleitr.h) 14 *******************************************************************************/ 15 16 #ifndef UCOLEITR_H 17 #define UCOLEITR_H 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_COLLATION 22 23 /** 24 * This indicates an error has occured during processing or if no more CEs is 25 * to be returned. 26 * @stable ICU 2.0 27 */ 28 #define UCOL_NULLORDER ((int32_t)0xFFFFFFFF) 29 30 #ifndef U_HIDE_INTERNAL_API 31 /** 32 * This indicates an error has occured during processing or there are no more CEs 33 * to be returned. 34 * 35 * @internal 36 */ 37 #define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX) 38 #endif /* U_HIDE_INTERNAL_API */ 39 40 #include "unicode/ucol.h" 41 42 /** 43 * The UCollationElements struct. 44 * For usage in C programs. 45 * @stable ICU 2.0 46 */ 47 typedef struct UCollationElements UCollationElements; 48 49 /** 50 * \file 51 * \brief C API: UCollationElements 52 * 53 * The UCollationElements API is used as an iterator to walk through each 54 * character of an international string. Use the iterator to return the 55 * ordering priority of the positioned character. The ordering priority of a 56 * character, which we refer to as a key, defines how a character is collated 57 * in the given collation object. 58 * For example, consider the following in Spanish: 59 * <pre> 60 * . "ca" -> the first key is key('c') and second key is key('a'). 61 * . "cha" -> the first key is key('ch') and second key is key('a'). 62 * </pre> 63 * And in German, 64 * <pre> 65 * . "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and 66 * . the third key is key('b'). 67 * </pre> 68 * <p>Example of the iterator usage: (without error checking) 69 * <pre> 70 * . void CollationElementIterator_Example() 71 * . { 72 * . UChar *s; 73 * . t_int32 order, primaryOrder; 74 * . UCollationElements *c; 75 * . UCollatorOld *coll; 76 * . UErrorCode success = U_ZERO_ERROR; 77 * . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) ); 78 * . u_uastrcpy(s, "This is a test"); 79 * . coll = ucol_open(NULL, &success); 80 * . c = ucol_openElements(coll, str, u_strlen(str), &status); 81 * . order = ucol_next(c, &success); 82 * . ucol_reset(c); 83 * . order = ucol_prev(c, &success); 84 * . free(s); 85 * . ucol_close(coll); 86 * . ucol_closeElements(c); 87 * . } 88 * </pre> 89 * <p> 90 * ucol_next() returns the collation order of the next. 91 * ucol_prev() returns the collation order of the previous character. 92 * The Collation Element Iterator moves only in one direction between calls to 93 * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. 94 * Whenever ucol_prev is to be called after ucol_next() or vice versa, 95 * ucol_reset has to be called first to reset the status, shifting pointers to 96 * either the end or the start of the string. Hence at the next call of 97 * ucol_prev or ucol_next, the first or last collation order will be returned. 98 * If a change of direction is done without a ucol_reset, the result is 99 * undefined. 100 * The result of a forward iterate (ucol_next) and reversed result of the 101 * backward iterate (ucol_prev) on the same string are equivalent, if 102 * collation orders with the value UCOL_IGNORABLE are ignored. 103 * Character based on the comparison level of the collator. A collation order 104 * consists of primary order, secondary order and tertiary order. The data 105 * type of the collation order is <strong>t_int32</strong>. 106 * 107 * @see UCollator 108 */ 109 110 /** 111 * Open the collation elements for a string. 112 * 113 * @param coll The collator containing the desired collation rules. 114 * @param text The text to iterate over. 115 * @param textLength The number of characters in text, or -1 if null-terminated 116 * @param status A pointer to an UErrorCode to receive any errors. 117 * @return a struct containing collation element information 118 * @stable ICU 2.0 119 */ 120 U_STABLE UCollationElements* U_EXPORT2 121 ucol_openElements(const UCollator *coll, 122 const UChar *text, 123 int32_t textLength, 124 UErrorCode *status); 125 126 127 /** 128 * get a hash code for a key... Not very useful! 129 * @param key the given key. 130 * @param length the size of the key array. 131 * @return the hash code. 132 * @stable ICU 2.0 133 */ 134 U_STABLE int32_t U_EXPORT2 135 ucol_keyHashCode(const uint8_t* key, int32_t length); 136 137 /** 138 * Close a UCollationElements. 139 * Once closed, a UCollationElements may no longer be used. 140 * @param elems The UCollationElements to close. 141 * @stable ICU 2.0 142 */ 143 U_STABLE void U_EXPORT2 144 ucol_closeElements(UCollationElements *elems); 145 146 /** 147 * Reset the collation elements to their initial state. 148 * This will move the 'cursor' to the beginning of the text. 149 * Property settings for collation will be reset to the current status. 150 * @param elems The UCollationElements to reset. 151 * @see ucol_next 152 * @see ucol_previous 153 * @stable ICU 2.0 154 */ 155 U_STABLE void U_EXPORT2 156 ucol_reset(UCollationElements *elems); 157 158 #ifndef U_HIDE_INTERNAL_API 159 /** 160 * Set the collation elements to use implicit ordering for Han 161 * even if they've been tailored. This will also force Hangul 162 * syllables to be ordered by decomposing them to their component 163 * Jamo. 164 * 165 * @param elems The UCollationElements containing the text. 166 * @param status A pointer to a UErrorCode to reveive any errors. 167 * 168 * @internal 169 */ 170 U_INTERNAL void U_EXPORT2 171 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status); 172 #endif /* U_HIDE_INTERNAL_API */ 173 174 /** 175 * Get the ordering priority of the next collation element in the text. 176 * A single character may contain more than one collation element. 177 * @param elems The UCollationElements containing the text. 178 * @param status A pointer to an UErrorCode to receive any errors. 179 * @return The next collation elements ordering, otherwise returns NULLORDER 180 * if an error has occured or if the end of string has been reached 181 * @stable ICU 2.0 182 */ 183 U_STABLE int32_t U_EXPORT2 184 ucol_next(UCollationElements *elems, UErrorCode *status); 185 186 /** 187 * Get the ordering priority of the previous collation element in the text. 188 * A single character may contain more than one collation element. 189 * Note that internally a stack is used to store buffered collation elements. 190 * It is very rare that the stack will overflow, however if such a case is 191 * encountered, the problem can be solved by increasing the size 192 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h. 193 * @param elems The UCollationElements containing the text. 194 * @param status A pointer to an UErrorCode to receive any errors. Noteably 195 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack 196 * buffer has been exhausted. 197 * @return The previous collation elements ordering, otherwise returns 198 * NULLORDER if an error has occured or if the start of string has 199 * been reached. 200 * @stable ICU 2.0 201 */ 202 U_STABLE int32_t U_EXPORT2 203 ucol_previous(UCollationElements *elems, UErrorCode *status); 204 205 #ifndef U_HIDE_INTERNAL_API 206 /** 207 * Get the processed ordering priority of the next collation element in the text. 208 * A single character may contain more than one collation element. 209 * 210 * @param elems The UCollationElements containing the text. 211 * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE. 212 * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE. 213 * @param status A pointer to an UErrorCode to receive any errors. 214 * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER 215 * if an error has occured or if the end of string has been reached 216 * 217 * @internal 218 */ 219 U_INTERNAL int64_t U_EXPORT2 220 ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); 221 222 /** 223 * Get the processed ordering priority of the previous collation element in the text. 224 * A single character may contain more than one collation element. 225 * Note that internally a stack is used to store buffered collation elements. 226 * It is very rare that the stack will overflow, however if such a case is 227 * encountered, the problem can be solved by increasing the size 228 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h. 229 * 230 * @param elems The UCollationElements containing the text. 231 * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE 232 * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE 233 * @param status A pointer to an UErrorCode to receive any errors. Noteably 234 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack 235 * buffer has been exhausted. 236 * @return The previous collation elements ordering, otherwise returns 237 * UCOL_PROCESSED_NULLORDER if an error has occured or if the start of 238 * string has been reached. 239 * 240 * @internal 241 */ 242 U_INTERNAL int64_t U_EXPORT2 243 ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); 244 #endif /* U_HIDE_INTERNAL_API */ 245 246 /** 247 * Get the maximum length of any expansion sequences that end with the 248 * specified comparison order. 249 * This is useful for .... ? 250 * @param elems The UCollationElements containing the text. 251 * @param order A collation order returned by previous or next. 252 * @return maximum size of the expansion sequences ending with the collation 253 * element or 1 if collation element does not occur at the end of any 254 * expansion sequence 255 * @stable ICU 2.0 256 */ 257 U_STABLE int32_t U_EXPORT2 258 ucol_getMaxExpansion(const UCollationElements *elems, int32_t order); 259 260 /** 261 * Set the text containing the collation elements. 262 * Property settings for collation will remain the same. 263 * In order to reset the iterator to the current collation property settings, 264 * the API reset() has to be called. 265 * @param elems The UCollationElements to set. 266 * @param text The source text containing the collation elements. 267 * @param textLength The length of text, or -1 if null-terminated. 268 * @param status A pointer to an UErrorCode to receive any errors. 269 * @see ucol_getText 270 * @stable ICU 2.0 271 */ 272 U_STABLE void U_EXPORT2 273 ucol_setText( UCollationElements *elems, 274 const UChar *text, 275 int32_t textLength, 276 UErrorCode *status); 277 278 /** 279 * Get the offset of the current source character. 280 * This is an offset into the text of the character containing the current 281 * collation elements. 282 * @param elems The UCollationElements to query. 283 * @return The offset of the current source character. 284 * @see ucol_setOffset 285 * @stable ICU 2.0 286 */ 287 U_STABLE int32_t U_EXPORT2 288 ucol_getOffset(const UCollationElements *elems); 289 290 /** 291 * Set the offset of the current source character. 292 * This is an offset into the text of the character to be processed. 293 * Property settings for collation will remain the same. 294 * In order to reset the iterator to the current collation property settings, 295 * the API reset() has to be called. 296 * @param elems The UCollationElements to set. 297 * @param offset The desired character offset. 298 * @param status A pointer to an UErrorCode to receive any errors. 299 * @see ucol_getOffset 300 * @stable ICU 2.0 301 */ 302 U_STABLE void U_EXPORT2 303 ucol_setOffset(UCollationElements *elems, 304 int32_t offset, 305 UErrorCode *status); 306 307 /** 308 * Get the primary order of a collation order. 309 * @param order the collation order 310 * @return the primary order of a collation order. 311 * @stable ICU 2.6 312 */ 313 U_STABLE int32_t U_EXPORT2 314 ucol_primaryOrder (int32_t order); 315 316 /** 317 * Get the secondary order of a collation order. 318 * @param order the collation order 319 * @return the secondary order of a collation order. 320 * @stable ICU 2.6 321 */ 322 U_STABLE int32_t U_EXPORT2 323 ucol_secondaryOrder (int32_t order); 324 325 /** 326 * Get the tertiary order of a collation order. 327 * @param order the collation order 328 * @return the tertiary order of a collation order. 329 * @stable ICU 2.6 330 */ 331 U_STABLE int32_t U_EXPORT2 332 ucol_tertiaryOrder (int32_t order); 333 334 #endif /* #if !UCONFIG_NO_COLLATION */ 335 336 #endif 337