1 /* 2 ****************************************************************************** 3 * Copyright (C) 1996-2010, International Business Machines * 4 * Corporation and others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 8 /** 9 * \file 10 * \brief C++ API: Collation data used to compute minLengthInChars. 11 * \internal 12 */ 13 14 #ifndef COLL_DATA_H 15 #define COLL_DATA_H 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_COLLATION 20 21 #include "unicode/uobject.h" 22 #include "unicode/ucol.h" 23 24 U_NAMESPACE_BEGIN 25 26 /** 27 * The size of the internal buffer for the Collator's short description string. 28 * @internal ICU 4.0.1 technology preview 29 */ 30 #define KEY_BUFFER_SIZE 64 31 32 /** 33 * The size of the internal CE buffer in a <code>CEList</code> object 34 * @internal ICU 4.0.1 technology preview 35 */ 36 #define CELIST_BUFFER_SIZE 4 37 38 /** 39 * \def INSTRUMENT_CELIST 40 * Define this to enable the <code>CEList</code> objects to collect 41 * statistics. 42 * @internal ICU 4.0.1 technology preview 43 */ 44 //#define INSTRUMENT_CELIST 45 46 /** 47 * The size of the initial list in a <code>StringList</code> object. 48 * @internal ICU 4.0.1 technology preview 49 */ 50 #define STRING_LIST_BUFFER_SIZE 16 51 52 /** 53 * \def INSTRUMENT_STRING_LIST 54 * Define this to enable the <code>StringList</code> objects to 55 * collect statistics. 56 * @internal ICU 4.0.1 technology preview 57 */ 58 //#define INSTRUMENT_STRING_LIST 59 60 /** 61 * This object holds a list of CEs generated from a particular 62 * <code>UnicodeString</code> 63 * 64 * @internal ICU 4.0.1 technology preview 65 */ 66 class U_I18N_API CEList : public UObject 67 { 68 public: 69 /** 70 * Construct a <code>CEList</code> object. 71 * 72 * @param coll - the Collator used to collect the CEs. 73 * @param string - the string for which to collect the CEs. 74 * @param status - will be set if any errors occur. 75 * 76 * Note: if on return, status is set to an error code, 77 * the only safe thing to do with this object is to call 78 * the destructor. 79 * 80 * @internal ICU 4.0.1 technology preview 81 */ 82 CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); 83 84 /** 85 * The destructor. 86 * @internal ICU 4.0.1 technology preview 87 */ 88 ~CEList(); 89 90 /** 91 * Return the number of CEs in the list. 92 * 93 * @return the number of CEs in the list. 94 * 95 * @internal ICU 4.0.1 technology preview 96 */ 97 int32_t size() const; 98 99 /** 100 * Get a particular CE from the list. 101 * 102 * @param index - the index of the CE to return 103 * 104 * @return the CE, or <code>0</code> if <code>index</code> is out of range 105 * 106 * @internal ICU 4.0.1 technology preview 107 */ 108 uint32_t get(int32_t index) const; 109 110 /** 111 * Check if the CEs in another <code>CEList</code> match the 112 * suffix of this list starting at a give offset. 113 * 114 * @param offset - the offset of the suffix 115 * @param other - the other <code>CEList</code> 116 * 117 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise. 118 * 119 * @internal ICU 4.0.1 technology preview 120 */ 121 UBool matchesAt(int32_t offset, const CEList *other) const; 122 123 /** 124 * The index operator. 125 * 126 * @param index - the index 127 * 128 * @return a reference to the given CE in the list 129 * 130 * @internal ICU 4.0.1 technology preview 131 */ 132 uint32_t &operator[](int32_t index) const; 133 134 /** 135 * UObject glue... 136 * @internal ICU 4.0.1 technology preview 137 */ 138 virtual UClassID getDynamicClassID() const; 139 /** 140 * UObject glue... 141 * @internal ICU 4.0.1 technology preview 142 */ 143 static UClassID getStaticClassID(); 144 145 private: 146 void add(uint32_t ce, UErrorCode &status); 147 148 uint32_t ceBuffer[CELIST_BUFFER_SIZE]; 149 uint32_t *ces; 150 int32_t listMax; 151 int32_t listSize; 152 153 #ifdef INSTRUMENT_CELIST 154 static int32_t _active; 155 static int32_t _histogram[10]; 156 #endif 157 }; 158 159 /** 160 * StringList 161 * 162 * This object holds a list of <code>UnicodeString</code> objects. 163 * 164 * @internal ICU 4.0.1 technology preview 165 */ 166 class U_I18N_API StringList : public UObject 167 { 168 public: 169 /** 170 * Construct an empty <code>StringList</code> 171 * 172 * @param status - will be set if any errors occur. 173 * 174 * Note: if on return, status is set to an error code, 175 * the only safe thing to do with this object is to call 176 * the destructor. 177 * 178 * @internal ICU 4.0.1 technology preview 179 */ 180 StringList(UErrorCode &status); 181 182 /** 183 * The destructor. 184 * 185 * @internal ICU 4.0.1 technology preview 186 */ 187 ~StringList(); 188 189 /** 190 * Add a string to the list. 191 * 192 * @param string - the string to add 193 * @param status - will be set if any errors occur. 194 * 195 * @internal ICU 4.0.1 technology preview 196 */ 197 void add(const UnicodeString *string, UErrorCode &status); 198 199 /** 200 * Add an array of Unicode code points to the list. 201 * 202 * @param chars - the address of the array of code points 203 * @param count - the number of code points in the array 204 * @param status - will be set if any errors occur. 205 * 206 * @internal ICU 4.0.1 technology preview 207 */ 208 void add(const UChar *chars, int32_t count, UErrorCode &status); 209 210 /** 211 * Get a particular string from the list. 212 * 213 * @param index - the index of the string 214 * 215 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code> 216 * if <code>index</code> is out of bounds. 217 * 218 * @internal ICU 4.0.1 technology preview 219 */ 220 const UnicodeString *get(int32_t index) const; 221 222 /** 223 * Get the number of stings in the list. 224 * 225 * @return the number of strings in the list. 226 * 227 * @internal ICU 4.0.1 technology preview 228 */ 229 int32_t size() const; 230 231 /** 232 * the UObject glue... 233 * @internal ICU 4.0.1 technology preview 234 */ 235 virtual UClassID getDynamicClassID() const; 236 /** 237 * the UObject glue... 238 * @internal ICU 4.0.1 technology preview 239 */ 240 static UClassID getStaticClassID(); 241 242 private: 243 UnicodeString *strings; 244 int32_t listMax; 245 int32_t listSize; 246 247 #ifdef INSTRUMENT_STRING_LIST 248 static int32_t _lists; 249 static int32_t _strings; 250 static int32_t _histogram[101]; 251 #endif 252 }; 253 254 /* 255 * Forward references to internal classes. 256 */ 257 class StringToCEsMap; 258 class CEToStringsMap; 259 class CollDataCache; 260 261 /** 262 * CollData 263 * 264 * This class holds the Collator-specific data needed to 265 * compute the length of the shortest string that can 266 * generate a partcular list of CEs. 267 * 268 * <code>CollData</code> objects are quite expensive to compute. Because 269 * of this, they are cached. When you call <code>CollData::open</code> it 270 * returns a reference counted cached object. When you call <code>CollData::close</code> 271 * the reference count on the object is decremented but the object is not deleted. 272 * 273 * If you do not need to reuse any unreferenced objects in the cache, you can call 274 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code> 275 * objects, you can call <code>CollData::freeCollDataCache</code> 276 * 277 * @internal ICU 4.0.1 technology preview 278 */ 279 class U_I18N_API CollData : public UObject 280 { 281 public: 282 /** 283 * Construct a <code>CollData</code> object. 284 * 285 * @param collator - the collator 286 * @param status - will be set if any errors occur. 287 * 288 * @return the <code>CollData</code> object. You must call 289 * <code>close</code> when you are done using the object. 290 * 291 * Note: if on return, status is set to an error code, 292 * the only safe thing to do with this object is to call 293 * <code>CollData::close</code>. 294 * 295 * @internal ICU 4.0.1 technology preview 296 */ 297 static CollData *open(UCollator *collator, UErrorCode &status); 298 299 /** 300 * Release a <code>CollData</code> object. 301 * 302 * @param collData - the object 303 * 304 * @internal ICU 4.0.1 technology preview 305 */ 306 static void close(CollData *collData); 307 308 /** 309 * Get the <code>UCollator</code> object used to create this object. 310 * The object returned may not be the exact object that was used to 311 * create this object, but it will have the same behavior. 312 * @internal ICU 4.0.1 technology preview 313 */ 314 UCollator *getCollator() const; 315 316 /** 317 * Get a list of all the strings which generate a list 318 * of CEs starting with a given CE. 319 * 320 * @param ce - the CE 321 * 322 * return a <code>StringList</code> object containing all 323 * the stirngs, or <code>NULL</code> if there are 324 * no such strings. 325 * 326 * @internal ICU 4.0.1 technology preview. 327 */ 328 const StringList *getStringList(int32_t ce) const; 329 330 /** 331 * Get a list of the CEs generated by a partcular stirng. 332 * 333 * @param string - the string 334 * 335 * @return a <code>CEList</code> object containt the CEs. You 336 * must call <code>freeCEList</code> when you are finished 337 * using the <code>CEList</code>/ 338 * 339 * @internal ICU 4.0.1 technology preview. 340 */ 341 const CEList *getCEList(const UnicodeString *string) const; 342 343 /** 344 * Release a <code>CEList</code> returned by <code>getCEList</code>. 345 * 346 * @param list - the <code>CEList</code> to free. 347 * 348 * @internal ICU 4.0.1 technology preview 349 */ 350 void freeCEList(const CEList *list); 351 352 /** 353 * Return the length of the shortest string that will generate 354 * the given list of CEs. 355 * 356 * @param ces - the CEs 357 * @param offset - the offset of the first CE in the list to use. 358 * 359 * @return the length of the shortest string. 360 * 361 * @internal ICU 4.0.1 technology preview 362 */ 363 int32_t minLengthInChars(const CEList *ces, int32_t offset) const; 364 365 366 /** 367 * Return the length of the shortest string that will generate 368 * the given list of CEs. 369 * 370 * Note: the algorithm used to do this computation is recursive. To 371 * limit the amount of recursion, a "history" list is used to record 372 * the best answer starting at a particular offset in the list of CEs. 373 * If the same offset is visited again during the recursion, the answer 374 * in the history list is used. 375 * 376 * @param ces - the CEs 377 * @param offset - the offset of the first CE in the list to use. 378 * @param history - the history list. Must be at least as long as 379 * the number of cEs in the <code>CEList</code> 380 * 381 * @return the length of the shortest string. 382 * 383 * @internal ICU 4.0.1 technology preview 384 */ 385 int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const; 386 387 /** 388 * UObject glue... 389 * @internal ICU 4.0.1 technology preview 390 */ 391 virtual UClassID getDynamicClassID() const; 392 /** 393 * UObject glue... 394 * @internal ICU 4.0.1 technology preview 395 */ 396 static UClassID getStaticClassID(); 397 398 /** 399 * <code>CollData</code> objects are expensive to compute, and so 400 * may be cached. This routine will free the cached objects and delete 401 * the cache. 402 * 403 * WARNING: Don't call this until you are have called <code>close</code> 404 * for each <code>CollData</code> object that you have used. also, 405 * DO NOT call this if another thread may be calling <code>flushCollDataCache</code> 406 * at the same time. 407 * 408 * @internal 4.0.1 technology preview 409 */ 410 static void freeCollDataCache(); 411 412 /** 413 * <code>CollData</code> objects are expensive to compute, and so 414 * may be cached. This routine will remove any unused <code>CollData</code> 415 * objects from the cache. 416 * 417 * @internal 4.0.1 technology preview 418 */ 419 static void flushCollDataCache(); 420 421 private: 422 friend class CollDataCache; 423 friend class CollDataCacheEntry; 424 425 CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status); 426 ~CollData(); 427 428 CollData(); 429 430 static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength); 431 432 static CollDataCache *getCollDataCache(); 433 434 UCollator *coll; 435 StringToCEsMap *charsToCEList; 436 CEToStringsMap *ceToCharsStartingWith; 437 438 char keyBuffer[KEY_BUFFER_SIZE]; 439 char *key; 440 441 static CollDataCache *collDataCache; 442 443 uint32_t minHan; 444 uint32_t maxHan; 445 446 uint32_t jamoLimits[4]; 447 }; 448 449 U_NAMESPACE_END 450 451 #endif // #if !UCONFIG_NO_COLLATION 452 #endif // #ifndef COLL_DATA_H 453