1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2009, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: ucol_res.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * Description: 12 * This file contains dependencies that the collation run-time doesn't normally 13 * need. This mainly contains resource bundle usage and collation meta information 14 * 15 * Modification history 16 * Date Name Comments 17 * 1996-1999 various members of ICU team maintained C API for collation framework 18 * 02/16/2001 synwee Added internal method getPrevSpecialCE 19 * 03/01/2001 synwee Added maxexpansion functionality. 20 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant 21 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp 22 */ 23 24 #include "unicode/utypes.h" 25 26 #if !UCONFIG_NO_COLLATION 27 #include "unicode/uloc.h" 28 #include "unicode/coll.h" 29 #include "unicode/tblcoll.h" 30 #include "unicode/caniter.h" 31 #include "unicode/ustring.h" 32 33 #include "ucol_bld.h" 34 #include "ucol_imp.h" 35 #include "ucol_tok.h" 36 #include "ucol_elm.h" 37 #include "uresimp.h" 38 #include "ustr_imp.h" 39 #include "cstring.h" 40 #include "umutex.h" 41 #include "ucln_in.h" 42 #include "ustrenum.h" 43 #include "putilimp.h" 44 #include "utracimp.h" 45 #include "cmemory.h" 46 #include "uenumimp.h" 47 #include "ulist.h" 48 49 U_NAMESPACE_USE 50 51 // static UCA. There is only one. Collators don't use it. 52 // It is referenced only in ucol_initUCA and ucol_cleanup 53 static UCollator* _staticUCA = NULL; 54 // static pointer to udata memory. Inited in ucol_initUCA 55 // used for cleanup in ucol_cleanup 56 static UDataMemory* UCA_DATA_MEM = NULL; 57 58 U_CDECL_BEGIN 59 static UBool U_CALLCONV 60 ucol_res_cleanup(void) 61 { 62 if (UCA_DATA_MEM) { 63 udata_close(UCA_DATA_MEM); 64 UCA_DATA_MEM = NULL; 65 } 66 if (_staticUCA) { 67 ucol_close(_staticUCA); 68 _staticUCA = NULL; 69 } 70 return TRUE; 71 } 72 73 static UBool U_CALLCONV 74 isAcceptableUCA(void * /*context*/, 75 const char * /*type*/, const char * /*name*/, 76 const UDataInfo *pInfo){ 77 /* context, type & name are intentionally not used */ 78 if( pInfo->size>=20 && 79 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 80 pInfo->charsetFamily==U_CHARSET_FAMILY && 81 pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 && /* dataFormat="UCol" */ 82 pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 && 83 pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 && 84 pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 && 85 pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 && 86 pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// && 87 //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 && 88 //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh 89 //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh 90 ) { 91 UVersionInfo UCDVersion; 92 u_getUnicodeVersion(UCDVersion); 93 return (UBool)(pInfo->dataVersion[0]==UCDVersion[0] 94 && pInfo->dataVersion[1]==UCDVersion[1]); 95 //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2] 96 //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]); 97 } else { 98 return FALSE; 99 } 100 } 101 U_CDECL_END 102 103 /* do not close UCA returned by ucol_initUCA! */ 104 UCollator * 105 ucol_initUCA(UErrorCode *status) { 106 if(U_FAILURE(*status)) { 107 return NULL; 108 } 109 UBool needsInit; 110 UMTX_CHECK(NULL, (_staticUCA == NULL), needsInit); 111 112 if(needsInit) { 113 UDataMemory *result = udata_openChoice(U_ICUDATA_COLL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status); 114 115 if(U_SUCCESS(*status)){ 116 UCollator *newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, status); 117 if(U_SUCCESS(*status)){ 118 // Initalize variables for implicit generation 119 uprv_uca_initImplicitConstants(status); 120 121 umtx_lock(NULL); 122 if(_staticUCA == NULL) { 123 UCA_DATA_MEM = result; 124 _staticUCA = newUCA; 125 newUCA = NULL; 126 result = NULL; 127 } 128 umtx_unlock(NULL); 129 130 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup); 131 if(newUCA != NULL) { 132 ucol_close(newUCA); 133 udata_close(result); 134 } 135 }else{ 136 ucol_close(newUCA); 137 udata_close(result); 138 } 139 } 140 else { 141 udata_close(result); 142 } 143 } 144 return _staticUCA; 145 } 146 147 U_CAPI void U_EXPORT2 148 ucol_forgetUCA(void) 149 { 150 _staticUCA = NULL; 151 UCA_DATA_MEM = NULL; 152 } 153 154 /****************************************************************************/ 155 /* Following are the open/close functions */ 156 /* */ 157 /****************************************************************************/ 158 static UCollator* 159 tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) { 160 int32_t rulesLen = 0; 161 const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status); 162 return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status); 163 } 164 165 166 // API in ucol_imp.h 167 168 U_CFUNC UCollator* 169 ucol_open_internal(const char *loc, 170 UErrorCode *status) 171 { 172 UErrorCode intStatus = U_ZERO_ERROR; 173 const UCollator* UCA = ucol_initUCA(status); 174 175 /* New version */ 176 if(U_FAILURE(*status)) return 0; 177 178 179 180 UCollator *result = NULL; 181 UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status); 182 183 /* we try to find stuff from keyword */ 184 UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status); 185 UResourceBundle *collElem = NULL; 186 char keyBuffer[256]; 187 // if there is a keyword, we pick it up and try to get elements 188 if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status) || 189 !uprv_strcmp(keyBuffer,"default")) { /* Treat 'zz@collation=default' as 'zz'. */ 190 // no keyword. we try to find the default setting, which will give us the keyword value 191 intStatus = U_ZERO_ERROR; 192 // finding default value does not affect collation fallback status 193 UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus); 194 if(U_SUCCESS(intStatus)) { 195 int32_t defaultKeyLen = 0; 196 const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus); 197 u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen); 198 keyBuffer[defaultKeyLen] = 0; 199 } else { 200 *status = U_INTERNAL_PROGRAM_ERROR; 201 return NULL; 202 } 203 ures_close(defaultColl); 204 } 205 collElem = ures_getByKeyWithFallback(collations, keyBuffer, collations, status); 206 collations = NULL; // We just reused the collations object as collElem. 207 208 UResourceBundle *binary = NULL; 209 210 if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */ 211 *status = U_USING_DEFAULT_WARNING; 212 result = ucol_initCollator(UCA->image, result, UCA, status); 213 if (U_FAILURE(*status)) { 214 goto clean; 215 } 216 // if we use UCA, real locale is root 217 ures_close(b); 218 b = ures_open(U_ICUDATA_COLL, "", status); 219 ures_close(collElem); 220 collElem = ures_open(U_ICUDATA_COLL, "", status); 221 if(U_FAILURE(*status)) { 222 goto clean; 223 } 224 result->hasRealData = FALSE; 225 } else if(U_SUCCESS(*status)) { 226 intStatus = U_ZERO_ERROR; 227 228 binary = ures_getByKey(collElem, "%%CollationBin", NULL, &intStatus); 229 230 if(intStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */ 231 binary = NULL; 232 result = tryOpeningFromRules(collElem, status); 233 if(U_FAILURE(*status)) { 234 goto clean; 235 } 236 } else if(U_SUCCESS(intStatus)) { /* otherwise, we'll pick a collation data that exists */ 237 int32_t len = 0; 238 const uint8_t *inData = ures_getBinary(binary, &len, status); 239 if(U_FAILURE(*status)) { 240 goto clean; 241 } 242 UCATableHeader *colData = (UCATableHeader *)inData; 243 if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 || 244 uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 || 245 colData->version[0] != UCOL_BUILDER_VERSION) 246 { 247 *status = U_DIFFERENT_UCA_VERSION; 248 result = tryOpeningFromRules(collElem, status); 249 } else { 250 if(U_FAILURE(*status)){ 251 goto clean; 252 } 253 if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) { 254 result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status); 255 if(U_FAILURE(*status)){ 256 goto clean; 257 } 258 result->hasRealData = TRUE; 259 } else { 260 result = ucol_initCollator(UCA->image, result, UCA, status); 261 ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status); 262 if(U_FAILURE(*status)){ 263 goto clean; 264 } 265 result->hasRealData = FALSE; 266 } 267 result->freeImageOnClose = FALSE; 268 } 269 } else { // !U_SUCCESS(binaryStatus) 270 if(U_SUCCESS(*status)) { 271 *status = intStatus; // propagate underlying error 272 } 273 goto clean; 274 } 275 intStatus = U_ZERO_ERROR; 276 result->rules = ures_getStringByKey(collElem, "Sequence", &result->rulesLength, &intStatus); 277 result->freeRulesOnClose = FALSE; 278 } else { /* There is another error, and we're just gonna clean up */ 279 goto clean; 280 } 281 282 intStatus = U_ZERO_ERROR; 283 result->ucaRules = ures_getStringByKey(b,"UCARules",NULL,&intStatus); 284 285 if(loc == NULL) { 286 loc = ures_getLocaleByType(b, ULOC_ACTUAL_LOCALE, status); 287 } 288 result->requestedLocale = uprv_strdup(loc); 289 /* test for NULL */ 290 if (result->requestedLocale == NULL) { 291 *status = U_MEMORY_ALLOCATION_ERROR; 292 goto clean; 293 } 294 loc = ures_getLocaleByType(collElem, ULOC_ACTUAL_LOCALE, status); 295 result->actualLocale = uprv_strdup(loc); 296 /* test for NULL */ 297 if (result->actualLocale == NULL) { 298 *status = U_MEMORY_ALLOCATION_ERROR; 299 goto clean; 300 } 301 loc = ures_getLocaleByType(b, ULOC_ACTUAL_LOCALE, status); 302 result->validLocale = uprv_strdup(loc); 303 /* test for NULL */ 304 if (result->validLocale == NULL) { 305 *status = U_MEMORY_ALLOCATION_ERROR; 306 goto clean; 307 } 308 309 ures_close(b); 310 ures_close(collElem); 311 ures_close(binary); 312 return result; 313 314 clean: 315 ures_close(b); 316 ures_close(collElem); 317 ures_close(binary); 318 ucol_close(result); 319 return NULL; 320 } 321 322 U_CAPI UCollator* 323 ucol_open(const char *loc, 324 UErrorCode *status) 325 { 326 U_NAMESPACE_USE 327 328 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); 329 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); 330 UCollator *result = NULL; 331 332 u_init(status); 333 #if !UCONFIG_NO_SERVICE 334 result = Collator::createUCollator(loc, status); 335 if (result == NULL) 336 #endif 337 { 338 result = ucol_open_internal(loc, status); 339 } 340 UTRACE_EXIT_PTR_STATUS(result, *status); 341 return result; 342 } 343 344 U_CAPI UCollator* U_EXPORT2 345 ucol_openRules( const UChar *rules, 346 int32_t rulesLength, 347 UColAttributeValue normalizationMode, 348 UCollationStrength strength, 349 UParseError *parseError, 350 UErrorCode *status) 351 { 352 UColTokenParser src; 353 UColAttributeValue norm; 354 UParseError tErr; 355 356 if(status == NULL || U_FAILURE(*status)){ 357 return 0; 358 } 359 360 u_init(status); 361 if (U_FAILURE(*status)) { 362 return NULL; 363 } 364 365 if(rules == NULL || rulesLength < -1) { 366 *status = U_ILLEGAL_ARGUMENT_ERROR; 367 return 0; 368 } 369 370 if(rulesLength == -1) { 371 rulesLength = u_strlen(rules); 372 } 373 374 if(parseError == NULL){ 375 parseError = &tErr; 376 } 377 378 switch(normalizationMode) { 379 case UCOL_OFF: 380 case UCOL_ON: 381 case UCOL_DEFAULT: 382 norm = normalizationMode; 383 break; 384 default: 385 *status = U_ILLEGAL_ARGUMENT_ERROR; 386 return 0; 387 } 388 389 UCollator *result = NULL; 390 UCATableHeader *table = NULL; 391 UCollator *UCA = ucol_initUCA(status); 392 393 if(U_FAILURE(*status)){ 394 return NULL; 395 } 396 397 ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status); 398 ucol_tok_assembleTokenList(&src,parseError, status); 399 400 if(U_FAILURE(*status)) { 401 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */ 402 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */ 403 /* so something might be done here... or on lower level */ 404 #ifdef UCOL_DEBUG 405 if(*status == U_ILLEGAL_ARGUMENT_ERROR) { 406 fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source); 407 } else { 408 fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source); 409 } 410 #endif 411 goto cleanup; 412 } 413 414 if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */ 415 /* also, if we wanted to remove some contractions, we should make a tailoring */ 416 table = ucol_assembleTailoringTable(&src, status); 417 if(U_SUCCESS(*status)) { 418 // builder version 419 table->version[0] = UCOL_BUILDER_VERSION; 420 // no tailoring information on this level 421 table->version[1] = table->version[2] = table->version[3] = 0; 422 // set UCD version 423 u_getUnicodeVersion(table->UCDVersion); 424 // set UCA version 425 uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)); 426 result = ucol_initCollator(table, 0, UCA, status); 427 if (U_FAILURE(*status)) { 428 goto cleanup; 429 } 430 result->hasRealData = TRUE; 431 result->freeImageOnClose = TRUE; 432 } 433 } else { /* no rules, but no error either */ 434 // must be only options 435 // We will init the collator from UCA 436 result = ucol_initCollator(UCA->image, 0, UCA, status); 437 // Check for null result 438 if (U_FAILURE(*status)) { 439 goto cleanup; 440 } 441 // And set only the options 442 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet)); 443 /* test for NULL */ 444 if (opts == NULL) { 445 *status = U_MEMORY_ALLOCATION_ERROR; 446 goto cleanup; 447 } 448 uprv_memcpy(opts, src.opts, sizeof(UColOptionSet)); 449 ucol_setOptionsFromHeader(result, opts, status); 450 result->freeOptionsOnClose = TRUE; 451 result->hasRealData = FALSE; 452 result->freeImageOnClose = FALSE; 453 } 454 455 if(U_SUCCESS(*status)) { 456 UChar *newRules; 457 result->dataVersion[0] = UCOL_BUILDER_VERSION; 458 if(rulesLength > 0) { 459 newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR); 460 /* test for NULL */ 461 if (newRules == NULL) { 462 *status = U_MEMORY_ALLOCATION_ERROR; 463 goto cleanup; 464 } 465 uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR); 466 newRules[rulesLength]=0; 467 result->rules = newRules; 468 result->rulesLength = rulesLength; 469 result->freeRulesOnClose = TRUE; 470 } 471 result->ucaRules = NULL; 472 result->actualLocale = NULL; 473 result->validLocale = NULL; 474 result->requestedLocale = NULL; 475 ucol_setAttribute(result, UCOL_STRENGTH, strength, status); 476 ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status); 477 } else { 478 cleanup: 479 if(result != NULL) { 480 ucol_close(result); 481 } else { 482 if(table != NULL) { 483 uprv_free(table); 484 } 485 } 486 result = NULL; 487 } 488 489 ucol_tok_closeTokenList(&src); 490 491 return result; 492 } 493 494 U_CAPI int32_t U_EXPORT2 495 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { 496 UErrorCode status = U_ZERO_ERROR; 497 int32_t len = 0; 498 int32_t UCAlen = 0; 499 const UChar* ucaRules = 0; 500 const UChar *rules = ucol_getRules(coll, &len); 501 if(delta == UCOL_FULL_RULES) { 502 /* take the UCA rules and append real rules at the end */ 503 /* UCA rules will be probably coming from the root RB */ 504 ucaRules = coll->ucaRules; 505 if (ucaRules) { 506 UCAlen = u_strlen(ucaRules); 507 } 508 /* 509 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status); 510 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status); 511 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status); 512 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status); 513 ures_close(uca); 514 ures_close(cresb); 515 */ 516 } 517 if(U_FAILURE(status)) { 518 return 0; 519 } 520 if(buffer!=0 && bufferLen>0){ 521 *buffer=0; 522 if(UCAlen > 0) { 523 u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen)); 524 } 525 if(len > 0 && bufferLen > UCAlen) { 526 u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen)); 527 } 528 } 529 return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status); 530 } 531 532 static const UChar _NUL = 0; 533 534 U_CAPI const UChar* U_EXPORT2 535 ucol_getRules( const UCollator *coll, 536 int32_t *length) 537 { 538 if(coll->rules != NULL) { 539 *length = coll->rulesLength; 540 return coll->rules; 541 } 542 else { 543 *length = 0; 544 return &_NUL; 545 } 546 } 547 548 U_CAPI UBool U_EXPORT2 549 ucol_equals(const UCollator *source, const UCollator *target) { 550 UErrorCode status = U_ZERO_ERROR; 551 // if pointers are equal, collators are equal 552 if(source == target) { 553 return TRUE; 554 } 555 int32_t i = 0, j = 0; 556 // if any of attributes are different, collators are not equal 557 for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { 558 if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) { 559 return FALSE; 560 } 561 } 562 563 int32_t sourceRulesLen = 0, targetRulesLen = 0; 564 const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen); 565 const UChar *targetRules = ucol_getRules(target, &targetRulesLen); 566 567 if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) { 568 // all the attributes are equal and the rules are equal - collators are equal 569 return(TRUE); 570 } 571 // hard part, need to construct tree from rules and see if they yield the same tailoring 572 UBool result = TRUE; 573 UParseError parseError; 574 UColTokenParser sourceParser, targetParser; 575 int32_t sourceListLen = 0, targetListLen = 0; 576 ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status); 577 ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status); 578 sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status); 579 targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status); 580 581 if(sourceListLen != targetListLen) { 582 // different number of resets 583 result = FALSE; 584 } else { 585 UColToken *sourceReset = NULL, *targetReset = NULL; 586 UChar *sourceResetString = NULL, *targetResetString = NULL; 587 int32_t sourceStringLen = 0, targetStringLen = 0; 588 for(i = 0; i < sourceListLen; i++) { 589 sourceReset = sourceParser.lh[i].reset; 590 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); 591 sourceStringLen = sourceReset->source >> 24; 592 for(j = 0; j < sourceListLen; j++) { 593 targetReset = targetParser.lh[j].reset; 594 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); 595 targetStringLen = targetReset->source >> 24; 596 if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) { 597 sourceReset = sourceParser.lh[i].first; 598 targetReset = targetParser.lh[j].first; 599 while(sourceReset != NULL && targetReset != NULL) { 600 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); 601 sourceStringLen = sourceReset->source >> 24; 602 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); 603 targetStringLen = targetReset->source >> 24; 604 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { 605 result = FALSE; 606 goto returnResult; 607 } 608 // probably also need to check the expansions 609 if(sourceReset->expansion) { 610 if(!targetReset->expansion) { 611 result = FALSE; 612 goto returnResult; 613 } else { 614 // compare expansions 615 sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF); 616 sourceStringLen = sourceReset->expansion >> 24; 617 targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF); 618 targetStringLen = targetReset->expansion >> 24; 619 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { 620 result = FALSE; 621 goto returnResult; 622 } 623 } 624 } else { 625 if(targetReset->expansion) { 626 result = FALSE; 627 goto returnResult; 628 } 629 } 630 sourceReset = sourceReset->next; 631 targetReset = targetReset->next; 632 } 633 if(sourceReset != targetReset) { // at least one is not NULL 634 // there are more tailored elements in one list 635 result = FALSE; 636 goto returnResult; 637 } 638 639 640 break; 641 } 642 } 643 // couldn't find the reset anchor, so the collators are not equal 644 if(j == sourceListLen) { 645 result = FALSE; 646 goto returnResult; 647 } 648 } 649 } 650 651 returnResult: 652 ucol_tok_closeTokenList(&sourceParser); 653 ucol_tok_closeTokenList(&targetParser); 654 return result; 655 656 } 657 658 U_CAPI int32_t U_EXPORT2 659 ucol_getDisplayName( const char *objLoc, 660 const char *dispLoc, 661 UChar *result, 662 int32_t resultLength, 663 UErrorCode *status) 664 { 665 U_NAMESPACE_USE 666 667 if(U_FAILURE(*status)) return -1; 668 UnicodeString dst; 669 if(!(result==NULL && resultLength==0)) { 670 // NULL destination for pure preflighting: empty dummy string 671 // otherwise, alias the destination buffer 672 dst.setTo(result, 0, resultLength); 673 } 674 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); 675 return dst.extract(result, resultLength, *status); 676 } 677 678 U_CAPI const char* U_EXPORT2 679 ucol_getAvailable(int32_t index) 680 { 681 int32_t count = 0; 682 const Locale *loc = Collator::getAvailableLocales(count); 683 if (loc != NULL && index < count) { 684 return loc[index].getName(); 685 } 686 return NULL; 687 } 688 689 U_CAPI int32_t U_EXPORT2 690 ucol_countAvailable() 691 { 692 int32_t count = 0; 693 Collator::getAvailableLocales(count); 694 return count; 695 } 696 697 #if !UCONFIG_NO_SERVICE 698 U_CAPI UEnumeration* U_EXPORT2 699 ucol_openAvailableLocales(UErrorCode *status) { 700 U_NAMESPACE_USE 701 702 // This is a wrapper over Collator::getAvailableLocales() 703 if (U_FAILURE(*status)) { 704 return NULL; 705 } 706 StringEnumeration *s = Collator::getAvailableLocales(); 707 if (s == NULL) { 708 *status = U_MEMORY_ALLOCATION_ERROR; 709 return NULL; 710 } 711 return uenum_openFromStringEnumeration(s, status); 712 } 713 #endif 714 715 // Note: KEYWORDS[0] != RESOURCE_NAME - alan 716 717 static const char RESOURCE_NAME[] = "collations"; 718 719 static const char* const KEYWORDS[] = { "collation" }; 720 721 #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0])) 722 723 U_CAPI UEnumeration* U_EXPORT2 724 ucol_getKeywords(UErrorCode *status) { 725 UEnumeration *result = NULL; 726 if (U_SUCCESS(*status)) { 727 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status); 728 } 729 return result; 730 } 731 732 U_CAPI UEnumeration* U_EXPORT2 733 ucol_getKeywordValues(const char *keyword, UErrorCode *status) { 734 if (U_FAILURE(*status)) { 735 return NULL; 736 } 737 // hard-coded to accept exactly one collation keyword 738 // modify if additional collation keyword is added later 739 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) 740 { 741 *status = U_ILLEGAL_ARGUMENT_ERROR; 742 return NULL; 743 } 744 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status); 745 } 746 747 static const UEnumeration defaultKeywordValues = { 748 NULL, 749 NULL, 750 ulist_close_keyword_values_iterator, 751 ulist_count_keyword_values, 752 uenum_unextDefault, 753 ulist_next_keyword_value, 754 ulist_reset_keyword_values_iterator 755 }; 756 757 U_CAPI UEnumeration* U_EXPORT2 758 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale, 759 UBool /*commonlyUsed*/, UErrorCode* status) { 760 /* Get the locale base name. */ 761 char localeBuffer[ULOC_FULLNAME_CAPACITY] = ""; 762 uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status); 763 764 /* Create the 2 lists 765 * -values is the temp location for the keyword values 766 * -results hold the actual list used by the UEnumeration object 767 */ 768 UList *values = ulist_createEmptyList(status); 769 UList *results = ulist_createEmptyList(status); 770 UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 771 if (U_FAILURE(*status) || en == NULL) { 772 if (en == NULL) { 773 *status = U_MEMORY_ALLOCATION_ERROR; 774 } else { 775 uprv_free(en); 776 } 777 ulist_deleteList(values); 778 ulist_deleteList(results); 779 return NULL; 780 } 781 782 memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); 783 en->context = results; 784 785 /* Open the resource bundle for collation with the given locale. */ 786 UResourceBundle bundle, collations, collres, defres; 787 ures_initStackObject(&bundle); 788 ures_initStackObject(&collations); 789 ures_initStackObject(&collres); 790 ures_initStackObject(&defres); 791 792 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); 793 794 while (U_SUCCESS(*status)) { 795 ures_getByKey(&bundle, RESOURCE_NAME, &collations, status); 796 ures_resetIterator(&collations); 797 while (U_SUCCESS(*status) && ures_hasNext(&collations)) { 798 ures_getNextResource(&collations, &collres, status); 799 const char *key = ures_getKey(&collres); 800 /* If the key is default, get the string and store it in results list only 801 * if results list is empty. 802 */ 803 if (uprv_strcmp(key, "default") == 0) { 804 if (ulist_getListSize(results) == 0) { 805 char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); 806 int32_t defcollLength = ULOC_KEYWORDS_CAPACITY; 807 808 ures_getNextResource(&collres, &defres, status); 809 ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status); 810 811 ulist_addItemBeginList(results, defcoll, TRUE, status); 812 } 813 } else { 814 ulist_addItemEndList(values, key, FALSE, status); 815 } 816 } 817 818 /* If the locale is "" this is root so exit. */ 819 if (uprv_strlen(localeBuffer) == 0) { 820 break; 821 } 822 /* Get the parent locale and open a new resource bundle. */ 823 uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status); 824 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); 825 } 826 827 ures_close(&defres); 828 ures_close(&collres); 829 ures_close(&collations); 830 ures_close(&bundle); 831 832 if (U_SUCCESS(*status)) { 833 char *value = NULL; 834 ulist_resetList(values); 835 while ((value = (char *)ulist_getNext(values)) != NULL) { 836 if (!ulist_containsString(results, value, uprv_strlen(value))) { 837 ulist_addItemEndList(results, value, FALSE, status); 838 if (U_FAILURE(*status)) { 839 break; 840 } 841 } 842 } 843 } 844 845 ulist_deleteList(values); 846 847 if (U_FAILURE(*status)){ 848 uenum_close(en); 849 en = NULL; 850 } else { 851 ulist_resetList(results); 852 } 853 854 return en; 855 } 856 857 U_CAPI int32_t U_EXPORT2 858 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 859 const char* keyword, const char* locale, 860 UBool* isAvailable, UErrorCode* status) 861 { 862 // N.B.: Resource name is "collations" but keyword is "collation" 863 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL, 864 "collations", keyword, locale, 865 isAvailable, TRUE, status); 866 } 867 868 /* returns the locale name the collation data comes from */ 869 U_CAPI const char * U_EXPORT2 870 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { 871 return ucol_getLocaleByType(coll, type, status); 872 } 873 874 U_CAPI const char * U_EXPORT2 875 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { 876 const char *result = NULL; 877 if(status == NULL || U_FAILURE(*status)) { 878 return NULL; 879 } 880 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); 881 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); 882 883 switch(type) { 884 case ULOC_ACTUAL_LOCALE: 885 result = coll->actualLocale; 886 break; 887 case ULOC_VALID_LOCALE: 888 result = coll->validLocale; 889 break; 890 case ULOC_REQUESTED_LOCALE: 891 result = coll->requestedLocale; 892 break; 893 default: 894 *status = U_ILLEGAL_ARGUMENT_ERROR; 895 } 896 UTRACE_DATA1(UTRACE_INFO, "result = %s", result); 897 UTRACE_EXIT_STATUS(*status); 898 return result; 899 } 900 901 U_CFUNC void U_EXPORT2 902 ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt) 903 { 904 if (coll) { 905 if (coll->validLocale) { 906 uprv_free(coll->validLocale); 907 } 908 coll->validLocale = validLocaleToAdopt; 909 if (coll->requestedLocale) { // should always have 910 uprv_free(coll->requestedLocale); 911 } 912 coll->requestedLocale = requestedLocaleToAdopt; 913 if (coll->actualLocale) { 914 uprv_free(coll->actualLocale); 915 } 916 coll->actualLocale = actualLocaleToAdopt; 917 } 918 } 919 920 U_CAPI USet * U_EXPORT2 921 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) 922 { 923 U_NAMESPACE_USE 924 925 if(status == NULL || U_FAILURE(*status)) { 926 return NULL; 927 } 928 if(coll == NULL || coll->UCA == NULL) { 929 *status = U_ILLEGAL_ARGUMENT_ERROR; 930 return NULL; 931 } 932 UParseError parseError; 933 UColTokenParser src; 934 int32_t rulesLen = 0; 935 const UChar *rules = ucol_getRules(coll, &rulesLen); 936 UBool startOfRules = TRUE; 937 // we internally use the C++ class, for the following reasons: 938 // 1. we need to utilize canonical iterator, which is a C++ only class 939 // 2. canonical iterator returns UnicodeStrings - USet cannot take them 940 // 3. USet is internally really UnicodeSet, C is just a wrapper 941 UnicodeSet *tailored = new UnicodeSet(); 942 UnicodeString pattern; 943 UnicodeString empty; 944 CanonicalIterator it(empty, *status); 945 946 947 // The idea is to tokenize the rule set. For each non-reset token, 948 // we add all the canonicaly equivalent FCD sequences 949 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status); 950 while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) { 951 startOfRules = FALSE; 952 if(src.parsedToken.strength != UCOL_TOK_RESET) { 953 const UChar *stuff = src.source+(src.parsedToken.charsOffset); 954 it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status); 955 pattern = it.next(); 956 while(!pattern.isBogus()) { 957 if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) { 958 tailored->add(pattern); 959 } 960 pattern = it.next(); 961 } 962 } 963 } 964 ucol_tok_closeTokenList(&src); 965 return (USet *)tailored; 966 } 967 968 #endif /* #if !UCONFIG_NO_COLLATION */ 969