1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: usprep.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003jul2 14 * created by: Ram Viswanadha 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_IDNA 20 21 #include "unicode/usprep.h" 22 23 #include "unicode/unorm.h" 24 #include "unicode/ustring.h" 25 #include "unicode/uchar.h" 26 #include "unicode/uversion.h" 27 #include "umutex.h" 28 #include "cmemory.h" 29 #include "sprpimpl.h" 30 #include "ustr_imp.h" 31 #include "uhash.h" 32 #include "cstring.h" 33 #include "udataswp.h" 34 #include "ucln_cmn.h" 35 #include "ubidi_props.h" 36 37 U_NAMESPACE_USE 38 39 U_CDECL_BEGIN 40 41 /* 42 Static cache for already opened StringPrep profiles 43 */ 44 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 45 46 static UMTX usprepMutex = NULL; 47 48 /* format version of spp file */ 49 static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; 50 51 /* the Unicode version of the sprep data */ 52 static UVersionInfo dataVersion={ 0, 0, 0, 0 }; 53 54 /* Profile names must be aligned to UStringPrepProfileType */ 55 static const char *PROFILE_NAMES[] = { 56 "rfc3491", /* USPREP_RFC3491_NAMEPREP */ 57 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ 58 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ 59 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ 60 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ 61 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ 62 "rfc3722", /* USPREP_RFC3722_ISCSI */ 63 "rfc3920node", /* USPREP_RFC3920_NODEPREP */ 64 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ 65 "rfc4011", /* USPREP_RFC4011_MIB */ 66 "rfc4013", /* USPREP_RFC4013_SASLPREP */ 67 "rfc4505", /* USPREP_RFC4505_TRACE */ 68 "rfc4518", /* USPREP_RFC4518_LDAP */ 69 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ 70 }; 71 72 static UBool U_CALLCONV 73 isSPrepAcceptable(void * /* context */, 74 const char * /* type */, 75 const char * /* name */, 76 const UDataInfo *pInfo) { 77 if( 78 pInfo->size>=20 && 79 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 80 pInfo->charsetFamily==U_CHARSET_FAMILY && 81 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 82 pInfo->dataFormat[1]==0x50 && 83 pInfo->dataFormat[2]==0x52 && 84 pInfo->dataFormat[3]==0x50 && 85 pInfo->formatVersion[0]==3 && 86 pInfo->formatVersion[2]==UTRIE_SHIFT && 87 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 88 ) { 89 uprv_memcpy(formatVersion, pInfo->formatVersion, 4); 90 uprv_memcpy(dataVersion, pInfo->dataVersion, 4); 91 return TRUE; 92 } else { 93 return FALSE; 94 } 95 } 96 97 static int32_t U_CALLCONV 98 getSPrepFoldingOffset(uint32_t data) { 99 100 return (int32_t)data; 101 102 } 103 104 /* hashes an entry */ 105 static int32_t U_CALLCONV 106 hashEntry(const UHashTok parm) { 107 UStringPrepKey *b = (UStringPrepKey *)parm.pointer; 108 UHashTok namekey, pathkey; 109 namekey.pointer = b->name; 110 pathkey.pointer = b->path; 111 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey); 112 } 113 114 /* compares two entries */ 115 static UBool U_CALLCONV 116 compareEntries(const UHashTok p1, const UHashTok p2) { 117 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; 118 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; 119 UHashTok name1, name2, path1, path2; 120 name1.pointer = b1->name; 121 name2.pointer = b2->name; 122 path1.pointer = b1->path; 123 path2.pointer = b2->path; 124 return ((UBool)(uhash_compareChars(name1, name2) & 125 uhash_compareChars(path1, path2))); 126 } 127 128 static void 129 usprep_unload(UStringPrepProfile* data){ 130 udata_close(data->sprepData); 131 } 132 133 static int32_t 134 usprep_internal_flushCache(UBool noRefCount){ 135 UStringPrepProfile *profile = NULL; 136 UStringPrepKey *key = NULL; 137 int32_t pos = -1; 138 int32_t deletedNum = 0; 139 const UHashElement *e; 140 141 /* 142 * if shared data hasn't even been lazy evaluated yet 143 * return 0 144 */ 145 umtx_lock(&usprepMutex); 146 if (SHARED_DATA_HASHTABLE == NULL) { 147 umtx_unlock(&usprepMutex); 148 return 0; 149 } 150 151 /*creates an enumeration to iterate through every element in the table */ 152 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) 153 { 154 profile = (UStringPrepProfile *) e->value.pointer; 155 key = (UStringPrepKey *) e->key.pointer; 156 157 if ((noRefCount== FALSE && profile->refCount == 0) || 158 noRefCount== TRUE) { 159 deletedNum++; 160 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 161 162 /* unload the data */ 163 usprep_unload(profile); 164 165 if(key->name != NULL) { 166 uprv_free(key->name); 167 key->name=NULL; 168 } 169 if(key->path != NULL) { 170 uprv_free(key->path); 171 key->path=NULL; 172 } 173 uprv_free(profile); 174 uprv_free(key); 175 } 176 177 } 178 umtx_unlock(&usprepMutex); 179 180 return deletedNum; 181 } 182 183 /* Works just like ucnv_flushCache() 184 static int32_t 185 usprep_flushCache(){ 186 return usprep_internal_flushCache(FALSE); 187 } 188 */ 189 190 static UBool U_CALLCONV usprep_cleanup(void){ 191 if (SHARED_DATA_HASHTABLE != NULL) { 192 usprep_internal_flushCache(TRUE); 193 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 194 uhash_close(SHARED_DATA_HASHTABLE); 195 SHARED_DATA_HASHTABLE = NULL; 196 } 197 } 198 199 umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */ 200 /* if the hash table still exists. The mutex */ 201 /* will lazily re-init itself if needed. */ 202 return (SHARED_DATA_HASHTABLE == NULL); 203 } 204 U_CDECL_END 205 206 207 /** Initializes the cache for resources */ 208 static void 209 initCache(UErrorCode *status) { 210 UBool makeCache; 211 UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE == NULL), makeCache); 212 if(makeCache) { 213 UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status); 214 if (U_SUCCESS(*status)) { 215 umtx_lock(&usprepMutex); 216 if(SHARED_DATA_HASHTABLE == NULL) { 217 SHARED_DATA_HASHTABLE = newCache; 218 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); 219 newCache = NULL; 220 } 221 umtx_unlock(&usprepMutex); 222 } 223 if(newCache != NULL) { 224 uhash_close(newCache); 225 } 226 } 227 } 228 229 static UBool U_CALLCONV 230 loadData(UStringPrepProfile* profile, 231 const char* path, 232 const char* name, 233 const char* type, 234 UErrorCode* errorCode) { 235 /* load Unicode SPREP data from file */ 236 UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; 237 UDataMemory *dataMemory; 238 const int32_t *p=NULL; 239 const uint8_t *pb; 240 UVersionInfo normUnicodeVersion; 241 int32_t normUniVer, sprepUniVer, normCorrVer; 242 243 if(errorCode==NULL || U_FAILURE(*errorCode)) { 244 return 0; 245 } 246 247 /* open the data outside the mutex block */ 248 //TODO: change the path 249 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); 250 if(U_FAILURE(*errorCode)) { 251 return FALSE; 252 } 253 254 p=(const int32_t *)udata_getMemory(dataMemory); 255 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); 256 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); 257 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; 258 259 260 if(U_FAILURE(*errorCode)) { 261 udata_close(dataMemory); 262 return FALSE; 263 } 264 265 /* in the mutex block, set the data for this process */ 266 umtx_lock(&usprepMutex); 267 if(profile->sprepData==NULL) { 268 profile->sprepData=dataMemory; 269 dataMemory=NULL; 270 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); 271 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); 272 } else { 273 p=(const int32_t *)udata_getMemory(profile->sprepData); 274 } 275 umtx_unlock(&usprepMutex); 276 /* initialize some variables */ 277 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); 278 279 u_getUnicodeVersion(normUnicodeVersion); 280 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 281 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); 282 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 283 (dataVersion[2] << 8 ) + (dataVersion[3]); 284 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; 285 286 if(U_FAILURE(*errorCode)){ 287 udata_close(dataMemory); 288 return FALSE; 289 } 290 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ 291 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ 292 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ 293 ){ 294 *errorCode = U_INVALID_FORMAT_ERROR; 295 udata_close(dataMemory); 296 return FALSE; 297 } 298 profile->isDataLoaded = TRUE; 299 300 /* if a different thread set it first, then close the extra data */ 301 if(dataMemory!=NULL) { 302 udata_close(dataMemory); /* NULL if it was set correctly */ 303 } 304 305 306 return profile->isDataLoaded; 307 } 308 309 static UStringPrepProfile* 310 usprep_getProfile(const char* path, 311 const char* name, 312 UErrorCode *status){ 313 314 UStringPrepProfile* profile = NULL; 315 316 initCache(status); 317 318 if(U_FAILURE(*status)){ 319 return NULL; 320 } 321 322 UStringPrepKey stackKey; 323 /* 324 * const is cast way to save malloc, strcpy and free calls 325 * we use the passed in pointers for fetching the data from the 326 * hash table which is safe 327 */ 328 stackKey.name = (char*) name; 329 stackKey.path = (char*) path; 330 331 /* fetch the data from the cache */ 332 umtx_lock(&usprepMutex); 333 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 334 if(profile != NULL) { 335 profile->refCount++; 336 } 337 umtx_unlock(&usprepMutex); 338 339 if(profile == NULL) { 340 /* else load the data and put the data in the cache */ 341 LocalMemory<UStringPrepProfile> newProfile; 342 if(newProfile.allocateInsteadAndReset() == NULL) { 343 *status = U_MEMORY_ALLOCATION_ERROR; 344 return NULL; 345 } 346 347 /* load the data */ 348 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ 349 return NULL; 350 } 351 352 /* get the options */ 353 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); 354 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); 355 356 if(newProfile->checkBiDi) { 357 newProfile->bdp = ubidi_getSingleton(status); 358 if(U_FAILURE(*status)) { 359 usprep_unload(newProfile.getAlias()); 360 return NULL; 361 } 362 } 363 364 LocalMemory<UStringPrepKey> key; 365 LocalMemory<char> keyName; 366 LocalMemory<char> keyPath; 367 if( key.allocateInsteadAndReset() == NULL || 368 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL || 369 (path != NULL && 370 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL) 371 ) { 372 *status = U_MEMORY_ALLOCATION_ERROR; 373 usprep_unload(newProfile.getAlias()); 374 return NULL; 375 } 376 377 /* initialize the key members */ 378 key->name = keyName.orphan(); 379 uprv_strcpy(key->name, name); 380 if(path != NULL){ 381 key->path = keyPath.orphan(); 382 uprv_strcpy(key->path, path); 383 } 384 385 profile = newProfile.orphan(); 386 umtx_lock(&usprepMutex); 387 /* add the data object to the cache */ 388 profile->refCount = 1; 389 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); 390 umtx_unlock(&usprepMutex); 391 } 392 393 return profile; 394 } 395 396 U_CAPI UStringPrepProfile* U_EXPORT2 397 usprep_open(const char* path, 398 const char* name, 399 UErrorCode* status){ 400 401 if(status == NULL || U_FAILURE(*status)){ 402 return NULL; 403 } 404 405 /* initialize the profile struct members */ 406 return usprep_getProfile(path,name,status); 407 } 408 409 U_CAPI UStringPrepProfile* U_EXPORT2 410 usprep_openByType(UStringPrepProfileType type, 411 UErrorCode* status) { 412 if(status == NULL || U_FAILURE(*status)){ 413 return NULL; 414 } 415 int32_t index = (int32_t)type; 416 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) { 417 *status = U_ILLEGAL_ARGUMENT_ERROR; 418 return NULL; 419 } 420 return usprep_open(NULL, PROFILE_NAMES[index], status); 421 } 422 423 U_CAPI void U_EXPORT2 424 usprep_close(UStringPrepProfile* profile){ 425 if(profile==NULL){ 426 return; 427 } 428 429 umtx_lock(&usprepMutex); 430 /* decrement the ref count*/ 431 if(profile->refCount > 0){ 432 profile->refCount--; 433 } 434 umtx_unlock(&usprepMutex); 435 436 } 437 438 U_CFUNC void 439 uprv_syntaxError(const UChar* rules, 440 int32_t pos, 441 int32_t rulesLen, 442 UParseError* parseError){ 443 if(parseError == NULL){ 444 return; 445 } 446 parseError->offset = pos; 447 parseError->line = 0 ; // we are not using line numbers 448 449 // for pre-context 450 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); 451 int32_t limit = pos; 452 453 u_memcpy(parseError->preContext,rules+start,limit-start); 454 //null terminate the buffer 455 parseError->preContext[limit-start] = 0; 456 457 // for post-context; include error rules[pos] 458 start = pos; 459 limit = start + (U_PARSE_CONTEXT_LEN-1); 460 if (limit > rulesLen) { 461 limit = rulesLen; 462 } 463 if (start < rulesLen) { 464 u_memcpy(parseError->postContext,rules+start,limit-start); 465 } 466 //null terminate the buffer 467 parseError->postContext[limit-start]= 0; 468 } 469 470 471 static inline UStringPrepType 472 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ 473 474 UStringPrepType type; 475 if(trieWord == 0){ 476 /* 477 * Initial value stored in the mapping table 478 * just return USPREP_TYPE_LIMIT .. so that 479 * the source codepoint is copied to the destination 480 */ 481 type = USPREP_TYPE_LIMIT; 482 isIndex =FALSE; 483 value = 0; 484 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ 485 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); 486 isIndex =FALSE; 487 value = 0; 488 }else{ 489 /* get the type */ 490 type = USPREP_MAP; 491 /* ascertain if the value is index or delta */ 492 if(trieWord & 0x02){ 493 isIndex = TRUE; 494 value = trieWord >> 2; //mask off the lower 2 bits and shift 495 }else{ 496 isIndex = FALSE; 497 value = (int16_t)trieWord; 498 value = (value >> 2); 499 } 500 501 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 502 type = USPREP_DELETE; 503 isIndex =FALSE; 504 value = 0; 505 } 506 } 507 return type; 508 } 509 510 511 512 static int32_t 513 usprep_map( const UStringPrepProfile* profile, 514 const UChar* src, int32_t srcLength, 515 UChar* dest, int32_t destCapacity, 516 int32_t options, 517 UParseError* parseError, 518 UErrorCode* status ){ 519 520 uint16_t result; 521 int32_t destIndex=0; 522 int32_t srcIndex; 523 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); 524 UStringPrepType type; 525 int16_t value; 526 UBool isIndex; 527 const int32_t* indexes = profile->indexes; 528 529 // no error checking the caller check for error and arguments 530 // no string length check the caller finds out the string length 531 532 for(srcIndex=0;srcIndex<srcLength;){ 533 UChar32 ch; 534 535 U16_NEXT(src,srcIndex,srcLength,ch); 536 537 result=0; 538 539 UTRIE_GET16(&profile->sprepTrie,ch,result); 540 541 type = getValues(result, value, isIndex); 542 543 // check if the source codepoint is unassigned 544 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ 545 546 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); 547 *status = U_STRINGPREP_UNASSIGNED_ERROR; 548 return 0; 549 550 }else if(type == USPREP_MAP){ 551 552 int32_t index, length; 553 554 if(isIndex){ 555 index = value; 556 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 557 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 558 length = 1; 559 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 560 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 561 length = 2; 562 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 563 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 564 length = 3; 565 }else{ 566 length = profile->mappingData[index++]; 567 568 } 569 570 /* copy mapping to destination */ 571 for(int32_t i=0; i< length; i++){ 572 if(destIndex < destCapacity ){ 573 dest[destIndex] = profile->mappingData[index+i]; 574 } 575 destIndex++; /* for pre-flighting */ 576 } 577 continue; 578 }else{ 579 // subtract the delta to arrive at the code point 580 ch -= value; 581 } 582 583 }else if(type==USPREP_DELETE){ 584 // just consume the codepoint and contine 585 continue; 586 } 587 //copy the code point into destination 588 if(ch <= 0xFFFF){ 589 if(destIndex < destCapacity ){ 590 dest[destIndex] = (UChar)ch; 591 } 592 destIndex++; 593 }else{ 594 if(destIndex+1 < destCapacity ){ 595 dest[destIndex] = U16_LEAD(ch); 596 dest[destIndex+1] = U16_TRAIL(ch); 597 } 598 destIndex +=2; 599 } 600 601 } 602 603 return u_terminateUChars(dest, destCapacity, destIndex, status); 604 } 605 606 607 static int32_t 608 usprep_normalize( const UChar* src, int32_t srcLength, 609 UChar* dest, int32_t destCapacity, 610 UErrorCode* status ){ 611 return unorm_normalize( 612 src, srcLength, 613 UNORM_NFKC, UNORM_UNICODE_3_2, 614 dest, destCapacity, 615 status); 616 } 617 618 619 /* 620 1) Map -- For each character in the input, check if it has a mapping 621 and, if so, replace it with its mapping. 622 623 2) Normalize -- Possibly normalize the result of step 1 using Unicode 624 normalization. 625 626 3) Prohibit -- Check for any characters that are not allowed in the 627 output. If any are found, return an error. 628 629 4) Check bidi -- Possibly check for right-to-left characters, and if 630 any are found, make sure that the whole string satisfies the 631 requirements for bidirectional strings. If the string does not 632 satisfy the requirements for bidirectional strings, return an 633 error. 634 [Unicode3.2] defines several bidirectional categories; each character 635 has one bidirectional category assigned to it. For the purposes of 636 the requirements below, an "RandALCat character" is a character that 637 has Unicode bidirectional categories "R" or "AL"; an "LCat character" 638 is a character that has Unicode bidirectional category "L". Note 639 640 641 that there are many characters which fall in neither of the above 642 definitions; Latin digits (<U+0030> through <U+0039>) are examples of 643 this because they have bidirectional category "EN". 644 645 In any profile that specifies bidirectional character handling, all 646 three of the following requirements MUST be met: 647 648 1) The characters in section 5.8 MUST be prohibited. 649 650 2) If a string contains any RandALCat character, the string MUST NOT 651 contain any LCat character. 652 653 3) If a string contains any RandALCat character, a RandALCat 654 character MUST be the first character of the string, and a 655 RandALCat character MUST be the last character of the string. 656 */ 657 658 #define MAX_STACK_BUFFER_SIZE 300 659 660 661 U_CAPI int32_t U_EXPORT2 662 usprep_prepare( const UStringPrepProfile* profile, 663 const UChar* src, int32_t srcLength, 664 UChar* dest, int32_t destCapacity, 665 int32_t options, 666 UParseError* parseError, 667 UErrorCode* status ){ 668 669 // check error status 670 if(status == NULL || U_FAILURE(*status)){ 671 return 0; 672 } 673 674 //check arguments 675 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 676 *status=U_ILLEGAL_ARGUMENT_ERROR; 677 return 0; 678 } 679 680 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; 681 UChar *b1 = b1Stack, *b2 = b2Stack; 682 int32_t b1Len, b2Len=0, 683 b1Capacity = MAX_STACK_BUFFER_SIZE , 684 b2Capacity = MAX_STACK_BUFFER_SIZE; 685 uint16_t result; 686 int32_t b2Index = 0; 687 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 688 UBool leftToRight=FALSE, rightToLeft=FALSE; 689 int32_t rtlPos =-1, ltrPos =-1; 690 691 //get the string length 692 if(srcLength == -1){ 693 srcLength = u_strlen(src); 694 } 695 // map 696 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status); 697 698 if(*status == U_BUFFER_OVERFLOW_ERROR){ 699 // redo processing of string 700 /* we do not have enough room so grow the buffer*/ 701 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 702 if(b1==NULL){ 703 *status = U_MEMORY_ALLOCATION_ERROR; 704 goto CLEANUP; 705 } 706 707 *status = U_ZERO_ERROR; // reset error 708 709 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status); 710 711 } 712 713 // normalize 714 if(profile->doNFKC == TRUE){ 715 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status); 716 717 if(*status == U_BUFFER_OVERFLOW_ERROR){ 718 // redo processing of string 719 /* we do not have enough room so grow the buffer*/ 720 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 721 if(b2==NULL){ 722 *status = U_MEMORY_ALLOCATION_ERROR; 723 goto CLEANUP; 724 } 725 726 *status = U_ZERO_ERROR; // reset error 727 728 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status); 729 730 } 731 732 }else{ 733 b2 = b1; 734 b2Len = b1Len; 735 } 736 737 738 if(U_FAILURE(*status)){ 739 goto CLEANUP; 740 } 741 742 UChar32 ch; 743 UStringPrepType type; 744 int16_t value; 745 UBool isIndex; 746 747 // Prohibit and checkBiDi in one pass 748 for(b2Index=0; b2Index<b2Len;){ 749 750 ch = 0; 751 752 U16_NEXT(b2, b2Index, b2Len, ch); 753 754 UTRIE_GET16(&profile->sprepTrie,ch,result); 755 756 type = getValues(result, value, isIndex); 757 758 if( type == USPREP_PROHIBITED || 759 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) 760 ){ 761 *status = U_STRINGPREP_PROHIBITED_ERROR; 762 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); 763 goto CLEANUP; 764 } 765 766 if(profile->checkBiDi) { 767 direction = ubidi_getClass(profile->bdp, ch); 768 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 769 firstCharDir = direction; 770 } 771 if(direction == U_LEFT_TO_RIGHT){ 772 leftToRight = TRUE; 773 ltrPos = b2Index-1; 774 } 775 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 776 rightToLeft = TRUE; 777 rtlPos = b2Index-1; 778 } 779 } 780 } 781 if(profile->checkBiDi == TRUE){ 782 // satisfy 2 783 if( leftToRight == TRUE && rightToLeft == TRUE){ 784 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 785 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); 786 goto CLEANUP; 787 } 788 789 //satisfy 3 790 if( rightToLeft == TRUE && 791 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 792 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 793 ){ 794 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 795 uprv_syntaxError(b2, rtlPos, b2Len, parseError); 796 return FALSE; 797 } 798 } 799 if(b2Len>0 && b2Len <= destCapacity){ 800 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR); 801 } 802 803 CLEANUP: 804 if(b1!=b1Stack){ 805 uprv_free(b1); 806 b1=NULL; 807 } 808 809 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){ 810 uprv_free(b2); 811 b2=NULL; 812 } 813 return u_terminateUChars(dest, destCapacity, b2Len, status); 814 } 815 816 817 /* data swapping ------------------------------------------------------------ */ 818 819 U_CAPI int32_t U_EXPORT2 820 usprep_swap(const UDataSwapper *ds, 821 const void *inData, int32_t length, void *outData, 822 UErrorCode *pErrorCode) { 823 const UDataInfo *pInfo; 824 int32_t headerSize; 825 826 const uint8_t *inBytes; 827 uint8_t *outBytes; 828 829 const int32_t *inIndexes; 830 int32_t indexes[16]; 831 832 int32_t i, offset, count, size; 833 834 /* udata_swapDataHeader checks the arguments */ 835 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 836 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 837 return 0; 838 } 839 840 /* check data format and format version */ 841 pInfo=(const UDataInfo *)((const char *)inData+4); 842 if(!( 843 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 844 pInfo->dataFormat[1]==0x50 && 845 pInfo->dataFormat[2]==0x52 && 846 pInfo->dataFormat[3]==0x50 && 847 pInfo->formatVersion[0]==3 848 )) { 849 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", 850 pInfo->dataFormat[0], pInfo->dataFormat[1], 851 pInfo->dataFormat[2], pInfo->dataFormat[3], 852 pInfo->formatVersion[0]); 853 *pErrorCode=U_UNSUPPORTED_ERROR; 854 return 0; 855 } 856 857 inBytes=(const uint8_t *)inData+headerSize; 858 outBytes=(uint8_t *)outData+headerSize; 859 860 inIndexes=(const int32_t *)inBytes; 861 862 if(length>=0) { 863 length-=headerSize; 864 if(length<16*4) { 865 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", 866 length); 867 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 868 return 0; 869 } 870 } 871 872 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ 873 for(i=0; i<16; ++i) { 874 indexes[i]=udata_readInt32(ds, inIndexes[i]); 875 } 876 877 /* calculate the total length of the data */ 878 size= 879 16*4+ /* size of indexes[] */ 880 indexes[_SPREP_INDEX_TRIE_SIZE]+ 881 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 882 883 if(length>=0) { 884 if(length<size) { 885 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", 886 length); 887 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 888 return 0; 889 } 890 891 /* copy the data for inaccessible bytes */ 892 if(inBytes!=outBytes) { 893 uprv_memcpy(outBytes, inBytes, size); 894 } 895 896 offset=0; 897 898 /* swap the int32_t indexes[] */ 899 count=16*4; 900 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 901 offset+=count; 902 903 /* swap the UTrie */ 904 count=indexes[_SPREP_INDEX_TRIE_SIZE]; 905 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 906 offset+=count; 907 908 /* swap the uint16_t mappingTable[] */ 909 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 910 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 911 offset+=count; 912 } 913 914 return headerSize+size; 915 } 916 917 #endif /* #if !UCONFIG_NO_IDNA */ 918