1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: usprep.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003jul2 14 * created by: Ram Viswanadha 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_IDNA 20 21 #include "unicode/usprep.h" 22 23 #include "unicode/unorm.h" 24 #include "unicode/ustring.h" 25 #include "unicode/uchar.h" 26 #include "unicode/uversion.h" 27 #include "umutex.h" 28 #include "cmemory.h" 29 #include "sprpimpl.h" 30 #include "ustr_imp.h" 31 #include "uhash.h" 32 #include "cstring.h" 33 #include "udataswp.h" 34 #include "ucln_cmn.h" 35 #include "ubidi_props.h" 36 37 U_NAMESPACE_USE 38 39 U_CDECL_BEGIN 40 41 /* 42 Static cache for already opened StringPrep profiles 43 */ 44 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 45 46 static UMTX usprepMutex = NULL; 47 48 /* format version of spp file */ 49 static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; 50 51 /* the Unicode version of the sprep data */ 52 static UVersionInfo dataVersion={ 0, 0, 0, 0 }; 53 54 /* Profile names must be aligned to UStringPrepProfileType */ 55 static const char *PROFILE_NAMES[] = { 56 "rfc3491", /* USPREP_RFC3491_NAMEPREP */ 57 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ 58 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ 59 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ 60 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ 61 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ 62 "rfc3722", /* USPREP_RFC3722_ISCSI */ 63 "rfc3920node", /* USPREP_RFC3920_NODEPREP */ 64 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ 65 "rfc4011", /* USPREP_RFC4011_MIB */ 66 "rfc4013", /* USPREP_RFC4013_SASLPREP */ 67 "rfc4505", /* USPREP_RFC4505_TRACE */ 68 "rfc4518", /* USPREP_RFC4518_LDAP */ 69 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ 70 }; 71 72 static UBool U_CALLCONV 73 isSPrepAcceptable(void * /* context */, 74 const char * /* type */, 75 const char * /* name */, 76 const UDataInfo *pInfo) { 77 if( 78 pInfo->size>=20 && 79 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 80 pInfo->charsetFamily==U_CHARSET_FAMILY && 81 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 82 pInfo->dataFormat[1]==0x50 && 83 pInfo->dataFormat[2]==0x52 && 84 pInfo->dataFormat[3]==0x50 && 85 pInfo->formatVersion[0]==3 && 86 pInfo->formatVersion[2]==UTRIE_SHIFT && 87 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 88 ) { 89 uprv_memcpy(formatVersion, pInfo->formatVersion, 4); 90 uprv_memcpy(dataVersion, pInfo->dataVersion, 4); 91 return TRUE; 92 } else { 93 return FALSE; 94 } 95 } 96 97 static int32_t U_CALLCONV 98 getSPrepFoldingOffset(uint32_t data) { 99 100 return (int32_t)data; 101 102 } 103 104 /* hashes an entry */ 105 static int32_t U_CALLCONV 106 hashEntry(const UHashTok parm) { 107 UStringPrepKey *b = (UStringPrepKey *)parm.pointer; 108 UHashTok namekey, pathkey; 109 namekey.pointer = b->name; 110 pathkey.pointer = b->path; 111 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey); 112 } 113 114 /* compares two entries */ 115 static UBool U_CALLCONV 116 compareEntries(const UHashTok p1, const UHashTok p2) { 117 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; 118 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; 119 UHashTok name1, name2, path1, path2; 120 name1.pointer = b1->name; 121 name2.pointer = b2->name; 122 path1.pointer = b1->path; 123 path2.pointer = b2->path; 124 return ((UBool)(uhash_compareChars(name1, name2) & 125 uhash_compareChars(path1, path2))); 126 } 127 128 static void 129 usprep_unload(UStringPrepProfile* data){ 130 udata_close(data->sprepData); 131 } 132 133 static int32_t 134 usprep_internal_flushCache(UBool noRefCount){ 135 UStringPrepProfile *profile = NULL; 136 UStringPrepKey *key = NULL; 137 int32_t pos = -1; 138 int32_t deletedNum = 0; 139 const UHashElement *e; 140 141 /* 142 * if shared data hasn't even been lazy evaluated yet 143 * return 0 144 */ 145 umtx_lock(&usprepMutex); 146 if (SHARED_DATA_HASHTABLE == NULL) { 147 umtx_unlock(&usprepMutex); 148 return 0; 149 } 150 151 /*creates an enumeration to iterate through every element in the table */ 152 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) 153 { 154 profile = (UStringPrepProfile *) e->value.pointer; 155 key = (UStringPrepKey *) e->key.pointer; 156 157 if ((noRefCount== FALSE && profile->refCount == 0) || 158 noRefCount== TRUE) { 159 deletedNum++; 160 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 161 162 /* unload the data */ 163 usprep_unload(profile); 164 165 if(key->name != NULL) { 166 uprv_free(key->name); 167 key->name=NULL; 168 } 169 if(key->path != NULL) { 170 uprv_free(key->path); 171 key->path=NULL; 172 } 173 uprv_free(profile); 174 uprv_free(key); 175 } 176 177 } 178 umtx_unlock(&usprepMutex); 179 180 return deletedNum; 181 } 182 183 /* Works just like ucnv_flushCache() 184 static int32_t 185 usprep_flushCache(){ 186 return usprep_internal_flushCache(FALSE); 187 } 188 */ 189 190 static UBool U_CALLCONV usprep_cleanup(void){ 191 if (SHARED_DATA_HASHTABLE != NULL) { 192 usprep_internal_flushCache(TRUE); 193 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 194 uhash_close(SHARED_DATA_HASHTABLE); 195 SHARED_DATA_HASHTABLE = NULL; 196 } 197 } 198 199 umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */ 200 /* if the hash table still exists. The mutex */ 201 /* will lazily re-init itself if needed. */ 202 return (SHARED_DATA_HASHTABLE == NULL); 203 } 204 U_CDECL_END 205 206 207 /** Initializes the cache for resources */ 208 static void 209 initCache(UErrorCode *status) { 210 UBool makeCache; 211 UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE == NULL), makeCache); 212 if(makeCache) { 213 UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status); 214 if (U_SUCCESS(*status)) { 215 umtx_lock(&usprepMutex); 216 if(SHARED_DATA_HASHTABLE == NULL) { 217 SHARED_DATA_HASHTABLE = newCache; 218 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); 219 newCache = NULL; 220 } 221 umtx_unlock(&usprepMutex); 222 } 223 if(newCache != NULL) { 224 uhash_close(newCache); 225 } 226 } 227 } 228 229 static UBool U_CALLCONV 230 loadData(UStringPrepProfile* profile, 231 const char* path, 232 const char* name, 233 const char* type, 234 UErrorCode* errorCode) { 235 /* load Unicode SPREP data from file */ 236 UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; 237 UDataMemory *dataMemory; 238 const int32_t *p=NULL; 239 const uint8_t *pb; 240 UVersionInfo normUnicodeVersion; 241 int32_t normUniVer, sprepUniVer, normCorrVer; 242 243 if(errorCode==NULL || U_FAILURE(*errorCode)) { 244 return 0; 245 } 246 247 /* open the data outside the mutex block */ 248 //TODO: change the path 249 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); 250 if(U_FAILURE(*errorCode)) { 251 return FALSE; 252 } 253 254 p=(const int32_t *)udata_getMemory(dataMemory); 255 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); 256 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); 257 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; 258 259 260 if(U_FAILURE(*errorCode)) { 261 udata_close(dataMemory); 262 return FALSE; 263 } 264 265 /* in the mutex block, set the data for this process */ 266 umtx_lock(&usprepMutex); 267 if(profile->sprepData==NULL) { 268 profile->sprepData=dataMemory; 269 dataMemory=NULL; 270 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); 271 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); 272 } else { 273 p=(const int32_t *)udata_getMemory(profile->sprepData); 274 } 275 umtx_unlock(&usprepMutex); 276 /* initialize some variables */ 277 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); 278 279 u_getUnicodeVersion(normUnicodeVersion); 280 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 281 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); 282 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 283 (dataVersion[2] << 8 ) + (dataVersion[3]); 284 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; 285 286 if(U_FAILURE(*errorCode)){ 287 udata_close(dataMemory); 288 return FALSE; 289 } 290 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ 291 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ 292 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ 293 ){ 294 *errorCode = U_INVALID_FORMAT_ERROR; 295 udata_close(dataMemory); 296 return FALSE; 297 } 298 profile->isDataLoaded = TRUE; 299 300 /* if a different thread set it first, then close the extra data */ 301 if(dataMemory!=NULL) { 302 udata_close(dataMemory); /* NULL if it was set correctly */ 303 } 304 305 306 return profile->isDataLoaded; 307 } 308 309 static UStringPrepProfile* 310 usprep_getProfile(const char* path, 311 const char* name, 312 UErrorCode *status){ 313 314 UStringPrepProfile* profile = NULL; 315 316 initCache(status); 317 318 if(U_FAILURE(*status)){ 319 return NULL; 320 } 321 322 UStringPrepKey stackKey; 323 /* 324 * const is cast way to save malloc, strcpy and free calls 325 * we use the passed in pointers for fetching the data from the 326 * hash table which is safe 327 */ 328 stackKey.name = (char*) name; 329 stackKey.path = (char*) path; 330 331 /* fetch the data from the cache */ 332 umtx_lock(&usprepMutex); 333 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 334 if(profile != NULL) { 335 profile->refCount++; 336 } 337 umtx_unlock(&usprepMutex); 338 339 if(profile == NULL) { 340 /* else load the data and put the data in the cache */ 341 LocalMemory<UStringPrepProfile> newProfile; 342 if(newProfile.allocateInsteadAndReset() == NULL) { 343 *status = U_MEMORY_ALLOCATION_ERROR; 344 return NULL; 345 } 346 347 /* load the data */ 348 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ 349 return NULL; 350 } 351 352 /* get the options */ 353 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); 354 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); 355 356 if(newProfile->checkBiDi) { 357 newProfile->bdp = ubidi_getSingleton(); 358 } 359 360 LocalMemory<UStringPrepKey> key; 361 LocalMemory<char> keyName; 362 LocalMemory<char> keyPath; 363 if( key.allocateInsteadAndReset() == NULL || 364 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL || 365 (path != NULL && 366 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL) 367 ) { 368 *status = U_MEMORY_ALLOCATION_ERROR; 369 usprep_unload(newProfile.getAlias()); 370 return NULL; 371 } 372 373 umtx_lock(&usprepMutex); 374 // If another thread already inserted the same key/value, refcount and cleanup our thread data 375 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 376 if(profile != NULL) { 377 profile->refCount++; 378 usprep_unload(newProfile.getAlias()); 379 } 380 else { 381 /* initialize the key members */ 382 key->name = keyName.orphan(); 383 uprv_strcpy(key->name, name); 384 if(path != NULL){ 385 key->path = keyPath.orphan(); 386 uprv_strcpy(key->path, path); 387 } 388 profile = newProfile.orphan(); 389 390 /* add the data object to the cache */ 391 profile->refCount = 1; 392 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); 393 } 394 umtx_unlock(&usprepMutex); 395 } 396 397 return profile; 398 } 399 400 U_CAPI UStringPrepProfile* U_EXPORT2 401 usprep_open(const char* path, 402 const char* name, 403 UErrorCode* status){ 404 405 if(status == NULL || U_FAILURE(*status)){ 406 return NULL; 407 } 408 409 /* initialize the profile struct members */ 410 return usprep_getProfile(path,name,status); 411 } 412 413 U_CAPI UStringPrepProfile* U_EXPORT2 414 usprep_openByType(UStringPrepProfileType type, 415 UErrorCode* status) { 416 if(status == NULL || U_FAILURE(*status)){ 417 return NULL; 418 } 419 int32_t index = (int32_t)type; 420 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) { 421 *status = U_ILLEGAL_ARGUMENT_ERROR; 422 return NULL; 423 } 424 return usprep_open(NULL, PROFILE_NAMES[index], status); 425 } 426 427 U_CAPI void U_EXPORT2 428 usprep_close(UStringPrepProfile* profile){ 429 if(profile==NULL){ 430 return; 431 } 432 433 umtx_lock(&usprepMutex); 434 /* decrement the ref count*/ 435 if(profile->refCount > 0){ 436 profile->refCount--; 437 } 438 umtx_unlock(&usprepMutex); 439 440 } 441 442 U_CFUNC void 443 uprv_syntaxError(const UChar* rules, 444 int32_t pos, 445 int32_t rulesLen, 446 UParseError* parseError){ 447 if(parseError == NULL){ 448 return; 449 } 450 parseError->offset = pos; 451 parseError->line = 0 ; // we are not using line numbers 452 453 // for pre-context 454 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); 455 int32_t limit = pos; 456 457 u_memcpy(parseError->preContext,rules+start,limit-start); 458 //null terminate the buffer 459 parseError->preContext[limit-start] = 0; 460 461 // for post-context; include error rules[pos] 462 start = pos; 463 limit = start + (U_PARSE_CONTEXT_LEN-1); 464 if (limit > rulesLen) { 465 limit = rulesLen; 466 } 467 if (start < rulesLen) { 468 u_memcpy(parseError->postContext,rules+start,limit-start); 469 } 470 //null terminate the buffer 471 parseError->postContext[limit-start]= 0; 472 } 473 474 475 static inline UStringPrepType 476 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ 477 478 UStringPrepType type; 479 if(trieWord == 0){ 480 /* 481 * Initial value stored in the mapping table 482 * just return USPREP_TYPE_LIMIT .. so that 483 * the source codepoint is copied to the destination 484 */ 485 type = USPREP_TYPE_LIMIT; 486 isIndex =FALSE; 487 value = 0; 488 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ 489 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); 490 isIndex =FALSE; 491 value = 0; 492 }else{ 493 /* get the type */ 494 type = USPREP_MAP; 495 /* ascertain if the value is index or delta */ 496 if(trieWord & 0x02){ 497 isIndex = TRUE; 498 value = trieWord >> 2; //mask off the lower 2 bits and shift 499 }else{ 500 isIndex = FALSE; 501 value = (int16_t)trieWord; 502 value = (value >> 2); 503 } 504 505 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 506 type = USPREP_DELETE; 507 isIndex =FALSE; 508 value = 0; 509 } 510 } 511 return type; 512 } 513 514 515 516 static int32_t 517 usprep_map( const UStringPrepProfile* profile, 518 const UChar* src, int32_t srcLength, 519 UChar* dest, int32_t destCapacity, 520 int32_t options, 521 UParseError* parseError, 522 UErrorCode* status ){ 523 524 uint16_t result; 525 int32_t destIndex=0; 526 int32_t srcIndex; 527 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); 528 UStringPrepType type; 529 int16_t value; 530 UBool isIndex; 531 const int32_t* indexes = profile->indexes; 532 533 // no error checking the caller check for error and arguments 534 // no string length check the caller finds out the string length 535 536 for(srcIndex=0;srcIndex<srcLength;){ 537 UChar32 ch; 538 539 U16_NEXT(src,srcIndex,srcLength,ch); 540 541 result=0; 542 543 UTRIE_GET16(&profile->sprepTrie,ch,result); 544 545 type = getValues(result, value, isIndex); 546 547 // check if the source codepoint is unassigned 548 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ 549 550 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); 551 *status = U_STRINGPREP_UNASSIGNED_ERROR; 552 return 0; 553 554 }else if(type == USPREP_MAP){ 555 556 int32_t index, length; 557 558 if(isIndex){ 559 index = value; 560 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 561 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 562 length = 1; 563 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 564 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 565 length = 2; 566 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 567 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 568 length = 3; 569 }else{ 570 length = profile->mappingData[index++]; 571 572 } 573 574 /* copy mapping to destination */ 575 for(int32_t i=0; i< length; i++){ 576 if(destIndex < destCapacity ){ 577 dest[destIndex] = profile->mappingData[index+i]; 578 } 579 destIndex++; /* for pre-flighting */ 580 } 581 continue; 582 }else{ 583 // subtract the delta to arrive at the code point 584 ch -= value; 585 } 586 587 }else if(type==USPREP_DELETE){ 588 // just consume the codepoint and contine 589 continue; 590 } 591 //copy the code point into destination 592 if(ch <= 0xFFFF){ 593 if(destIndex < destCapacity ){ 594 dest[destIndex] = (UChar)ch; 595 } 596 destIndex++; 597 }else{ 598 if(destIndex+1 < destCapacity ){ 599 dest[destIndex] = U16_LEAD(ch); 600 dest[destIndex+1] = U16_TRAIL(ch); 601 } 602 destIndex +=2; 603 } 604 605 } 606 607 return u_terminateUChars(dest, destCapacity, destIndex, status); 608 } 609 610 611 static int32_t 612 usprep_normalize( const UChar* src, int32_t srcLength, 613 UChar* dest, int32_t destCapacity, 614 UErrorCode* status ){ 615 return unorm_normalize( 616 src, srcLength, 617 UNORM_NFKC, UNORM_UNICODE_3_2, 618 dest, destCapacity, 619 status); 620 } 621 622 623 /* 624 1) Map -- For each character in the input, check if it has a mapping 625 and, if so, replace it with its mapping. 626 627 2) Normalize -- Possibly normalize the result of step 1 using Unicode 628 normalization. 629 630 3) Prohibit -- Check for any characters that are not allowed in the 631 output. If any are found, return an error. 632 633 4) Check bidi -- Possibly check for right-to-left characters, and if 634 any are found, make sure that the whole string satisfies the 635 requirements for bidirectional strings. If the string does not 636 satisfy the requirements for bidirectional strings, return an 637 error. 638 [Unicode3.2] defines several bidirectional categories; each character 639 has one bidirectional category assigned to it. For the purposes of 640 the requirements below, an "RandALCat character" is a character that 641 has Unicode bidirectional categories "R" or "AL"; an "LCat character" 642 is a character that has Unicode bidirectional category "L". Note 643 644 645 that there are many characters which fall in neither of the above 646 definitions; Latin digits (<U+0030> through <U+0039>) are examples of 647 this because they have bidirectional category "EN". 648 649 In any profile that specifies bidirectional character handling, all 650 three of the following requirements MUST be met: 651 652 1) The characters in section 5.8 MUST be prohibited. 653 654 2) If a string contains any RandALCat character, the string MUST NOT 655 contain any LCat character. 656 657 3) If a string contains any RandALCat character, a RandALCat 658 character MUST be the first character of the string, and a 659 RandALCat character MUST be the last character of the string. 660 */ 661 662 #define MAX_STACK_BUFFER_SIZE 300 663 664 665 U_CAPI int32_t U_EXPORT2 666 usprep_prepare( const UStringPrepProfile* profile, 667 const UChar* src, int32_t srcLength, 668 UChar* dest, int32_t destCapacity, 669 int32_t options, 670 UParseError* parseError, 671 UErrorCode* status ){ 672 673 // check error status 674 if(status == NULL || U_FAILURE(*status)){ 675 return 0; 676 } 677 678 //check arguments 679 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 680 *status=U_ILLEGAL_ARGUMENT_ERROR; 681 return 0; 682 } 683 684 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; 685 UChar *b1 = b1Stack, *b2 = b2Stack; 686 int32_t b1Len, b2Len=0, 687 b1Capacity = MAX_STACK_BUFFER_SIZE , 688 b2Capacity = MAX_STACK_BUFFER_SIZE; 689 uint16_t result; 690 int32_t b2Index = 0; 691 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 692 UBool leftToRight=FALSE, rightToLeft=FALSE; 693 int32_t rtlPos =-1, ltrPos =-1; 694 695 //get the string length 696 if(srcLength == -1){ 697 srcLength = u_strlen(src); 698 } 699 // map 700 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status); 701 702 if(*status == U_BUFFER_OVERFLOW_ERROR){ 703 // redo processing of string 704 /* we do not have enough room so grow the buffer*/ 705 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 706 if(b1==NULL){ 707 *status = U_MEMORY_ALLOCATION_ERROR; 708 goto CLEANUP; 709 } 710 711 *status = U_ZERO_ERROR; // reset error 712 713 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status); 714 715 } 716 717 // normalize 718 if(profile->doNFKC == TRUE){ 719 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status); 720 721 if(*status == U_BUFFER_OVERFLOW_ERROR){ 722 // redo processing of string 723 /* we do not have enough room so grow the buffer*/ 724 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 725 if(b2==NULL){ 726 *status = U_MEMORY_ALLOCATION_ERROR; 727 goto CLEANUP; 728 } 729 730 *status = U_ZERO_ERROR; // reset error 731 732 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status); 733 734 } 735 736 }else{ 737 b2 = b1; 738 b2Len = b1Len; 739 } 740 741 742 if(U_FAILURE(*status)){ 743 goto CLEANUP; 744 } 745 746 UChar32 ch; 747 UStringPrepType type; 748 int16_t value; 749 UBool isIndex; 750 751 // Prohibit and checkBiDi in one pass 752 for(b2Index=0; b2Index<b2Len;){ 753 754 ch = 0; 755 756 U16_NEXT(b2, b2Index, b2Len, ch); 757 758 UTRIE_GET16(&profile->sprepTrie,ch,result); 759 760 type = getValues(result, value, isIndex); 761 762 if( type == USPREP_PROHIBITED || 763 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) 764 ){ 765 *status = U_STRINGPREP_PROHIBITED_ERROR; 766 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); 767 goto CLEANUP; 768 } 769 770 if(profile->checkBiDi) { 771 direction = ubidi_getClass(profile->bdp, ch); 772 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 773 firstCharDir = direction; 774 } 775 if(direction == U_LEFT_TO_RIGHT){ 776 leftToRight = TRUE; 777 ltrPos = b2Index-1; 778 } 779 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 780 rightToLeft = TRUE; 781 rtlPos = b2Index-1; 782 } 783 } 784 } 785 if(profile->checkBiDi == TRUE){ 786 // satisfy 2 787 if( leftToRight == TRUE && rightToLeft == TRUE){ 788 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 789 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); 790 goto CLEANUP; 791 } 792 793 //satisfy 3 794 if( rightToLeft == TRUE && 795 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 796 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 797 ){ 798 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 799 uprv_syntaxError(b2, rtlPos, b2Len, parseError); 800 return FALSE; 801 } 802 } 803 if(b2Len>0 && b2Len <= destCapacity){ 804 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR); 805 } 806 807 CLEANUP: 808 if(b1!=b1Stack){ 809 uprv_free(b1); 810 b1=NULL; 811 } 812 813 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){ 814 uprv_free(b2); 815 b2=NULL; 816 } 817 return u_terminateUChars(dest, destCapacity, b2Len, status); 818 } 819 820 821 /* data swapping ------------------------------------------------------------ */ 822 823 U_CAPI int32_t U_EXPORT2 824 usprep_swap(const UDataSwapper *ds, 825 const void *inData, int32_t length, void *outData, 826 UErrorCode *pErrorCode) { 827 const UDataInfo *pInfo; 828 int32_t headerSize; 829 830 const uint8_t *inBytes; 831 uint8_t *outBytes; 832 833 const int32_t *inIndexes; 834 int32_t indexes[16]; 835 836 int32_t i, offset, count, size; 837 838 /* udata_swapDataHeader checks the arguments */ 839 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 840 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 841 return 0; 842 } 843 844 /* check data format and format version */ 845 pInfo=(const UDataInfo *)((const char *)inData+4); 846 if(!( 847 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 848 pInfo->dataFormat[1]==0x50 && 849 pInfo->dataFormat[2]==0x52 && 850 pInfo->dataFormat[3]==0x50 && 851 pInfo->formatVersion[0]==3 852 )) { 853 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", 854 pInfo->dataFormat[0], pInfo->dataFormat[1], 855 pInfo->dataFormat[2], pInfo->dataFormat[3], 856 pInfo->formatVersion[0]); 857 *pErrorCode=U_UNSUPPORTED_ERROR; 858 return 0; 859 } 860 861 inBytes=(const uint8_t *)inData+headerSize; 862 outBytes=(uint8_t *)outData+headerSize; 863 864 inIndexes=(const int32_t *)inBytes; 865 866 if(length>=0) { 867 length-=headerSize; 868 if(length<16*4) { 869 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", 870 length); 871 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 872 return 0; 873 } 874 } 875 876 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ 877 for(i=0; i<16; ++i) { 878 indexes[i]=udata_readInt32(ds, inIndexes[i]); 879 } 880 881 /* calculate the total length of the data */ 882 size= 883 16*4+ /* size of indexes[] */ 884 indexes[_SPREP_INDEX_TRIE_SIZE]+ 885 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 886 887 if(length>=0) { 888 if(length<size) { 889 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", 890 length); 891 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 892 return 0; 893 } 894 895 /* copy the data for inaccessible bytes */ 896 if(inBytes!=outBytes) { 897 uprv_memcpy(outBytes, inBytes, size); 898 } 899 900 offset=0; 901 902 /* swap the int32_t indexes[] */ 903 count=16*4; 904 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 905 offset+=count; 906 907 /* swap the UTrie */ 908 count=indexes[_SPREP_INDEX_TRIE_SIZE]; 909 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 910 offset+=count; 911 912 /* swap the uint16_t mappingTable[] */ 913 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 914 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 915 offset+=count; 916 } 917 918 return headerSize+size; 919 } 920 921 #endif /* #if !UCONFIG_NO_IDNA */ 922