1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: usprep.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003jul2 16 * created by: Ram Viswanadha 17 */ 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_IDNA 22 23 #include "unicode/usprep.h" 24 25 #include "unicode/normalizer2.h" 26 #include "unicode/ustring.h" 27 #include "unicode/uchar.h" 28 #include "unicode/uversion.h" 29 #include "umutex.h" 30 #include "cmemory.h" 31 #include "sprpimpl.h" 32 #include "ustr_imp.h" 33 #include "uhash.h" 34 #include "cstring.h" 35 #include "udataswp.h" 36 #include "ucln_cmn.h" 37 #include "ubidi_props.h" 38 #include "uprops.h" 39 40 U_NAMESPACE_USE 41 42 U_CDECL_BEGIN 43 44 /* 45 Static cache for already opened StringPrep profiles 46 */ 47 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 48 static icu::UInitOnce gSharedDataInitOnce; 49 50 static UMutex usprepMutex = U_MUTEX_INITIALIZER; 51 52 /* format version of spp file */ 53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; 54 55 /* the Unicode version of the sprep data */ 56 static UVersionInfo dataVersion={ 0, 0, 0, 0 }; 57 58 /* Profile names must be aligned to UStringPrepProfileType */ 59 static const char * const PROFILE_NAMES[] = { 60 "rfc3491", /* USPREP_RFC3491_NAMEPREP */ 61 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ 62 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ 63 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ 64 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ 65 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ 66 "rfc3722", /* USPREP_RFC3722_ISCSI */ 67 "rfc3920node", /* USPREP_RFC3920_NODEPREP */ 68 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ 69 "rfc4011", /* USPREP_RFC4011_MIB */ 70 "rfc4013", /* USPREP_RFC4013_SASLPREP */ 71 "rfc4505", /* USPREP_RFC4505_TRACE */ 72 "rfc4518", /* USPREP_RFC4518_LDAP */ 73 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ 74 }; 75 76 static UBool U_CALLCONV 77 isSPrepAcceptable(void * /* context */, 78 const char * /* type */, 79 const char * /* name */, 80 const UDataInfo *pInfo) { 81 if( 82 pInfo->size>=20 && 83 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 84 pInfo->charsetFamily==U_CHARSET_FAMILY && 85 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 86 pInfo->dataFormat[1]==0x50 && 87 pInfo->dataFormat[2]==0x52 && 88 pInfo->dataFormat[3]==0x50 && 89 pInfo->formatVersion[0]==3 && 90 pInfo->formatVersion[2]==UTRIE_SHIFT && 91 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 92 ) { 93 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); 94 uprv_memcpy(dataVersion, pInfo->dataVersion, 4); 95 return TRUE; 96 } else { 97 return FALSE; 98 } 99 } 100 101 static int32_t U_CALLCONV 102 getSPrepFoldingOffset(uint32_t data) { 103 104 return (int32_t)data; 105 106 } 107 108 /* hashes an entry */ 109 static int32_t U_CALLCONV 110 hashEntry(const UHashTok parm) { 111 UStringPrepKey *b = (UStringPrepKey *)parm.pointer; 112 UHashTok namekey, pathkey; 113 namekey.pointer = b->name; 114 pathkey.pointer = b->path; 115 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey); 116 } 117 118 /* compares two entries */ 119 static UBool U_CALLCONV 120 compareEntries(const UHashTok p1, const UHashTok p2) { 121 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; 122 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; 123 UHashTok name1, name2, path1, path2; 124 name1.pointer = b1->name; 125 name2.pointer = b2->name; 126 path1.pointer = b1->path; 127 path2.pointer = b2->path; 128 return ((UBool)(uhash_compareChars(name1, name2) & 129 uhash_compareChars(path1, path2))); 130 } 131 132 static void 133 usprep_unload(UStringPrepProfile* data){ 134 udata_close(data->sprepData); 135 } 136 137 static int32_t 138 usprep_internal_flushCache(UBool noRefCount){ 139 UStringPrepProfile *profile = NULL; 140 UStringPrepKey *key = NULL; 141 int32_t pos = UHASH_FIRST; 142 int32_t deletedNum = 0; 143 const UHashElement *e; 144 145 /* 146 * if shared data hasn't even been lazy evaluated yet 147 * return 0 148 */ 149 umtx_lock(&usprepMutex); 150 if (SHARED_DATA_HASHTABLE == NULL) { 151 umtx_unlock(&usprepMutex); 152 return 0; 153 } 154 155 /*creates an enumeration to iterate through every element in the table */ 156 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) 157 { 158 profile = (UStringPrepProfile *) e->value.pointer; 159 key = (UStringPrepKey *) e->key.pointer; 160 161 if ((noRefCount== FALSE && profile->refCount == 0) || 162 noRefCount== TRUE) { 163 deletedNum++; 164 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 165 166 /* unload the data */ 167 usprep_unload(profile); 168 169 if(key->name != NULL) { 170 uprv_free(key->name); 171 key->name=NULL; 172 } 173 if(key->path != NULL) { 174 uprv_free(key->path); 175 key->path=NULL; 176 } 177 uprv_free(profile); 178 uprv_free(key); 179 } 180 181 } 182 umtx_unlock(&usprepMutex); 183 184 return deletedNum; 185 } 186 187 /* Works just like ucnv_flushCache() 188 static int32_t 189 usprep_flushCache(){ 190 return usprep_internal_flushCache(FALSE); 191 } 192 */ 193 194 static UBool U_CALLCONV usprep_cleanup(void){ 195 if (SHARED_DATA_HASHTABLE != NULL) { 196 usprep_internal_flushCache(TRUE); 197 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 198 uhash_close(SHARED_DATA_HASHTABLE); 199 SHARED_DATA_HASHTABLE = NULL; 200 } 201 } 202 gSharedDataInitOnce.reset(); 203 return (SHARED_DATA_HASHTABLE == NULL); 204 } 205 U_CDECL_END 206 207 208 /** Initializes the cache for resources */ 209 static void U_CALLCONV 210 createCache(UErrorCode &status) { 211 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status); 212 if (U_FAILURE(status)) { 213 SHARED_DATA_HASHTABLE = NULL; 214 } 215 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); 216 } 217 218 static void 219 initCache(UErrorCode *status) { 220 umtx_initOnce(gSharedDataInitOnce, &createCache, *status); 221 } 222 223 static UBool U_CALLCONV 224 loadData(UStringPrepProfile* profile, 225 const char* path, 226 const char* name, 227 const char* type, 228 UErrorCode* errorCode) { 229 /* load Unicode SPREP data from file */ 230 UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; 231 UDataMemory *dataMemory; 232 const int32_t *p=NULL; 233 const uint8_t *pb; 234 UVersionInfo normUnicodeVersion; 235 int32_t normUniVer, sprepUniVer, normCorrVer; 236 237 if(errorCode==NULL || U_FAILURE(*errorCode)) { 238 return 0; 239 } 240 241 /* open the data outside the mutex block */ 242 //TODO: change the path 243 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); 244 if(U_FAILURE(*errorCode)) { 245 return FALSE; 246 } 247 248 p=(const int32_t *)udata_getMemory(dataMemory); 249 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); 250 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); 251 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; 252 253 254 if(U_FAILURE(*errorCode)) { 255 udata_close(dataMemory); 256 return FALSE; 257 } 258 259 /* in the mutex block, set the data for this process */ 260 umtx_lock(&usprepMutex); 261 if(profile->sprepData==NULL) { 262 profile->sprepData=dataMemory; 263 dataMemory=NULL; 264 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); 265 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); 266 } else { 267 p=(const int32_t *)udata_getMemory(profile->sprepData); 268 } 269 umtx_unlock(&usprepMutex); 270 /* initialize some variables */ 271 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); 272 273 u_getUnicodeVersion(normUnicodeVersion); 274 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 275 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); 276 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 277 (dataVersion[2] << 8 ) + (dataVersion[3]); 278 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; 279 280 if(U_FAILURE(*errorCode)){ 281 udata_close(dataMemory); 282 return FALSE; 283 } 284 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ 285 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ 286 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ 287 ){ 288 *errorCode = U_INVALID_FORMAT_ERROR; 289 udata_close(dataMemory); 290 return FALSE; 291 } 292 profile->isDataLoaded = TRUE; 293 294 /* if a different thread set it first, then close the extra data */ 295 if(dataMemory!=NULL) { 296 udata_close(dataMemory); /* NULL if it was set correctly */ 297 } 298 299 300 return profile->isDataLoaded; 301 } 302 303 static UStringPrepProfile* 304 usprep_getProfile(const char* path, 305 const char* name, 306 UErrorCode *status){ 307 308 UStringPrepProfile* profile = NULL; 309 310 initCache(status); 311 312 if(U_FAILURE(*status)){ 313 return NULL; 314 } 315 316 UStringPrepKey stackKey; 317 /* 318 * const is cast way to save malloc, strcpy and free calls 319 * we use the passed in pointers for fetching the data from the 320 * hash table which is safe 321 */ 322 stackKey.name = (char*) name; 323 stackKey.path = (char*) path; 324 325 /* fetch the data from the cache */ 326 umtx_lock(&usprepMutex); 327 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 328 if(profile != NULL) { 329 profile->refCount++; 330 } 331 umtx_unlock(&usprepMutex); 332 333 if(profile == NULL) { 334 /* else load the data and put the data in the cache */ 335 LocalMemory<UStringPrepProfile> newProfile; 336 if(newProfile.allocateInsteadAndReset() == NULL) { 337 *status = U_MEMORY_ALLOCATION_ERROR; 338 return NULL; 339 } 340 341 /* load the data */ 342 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ 343 return NULL; 344 } 345 346 /* get the options */ 347 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); 348 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); 349 350 if(newProfile->checkBiDi) { 351 newProfile->bdp = ubidi_getSingleton(); 352 } 353 354 LocalMemory<UStringPrepKey> key; 355 LocalMemory<char> keyName; 356 LocalMemory<char> keyPath; 357 if( key.allocateInsteadAndReset() == NULL || 358 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL || 359 (path != NULL && 360 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL) 361 ) { 362 *status = U_MEMORY_ALLOCATION_ERROR; 363 usprep_unload(newProfile.getAlias()); 364 return NULL; 365 } 366 367 umtx_lock(&usprepMutex); 368 // If another thread already inserted the same key/value, refcount and cleanup our thread data 369 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 370 if(profile != NULL) { 371 profile->refCount++; 372 usprep_unload(newProfile.getAlias()); 373 } 374 else { 375 /* initialize the key members */ 376 key->name = keyName.orphan(); 377 uprv_strcpy(key->name, name); 378 if(path != NULL){ 379 key->path = keyPath.orphan(); 380 uprv_strcpy(key->path, path); 381 } 382 profile = newProfile.orphan(); 383 384 /* add the data object to the cache */ 385 profile->refCount = 1; 386 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); 387 } 388 umtx_unlock(&usprepMutex); 389 } 390 391 return profile; 392 } 393 394 U_CAPI UStringPrepProfile* U_EXPORT2 395 usprep_open(const char* path, 396 const char* name, 397 UErrorCode* status){ 398 399 if(status == NULL || U_FAILURE(*status)){ 400 return NULL; 401 } 402 403 /* initialize the profile struct members */ 404 return usprep_getProfile(path,name,status); 405 } 406 407 U_CAPI UStringPrepProfile* U_EXPORT2 408 usprep_openByType(UStringPrepProfileType type, 409 UErrorCode* status) { 410 if(status == NULL || U_FAILURE(*status)){ 411 return NULL; 412 } 413 int32_t index = (int32_t)type; 414 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) { 415 *status = U_ILLEGAL_ARGUMENT_ERROR; 416 return NULL; 417 } 418 return usprep_open(NULL, PROFILE_NAMES[index], status); 419 } 420 421 U_CAPI void U_EXPORT2 422 usprep_close(UStringPrepProfile* profile){ 423 if(profile==NULL){ 424 return; 425 } 426 427 umtx_lock(&usprepMutex); 428 /* decrement the ref count*/ 429 if(profile->refCount > 0){ 430 profile->refCount--; 431 } 432 umtx_unlock(&usprepMutex); 433 434 } 435 436 U_CFUNC void 437 uprv_syntaxError(const UChar* rules, 438 int32_t pos, 439 int32_t rulesLen, 440 UParseError* parseError){ 441 if(parseError == NULL){ 442 return; 443 } 444 parseError->offset = pos; 445 parseError->line = 0 ; // we are not using line numbers 446 447 // for pre-context 448 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); 449 int32_t limit = pos; 450 451 u_memcpy(parseError->preContext,rules+start,limit-start); 452 //null terminate the buffer 453 parseError->preContext[limit-start] = 0; 454 455 // for post-context; include error rules[pos] 456 start = pos; 457 limit = start + (U_PARSE_CONTEXT_LEN-1); 458 if (limit > rulesLen) { 459 limit = rulesLen; 460 } 461 if (start < rulesLen) { 462 u_memcpy(parseError->postContext,rules+start,limit-start); 463 } 464 //null terminate the buffer 465 parseError->postContext[limit-start]= 0; 466 } 467 468 469 static inline UStringPrepType 470 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ 471 472 UStringPrepType type; 473 if(trieWord == 0){ 474 /* 475 * Initial value stored in the mapping table 476 * just return USPREP_TYPE_LIMIT .. so that 477 * the source codepoint is copied to the destination 478 */ 479 type = USPREP_TYPE_LIMIT; 480 isIndex =FALSE; 481 value = 0; 482 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ 483 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); 484 isIndex =FALSE; 485 value = 0; 486 }else{ 487 /* get the type */ 488 type = USPREP_MAP; 489 /* ascertain if the value is index or delta */ 490 if(trieWord & 0x02){ 491 isIndex = TRUE; 492 value = trieWord >> 2; //mask off the lower 2 bits and shift 493 }else{ 494 isIndex = FALSE; 495 value = (int16_t)trieWord; 496 value = (value >> 2); 497 } 498 499 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 500 type = USPREP_DELETE; 501 isIndex =FALSE; 502 value = 0; 503 } 504 } 505 return type; 506 } 507 508 // TODO: change to writing to UnicodeString not UChar * 509 static int32_t 510 usprep_map( const UStringPrepProfile* profile, 511 const UChar* src, int32_t srcLength, 512 UChar* dest, int32_t destCapacity, 513 int32_t options, 514 UParseError* parseError, 515 UErrorCode* status ){ 516 517 uint16_t result; 518 int32_t destIndex=0; 519 int32_t srcIndex; 520 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); 521 UStringPrepType type; 522 int16_t value; 523 UBool isIndex; 524 const int32_t* indexes = profile->indexes; 525 526 // no error checking the caller check for error and arguments 527 // no string length check the caller finds out the string length 528 529 for(srcIndex=0;srcIndex<srcLength;){ 530 UChar32 ch; 531 532 U16_NEXT(src,srcIndex,srcLength,ch); 533 534 result=0; 535 536 UTRIE_GET16(&profile->sprepTrie,ch,result); 537 538 type = getValues(result, value, isIndex); 539 540 // check if the source codepoint is unassigned 541 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ 542 543 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); 544 *status = U_STRINGPREP_UNASSIGNED_ERROR; 545 return 0; 546 547 }else if(type == USPREP_MAP){ 548 549 int32_t index, length; 550 551 if(isIndex){ 552 index = value; 553 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 554 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 555 length = 1; 556 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 557 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 558 length = 2; 559 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 560 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 561 length = 3; 562 }else{ 563 length = profile->mappingData[index++]; 564 565 } 566 567 /* copy mapping to destination */ 568 for(int32_t i=0; i< length; i++){ 569 if(destIndex < destCapacity ){ 570 dest[destIndex] = profile->mappingData[index+i]; 571 } 572 destIndex++; /* for pre-flighting */ 573 } 574 continue; 575 }else{ 576 // subtract the delta to arrive at the code point 577 ch -= value; 578 } 579 580 }else if(type==USPREP_DELETE){ 581 // just consume the codepoint and contine 582 continue; 583 } 584 //copy the code point into destination 585 if(ch <= 0xFFFF){ 586 if(destIndex < destCapacity ){ 587 dest[destIndex] = (UChar)ch; 588 } 589 destIndex++; 590 }else{ 591 if(destIndex+1 < destCapacity ){ 592 dest[destIndex] = U16_LEAD(ch); 593 dest[destIndex+1] = U16_TRAIL(ch); 594 } 595 destIndex +=2; 596 } 597 598 } 599 600 return u_terminateUChars(dest, destCapacity, destIndex, status); 601 } 602 603 /* 604 1) Map -- For each character in the input, check if it has a mapping 605 and, if so, replace it with its mapping. 606 607 2) Normalize -- Possibly normalize the result of step 1 using Unicode 608 normalization. 609 610 3) Prohibit -- Check for any characters that are not allowed in the 611 output. If any are found, return an error. 612 613 4) Check bidi -- Possibly check for right-to-left characters, and if 614 any are found, make sure that the whole string satisfies the 615 requirements for bidirectional strings. If the string does not 616 satisfy the requirements for bidirectional strings, return an 617 error. 618 [Unicode3.2] defines several bidirectional categories; each character 619 has one bidirectional category assigned to it. For the purposes of 620 the requirements below, an "RandALCat character" is a character that 621 has Unicode bidirectional categories "R" or "AL"; an "LCat character" 622 is a character that has Unicode bidirectional category "L". Note 623 624 625 that there are many characters which fall in neither of the above 626 definitions; Latin digits (<U+0030> through <U+0039>) are examples of 627 this because they have bidirectional category "EN". 628 629 In any profile that specifies bidirectional character handling, all 630 three of the following requirements MUST be met: 631 632 1) The characters in section 5.8 MUST be prohibited. 633 634 2) If a string contains any RandALCat character, the string MUST NOT 635 contain any LCat character. 636 637 3) If a string contains any RandALCat character, a RandALCat 638 character MUST be the first character of the string, and a 639 RandALCat character MUST be the last character of the string. 640 */ 641 U_CAPI int32_t U_EXPORT2 642 usprep_prepare( const UStringPrepProfile* profile, 643 const UChar* src, int32_t srcLength, 644 UChar* dest, int32_t destCapacity, 645 int32_t options, 646 UParseError* parseError, 647 UErrorCode* status ){ 648 649 // check error status 650 if(U_FAILURE(*status)){ 651 return 0; 652 } 653 654 //check arguments 655 if(profile==NULL || 656 (src==NULL ? srcLength!=0 : srcLength<-1) || 657 (dest==NULL ? destCapacity!=0 : destCapacity<0)) { 658 *status=U_ILLEGAL_ARGUMENT_ERROR; 659 return 0; 660 } 661 662 //get the string length 663 if(srcLength < 0){ 664 srcLength = u_strlen(src); 665 } 666 // map 667 UnicodeString s1; 668 UChar *b1 = s1.getBuffer(srcLength); 669 if(b1==NULL){ 670 *status = U_MEMORY_ALLOCATION_ERROR; 671 return 0; 672 } 673 int32_t b1Len = usprep_map(profile, src, srcLength, 674 b1, s1.getCapacity(), options, parseError, status); 675 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); 676 677 if(*status == U_BUFFER_OVERFLOW_ERROR){ 678 // redo processing of string 679 /* we do not have enough room so grow the buffer*/ 680 b1 = s1.getBuffer(b1Len); 681 if(b1==NULL){ 682 *status = U_MEMORY_ALLOCATION_ERROR; 683 return 0; 684 } 685 686 *status = U_ZERO_ERROR; // reset error 687 b1Len = usprep_map(profile, src, srcLength, 688 b1, s1.getCapacity(), options, parseError, status); 689 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); 690 } 691 if(U_FAILURE(*status)){ 692 return 0; 693 } 694 695 // normalize 696 UnicodeString s2; 697 if(profile->doNFKC){ 698 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status); 699 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status)); 700 if(U_FAILURE(*status)){ 701 return 0; 702 } 703 fn2.normalize(s1, s2, *status); 704 }else{ 705 s2.fastCopyFrom(s1); 706 } 707 if(U_FAILURE(*status)){ 708 return 0; 709 } 710 711 // Prohibit and checkBiDi in one pass 712 const UChar *b2 = s2.getBuffer(); 713 int32_t b2Len = s2.length(); 714 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 715 UBool leftToRight=FALSE, rightToLeft=FALSE; 716 int32_t rtlPos =-1, ltrPos =-1; 717 718 for(int32_t b2Index=0; b2Index<b2Len;){ 719 UChar32 ch = 0; 720 U16_NEXT(b2, b2Index, b2Len, ch); 721 722 uint16_t result; 723 UTRIE_GET16(&profile->sprepTrie,ch,result); 724 725 int16_t value; 726 UBool isIndex; 727 UStringPrepType type = getValues(result, value, isIndex); 728 729 if( type == USPREP_PROHIBITED || 730 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) 731 ){ 732 *status = U_STRINGPREP_PROHIBITED_ERROR; 733 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); 734 return 0; 735 } 736 737 if(profile->checkBiDi) { 738 direction = ubidi_getClass(profile->bdp, ch); 739 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 740 firstCharDir = direction; 741 } 742 if(direction == U_LEFT_TO_RIGHT){ 743 leftToRight = TRUE; 744 ltrPos = b2Index-1; 745 } 746 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 747 rightToLeft = TRUE; 748 rtlPos = b2Index-1; 749 } 750 } 751 } 752 if(profile->checkBiDi == TRUE){ 753 // satisfy 2 754 if( leftToRight == TRUE && rightToLeft == TRUE){ 755 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 756 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); 757 return 0; 758 } 759 760 //satisfy 3 761 if( rightToLeft == TRUE && 762 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 763 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 764 ){ 765 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 766 uprv_syntaxError(b2, rtlPos, b2Len, parseError); 767 return FALSE; 768 } 769 } 770 return s2.extract(dest, destCapacity, *status); 771 } 772 773 774 /* data swapping ------------------------------------------------------------ */ 775 776 U_CAPI int32_t U_EXPORT2 777 usprep_swap(const UDataSwapper *ds, 778 const void *inData, int32_t length, void *outData, 779 UErrorCode *pErrorCode) { 780 const UDataInfo *pInfo; 781 int32_t headerSize; 782 783 const uint8_t *inBytes; 784 uint8_t *outBytes; 785 786 const int32_t *inIndexes; 787 int32_t indexes[16]; 788 789 int32_t i, offset, count, size; 790 791 /* udata_swapDataHeader checks the arguments */ 792 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 793 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 794 return 0; 795 } 796 797 /* check data format and format version */ 798 pInfo=(const UDataInfo *)((const char *)inData+4); 799 if(!( 800 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 801 pInfo->dataFormat[1]==0x50 && 802 pInfo->dataFormat[2]==0x52 && 803 pInfo->dataFormat[3]==0x50 && 804 pInfo->formatVersion[0]==3 805 )) { 806 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", 807 pInfo->dataFormat[0], pInfo->dataFormat[1], 808 pInfo->dataFormat[2], pInfo->dataFormat[3], 809 pInfo->formatVersion[0]); 810 *pErrorCode=U_UNSUPPORTED_ERROR; 811 return 0; 812 } 813 814 inBytes=(const uint8_t *)inData+headerSize; 815 outBytes=(uint8_t *)outData+headerSize; 816 817 inIndexes=(const int32_t *)inBytes; 818 819 if(length>=0) { 820 length-=headerSize; 821 if(length<16*4) { 822 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", 823 length); 824 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 825 return 0; 826 } 827 } 828 829 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ 830 for(i=0; i<16; ++i) { 831 indexes[i]=udata_readInt32(ds, inIndexes[i]); 832 } 833 834 /* calculate the total length of the data */ 835 size= 836 16*4+ /* size of indexes[] */ 837 indexes[_SPREP_INDEX_TRIE_SIZE]+ 838 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 839 840 if(length>=0) { 841 if(length<size) { 842 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", 843 length); 844 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 845 return 0; 846 } 847 848 /* copy the data for inaccessible bytes */ 849 if(inBytes!=outBytes) { 850 uprv_memcpy(outBytes, inBytes, size); 851 } 852 853 offset=0; 854 855 /* swap the int32_t indexes[] */ 856 count=16*4; 857 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 858 offset+=count; 859 860 /* swap the UTrie */ 861 count=indexes[_SPREP_INDEX_TRIE_SIZE]; 862 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 863 offset+=count; 864 865 /* swap the uint16_t mappingTable[] */ 866 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 867 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 868 //offset+=count; 869 } 870 871 return headerSize+size; 872 } 873 874 #endif /* #if !UCONFIG_NO_IDNA */ 875