1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: usprep.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003jul2 16 * created by: Ram Viswanadha 17 */ 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_IDNA 22 23 #include "unicode/usprep.h" 24 25 #include "unicode/normalizer2.h" 26 #include "unicode/ustring.h" 27 #include "unicode/uchar.h" 28 #include "unicode/uversion.h" 29 #include "umutex.h" 30 #include "cmemory.h" 31 #include "sprpimpl.h" 32 #include "ustr_imp.h" 33 #include "uhash.h" 34 #include "cstring.h" 35 #include "udataswp.h" 36 #include "ucln_cmn.h" 37 #include "ubidi_props.h" 38 #include "uprops.h" 39 40 U_NAMESPACE_USE 41 42 U_CDECL_BEGIN 43 44 /* 45 Static cache for already opened StringPrep profiles 46 */ 47 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 48 static icu::UInitOnce gSharedDataInitOnce; 49 50 static UMutex usprepMutex = U_MUTEX_INITIALIZER; 51 52 /* format version of spp file */ 53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; 54 55 /* the Unicode version of the sprep data */ 56 static UVersionInfo dataVersion={ 0, 0, 0, 0 }; 57 58 /* Profile names must be aligned to UStringPrepProfileType */ 59 static const char * const PROFILE_NAMES[] = { 60 "rfc3491", /* USPREP_RFC3491_NAMEPREP */ 61 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ 62 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ 63 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ 64 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ 65 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ 66 "rfc3722", /* USPREP_RFC3722_ISCSI */ 67 "rfc3920node", /* USPREP_RFC3920_NODEPREP */ 68 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ 69 "rfc4011", /* USPREP_RFC4011_MIB */ 70 "rfc4013", /* USPREP_RFC4013_SASLPREP */ 71 "rfc4505", /* USPREP_RFC4505_TRACE */ 72 "rfc4518", /* USPREP_RFC4518_LDAP */ 73 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ 74 }; 75 76 static UBool U_CALLCONV 77 isSPrepAcceptable(void * /* context */, 78 const char * /* type */, 79 const char * /* name */, 80 const UDataInfo *pInfo) { 81 if( 82 pInfo->size>=20 && 83 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 84 pInfo->charsetFamily==U_CHARSET_FAMILY && 85 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 86 pInfo->dataFormat[1]==0x50 && 87 pInfo->dataFormat[2]==0x52 && 88 pInfo->dataFormat[3]==0x50 && 89 pInfo->formatVersion[0]==3 && 90 pInfo->formatVersion[2]==UTRIE_SHIFT && 91 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 92 ) { 93 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); 94 uprv_memcpy(dataVersion, pInfo->dataVersion, 4); 95 return TRUE; 96 } else { 97 return FALSE; 98 } 99 } 100 101 static int32_t U_CALLCONV 102 getSPrepFoldingOffset(uint32_t data) { 103 104 return (int32_t)data; 105 106 } 107 108 /* hashes an entry */ 109 static int32_t U_CALLCONV 110 hashEntry(const UHashTok parm) { 111 UStringPrepKey *b = (UStringPrepKey *)parm.pointer; 112 UHashTok namekey, pathkey; 113 namekey.pointer = b->name; 114 pathkey.pointer = b->path; 115 uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) + 116 37u * static_cast<uint32_t>(uhash_hashChars(pathkey)); 117 return static_cast<int32_t>(unsignedHash); 118 } 119 120 /* compares two entries */ 121 static UBool U_CALLCONV 122 compareEntries(const UHashTok p1, const UHashTok p2) { 123 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; 124 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; 125 UHashTok name1, name2, path1, path2; 126 name1.pointer = b1->name; 127 name2.pointer = b2->name; 128 path1.pointer = b1->path; 129 path2.pointer = b2->path; 130 return ((UBool)(uhash_compareChars(name1, name2) & 131 uhash_compareChars(path1, path2))); 132 } 133 134 static void 135 usprep_unload(UStringPrepProfile* data){ 136 udata_close(data->sprepData); 137 } 138 139 static int32_t 140 usprep_internal_flushCache(UBool noRefCount){ 141 UStringPrepProfile *profile = NULL; 142 UStringPrepKey *key = NULL; 143 int32_t pos = UHASH_FIRST; 144 int32_t deletedNum = 0; 145 const UHashElement *e; 146 147 /* 148 * if shared data hasn't even been lazy evaluated yet 149 * return 0 150 */ 151 umtx_lock(&usprepMutex); 152 if (SHARED_DATA_HASHTABLE == NULL) { 153 umtx_unlock(&usprepMutex); 154 return 0; 155 } 156 157 /*creates an enumeration to iterate through every element in the table */ 158 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) 159 { 160 profile = (UStringPrepProfile *) e->value.pointer; 161 key = (UStringPrepKey *) e->key.pointer; 162 163 if ((noRefCount== FALSE && profile->refCount == 0) || 164 noRefCount== TRUE) { 165 deletedNum++; 166 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 167 168 /* unload the data */ 169 usprep_unload(profile); 170 171 if(key->name != NULL) { 172 uprv_free(key->name); 173 key->name=NULL; 174 } 175 if(key->path != NULL) { 176 uprv_free(key->path); 177 key->path=NULL; 178 } 179 uprv_free(profile); 180 uprv_free(key); 181 } 182 183 } 184 umtx_unlock(&usprepMutex); 185 186 return deletedNum; 187 } 188 189 /* Works just like ucnv_flushCache() 190 static int32_t 191 usprep_flushCache(){ 192 return usprep_internal_flushCache(FALSE); 193 } 194 */ 195 196 static UBool U_CALLCONV usprep_cleanup(void){ 197 if (SHARED_DATA_HASHTABLE != NULL) { 198 usprep_internal_flushCache(TRUE); 199 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 200 uhash_close(SHARED_DATA_HASHTABLE); 201 SHARED_DATA_HASHTABLE = NULL; 202 } 203 } 204 gSharedDataInitOnce.reset(); 205 return (SHARED_DATA_HASHTABLE == NULL); 206 } 207 U_CDECL_END 208 209 210 /** Initializes the cache for resources */ 211 static void U_CALLCONV 212 createCache(UErrorCode &status) { 213 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status); 214 if (U_FAILURE(status)) { 215 SHARED_DATA_HASHTABLE = NULL; 216 } 217 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); 218 } 219 220 static void 221 initCache(UErrorCode *status) { 222 umtx_initOnce(gSharedDataInitOnce, &createCache, *status); 223 } 224 225 static UBool U_CALLCONV 226 loadData(UStringPrepProfile* profile, 227 const char* path, 228 const char* name, 229 const char* type, 230 UErrorCode* errorCode) { 231 /* load Unicode SPREP data from file */ 232 UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; 233 UDataMemory *dataMemory; 234 const int32_t *p=NULL; 235 const uint8_t *pb; 236 UVersionInfo normUnicodeVersion; 237 int32_t normUniVer, sprepUniVer, normCorrVer; 238 239 if(errorCode==NULL || U_FAILURE(*errorCode)) { 240 return 0; 241 } 242 243 /* open the data outside the mutex block */ 244 //TODO: change the path 245 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); 246 if(U_FAILURE(*errorCode)) { 247 return FALSE; 248 } 249 250 p=(const int32_t *)udata_getMemory(dataMemory); 251 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); 252 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); 253 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; 254 255 256 if(U_FAILURE(*errorCode)) { 257 udata_close(dataMemory); 258 return FALSE; 259 } 260 261 /* in the mutex block, set the data for this process */ 262 umtx_lock(&usprepMutex); 263 if(profile->sprepData==NULL) { 264 profile->sprepData=dataMemory; 265 dataMemory=NULL; 266 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); 267 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); 268 } else { 269 p=(const int32_t *)udata_getMemory(profile->sprepData); 270 } 271 umtx_unlock(&usprepMutex); 272 /* initialize some variables */ 273 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); 274 275 u_getUnicodeVersion(normUnicodeVersion); 276 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 277 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); 278 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 279 (dataVersion[2] << 8 ) + (dataVersion[3]); 280 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; 281 282 if(U_FAILURE(*errorCode)){ 283 udata_close(dataMemory); 284 return FALSE; 285 } 286 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ 287 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ 288 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ 289 ){ 290 *errorCode = U_INVALID_FORMAT_ERROR; 291 udata_close(dataMemory); 292 return FALSE; 293 } 294 profile->isDataLoaded = TRUE; 295 296 /* if a different thread set it first, then close the extra data */ 297 if(dataMemory!=NULL) { 298 udata_close(dataMemory); /* NULL if it was set correctly */ 299 } 300 301 302 return profile->isDataLoaded; 303 } 304 305 static UStringPrepProfile* 306 usprep_getProfile(const char* path, 307 const char* name, 308 UErrorCode *status){ 309 310 UStringPrepProfile* profile = NULL; 311 312 initCache(status); 313 314 if(U_FAILURE(*status)){ 315 return NULL; 316 } 317 318 UStringPrepKey stackKey; 319 /* 320 * const is cast way to save malloc, strcpy and free calls 321 * we use the passed in pointers for fetching the data from the 322 * hash table which is safe 323 */ 324 stackKey.name = (char*) name; 325 stackKey.path = (char*) path; 326 327 /* fetch the data from the cache */ 328 umtx_lock(&usprepMutex); 329 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 330 if(profile != NULL) { 331 profile->refCount++; 332 } 333 umtx_unlock(&usprepMutex); 334 335 if(profile == NULL) { 336 /* else load the data and put the data in the cache */ 337 LocalMemory<UStringPrepProfile> newProfile; 338 if(newProfile.allocateInsteadAndReset() == NULL) { 339 *status = U_MEMORY_ALLOCATION_ERROR; 340 return NULL; 341 } 342 343 /* load the data */ 344 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ 345 return NULL; 346 } 347 348 /* get the options */ 349 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); 350 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); 351 352 LocalMemory<UStringPrepKey> key; 353 LocalMemory<char> keyName; 354 LocalMemory<char> keyPath; 355 if( key.allocateInsteadAndReset() == NULL || 356 keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL || 357 (path != NULL && 358 keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL) 359 ) { 360 *status = U_MEMORY_ALLOCATION_ERROR; 361 usprep_unload(newProfile.getAlias()); 362 return NULL; 363 } 364 365 umtx_lock(&usprepMutex); 366 // If another thread already inserted the same key/value, refcount and cleanup our thread data 367 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 368 if(profile != NULL) { 369 profile->refCount++; 370 usprep_unload(newProfile.getAlias()); 371 } 372 else { 373 /* initialize the key members */ 374 key->name = keyName.orphan(); 375 uprv_strcpy(key->name, name); 376 if(path != NULL){ 377 key->path = keyPath.orphan(); 378 uprv_strcpy(key->path, path); 379 } 380 profile = newProfile.orphan(); 381 382 /* add the data object to the cache */ 383 profile->refCount = 1; 384 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); 385 } 386 umtx_unlock(&usprepMutex); 387 } 388 389 return profile; 390 } 391 392 U_CAPI UStringPrepProfile* U_EXPORT2 393 usprep_open(const char* path, 394 const char* name, 395 UErrorCode* status){ 396 397 if(status == NULL || U_FAILURE(*status)){ 398 return NULL; 399 } 400 401 /* initialize the profile struct members */ 402 return usprep_getProfile(path,name,status); 403 } 404 405 U_CAPI UStringPrepProfile* U_EXPORT2 406 usprep_openByType(UStringPrepProfileType type, 407 UErrorCode* status) { 408 if(status == NULL || U_FAILURE(*status)){ 409 return NULL; 410 } 411 int32_t index = (int32_t)type; 412 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) { 413 *status = U_ILLEGAL_ARGUMENT_ERROR; 414 return NULL; 415 } 416 return usprep_open(NULL, PROFILE_NAMES[index], status); 417 } 418 419 U_CAPI void U_EXPORT2 420 usprep_close(UStringPrepProfile* profile){ 421 if(profile==NULL){ 422 return; 423 } 424 425 umtx_lock(&usprepMutex); 426 /* decrement the ref count*/ 427 if(profile->refCount > 0){ 428 profile->refCount--; 429 } 430 umtx_unlock(&usprepMutex); 431 432 } 433 434 U_CFUNC void 435 uprv_syntaxError(const UChar* rules, 436 int32_t pos, 437 int32_t rulesLen, 438 UParseError* parseError){ 439 if(parseError == NULL){ 440 return; 441 } 442 parseError->offset = pos; 443 parseError->line = 0 ; // we are not using line numbers 444 445 // for pre-context 446 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); 447 int32_t limit = pos; 448 449 u_memcpy(parseError->preContext,rules+start,limit-start); 450 //null terminate the buffer 451 parseError->preContext[limit-start] = 0; 452 453 // for post-context; include error rules[pos] 454 start = pos; 455 limit = start + (U_PARSE_CONTEXT_LEN-1); 456 if (limit > rulesLen) { 457 limit = rulesLen; 458 } 459 if (start < rulesLen) { 460 u_memcpy(parseError->postContext,rules+start,limit-start); 461 } 462 //null terminate the buffer 463 parseError->postContext[limit-start]= 0; 464 } 465 466 467 static inline UStringPrepType 468 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ 469 470 UStringPrepType type; 471 if(trieWord == 0){ 472 /* 473 * Initial value stored in the mapping table 474 * just return USPREP_TYPE_LIMIT .. so that 475 * the source codepoint is copied to the destination 476 */ 477 type = USPREP_TYPE_LIMIT; 478 isIndex =FALSE; 479 value = 0; 480 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ 481 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); 482 isIndex =FALSE; 483 value = 0; 484 }else{ 485 /* get the type */ 486 type = USPREP_MAP; 487 /* ascertain if the value is index or delta */ 488 if(trieWord & 0x02){ 489 isIndex = TRUE; 490 value = trieWord >> 2; //mask off the lower 2 bits and shift 491 }else{ 492 isIndex = FALSE; 493 value = (int16_t)trieWord; 494 value = (value >> 2); 495 } 496 497 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 498 type = USPREP_DELETE; 499 isIndex =FALSE; 500 value = 0; 501 } 502 } 503 return type; 504 } 505 506 // TODO: change to writing to UnicodeString not UChar * 507 static int32_t 508 usprep_map( const UStringPrepProfile* profile, 509 const UChar* src, int32_t srcLength, 510 UChar* dest, int32_t destCapacity, 511 int32_t options, 512 UParseError* parseError, 513 UErrorCode* status ){ 514 515 uint16_t result; 516 int32_t destIndex=0; 517 int32_t srcIndex; 518 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); 519 UStringPrepType type; 520 int16_t value; 521 UBool isIndex; 522 const int32_t* indexes = profile->indexes; 523 524 // no error checking the caller check for error and arguments 525 // no string length check the caller finds out the string length 526 527 for(srcIndex=0;srcIndex<srcLength;){ 528 UChar32 ch; 529 530 U16_NEXT(src,srcIndex,srcLength,ch); 531 532 result=0; 533 534 UTRIE_GET16(&profile->sprepTrie,ch,result); 535 536 type = getValues(result, value, isIndex); 537 538 // check if the source codepoint is unassigned 539 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ 540 541 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); 542 *status = U_STRINGPREP_UNASSIGNED_ERROR; 543 return 0; 544 545 }else if(type == USPREP_MAP){ 546 547 int32_t index, length; 548 549 if(isIndex){ 550 index = value; 551 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 552 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 553 length = 1; 554 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 555 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 556 length = 2; 557 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 558 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 559 length = 3; 560 }else{ 561 length = profile->mappingData[index++]; 562 563 } 564 565 /* copy mapping to destination */ 566 for(int32_t i=0; i< length; i++){ 567 if(destIndex < destCapacity ){ 568 dest[destIndex] = profile->mappingData[index+i]; 569 } 570 destIndex++; /* for pre-flighting */ 571 } 572 continue; 573 }else{ 574 // subtract the delta to arrive at the code point 575 ch -= value; 576 } 577 578 }else if(type==USPREP_DELETE){ 579 // just consume the codepoint and contine 580 continue; 581 } 582 //copy the code point into destination 583 if(ch <= 0xFFFF){ 584 if(destIndex < destCapacity ){ 585 dest[destIndex] = (UChar)ch; 586 } 587 destIndex++; 588 }else{ 589 if(destIndex+1 < destCapacity ){ 590 dest[destIndex] = U16_LEAD(ch); 591 dest[destIndex+1] = U16_TRAIL(ch); 592 } 593 destIndex +=2; 594 } 595 596 } 597 598 return u_terminateUChars(dest, destCapacity, destIndex, status); 599 } 600 601 /* 602 1) Map -- For each character in the input, check if it has a mapping 603 and, if so, replace it with its mapping. 604 605 2) Normalize -- Possibly normalize the result of step 1 using Unicode 606 normalization. 607 608 3) Prohibit -- Check for any characters that are not allowed in the 609 output. If any are found, return an error. 610 611 4) Check bidi -- Possibly check for right-to-left characters, and if 612 any are found, make sure that the whole string satisfies the 613 requirements for bidirectional strings. If the string does not 614 satisfy the requirements for bidirectional strings, return an 615 error. 616 [Unicode3.2] defines several bidirectional categories; each character 617 has one bidirectional category assigned to it. For the purposes of 618 the requirements below, an "RandALCat character" is a character that 619 has Unicode bidirectional categories "R" or "AL"; an "LCat character" 620 is a character that has Unicode bidirectional category "L". Note 621 622 623 that there are many characters which fall in neither of the above 624 definitions; Latin digits (<U+0030> through <U+0039>) are examples of 625 this because they have bidirectional category "EN". 626 627 In any profile that specifies bidirectional character handling, all 628 three of the following requirements MUST be met: 629 630 1) The characters in section 5.8 MUST be prohibited. 631 632 2) If a string contains any RandALCat character, the string MUST NOT 633 contain any LCat character. 634 635 3) If a string contains any RandALCat character, a RandALCat 636 character MUST be the first character of the string, and a 637 RandALCat character MUST be the last character of the string. 638 */ 639 U_CAPI int32_t U_EXPORT2 640 usprep_prepare( const UStringPrepProfile* profile, 641 const UChar* src, int32_t srcLength, 642 UChar* dest, int32_t destCapacity, 643 int32_t options, 644 UParseError* parseError, 645 UErrorCode* status ){ 646 647 // check error status 648 if(U_FAILURE(*status)){ 649 return 0; 650 } 651 652 //check arguments 653 if(profile==NULL || 654 (src==NULL ? srcLength!=0 : srcLength<-1) || 655 (dest==NULL ? destCapacity!=0 : destCapacity<0)) { 656 *status=U_ILLEGAL_ARGUMENT_ERROR; 657 return 0; 658 } 659 660 //get the string length 661 if(srcLength < 0){ 662 srcLength = u_strlen(src); 663 } 664 // map 665 UnicodeString s1; 666 UChar *b1 = s1.getBuffer(srcLength); 667 if(b1==NULL){ 668 *status = U_MEMORY_ALLOCATION_ERROR; 669 return 0; 670 } 671 int32_t b1Len = usprep_map(profile, src, srcLength, 672 b1, s1.getCapacity(), options, parseError, status); 673 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); 674 675 if(*status == U_BUFFER_OVERFLOW_ERROR){ 676 // redo processing of string 677 /* we do not have enough room so grow the buffer*/ 678 b1 = s1.getBuffer(b1Len); 679 if(b1==NULL){ 680 *status = U_MEMORY_ALLOCATION_ERROR; 681 return 0; 682 } 683 684 *status = U_ZERO_ERROR; // reset error 685 b1Len = usprep_map(profile, src, srcLength, 686 b1, s1.getCapacity(), options, parseError, status); 687 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); 688 } 689 if(U_FAILURE(*status)){ 690 return 0; 691 } 692 693 // normalize 694 UnicodeString s2; 695 if(profile->doNFKC){ 696 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status); 697 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status)); 698 if(U_FAILURE(*status)){ 699 return 0; 700 } 701 fn2.normalize(s1, s2, *status); 702 }else{ 703 s2.fastCopyFrom(s1); 704 } 705 if(U_FAILURE(*status)){ 706 return 0; 707 } 708 709 // Prohibit and checkBiDi in one pass 710 const UChar *b2 = s2.getBuffer(); 711 int32_t b2Len = s2.length(); 712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 713 UBool leftToRight=FALSE, rightToLeft=FALSE; 714 int32_t rtlPos =-1, ltrPos =-1; 715 716 for(int32_t b2Index=0; b2Index<b2Len;){ 717 UChar32 ch = 0; 718 U16_NEXT(b2, b2Index, b2Len, ch); 719 720 uint16_t result; 721 UTRIE_GET16(&profile->sprepTrie,ch,result); 722 723 int16_t value; 724 UBool isIndex; 725 UStringPrepType type = getValues(result, value, isIndex); 726 727 if( type == USPREP_PROHIBITED || 728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) 729 ){ 730 *status = U_STRINGPREP_PROHIBITED_ERROR; 731 uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError); 732 return 0; 733 } 734 735 if(profile->checkBiDi) { 736 direction = ubidi_getClass(ch); 737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 738 firstCharDir = direction; 739 } 740 if(direction == U_LEFT_TO_RIGHT){ 741 leftToRight = TRUE; 742 ltrPos = b2Index-1; 743 } 744 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 745 rightToLeft = TRUE; 746 rtlPos = b2Index-1; 747 } 748 } 749 } 750 if(profile->checkBiDi == TRUE){ 751 // satisfy 2 752 if( leftToRight == TRUE && rightToLeft == TRUE){ 753 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); 755 return 0; 756 } 757 758 //satisfy 3 759 if( rightToLeft == TRUE && 760 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 761 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 762 ){ 763 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 764 uprv_syntaxError(b2, rtlPos, b2Len, parseError); 765 return FALSE; 766 } 767 } 768 return s2.extract(dest, destCapacity, *status); 769 } 770 771 772 /* data swapping ------------------------------------------------------------ */ 773 774 U_CAPI int32_t U_EXPORT2 775 usprep_swap(const UDataSwapper *ds, 776 const void *inData, int32_t length, void *outData, 777 UErrorCode *pErrorCode) { 778 const UDataInfo *pInfo; 779 int32_t headerSize; 780 781 const uint8_t *inBytes; 782 uint8_t *outBytes; 783 784 const int32_t *inIndexes; 785 int32_t indexes[16]; 786 787 int32_t i, offset, count, size; 788 789 /* udata_swapDataHeader checks the arguments */ 790 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 791 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 792 return 0; 793 } 794 795 /* check data format and format version */ 796 pInfo=(const UDataInfo *)((const char *)inData+4); 797 if(!( 798 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 799 pInfo->dataFormat[1]==0x50 && 800 pInfo->dataFormat[2]==0x52 && 801 pInfo->dataFormat[3]==0x50 && 802 pInfo->formatVersion[0]==3 803 )) { 804 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", 805 pInfo->dataFormat[0], pInfo->dataFormat[1], 806 pInfo->dataFormat[2], pInfo->dataFormat[3], 807 pInfo->formatVersion[0]); 808 *pErrorCode=U_UNSUPPORTED_ERROR; 809 return 0; 810 } 811 812 inBytes=(const uint8_t *)inData+headerSize; 813 outBytes=(uint8_t *)outData+headerSize; 814 815 inIndexes=(const int32_t *)inBytes; 816 817 if(length>=0) { 818 length-=headerSize; 819 if(length<16*4) { 820 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", 821 length); 822 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 823 return 0; 824 } 825 } 826 827 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ 828 for(i=0; i<16; ++i) { 829 indexes[i]=udata_readInt32(ds, inIndexes[i]); 830 } 831 832 /* calculate the total length of the data */ 833 size= 834 16*4+ /* size of indexes[] */ 835 indexes[_SPREP_INDEX_TRIE_SIZE]+ 836 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 837 838 if(length>=0) { 839 if(length<size) { 840 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", 841 length); 842 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 843 return 0; 844 } 845 846 /* copy the data for inaccessible bytes */ 847 if(inBytes!=outBytes) { 848 uprv_memcpy(outBytes, inBytes, size); 849 } 850 851 offset=0; 852 853 /* swap the int32_t indexes[] */ 854 count=16*4; 855 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 856 offset+=count; 857 858 /* swap the UTrie */ 859 count=indexes[_SPREP_INDEX_TRIE_SIZE]; 860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 861 offset+=count; 862 863 /* swap the uint16_t mappingTable[] */ 864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 866 //offset+=count; 867 } 868 869 return headerSize+size; 870 } 871 872 #endif /* #if !UCONFIG_NO_IDNA */ 873