1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2013, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: usprep.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003jul2 14 * created by: Ram Viswanadha 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_IDNA 20 21 #include "unicode/usprep.h" 22 23 #include "unicode/unorm.h" 24 #include "unicode/ustring.h" 25 #include "unicode/uchar.h" 26 #include "unicode/uversion.h" 27 #include "umutex.h" 28 #include "cmemory.h" 29 #include "sprpimpl.h" 30 #include "ustr_imp.h" 31 #include "uhash.h" 32 #include "cstring.h" 33 #include "udataswp.h" 34 #include "ucln_cmn.h" 35 #include "ubidi_props.h" 36 37 U_NAMESPACE_USE 38 39 U_CDECL_BEGIN 40 41 /* 42 Static cache for already opened StringPrep profiles 43 */ 44 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 45 static icu::UInitOnce gSharedDataInitOnce; 46 47 static UMutex usprepMutex = U_MUTEX_INITIALIZER; 48 49 /* format version of spp file */ 50 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; 51 52 /* the Unicode version of the sprep data */ 53 static UVersionInfo dataVersion={ 0, 0, 0, 0 }; 54 55 /* Profile names must be aligned to UStringPrepProfileType */ 56 static const char * const PROFILE_NAMES[] = { 57 "rfc3491", /* USPREP_RFC3491_NAMEPREP */ 58 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ 59 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ 60 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ 61 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ 62 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ 63 "rfc3722", /* USPREP_RFC3722_ISCSI */ 64 "rfc3920node", /* USPREP_RFC3920_NODEPREP */ 65 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ 66 "rfc4011", /* USPREP_RFC4011_MIB */ 67 "rfc4013", /* USPREP_RFC4013_SASLPREP */ 68 "rfc4505", /* USPREP_RFC4505_TRACE */ 69 "rfc4518", /* USPREP_RFC4518_LDAP */ 70 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ 71 }; 72 73 static UBool U_CALLCONV 74 isSPrepAcceptable(void * /* context */, 75 const char * /* type */, 76 const char * /* name */, 77 const UDataInfo *pInfo) { 78 if( 79 pInfo->size>=20 && 80 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 81 pInfo->charsetFamily==U_CHARSET_FAMILY && 82 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 83 pInfo->dataFormat[1]==0x50 && 84 pInfo->dataFormat[2]==0x52 && 85 pInfo->dataFormat[3]==0x50 && 86 pInfo->formatVersion[0]==3 && 87 pInfo->formatVersion[2]==UTRIE_SHIFT && 88 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 89 ) { 90 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); 91 uprv_memcpy(dataVersion, pInfo->dataVersion, 4); 92 return TRUE; 93 } else { 94 return FALSE; 95 } 96 } 97 98 static int32_t U_CALLCONV 99 getSPrepFoldingOffset(uint32_t data) { 100 101 return (int32_t)data; 102 103 } 104 105 /* hashes an entry */ 106 static int32_t U_CALLCONV 107 hashEntry(const UHashTok parm) { 108 UStringPrepKey *b = (UStringPrepKey *)parm.pointer; 109 UHashTok namekey, pathkey; 110 namekey.pointer = b->name; 111 pathkey.pointer = b->path; 112 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey); 113 } 114 115 /* compares two entries */ 116 static UBool U_CALLCONV 117 compareEntries(const UHashTok p1, const UHashTok p2) { 118 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; 119 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; 120 UHashTok name1, name2, path1, path2; 121 name1.pointer = b1->name; 122 name2.pointer = b2->name; 123 path1.pointer = b1->path; 124 path2.pointer = b2->path; 125 return ((UBool)(uhash_compareChars(name1, name2) & 126 uhash_compareChars(path1, path2))); 127 } 128 129 static void 130 usprep_unload(UStringPrepProfile* data){ 131 udata_close(data->sprepData); 132 } 133 134 static int32_t 135 usprep_internal_flushCache(UBool noRefCount){ 136 UStringPrepProfile *profile = NULL; 137 UStringPrepKey *key = NULL; 138 int32_t pos = -1; 139 int32_t deletedNum = 0; 140 const UHashElement *e; 141 142 /* 143 * if shared data hasn't even been lazy evaluated yet 144 * return 0 145 */ 146 umtx_lock(&usprepMutex); 147 if (SHARED_DATA_HASHTABLE == NULL) { 148 umtx_unlock(&usprepMutex); 149 return 0; 150 } 151 152 /*creates an enumeration to iterate through every element in the table */ 153 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) 154 { 155 profile = (UStringPrepProfile *) e->value.pointer; 156 key = (UStringPrepKey *) e->key.pointer; 157 158 if ((noRefCount== FALSE && profile->refCount == 0) || 159 noRefCount== TRUE) { 160 deletedNum++; 161 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 162 163 /* unload the data */ 164 usprep_unload(profile); 165 166 if(key->name != NULL) { 167 uprv_free(key->name); 168 key->name=NULL; 169 } 170 if(key->path != NULL) { 171 uprv_free(key->path); 172 key->path=NULL; 173 } 174 uprv_free(profile); 175 uprv_free(key); 176 } 177 178 } 179 umtx_unlock(&usprepMutex); 180 181 return deletedNum; 182 } 183 184 /* Works just like ucnv_flushCache() 185 static int32_t 186 usprep_flushCache(){ 187 return usprep_internal_flushCache(FALSE); 188 } 189 */ 190 191 static UBool U_CALLCONV usprep_cleanup(void){ 192 if (SHARED_DATA_HASHTABLE != NULL) { 193 usprep_internal_flushCache(TRUE); 194 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 195 uhash_close(SHARED_DATA_HASHTABLE); 196 SHARED_DATA_HASHTABLE = NULL; 197 } 198 } 199 gSharedDataInitOnce.reset(); 200 return (SHARED_DATA_HASHTABLE == NULL); 201 } 202 U_CDECL_END 203 204 205 /** Initializes the cache for resources */ 206 static void U_CALLCONV 207 createCache(UErrorCode &status) { 208 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status); 209 if (U_FAILURE(status)) { 210 SHARED_DATA_HASHTABLE = NULL; 211 } 212 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); 213 } 214 215 static void 216 initCache(UErrorCode *status) { 217 umtx_initOnce(gSharedDataInitOnce, &createCache, *status); 218 } 219 220 static UBool U_CALLCONV 221 loadData(UStringPrepProfile* profile, 222 const char* path, 223 const char* name, 224 const char* type, 225 UErrorCode* errorCode) { 226 /* load Unicode SPREP data from file */ 227 UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; 228 UDataMemory *dataMemory; 229 const int32_t *p=NULL; 230 const uint8_t *pb; 231 UVersionInfo normUnicodeVersion; 232 int32_t normUniVer, sprepUniVer, normCorrVer; 233 234 if(errorCode==NULL || U_FAILURE(*errorCode)) { 235 return 0; 236 } 237 238 /* open the data outside the mutex block */ 239 //TODO: change the path 240 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); 241 if(U_FAILURE(*errorCode)) { 242 return FALSE; 243 } 244 245 p=(const int32_t *)udata_getMemory(dataMemory); 246 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); 247 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); 248 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; 249 250 251 if(U_FAILURE(*errorCode)) { 252 udata_close(dataMemory); 253 return FALSE; 254 } 255 256 /* in the mutex block, set the data for this process */ 257 umtx_lock(&usprepMutex); 258 if(profile->sprepData==NULL) { 259 profile->sprepData=dataMemory; 260 dataMemory=NULL; 261 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); 262 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); 263 } else { 264 p=(const int32_t *)udata_getMemory(profile->sprepData); 265 } 266 umtx_unlock(&usprepMutex); 267 /* initialize some variables */ 268 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); 269 270 u_getUnicodeVersion(normUnicodeVersion); 271 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 272 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); 273 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 274 (dataVersion[2] << 8 ) + (dataVersion[3]); 275 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; 276 277 if(U_FAILURE(*errorCode)){ 278 udata_close(dataMemory); 279 return FALSE; 280 } 281 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ 282 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ 283 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ 284 ){ 285 *errorCode = U_INVALID_FORMAT_ERROR; 286 udata_close(dataMemory); 287 return FALSE; 288 } 289 profile->isDataLoaded = TRUE; 290 291 /* if a different thread set it first, then close the extra data */ 292 if(dataMemory!=NULL) { 293 udata_close(dataMemory); /* NULL if it was set correctly */ 294 } 295 296 297 return profile->isDataLoaded; 298 } 299 300 static UStringPrepProfile* 301 usprep_getProfile(const char* path, 302 const char* name, 303 UErrorCode *status){ 304 305 UStringPrepProfile* profile = NULL; 306 307 initCache(status); 308 309 if(U_FAILURE(*status)){ 310 return NULL; 311 } 312 313 UStringPrepKey stackKey; 314 /* 315 * const is cast way to save malloc, strcpy and free calls 316 * we use the passed in pointers for fetching the data from the 317 * hash table which is safe 318 */ 319 stackKey.name = (char*) name; 320 stackKey.path = (char*) path; 321 322 /* fetch the data from the cache */ 323 umtx_lock(&usprepMutex); 324 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 325 if(profile != NULL) { 326 profile->refCount++; 327 } 328 umtx_unlock(&usprepMutex); 329 330 if(profile == NULL) { 331 /* else load the data and put the data in the cache */ 332 LocalMemory<UStringPrepProfile> newProfile; 333 if(newProfile.allocateInsteadAndReset() == NULL) { 334 *status = U_MEMORY_ALLOCATION_ERROR; 335 return NULL; 336 } 337 338 /* load the data */ 339 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ 340 return NULL; 341 } 342 343 /* get the options */ 344 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); 345 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); 346 347 if(newProfile->checkBiDi) { 348 newProfile->bdp = ubidi_getSingleton(); 349 } 350 351 LocalMemory<UStringPrepKey> key; 352 LocalMemory<char> keyName; 353 LocalMemory<char> keyPath; 354 if( key.allocateInsteadAndReset() == NULL || 355 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL || 356 (path != NULL && 357 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL) 358 ) { 359 *status = U_MEMORY_ALLOCATION_ERROR; 360 usprep_unload(newProfile.getAlias()); 361 return NULL; 362 } 363 364 umtx_lock(&usprepMutex); 365 // If another thread already inserted the same key/value, refcount and cleanup our thread data 366 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 367 if(profile != NULL) { 368 profile->refCount++; 369 usprep_unload(newProfile.getAlias()); 370 } 371 else { 372 /* initialize the key members */ 373 key->name = keyName.orphan(); 374 uprv_strcpy(key->name, name); 375 if(path != NULL){ 376 key->path = keyPath.orphan(); 377 uprv_strcpy(key->path, path); 378 } 379 profile = newProfile.orphan(); 380 381 /* add the data object to the cache */ 382 profile->refCount = 1; 383 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); 384 } 385 umtx_unlock(&usprepMutex); 386 } 387 388 return profile; 389 } 390 391 U_CAPI UStringPrepProfile* U_EXPORT2 392 usprep_open(const char* path, 393 const char* name, 394 UErrorCode* status){ 395 396 if(status == NULL || U_FAILURE(*status)){ 397 return NULL; 398 } 399 400 /* initialize the profile struct members */ 401 return usprep_getProfile(path,name,status); 402 } 403 404 U_CAPI UStringPrepProfile* U_EXPORT2 405 usprep_openByType(UStringPrepProfileType type, 406 UErrorCode* status) { 407 if(status == NULL || U_FAILURE(*status)){ 408 return NULL; 409 } 410 int32_t index = (int32_t)type; 411 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) { 412 *status = U_ILLEGAL_ARGUMENT_ERROR; 413 return NULL; 414 } 415 return usprep_open(NULL, PROFILE_NAMES[index], status); 416 } 417 418 U_CAPI void U_EXPORT2 419 usprep_close(UStringPrepProfile* profile){ 420 if(profile==NULL){ 421 return; 422 } 423 424 umtx_lock(&usprepMutex); 425 /* decrement the ref count*/ 426 if(profile->refCount > 0){ 427 profile->refCount--; 428 } 429 umtx_unlock(&usprepMutex); 430 431 } 432 433 U_CFUNC void 434 uprv_syntaxError(const UChar* rules, 435 int32_t pos, 436 int32_t rulesLen, 437 UParseError* parseError){ 438 if(parseError == NULL){ 439 return; 440 } 441 parseError->offset = pos; 442 parseError->line = 0 ; // we are not using line numbers 443 444 // for pre-context 445 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); 446 int32_t limit = pos; 447 448 u_memcpy(parseError->preContext,rules+start,limit-start); 449 //null terminate the buffer 450 parseError->preContext[limit-start] = 0; 451 452 // for post-context; include error rules[pos] 453 start = pos; 454 limit = start + (U_PARSE_CONTEXT_LEN-1); 455 if (limit > rulesLen) { 456 limit = rulesLen; 457 } 458 if (start < rulesLen) { 459 u_memcpy(parseError->postContext,rules+start,limit-start); 460 } 461 //null terminate the buffer 462 parseError->postContext[limit-start]= 0; 463 } 464 465 466 static inline UStringPrepType 467 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ 468 469 UStringPrepType type; 470 if(trieWord == 0){ 471 /* 472 * Initial value stored in the mapping table 473 * just return USPREP_TYPE_LIMIT .. so that 474 * the source codepoint is copied to the destination 475 */ 476 type = USPREP_TYPE_LIMIT; 477 isIndex =FALSE; 478 value = 0; 479 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ 480 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); 481 isIndex =FALSE; 482 value = 0; 483 }else{ 484 /* get the type */ 485 type = USPREP_MAP; 486 /* ascertain if the value is index or delta */ 487 if(trieWord & 0x02){ 488 isIndex = TRUE; 489 value = trieWord >> 2; //mask off the lower 2 bits and shift 490 }else{ 491 isIndex = FALSE; 492 value = (int16_t)trieWord; 493 value = (value >> 2); 494 } 495 496 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 497 type = USPREP_DELETE; 498 isIndex =FALSE; 499 value = 0; 500 } 501 } 502 return type; 503 } 504 505 506 507 static int32_t 508 usprep_map( const UStringPrepProfile* profile, 509 const UChar* src, int32_t srcLength, 510 UChar* dest, int32_t destCapacity, 511 int32_t options, 512 UParseError* parseError, 513 UErrorCode* status ){ 514 515 uint16_t result; 516 int32_t destIndex=0; 517 int32_t srcIndex; 518 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); 519 UStringPrepType type; 520 int16_t value; 521 UBool isIndex; 522 const int32_t* indexes = profile->indexes; 523 524 // no error checking the caller check for error and arguments 525 // no string length check the caller finds out the string length 526 527 for(srcIndex=0;srcIndex<srcLength;){ 528 UChar32 ch; 529 530 U16_NEXT(src,srcIndex,srcLength,ch); 531 532 result=0; 533 534 UTRIE_GET16(&profile->sprepTrie,ch,result); 535 536 type = getValues(result, value, isIndex); 537 538 // check if the source codepoint is unassigned 539 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ 540 541 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); 542 *status = U_STRINGPREP_UNASSIGNED_ERROR; 543 return 0; 544 545 }else if(type == USPREP_MAP){ 546 547 int32_t index, length; 548 549 if(isIndex){ 550 index = value; 551 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 552 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 553 length = 1; 554 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 555 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 556 length = 2; 557 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 558 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 559 length = 3; 560 }else{ 561 length = profile->mappingData[index++]; 562 563 } 564 565 /* copy mapping to destination */ 566 for(int32_t i=0; i< length; i++){ 567 if(destIndex < destCapacity ){ 568 dest[destIndex] = profile->mappingData[index+i]; 569 } 570 destIndex++; /* for pre-flighting */ 571 } 572 continue; 573 }else{ 574 // subtract the delta to arrive at the code point 575 ch -= value; 576 } 577 578 }else if(type==USPREP_DELETE){ 579 // just consume the codepoint and contine 580 continue; 581 } 582 //copy the code point into destination 583 if(ch <= 0xFFFF){ 584 if(destIndex < destCapacity ){ 585 dest[destIndex] = (UChar)ch; 586 } 587 destIndex++; 588 }else{ 589 if(destIndex+1 < destCapacity ){ 590 dest[destIndex] = U16_LEAD(ch); 591 dest[destIndex+1] = U16_TRAIL(ch); 592 } 593 destIndex +=2; 594 } 595 596 } 597 598 return u_terminateUChars(dest, destCapacity, destIndex, status); 599 } 600 601 602 static int32_t 603 usprep_normalize( const UChar* src, int32_t srcLength, 604 UChar* dest, int32_t destCapacity, 605 UErrorCode* status ){ 606 return unorm_normalize( 607 src, srcLength, 608 UNORM_NFKC, UNORM_UNICODE_3_2, 609 dest, destCapacity, 610 status); 611 } 612 613 614 /* 615 1) Map -- For each character in the input, check if it has a mapping 616 and, if so, replace it with its mapping. 617 618 2) Normalize -- Possibly normalize the result of step 1 using Unicode 619 normalization. 620 621 3) Prohibit -- Check for any characters that are not allowed in the 622 output. If any are found, return an error. 623 624 4) Check bidi -- Possibly check for right-to-left characters, and if 625 any are found, make sure that the whole string satisfies the 626 requirements for bidirectional strings. If the string does not 627 satisfy the requirements for bidirectional strings, return an 628 error. 629 [Unicode3.2] defines several bidirectional categories; each character 630 has one bidirectional category assigned to it. For the purposes of 631 the requirements below, an "RandALCat character" is a character that 632 has Unicode bidirectional categories "R" or "AL"; an "LCat character" 633 is a character that has Unicode bidirectional category "L". Note 634 635 636 that there are many characters which fall in neither of the above 637 definitions; Latin digits (<U+0030> through <U+0039>) are examples of 638 this because they have bidirectional category "EN". 639 640 In any profile that specifies bidirectional character handling, all 641 three of the following requirements MUST be met: 642 643 1) The characters in section 5.8 MUST be prohibited. 644 645 2) If a string contains any RandALCat character, the string MUST NOT 646 contain any LCat character. 647 648 3) If a string contains any RandALCat character, a RandALCat 649 character MUST be the first character of the string, and a 650 RandALCat character MUST be the last character of the string. 651 */ 652 653 #define MAX_STACK_BUFFER_SIZE 300 654 655 656 U_CAPI int32_t U_EXPORT2 657 usprep_prepare( const UStringPrepProfile* profile, 658 const UChar* src, int32_t srcLength, 659 UChar* dest, int32_t destCapacity, 660 int32_t options, 661 UParseError* parseError, 662 UErrorCode* status ){ 663 664 // check error status 665 if(status == NULL || U_FAILURE(*status)){ 666 return 0; 667 } 668 669 //check arguments 670 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 671 *status=U_ILLEGAL_ARGUMENT_ERROR; 672 return 0; 673 } 674 675 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; 676 UChar *b1 = b1Stack, *b2 = b2Stack; 677 int32_t b1Len, b2Len=0, 678 b1Capacity = MAX_STACK_BUFFER_SIZE , 679 b2Capacity = MAX_STACK_BUFFER_SIZE; 680 uint16_t result; 681 int32_t b2Index = 0; 682 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 683 UBool leftToRight=FALSE, rightToLeft=FALSE; 684 int32_t rtlPos =-1, ltrPos =-1; 685 686 //get the string length 687 if(srcLength == -1){ 688 srcLength = u_strlen(src); 689 } 690 // map 691 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status); 692 693 if(*status == U_BUFFER_OVERFLOW_ERROR){ 694 // redo processing of string 695 /* we do not have enough room so grow the buffer*/ 696 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 697 if(b1==NULL){ 698 *status = U_MEMORY_ALLOCATION_ERROR; 699 goto CLEANUP; 700 } 701 702 *status = U_ZERO_ERROR; // reset error 703 704 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status); 705 706 } 707 708 // normalize 709 if(profile->doNFKC == TRUE){ 710 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status); 711 712 if(*status == U_BUFFER_OVERFLOW_ERROR){ 713 // redo processing of string 714 /* we do not have enough room so grow the buffer*/ 715 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 716 if(b2==NULL){ 717 *status = U_MEMORY_ALLOCATION_ERROR; 718 goto CLEANUP; 719 } 720 721 *status = U_ZERO_ERROR; // reset error 722 723 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status); 724 725 } 726 727 }else{ 728 b2 = b1; 729 b2Len = b1Len; 730 } 731 732 733 if(U_FAILURE(*status)){ 734 goto CLEANUP; 735 } 736 737 UChar32 ch; 738 UStringPrepType type; 739 int16_t value; 740 UBool isIndex; 741 742 // Prohibit and checkBiDi in one pass 743 for(b2Index=0; b2Index<b2Len;){ 744 745 ch = 0; 746 747 U16_NEXT(b2, b2Index, b2Len, ch); 748 749 UTRIE_GET16(&profile->sprepTrie,ch,result); 750 751 type = getValues(result, value, isIndex); 752 753 if( type == USPREP_PROHIBITED || 754 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) 755 ){ 756 *status = U_STRINGPREP_PROHIBITED_ERROR; 757 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); 758 goto CLEANUP; 759 } 760 761 if(profile->checkBiDi) { 762 direction = ubidi_getClass(profile->bdp, ch); 763 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 764 firstCharDir = direction; 765 } 766 if(direction == U_LEFT_TO_RIGHT){ 767 leftToRight = TRUE; 768 ltrPos = b2Index-1; 769 } 770 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 771 rightToLeft = TRUE; 772 rtlPos = b2Index-1; 773 } 774 } 775 } 776 if(profile->checkBiDi == TRUE){ 777 // satisfy 2 778 if( leftToRight == TRUE && rightToLeft == TRUE){ 779 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 780 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); 781 goto CLEANUP; 782 } 783 784 //satisfy 3 785 if( rightToLeft == TRUE && 786 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 787 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 788 ){ 789 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 790 uprv_syntaxError(b2, rtlPos, b2Len, parseError); 791 return FALSE; 792 } 793 } 794 if(b2Len>0 && b2Len <= destCapacity){ 795 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR); 796 } 797 798 CLEANUP: 799 if(b1!=b1Stack){ 800 uprv_free(b1); 801 b1=NULL; 802 } 803 804 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){ 805 uprv_free(b2); 806 b2=NULL; 807 } 808 return u_terminateUChars(dest, destCapacity, b2Len, status); 809 } 810 811 812 /* data swapping ------------------------------------------------------------ */ 813 814 U_CAPI int32_t U_EXPORT2 815 usprep_swap(const UDataSwapper *ds, 816 const void *inData, int32_t length, void *outData, 817 UErrorCode *pErrorCode) { 818 const UDataInfo *pInfo; 819 int32_t headerSize; 820 821 const uint8_t *inBytes; 822 uint8_t *outBytes; 823 824 const int32_t *inIndexes; 825 int32_t indexes[16]; 826 827 int32_t i, offset, count, size; 828 829 /* udata_swapDataHeader checks the arguments */ 830 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 831 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 832 return 0; 833 } 834 835 /* check data format and format version */ 836 pInfo=(const UDataInfo *)((const char *)inData+4); 837 if(!( 838 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 839 pInfo->dataFormat[1]==0x50 && 840 pInfo->dataFormat[2]==0x52 && 841 pInfo->dataFormat[3]==0x50 && 842 pInfo->formatVersion[0]==3 843 )) { 844 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", 845 pInfo->dataFormat[0], pInfo->dataFormat[1], 846 pInfo->dataFormat[2], pInfo->dataFormat[3], 847 pInfo->formatVersion[0]); 848 *pErrorCode=U_UNSUPPORTED_ERROR; 849 return 0; 850 } 851 852 inBytes=(const uint8_t *)inData+headerSize; 853 outBytes=(uint8_t *)outData+headerSize; 854 855 inIndexes=(const int32_t *)inBytes; 856 857 if(length>=0) { 858 length-=headerSize; 859 if(length<16*4) { 860 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", 861 length); 862 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 863 return 0; 864 } 865 } 866 867 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ 868 for(i=0; i<16; ++i) { 869 indexes[i]=udata_readInt32(ds, inIndexes[i]); 870 } 871 872 /* calculate the total length of the data */ 873 size= 874 16*4+ /* size of indexes[] */ 875 indexes[_SPREP_INDEX_TRIE_SIZE]+ 876 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 877 878 if(length>=0) { 879 if(length<size) { 880 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", 881 length); 882 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 883 return 0; 884 } 885 886 /* copy the data for inaccessible bytes */ 887 if(inBytes!=outBytes) { 888 uprv_memcpy(outBytes, inBytes, size); 889 } 890 891 offset=0; 892 893 /* swap the int32_t indexes[] */ 894 count=16*4; 895 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 896 offset+=count; 897 898 /* swap the UTrie */ 899 count=indexes[_SPREP_INDEX_TRIE_SIZE]; 900 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 901 offset+=count; 902 903 /* swap the uint16_t mappingTable[] */ 904 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 905 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 906 offset+=count; 907 } 908 909 return headerSize+size; 910 } 911 912 #endif /* #if !UCONFIG_NO_IDNA */ 913