1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: usprep.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003jul2 14 * created by: Ram Viswanadha 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_IDNA 20 21 #include "unicode/usprep.h" 22 23 #include "unicode/unorm.h" 24 #include "unicode/ustring.h" 25 #include "unicode/uchar.h" 26 #include "unicode/uversion.h" 27 #include "umutex.h" 28 #include "cmemory.h" 29 #include "sprpimpl.h" 30 #include "ustr_imp.h" 31 #include "uhash.h" 32 #include "cstring.h" 33 #include "udataswp.h" 34 #include "ucln_cmn.h" 35 #include "ubidi_props.h" 36 37 U_NAMESPACE_USE 38 39 U_CDECL_BEGIN 40 41 /* 42 Static cache for already opened StringPrep profiles 43 */ 44 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 45 46 static UMutex usprepMutex = U_MUTEX_INITIALIZER; 47 48 /* format version of spp file */ 49 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; 50 51 /* the Unicode version of the sprep data */ 52 static UVersionInfo dataVersion={ 0, 0, 0, 0 }; 53 54 /* Profile names must be aligned to UStringPrepProfileType */ 55 static const char * const PROFILE_NAMES[] = { 56 "rfc3491", /* USPREP_RFC3491_NAMEPREP */ 57 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ 58 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ 59 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ 60 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ 61 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ 62 "rfc3722", /* USPREP_RFC3722_ISCSI */ 63 "rfc3920node", /* USPREP_RFC3920_NODEPREP */ 64 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ 65 "rfc4011", /* USPREP_RFC4011_MIB */ 66 "rfc4013", /* USPREP_RFC4013_SASLPREP */ 67 "rfc4505", /* USPREP_RFC4505_TRACE */ 68 "rfc4518", /* USPREP_RFC4518_LDAP */ 69 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ 70 }; 71 72 static UBool U_CALLCONV 73 isSPrepAcceptable(void * /* context */, 74 const char * /* type */, 75 const char * /* name */, 76 const UDataInfo *pInfo) { 77 if( 78 pInfo->size>=20 && 79 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 80 pInfo->charsetFamily==U_CHARSET_FAMILY && 81 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 82 pInfo->dataFormat[1]==0x50 && 83 pInfo->dataFormat[2]==0x52 && 84 pInfo->dataFormat[3]==0x50 && 85 pInfo->formatVersion[0]==3 && 86 pInfo->formatVersion[2]==UTRIE_SHIFT && 87 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 88 ) { 89 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); 90 uprv_memcpy(dataVersion, pInfo->dataVersion, 4); 91 return TRUE; 92 } else { 93 return FALSE; 94 } 95 } 96 97 static int32_t U_CALLCONV 98 getSPrepFoldingOffset(uint32_t data) { 99 100 return (int32_t)data; 101 102 } 103 104 /* hashes an entry */ 105 static int32_t U_CALLCONV 106 hashEntry(const UHashTok parm) { 107 UStringPrepKey *b = (UStringPrepKey *)parm.pointer; 108 UHashTok namekey, pathkey; 109 namekey.pointer = b->name; 110 pathkey.pointer = b->path; 111 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey); 112 } 113 114 /* compares two entries */ 115 static UBool U_CALLCONV 116 compareEntries(const UHashTok p1, const UHashTok p2) { 117 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; 118 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; 119 UHashTok name1, name2, path1, path2; 120 name1.pointer = b1->name; 121 name2.pointer = b2->name; 122 path1.pointer = b1->path; 123 path2.pointer = b2->path; 124 return ((UBool)(uhash_compareChars(name1, name2) & 125 uhash_compareChars(path1, path2))); 126 } 127 128 static void 129 usprep_unload(UStringPrepProfile* data){ 130 udata_close(data->sprepData); 131 } 132 133 static int32_t 134 usprep_internal_flushCache(UBool noRefCount){ 135 UStringPrepProfile *profile = NULL; 136 UStringPrepKey *key = NULL; 137 int32_t pos = -1; 138 int32_t deletedNum = 0; 139 const UHashElement *e; 140 141 /* 142 * if shared data hasn't even been lazy evaluated yet 143 * return 0 144 */ 145 umtx_lock(&usprepMutex); 146 if (SHARED_DATA_HASHTABLE == NULL) { 147 umtx_unlock(&usprepMutex); 148 return 0; 149 } 150 151 /*creates an enumeration to iterate through every element in the table */ 152 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) 153 { 154 profile = (UStringPrepProfile *) e->value.pointer; 155 key = (UStringPrepKey *) e->key.pointer; 156 157 if ((noRefCount== FALSE && profile->refCount == 0) || 158 noRefCount== TRUE) { 159 deletedNum++; 160 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 161 162 /* unload the data */ 163 usprep_unload(profile); 164 165 if(key->name != NULL) { 166 uprv_free(key->name); 167 key->name=NULL; 168 } 169 if(key->path != NULL) { 170 uprv_free(key->path); 171 key->path=NULL; 172 } 173 uprv_free(profile); 174 uprv_free(key); 175 } 176 177 } 178 umtx_unlock(&usprepMutex); 179 180 return deletedNum; 181 } 182 183 /* Works just like ucnv_flushCache() 184 static int32_t 185 usprep_flushCache(){ 186 return usprep_internal_flushCache(FALSE); 187 } 188 */ 189 190 static UBool U_CALLCONV usprep_cleanup(void){ 191 if (SHARED_DATA_HASHTABLE != NULL) { 192 usprep_internal_flushCache(TRUE); 193 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 194 uhash_close(SHARED_DATA_HASHTABLE); 195 SHARED_DATA_HASHTABLE = NULL; 196 } 197 } 198 199 return (SHARED_DATA_HASHTABLE == NULL); 200 } 201 U_CDECL_END 202 203 204 /** Initializes the cache for resources */ 205 static void 206 initCache(UErrorCode *status) { 207 UBool makeCache; 208 UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE == NULL), makeCache); 209 if(makeCache) { 210 UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status); 211 if (U_SUCCESS(*status)) { 212 umtx_lock(&usprepMutex); 213 if(SHARED_DATA_HASHTABLE == NULL) { 214 SHARED_DATA_HASHTABLE = newCache; 215 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); 216 newCache = NULL; 217 } 218 umtx_unlock(&usprepMutex); 219 } 220 if(newCache != NULL) { 221 uhash_close(newCache); 222 } 223 } 224 } 225 226 static UBool U_CALLCONV 227 loadData(UStringPrepProfile* profile, 228 const char* path, 229 const char* name, 230 const char* type, 231 UErrorCode* errorCode) { 232 /* load Unicode SPREP data from file */ 233 UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; 234 UDataMemory *dataMemory; 235 const int32_t *p=NULL; 236 const uint8_t *pb; 237 UVersionInfo normUnicodeVersion; 238 int32_t normUniVer, sprepUniVer, normCorrVer; 239 240 if(errorCode==NULL || U_FAILURE(*errorCode)) { 241 return 0; 242 } 243 244 /* open the data outside the mutex block */ 245 //TODO: change the path 246 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); 247 if(U_FAILURE(*errorCode)) { 248 return FALSE; 249 } 250 251 p=(const int32_t *)udata_getMemory(dataMemory); 252 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); 253 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); 254 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; 255 256 257 if(U_FAILURE(*errorCode)) { 258 udata_close(dataMemory); 259 return FALSE; 260 } 261 262 /* in the mutex block, set the data for this process */ 263 umtx_lock(&usprepMutex); 264 if(profile->sprepData==NULL) { 265 profile->sprepData=dataMemory; 266 dataMemory=NULL; 267 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); 268 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); 269 } else { 270 p=(const int32_t *)udata_getMemory(profile->sprepData); 271 } 272 umtx_unlock(&usprepMutex); 273 /* initialize some variables */ 274 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); 275 276 u_getUnicodeVersion(normUnicodeVersion); 277 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 278 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); 279 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 280 (dataVersion[2] << 8 ) + (dataVersion[3]); 281 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; 282 283 if(U_FAILURE(*errorCode)){ 284 udata_close(dataMemory); 285 return FALSE; 286 } 287 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ 288 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ 289 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ 290 ){ 291 *errorCode = U_INVALID_FORMAT_ERROR; 292 udata_close(dataMemory); 293 return FALSE; 294 } 295 profile->isDataLoaded = TRUE; 296 297 /* if a different thread set it first, then close the extra data */ 298 if(dataMemory!=NULL) { 299 udata_close(dataMemory); /* NULL if it was set correctly */ 300 } 301 302 303 return profile->isDataLoaded; 304 } 305 306 static UStringPrepProfile* 307 usprep_getProfile(const char* path, 308 const char* name, 309 UErrorCode *status){ 310 311 UStringPrepProfile* profile = NULL; 312 313 initCache(status); 314 315 if(U_FAILURE(*status)){ 316 return NULL; 317 } 318 319 UStringPrepKey stackKey; 320 /* 321 * const is cast way to save malloc, strcpy and free calls 322 * we use the passed in pointers for fetching the data from the 323 * hash table which is safe 324 */ 325 stackKey.name = (char*) name; 326 stackKey.path = (char*) path; 327 328 /* fetch the data from the cache */ 329 umtx_lock(&usprepMutex); 330 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 331 if(profile != NULL) { 332 profile->refCount++; 333 } 334 umtx_unlock(&usprepMutex); 335 336 if(profile == NULL) { 337 /* else load the data and put the data in the cache */ 338 LocalMemory<UStringPrepProfile> newProfile; 339 if(newProfile.allocateInsteadAndReset() == NULL) { 340 *status = U_MEMORY_ALLOCATION_ERROR; 341 return NULL; 342 } 343 344 /* load the data */ 345 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ 346 return NULL; 347 } 348 349 /* get the options */ 350 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); 351 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); 352 353 if(newProfile->checkBiDi) { 354 newProfile->bdp = ubidi_getSingleton(); 355 } 356 357 LocalMemory<UStringPrepKey> key; 358 LocalMemory<char> keyName; 359 LocalMemory<char> keyPath; 360 if( key.allocateInsteadAndReset() == NULL || 361 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL || 362 (path != NULL && 363 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL) 364 ) { 365 *status = U_MEMORY_ALLOCATION_ERROR; 366 usprep_unload(newProfile.getAlias()); 367 return NULL; 368 } 369 370 umtx_lock(&usprepMutex); 371 // If another thread already inserted the same key/value, refcount and cleanup our thread data 372 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 373 if(profile != NULL) { 374 profile->refCount++; 375 usprep_unload(newProfile.getAlias()); 376 } 377 else { 378 /* initialize the key members */ 379 key->name = keyName.orphan(); 380 uprv_strcpy(key->name, name); 381 if(path != NULL){ 382 key->path = keyPath.orphan(); 383 uprv_strcpy(key->path, path); 384 } 385 profile = newProfile.orphan(); 386 387 /* add the data object to the cache */ 388 profile->refCount = 1; 389 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); 390 } 391 umtx_unlock(&usprepMutex); 392 } 393 394 return profile; 395 } 396 397 U_CAPI UStringPrepProfile* U_EXPORT2 398 usprep_open(const char* path, 399 const char* name, 400 UErrorCode* status){ 401 402 if(status == NULL || U_FAILURE(*status)){ 403 return NULL; 404 } 405 406 /* initialize the profile struct members */ 407 return usprep_getProfile(path,name,status); 408 } 409 410 U_CAPI UStringPrepProfile* U_EXPORT2 411 usprep_openByType(UStringPrepProfileType type, 412 UErrorCode* status) { 413 if(status == NULL || U_FAILURE(*status)){ 414 return NULL; 415 } 416 int32_t index = (int32_t)type; 417 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) { 418 *status = U_ILLEGAL_ARGUMENT_ERROR; 419 return NULL; 420 } 421 return usprep_open(NULL, PROFILE_NAMES[index], status); 422 } 423 424 U_CAPI void U_EXPORT2 425 usprep_close(UStringPrepProfile* profile){ 426 if(profile==NULL){ 427 return; 428 } 429 430 umtx_lock(&usprepMutex); 431 /* decrement the ref count*/ 432 if(profile->refCount > 0){ 433 profile->refCount--; 434 } 435 umtx_unlock(&usprepMutex); 436 437 } 438 439 U_CFUNC void 440 uprv_syntaxError(const UChar* rules, 441 int32_t pos, 442 int32_t rulesLen, 443 UParseError* parseError){ 444 if(parseError == NULL){ 445 return; 446 } 447 parseError->offset = pos; 448 parseError->line = 0 ; // we are not using line numbers 449 450 // for pre-context 451 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); 452 int32_t limit = pos; 453 454 u_memcpy(parseError->preContext,rules+start,limit-start); 455 //null terminate the buffer 456 parseError->preContext[limit-start] = 0; 457 458 // for post-context; include error rules[pos] 459 start = pos; 460 limit = start + (U_PARSE_CONTEXT_LEN-1); 461 if (limit > rulesLen) { 462 limit = rulesLen; 463 } 464 if (start < rulesLen) { 465 u_memcpy(parseError->postContext,rules+start,limit-start); 466 } 467 //null terminate the buffer 468 parseError->postContext[limit-start]= 0; 469 } 470 471 472 static inline UStringPrepType 473 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ 474 475 UStringPrepType type; 476 if(trieWord == 0){ 477 /* 478 * Initial value stored in the mapping table 479 * just return USPREP_TYPE_LIMIT .. so that 480 * the source codepoint is copied to the destination 481 */ 482 type = USPREP_TYPE_LIMIT; 483 isIndex =FALSE; 484 value = 0; 485 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ 486 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); 487 isIndex =FALSE; 488 value = 0; 489 }else{ 490 /* get the type */ 491 type = USPREP_MAP; 492 /* ascertain if the value is index or delta */ 493 if(trieWord & 0x02){ 494 isIndex = TRUE; 495 value = trieWord >> 2; //mask off the lower 2 bits and shift 496 }else{ 497 isIndex = FALSE; 498 value = (int16_t)trieWord; 499 value = (value >> 2); 500 } 501 502 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 503 type = USPREP_DELETE; 504 isIndex =FALSE; 505 value = 0; 506 } 507 } 508 return type; 509 } 510 511 512 513 static int32_t 514 usprep_map( const UStringPrepProfile* profile, 515 const UChar* src, int32_t srcLength, 516 UChar* dest, int32_t destCapacity, 517 int32_t options, 518 UParseError* parseError, 519 UErrorCode* status ){ 520 521 uint16_t result; 522 int32_t destIndex=0; 523 int32_t srcIndex; 524 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); 525 UStringPrepType type; 526 int16_t value; 527 UBool isIndex; 528 const int32_t* indexes = profile->indexes; 529 530 // no error checking the caller check for error and arguments 531 // no string length check the caller finds out the string length 532 533 for(srcIndex=0;srcIndex<srcLength;){ 534 UChar32 ch; 535 536 U16_NEXT(src,srcIndex,srcLength,ch); 537 538 result=0; 539 540 UTRIE_GET16(&profile->sprepTrie,ch,result); 541 542 type = getValues(result, value, isIndex); 543 544 // check if the source codepoint is unassigned 545 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ 546 547 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); 548 *status = U_STRINGPREP_UNASSIGNED_ERROR; 549 return 0; 550 551 }else if(type == USPREP_MAP){ 552 553 int32_t index, length; 554 555 if(isIndex){ 556 index = value; 557 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 558 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 559 length = 1; 560 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 561 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 562 length = 2; 563 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 564 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 565 length = 3; 566 }else{ 567 length = profile->mappingData[index++]; 568 569 } 570 571 /* copy mapping to destination */ 572 for(int32_t i=0; i< length; i++){ 573 if(destIndex < destCapacity ){ 574 dest[destIndex] = profile->mappingData[index+i]; 575 } 576 destIndex++; /* for pre-flighting */ 577 } 578 continue; 579 }else{ 580 // subtract the delta to arrive at the code point 581 ch -= value; 582 } 583 584 }else if(type==USPREP_DELETE){ 585 // just consume the codepoint and contine 586 continue; 587 } 588 //copy the code point into destination 589 if(ch <= 0xFFFF){ 590 if(destIndex < destCapacity ){ 591 dest[destIndex] = (UChar)ch; 592 } 593 destIndex++; 594 }else{ 595 if(destIndex+1 < destCapacity ){ 596 dest[destIndex] = U16_LEAD(ch); 597 dest[destIndex+1] = U16_TRAIL(ch); 598 } 599 destIndex +=2; 600 } 601 602 } 603 604 return u_terminateUChars(dest, destCapacity, destIndex, status); 605 } 606 607 608 static int32_t 609 usprep_normalize( const UChar* src, int32_t srcLength, 610 UChar* dest, int32_t destCapacity, 611 UErrorCode* status ){ 612 return unorm_normalize( 613 src, srcLength, 614 UNORM_NFKC, UNORM_UNICODE_3_2, 615 dest, destCapacity, 616 status); 617 } 618 619 620 /* 621 1) Map -- For each character in the input, check if it has a mapping 622 and, if so, replace it with its mapping. 623 624 2) Normalize -- Possibly normalize the result of step 1 using Unicode 625 normalization. 626 627 3) Prohibit -- Check for any characters that are not allowed in the 628 output. If any are found, return an error. 629 630 4) Check bidi -- Possibly check for right-to-left characters, and if 631 any are found, make sure that the whole string satisfies the 632 requirements for bidirectional strings. If the string does not 633 satisfy the requirements for bidirectional strings, return an 634 error. 635 [Unicode3.2] defines several bidirectional categories; each character 636 has one bidirectional category assigned to it. For the purposes of 637 the requirements below, an "RandALCat character" is a character that 638 has Unicode bidirectional categories "R" or "AL"; an "LCat character" 639 is a character that has Unicode bidirectional category "L". Note 640 641 642 that there are many characters which fall in neither of the above 643 definitions; Latin digits (<U+0030> through <U+0039>) are examples of 644 this because they have bidirectional category "EN". 645 646 In any profile that specifies bidirectional character handling, all 647 three of the following requirements MUST be met: 648 649 1) The characters in section 5.8 MUST be prohibited. 650 651 2) If a string contains any RandALCat character, the string MUST NOT 652 contain any LCat character. 653 654 3) If a string contains any RandALCat character, a RandALCat 655 character MUST be the first character of the string, and a 656 RandALCat character MUST be the last character of the string. 657 */ 658 659 #define MAX_STACK_BUFFER_SIZE 300 660 661 662 U_CAPI int32_t U_EXPORT2 663 usprep_prepare( const UStringPrepProfile* profile, 664 const UChar* src, int32_t srcLength, 665 UChar* dest, int32_t destCapacity, 666 int32_t options, 667 UParseError* parseError, 668 UErrorCode* status ){ 669 670 // check error status 671 if(status == NULL || U_FAILURE(*status)){ 672 return 0; 673 } 674 675 //check arguments 676 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 677 *status=U_ILLEGAL_ARGUMENT_ERROR; 678 return 0; 679 } 680 681 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; 682 UChar *b1 = b1Stack, *b2 = b2Stack; 683 int32_t b1Len, b2Len=0, 684 b1Capacity = MAX_STACK_BUFFER_SIZE , 685 b2Capacity = MAX_STACK_BUFFER_SIZE; 686 uint16_t result; 687 int32_t b2Index = 0; 688 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 689 UBool leftToRight=FALSE, rightToLeft=FALSE; 690 int32_t rtlPos =-1, ltrPos =-1; 691 692 //get the string length 693 if(srcLength == -1){ 694 srcLength = u_strlen(src); 695 } 696 // map 697 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status); 698 699 if(*status == U_BUFFER_OVERFLOW_ERROR){ 700 // redo processing of string 701 /* we do not have enough room so grow the buffer*/ 702 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 703 if(b1==NULL){ 704 *status = U_MEMORY_ALLOCATION_ERROR; 705 goto CLEANUP; 706 } 707 708 *status = U_ZERO_ERROR; // reset error 709 710 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status); 711 712 } 713 714 // normalize 715 if(profile->doNFKC == TRUE){ 716 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status); 717 718 if(*status == U_BUFFER_OVERFLOW_ERROR){ 719 // redo processing of string 720 /* we do not have enough room so grow the buffer*/ 721 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 722 if(b2==NULL){ 723 *status = U_MEMORY_ALLOCATION_ERROR; 724 goto CLEANUP; 725 } 726 727 *status = U_ZERO_ERROR; // reset error 728 729 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status); 730 731 } 732 733 }else{ 734 b2 = b1; 735 b2Len = b1Len; 736 } 737 738 739 if(U_FAILURE(*status)){ 740 goto CLEANUP; 741 } 742 743 UChar32 ch; 744 UStringPrepType type; 745 int16_t value; 746 UBool isIndex; 747 748 // Prohibit and checkBiDi in one pass 749 for(b2Index=0; b2Index<b2Len;){ 750 751 ch = 0; 752 753 U16_NEXT(b2, b2Index, b2Len, ch); 754 755 UTRIE_GET16(&profile->sprepTrie,ch,result); 756 757 type = getValues(result, value, isIndex); 758 759 if( type == USPREP_PROHIBITED || 760 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) 761 ){ 762 *status = U_STRINGPREP_PROHIBITED_ERROR; 763 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); 764 goto CLEANUP; 765 } 766 767 if(profile->checkBiDi) { 768 direction = ubidi_getClass(profile->bdp, ch); 769 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 770 firstCharDir = direction; 771 } 772 if(direction == U_LEFT_TO_RIGHT){ 773 leftToRight = TRUE; 774 ltrPos = b2Index-1; 775 } 776 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 777 rightToLeft = TRUE; 778 rtlPos = b2Index-1; 779 } 780 } 781 } 782 if(profile->checkBiDi == TRUE){ 783 // satisfy 2 784 if( leftToRight == TRUE && rightToLeft == TRUE){ 785 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 786 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); 787 goto CLEANUP; 788 } 789 790 //satisfy 3 791 if( rightToLeft == TRUE && 792 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 793 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 794 ){ 795 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 796 uprv_syntaxError(b2, rtlPos, b2Len, parseError); 797 return FALSE; 798 } 799 } 800 if(b2Len>0 && b2Len <= destCapacity){ 801 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR); 802 } 803 804 CLEANUP: 805 if(b1!=b1Stack){ 806 uprv_free(b1); 807 b1=NULL; 808 } 809 810 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){ 811 uprv_free(b2); 812 b2=NULL; 813 } 814 return u_terminateUChars(dest, destCapacity, b2Len, status); 815 } 816 817 818 /* data swapping ------------------------------------------------------------ */ 819 820 U_CAPI int32_t U_EXPORT2 821 usprep_swap(const UDataSwapper *ds, 822 const void *inData, int32_t length, void *outData, 823 UErrorCode *pErrorCode) { 824 const UDataInfo *pInfo; 825 int32_t headerSize; 826 827 const uint8_t *inBytes; 828 uint8_t *outBytes; 829 830 const int32_t *inIndexes; 831 int32_t indexes[16]; 832 833 int32_t i, offset, count, size; 834 835 /* udata_swapDataHeader checks the arguments */ 836 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 837 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 838 return 0; 839 } 840 841 /* check data format and format version */ 842 pInfo=(const UDataInfo *)((const char *)inData+4); 843 if(!( 844 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 845 pInfo->dataFormat[1]==0x50 && 846 pInfo->dataFormat[2]==0x52 && 847 pInfo->dataFormat[3]==0x50 && 848 pInfo->formatVersion[0]==3 849 )) { 850 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", 851 pInfo->dataFormat[0], pInfo->dataFormat[1], 852 pInfo->dataFormat[2], pInfo->dataFormat[3], 853 pInfo->formatVersion[0]); 854 *pErrorCode=U_UNSUPPORTED_ERROR; 855 return 0; 856 } 857 858 inBytes=(const uint8_t *)inData+headerSize; 859 outBytes=(uint8_t *)outData+headerSize; 860 861 inIndexes=(const int32_t *)inBytes; 862 863 if(length>=0) { 864 length-=headerSize; 865 if(length<16*4) { 866 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", 867 length); 868 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 869 return 0; 870 } 871 } 872 873 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ 874 for(i=0; i<16; ++i) { 875 indexes[i]=udata_readInt32(ds, inIndexes[i]); 876 } 877 878 /* calculate the total length of the data */ 879 size= 880 16*4+ /* size of indexes[] */ 881 indexes[_SPREP_INDEX_TRIE_SIZE]+ 882 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 883 884 if(length>=0) { 885 if(length<size) { 886 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", 887 length); 888 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 889 return 0; 890 } 891 892 /* copy the data for inaccessible bytes */ 893 if(inBytes!=outBytes) { 894 uprv_memcpy(outBytes, inBytes, size); 895 } 896 897 offset=0; 898 899 /* swap the int32_t indexes[] */ 900 count=16*4; 901 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 902 offset+=count; 903 904 /* swap the UTrie */ 905 count=indexes[_SPREP_INDEX_TRIE_SIZE]; 906 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 907 offset+=count; 908 909 /* swap the uint16_t mappingTable[] */ 910 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 911 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 912 offset+=count; 913 } 914 915 return headerSize+size; 916 } 917 918 #endif /* #if !UCONFIG_NO_IDNA */ 919