1 /* 2 ********************************************************************** 3 * Copyright (C) 2008-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8 #include "unicode/utypes.h" 9 #include "unicode/uspoof.h" 10 #include "unicode/uchar.h" 11 #include "unicode/uniset.h" 12 #include "unicode/utf16.h" 13 #include "utrie2.h" 14 #include "cmemory.h" 15 #include "cstring.h" 16 #include "identifier_info.h" 17 #include "scriptset.h" 18 #include "udatamem.h" 19 #include "umutex.h" 20 #include "udataswp.h" 21 #include "uassert.h" 22 #include "uspoof_impl.h" 23 24 #if !UCONFIG_NO_NORMALIZATION 25 26 27 U_NAMESPACE_BEGIN 28 29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl) 30 31 SpoofImpl::SpoofImpl(SpoofData *data, UErrorCode &status) : 32 fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) , 33 fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) { 34 if (U_FAILURE(status)) { 35 return; 36 } 37 fSpoofData = data; 38 fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE; 39 40 UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff); 41 allowedCharsSet->freeze(); 42 fAllowedCharsSet = allowedCharsSet; 43 fAllowedLocales = uprv_strdup(""); 44 if (fAllowedCharsSet == NULL || fAllowedLocales == NULL) { 45 status = U_MEMORY_ALLOCATION_ERROR; 46 return; 47 } 48 fMagic = USPOOF_MAGIC; 49 } 50 51 52 SpoofImpl::SpoofImpl() : 53 fMagic(USPOOF_MAGIC), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) , 54 fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) { 55 UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff); 56 allowedCharsSet->freeze(); 57 fAllowedCharsSet = allowedCharsSet; 58 fAllowedLocales = uprv_strdup(""); 59 fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE; 60 } 61 62 63 // Copy Constructor, used by the user level clone() function. 64 SpoofImpl::SpoofImpl(const SpoofImpl &src, UErrorCode &status) : 65 fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) , 66 fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) { 67 if (U_FAILURE(status)) { 68 return; 69 } 70 fMagic = src.fMagic; 71 fChecks = src.fChecks; 72 if (src.fSpoofData != NULL) { 73 fSpoofData = src.fSpoofData->addReference(); 74 } 75 fAllowedCharsSet = static_cast<const UnicodeSet *>(src.fAllowedCharsSet->clone()); 76 if (fAllowedCharsSet == NULL) { 77 status = U_MEMORY_ALLOCATION_ERROR; 78 } 79 fAllowedLocales = uprv_strdup(src.fAllowedLocales); 80 fRestrictionLevel = src.fRestrictionLevel; 81 } 82 83 SpoofImpl::~SpoofImpl() { 84 fMagic = 0; // head off application errors by preventing use of 85 // of deleted objects. 86 if (fSpoofData != NULL) { 87 fSpoofData->removeReference(); // Will delete if refCount goes to zero. 88 } 89 delete fAllowedCharsSet; 90 uprv_free((void *)fAllowedLocales); 91 delete fCachedIdentifierInfo; 92 } 93 94 // 95 // Incoming parameter check on Status and the SpoofChecker object 96 // received from the C API. 97 // 98 const SpoofImpl *SpoofImpl::validateThis(const USpoofChecker *sc, UErrorCode &status) { 99 if (U_FAILURE(status)) { 100 return NULL; 101 } 102 if (sc == NULL) { 103 status = U_ILLEGAL_ARGUMENT_ERROR; 104 return NULL; 105 } 106 SpoofImpl *This = (SpoofImpl *)sc; 107 if (This->fMagic != USPOOF_MAGIC || 108 This->fSpoofData == NULL) { 109 status = U_INVALID_FORMAT_ERROR; 110 return NULL; 111 } 112 if (!SpoofData::validateDataVersion(This->fSpoofData->fRawData, status)) { 113 return NULL; 114 } 115 return This; 116 } 117 118 SpoofImpl *SpoofImpl::validateThis(USpoofChecker *sc, UErrorCode &status) { 119 return const_cast<SpoofImpl *> 120 (SpoofImpl::validateThis(const_cast<const USpoofChecker *>(sc), status)); 121 } 122 123 124 125 //-------------------------------------------------------------------------------------- 126 // 127 // confusableLookup() This is the heart of the confusable skeleton generation 128 // implementation. 129 // 130 // Given a source character, produce the corresponding 131 // replacement character(s), appending them to the dest string. 132 // 133 //--------------------------------------------------------------------------------------- 134 int32_t SpoofImpl::confusableLookup(UChar32 inChar, int32_t tableMask, UnicodeString &dest) const { 135 136 // Binary search the spoof data key table for the inChar 137 int32_t *low = fSpoofData->fCFUKeys; 138 int32_t *mid = NULL; 139 int32_t *limit = low + fSpoofData->fRawData->fCFUKeysSize; 140 UChar32 midc; 141 do { 142 int32_t delta = ((int32_t)(limit-low))/2; 143 mid = low + delta; 144 midc = *mid & 0x1fffff; 145 if (inChar == midc) { 146 goto foundChar; 147 } else if (inChar < midc) { 148 limit = mid; 149 } else { 150 low = mid; 151 } 152 } while (low < limit-1); 153 mid = low; 154 midc = *mid & 0x1fffff; 155 if (inChar != midc) { 156 // Char not found. It maps to itself. 157 int i = 0; 158 dest.append(inChar); 159 return i; 160 } 161 foundChar: 162 int32_t keyFlags = *mid & 0xff000000; 163 if ((keyFlags & tableMask) == 0) { 164 // We found the right key char, but the entry doesn't pertain to the 165 // table we need. See if there is an adjacent key that does 166 if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) { 167 int32_t *altMid; 168 for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) { 169 keyFlags = *altMid & 0xff000000; 170 if (keyFlags & tableMask) { 171 mid = altMid; 172 goto foundKey; 173 } 174 } 175 for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) { 176 keyFlags = *altMid & 0xff000000; 177 if (keyFlags & tableMask) { 178 mid = altMid; 179 goto foundKey; 180 } 181 } 182 } 183 // No key entry for this char & table. 184 // The input char maps to itself. 185 int i = 0; 186 dest.append(inChar); 187 return i; 188 } 189 190 foundKey: 191 int32_t stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1; 192 int32_t keyTableIndex = (int32_t)(mid - fSpoofData->fCFUKeys); 193 194 // Value is either a UChar (for strings of length 1) or 195 // an index into the string table (for longer strings) 196 uint16_t value = fSpoofData->fCFUValues[keyTableIndex]; 197 if (stringLen == 1) { 198 dest.append((UChar)value); 199 return 1; 200 } 201 202 // String length of 4 from the above lookup is used for all strings of length >= 4. 203 // For these, get the real length from the string lengths table, 204 // which maps string table indexes to lengths. 205 // All strings of the same length are stored contiguously in the string table. 206 // 'value' from the lookup above is the starting index for the desired string. 207 208 int32_t ix; 209 if (stringLen == 4) { 210 int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize; 211 for (ix = 0; ix < stringLengthsLimit; ix++) { 212 if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) { 213 stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength; 214 break; 215 } 216 } 217 U_ASSERT(ix < stringLengthsLimit); 218 } 219 220 U_ASSERT(value + stringLen <= fSpoofData->fRawData->fCFUStringTableLen); 221 UChar *src = &fSpoofData->fCFUStrings[value]; 222 dest.append(src, stringLen); 223 return stringLen; 224 } 225 226 227 //--------------------------------------------------------------------------------------- 228 // 229 // wholeScriptCheck() 230 // 231 // Input text is already normalized to NFD 232 // Return the set of scripts, each of which can represent something that is 233 // confusable with the input text. The script of the input text 234 // is included; input consisting of characters from a single script will 235 // always produce a result consisting of a set containing that script. 236 // 237 //--------------------------------------------------------------------------------------- 238 void SpoofImpl::wholeScriptCheck( 239 const UnicodeString &text, ScriptSet *result, UErrorCode &status) const { 240 241 UTrie2 *table = 242 (fChecks & USPOOF_ANY_CASE) ? fSpoofData->fAnyCaseTrie : fSpoofData->fLowerCaseTrie; 243 result->setAll(); 244 int32_t length = text.length(); 245 for (int32_t inputIdx=0; inputIdx < length;) { 246 UChar32 c = text.char32At(inputIdx); 247 inputIdx += U16_LENGTH(c); 248 uint32_t index = utrie2_get32(table, c); 249 if (index == 0) { 250 // No confusables in another script for this char. 251 // TODO: we should change the data to have sets with just the single script 252 // bit for the script of this char. Gets rid of this special case. 253 // Until then, grab the script from the char and intersect it with the set. 254 UScriptCode cpScript = uscript_getScript(c, &status); 255 U_ASSERT(cpScript > USCRIPT_INHERITED); 256 result->intersect(cpScript, status); 257 } else if (index == 1) { 258 // Script == Common or Inherited. Nothing to do. 259 } else { 260 result->intersect(fSpoofData->fScriptSets[index]); 261 } 262 } 263 } 264 265 266 void SpoofImpl::setAllowedLocales(const char *localesList, UErrorCode &status) { 267 UnicodeSet allowedChars; 268 UnicodeSet *tmpSet = NULL; 269 const char *locStart = localesList; 270 const char *locEnd = NULL; 271 const char *localesListEnd = localesList + uprv_strlen(localesList); 272 int32_t localeListCount = 0; // Number of locales provided by caller. 273 274 // Loop runs once per locale from the localesList, a comma separated list of locales. 275 do { 276 locEnd = uprv_strchr(locStart, ','); 277 if (locEnd == NULL) { 278 locEnd = localesListEnd; 279 } 280 while (*locStart == ' ') { 281 locStart++; 282 } 283 const char *trimmedEnd = locEnd-1; 284 while (trimmedEnd > locStart && *trimmedEnd == ' ') { 285 trimmedEnd--; 286 } 287 if (trimmedEnd <= locStart) { 288 break; 289 } 290 const char *locale = uprv_strndup(locStart, (int32_t)(trimmedEnd + 1 - locStart)); 291 localeListCount++; 292 293 // We have one locale from the locales list. 294 // Add the script chars for this locale to the accumulating set of allowed chars. 295 // If the locale is no good, we will be notified back via status. 296 addScriptChars(locale, &allowedChars, status); 297 uprv_free((void *)locale); 298 if (U_FAILURE(status)) { 299 break; 300 } 301 locStart = locEnd + 1; 302 } while (locStart < localesListEnd); 303 304 // If our caller provided an empty list of locales, we disable the allowed characters checking 305 if (localeListCount == 0) { 306 uprv_free((void *)fAllowedLocales); 307 fAllowedLocales = uprv_strdup(""); 308 tmpSet = new UnicodeSet(0, 0x10ffff); 309 if (fAllowedLocales == NULL || tmpSet == NULL) { 310 status = U_MEMORY_ALLOCATION_ERROR; 311 return; 312 } 313 tmpSet->freeze(); 314 delete fAllowedCharsSet; 315 fAllowedCharsSet = tmpSet; 316 fChecks &= ~USPOOF_CHAR_LIMIT; 317 return; 318 } 319 320 321 // Add all common and inherited characters to the set of allowed chars. 322 UnicodeSet tempSet; 323 tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status); 324 allowedChars.addAll(tempSet); 325 tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status); 326 allowedChars.addAll(tempSet); 327 328 // If anything went wrong, we bail out without changing 329 // the state of the spoof checker. 330 if (U_FAILURE(status)) { 331 return; 332 } 333 334 // Store the updated spoof checker state. 335 tmpSet = static_cast<UnicodeSet *>(allowedChars.clone()); 336 const char *tmpLocalesList = uprv_strdup(localesList); 337 if (tmpSet == NULL || tmpLocalesList == NULL) { 338 status = U_MEMORY_ALLOCATION_ERROR; 339 return; 340 } 341 uprv_free((void *)fAllowedLocales); 342 fAllowedLocales = tmpLocalesList; 343 tmpSet->freeze(); 344 delete fAllowedCharsSet; 345 fAllowedCharsSet = tmpSet; 346 fChecks |= USPOOF_CHAR_LIMIT; 347 } 348 349 350 const char * SpoofImpl::getAllowedLocales(UErrorCode &/*status*/) { 351 return fAllowedLocales; 352 } 353 354 355 // Given a locale (a language), add all the characters from all of the scripts used with that language 356 // to the allowedChars UnicodeSet 357 358 void SpoofImpl::addScriptChars(const char *locale, UnicodeSet *allowedChars, UErrorCode &status) { 359 UScriptCode scripts[30]; 360 361 int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status); 362 if (U_FAILURE(status)) { 363 return; 364 } 365 if (status == U_USING_DEFAULT_WARNING) { 366 status = U_ILLEGAL_ARGUMENT_ERROR; 367 return; 368 } 369 UnicodeSet tmpSet; 370 int32_t i; 371 for (i=0; i<numScripts; i++) { 372 tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status); 373 allowedChars->addAll(tmpSet); 374 } 375 } 376 377 378 // Convert a text format hex number. Utility function used by builder code. Static. 379 // Input: UChar *string text. Output: a UChar32 380 // Input has been pre-checked, and will have no non-hex chars. 381 // The number must fall in the code point range of 0..0x10ffff 382 // Static Function. 383 UChar32 SpoofImpl::ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status) { 384 if (U_FAILURE(status)) { 385 return 0; 386 } 387 U_ASSERT(limit-start > 0); 388 uint32_t val = 0; 389 int i; 390 for (i=start; i<limit; i++) { 391 int digitVal = s[i] - 0x30; 392 if (digitVal>9) { 393 digitVal = 0xa + (s[i] - 0x41); // Upper Case 'A' 394 } 395 if (digitVal>15) { 396 digitVal = 0xa + (s[i] - 0x61); // Lower Case 'a' 397 } 398 U_ASSERT(digitVal <= 0xf); 399 val <<= 4; 400 val += digitVal; 401 } 402 if (val > 0x10ffff) { 403 status = U_PARSE_ERROR; 404 val = 0; 405 } 406 return (UChar32)val; 407 } 408 409 // IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create. 410 // Maintain a one-element cache, which is sufficient to avoid repeatedly 411 // creating new ones unless we get multi-thread concurrency in spoof 412 // check operations, which should be statistically uncommon. 413 414 // These functions are used in place of new & delete of an IdentifierInfo. 415 // They will recycle the IdentifierInfo when possible. 416 // They are logically const, and used within const functions that must be thread safe. 417 IdentifierInfo *SpoofImpl::getIdentifierInfo(UErrorCode &status) const { 418 IdentifierInfo *returnIdInfo = NULL; 419 if (U_FAILURE(status)) { 420 return returnIdInfo; 421 } 422 SpoofImpl *nonConstThis = const_cast<SpoofImpl *>(this); 423 { 424 Mutex m; 425 returnIdInfo = nonConstThis->fCachedIdentifierInfo; 426 nonConstThis->fCachedIdentifierInfo = NULL; 427 } 428 if (returnIdInfo == NULL) { 429 returnIdInfo = new IdentifierInfo(status); 430 if (U_SUCCESS(status) && returnIdInfo == NULL) { 431 status = U_MEMORY_ALLOCATION_ERROR; 432 } 433 if (U_FAILURE(status) && returnIdInfo != NULL) { 434 delete returnIdInfo; 435 returnIdInfo = NULL; 436 } 437 } 438 return returnIdInfo; 439 } 440 441 442 void SpoofImpl::releaseIdentifierInfo(IdentifierInfo *idInfo) const { 443 if (idInfo != NULL) { 444 SpoofImpl *nonConstThis = const_cast<SpoofImpl *>(this); 445 { 446 Mutex m; 447 if (nonConstThis->fCachedIdentifierInfo == NULL) { 448 nonConstThis->fCachedIdentifierInfo = idInfo; 449 idInfo = NULL; 450 } 451 } 452 delete idInfo; 453 } 454 } 455 456 457 458 459 //---------------------------------------------------------------------------------------------- 460 // 461 // class SpoofData Implementation 462 // 463 //---------------------------------------------------------------------------------------------- 464 465 466 UBool SpoofData::validateDataVersion(const SpoofDataHeader *rawData, UErrorCode &status) { 467 if (U_FAILURE(status) || 468 rawData == NULL || 469 rawData->fMagic != USPOOF_MAGIC || 470 rawData->fFormatVersion[0] > 1 || 471 rawData->fFormatVersion[1] > 0) { 472 status = U_INVALID_FORMAT_ERROR; 473 return FALSE; 474 } 475 return TRUE; 476 } 477 478 // 479 // SpoofData::getDefault() - return a wrapper around the spoof data that is 480 // baked into the default ICU data. 481 // 482 SpoofData *SpoofData::getDefault(UErrorCode &status) { 483 // TODO: Cache it. Lazy create, keep until cleanup. 484 485 UDataMemory *udm = udata_open(NULL, "cfu", "confusables", &status); 486 if (U_FAILURE(status)) { 487 return NULL; 488 } 489 SpoofData *This = new SpoofData(udm, status); 490 if (U_FAILURE(status)) { 491 delete This; 492 return NULL; 493 } 494 if (This == NULL) { 495 status = U_MEMORY_ALLOCATION_ERROR; 496 } 497 return This; 498 } 499 500 501 SpoofData::SpoofData(UDataMemory *udm, UErrorCode &status) 502 { 503 reset(); 504 if (U_FAILURE(status)) { 505 return; 506 } 507 fRawData = reinterpret_cast<SpoofDataHeader *> 508 ((char *)(udm->pHeader) + udm->pHeader->dataHeader.headerSize); 509 fUDM = udm; 510 validateDataVersion(fRawData, status); 511 initPtrs(status); 512 } 513 514 515 SpoofData::SpoofData(const void *data, int32_t length, UErrorCode &status) 516 { 517 reset(); 518 if (U_FAILURE(status)) { 519 return; 520 } 521 if ((size_t)length < sizeof(SpoofDataHeader)) { 522 status = U_INVALID_FORMAT_ERROR; 523 return; 524 } 525 void *ncData = const_cast<void *>(data); 526 fRawData = static_cast<SpoofDataHeader *>(ncData); 527 if (length < fRawData->fLength) { 528 status = U_INVALID_FORMAT_ERROR; 529 return; 530 } 531 validateDataVersion(fRawData, status); 532 initPtrs(status); 533 } 534 535 536 // Spoof Data constructor for use from data builder. 537 // Initializes a new, empty data area that will be populated later. 538 SpoofData::SpoofData(UErrorCode &status) { 539 reset(); 540 if (U_FAILURE(status)) { 541 return; 542 } 543 fDataOwned = true; 544 fRefCount = 1; 545 546 // The spoof header should already be sized to be a multiple of 16 bytes. 547 // Just in case it's not, round it up. 548 uint32_t initialSize = (sizeof(SpoofDataHeader) + 15) & ~15; 549 U_ASSERT(initialSize == sizeof(SpoofDataHeader)); 550 551 fRawData = static_cast<SpoofDataHeader *>(uprv_malloc(initialSize)); 552 fMemLimit = initialSize; 553 if (fRawData == NULL) { 554 status = U_MEMORY_ALLOCATION_ERROR; 555 return; 556 } 557 uprv_memset(fRawData, 0, initialSize); 558 559 fRawData->fMagic = USPOOF_MAGIC; 560 fRawData->fFormatVersion[0] = 1; 561 fRawData->fFormatVersion[1] = 0; 562 fRawData->fFormatVersion[2] = 0; 563 fRawData->fFormatVersion[3] = 0; 564 initPtrs(status); 565 } 566 567 // reset() - initialize all fields. 568 // Should be updated if any new fields are added. 569 // Called by constructors to put things in a known initial state. 570 void SpoofData::reset() { 571 fRawData = NULL; 572 fDataOwned = FALSE; 573 fUDM = NULL; 574 fMemLimit = 0; 575 fRefCount = 1; 576 fCFUKeys = NULL; 577 fCFUValues = NULL; 578 fCFUStringLengths = NULL; 579 fCFUStrings = NULL; 580 fAnyCaseTrie = NULL; 581 fLowerCaseTrie = NULL; 582 fScriptSets = NULL; 583 } 584 585 586 // SpoofData::initPtrs() 587 // Initialize the pointers to the various sections of the raw data. 588 // 589 // This function is used both during the Trie building process (multiple 590 // times, as the individual data sections are added), and 591 // during the opening of a Spoof Checker from prebuilt data. 592 // 593 // The pointers for non-existent data sections (identified by an offset of 0) 594 // are set to NULL. 595 // 596 // Note: During building the data, adding each new data section 597 // reallocs the raw data area, which likely relocates it, which 598 // in turn requires reinitializing all of the pointers into it, hence 599 // multiple calls to this function during building. 600 // 601 void SpoofData::initPtrs(UErrorCode &status) { 602 fCFUKeys = NULL; 603 fCFUValues = NULL; 604 fCFUStringLengths = NULL; 605 fCFUStrings = NULL; 606 if (U_FAILURE(status)) { 607 return; 608 } 609 if (fRawData->fCFUKeys != 0) { 610 fCFUKeys = (int32_t *)((char *)fRawData + fRawData->fCFUKeys); 611 } 612 if (fRawData->fCFUStringIndex != 0) { 613 fCFUValues = (uint16_t *)((char *)fRawData + fRawData->fCFUStringIndex); 614 } 615 if (fRawData->fCFUStringLengths != 0) { 616 fCFUStringLengths = (SpoofStringLengthsElement *)((char *)fRawData + fRawData->fCFUStringLengths); 617 } 618 if (fRawData->fCFUStringTable != 0) { 619 fCFUStrings = (UChar *)((char *)fRawData + fRawData->fCFUStringTable); 620 } 621 622 if (fAnyCaseTrie == NULL && fRawData->fAnyCaseTrie != 0) { 623 fAnyCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 624 (char *)fRawData + fRawData->fAnyCaseTrie, fRawData->fAnyCaseTrieLength, NULL, &status); 625 } 626 if (fLowerCaseTrie == NULL && fRawData->fLowerCaseTrie != 0) { 627 fLowerCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 628 (char *)fRawData + fRawData->fLowerCaseTrie, fRawData->fLowerCaseTrieLength, NULL, &status); 629 } 630 631 if (fRawData->fScriptSets != 0) { 632 fScriptSets = (ScriptSet *)((char *)fRawData + fRawData->fScriptSets); 633 } 634 } 635 636 637 SpoofData::~SpoofData() { 638 utrie2_close(fAnyCaseTrie); 639 fAnyCaseTrie = NULL; 640 utrie2_close(fLowerCaseTrie); 641 fLowerCaseTrie = NULL; 642 if (fDataOwned) { 643 uprv_free(fRawData); 644 } 645 fRawData = NULL; 646 if (fUDM != NULL) { 647 udata_close(fUDM); 648 } 649 fUDM = NULL; 650 } 651 652 653 void SpoofData::removeReference() { 654 if (umtx_atomic_dec(&fRefCount) == 0) { 655 delete this; 656 } 657 } 658 659 660 SpoofData *SpoofData::addReference() { 661 umtx_atomic_inc(&fRefCount); 662 return this; 663 } 664 665 666 void *SpoofData::reserveSpace(int32_t numBytes, UErrorCode &status) { 667 if (U_FAILURE(status)) { 668 return NULL; 669 } 670 if (!fDataOwned) { 671 U_ASSERT(FALSE); 672 status = U_INTERNAL_PROGRAM_ERROR; 673 return NULL; 674 } 675 676 numBytes = (numBytes + 15) & ~15; // Round up to a multiple of 16 677 uint32_t returnOffset = fMemLimit; 678 fMemLimit += numBytes; 679 fRawData = static_cast<SpoofDataHeader *>(uprv_realloc(fRawData, fMemLimit)); 680 fRawData->fLength = fMemLimit; 681 uprv_memset((char *)fRawData + returnOffset, 0, numBytes); 682 initPtrs(status); 683 return (char *)fRawData + returnOffset; 684 } 685 686 687 U_NAMESPACE_END 688 689 U_NAMESPACE_USE 690 691 //----------------------------------------------------------------------------- 692 // 693 // uspoof_swap - byte swap and char encoding swap of spoof data 694 // 695 //----------------------------------------------------------------------------- 696 U_CAPI int32_t U_EXPORT2 697 uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, 698 UErrorCode *status) { 699 700 if (status == NULL || U_FAILURE(*status)) { 701 return 0; 702 } 703 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { 704 *status=U_ILLEGAL_ARGUMENT_ERROR; 705 return 0; 706 } 707 708 // 709 // Check that the data header is for spoof data. 710 // (Header contents are defined in gencfu.cpp) 711 // 712 const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4); 713 if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="Cfu " */ 714 pInfo->dataFormat[1]==0x66 && 715 pInfo->dataFormat[2]==0x75 && 716 pInfo->dataFormat[3]==0x20 && 717 pInfo->formatVersion[0]==1 )) { 718 udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x " 719 "(format version %02x %02x %02x %02x) is not recognized\n", 720 pInfo->dataFormat[0], pInfo->dataFormat[1], 721 pInfo->dataFormat[2], pInfo->dataFormat[3], 722 pInfo->formatVersion[0], pInfo->formatVersion[1], 723 pInfo->formatVersion[2], pInfo->formatVersion[3]); 724 *status=U_UNSUPPORTED_ERROR; 725 return 0; 726 } 727 728 // 729 // Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific 730 // header). This swap also conveniently gets us 731 // the size of the ICU d.h., which lets us locate the start 732 // of the uspoof specific data. 733 // 734 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); 735 736 737 // 738 // Get the Spoof Data Header, and check that it appears to be OK. 739 // 740 // 741 const uint8_t *inBytes =(const uint8_t *)inData+headerSize; 742 SpoofDataHeader *spoofDH = (SpoofDataHeader *)inBytes; 743 if (ds->readUInt32(spoofDH->fMagic) != USPOOF_MAGIC || 744 ds->readUInt32(spoofDH->fLength) < sizeof(SpoofDataHeader)) 745 { 746 udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n"); 747 *status=U_UNSUPPORTED_ERROR; 748 return 0; 749 } 750 751 // 752 // Prefight operation? Just return the size 753 // 754 int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength); 755 int32_t totalSize = headerSize + spoofDataLength; 756 if (length < 0) { 757 return totalSize; 758 } 759 760 // 761 // Check that length passed in is consistent with length from Spoof data header. 762 // 763 if (length < totalSize) { 764 udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n", 765 spoofDataLength); 766 *status=U_INDEX_OUTOFBOUNDS_ERROR; 767 return 0; 768 } 769 770 771 // 772 // Swap the Data. Do the data itself first, then the Spoof Data Header, because 773 // we need to reference the header to locate the data, and an 774 // inplace swap of the header leaves it unusable. 775 // 776 uint8_t *outBytes = (uint8_t *)outData + headerSize; 777 SpoofDataHeader *outputDH = (SpoofDataHeader *)outBytes; 778 779 int32_t sectionStart; 780 int32_t sectionLength; 781 782 // 783 // If not swapping in place, zero out the output buffer before starting. 784 // Gaps may exist between the individual sections, and these must be zeroed in 785 // the output buffer. The simplest way to do that is to just zero the whole thing. 786 // 787 if (inBytes != outBytes) { 788 uprv_memset(outBytes, 0, spoofDataLength); 789 } 790 791 // Confusables Keys Section (fCFUKeys) 792 sectionStart = ds->readUInt32(spoofDH->fCFUKeys); 793 sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4; 794 ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 795 796 // String Index Section 797 sectionStart = ds->readUInt32(spoofDH->fCFUStringIndex); 798 sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2; 799 ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 800 801 // String Table Section 802 sectionStart = ds->readUInt32(spoofDH->fCFUStringTable); 803 sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2; 804 ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 805 806 // String Lengths Section 807 sectionStart = ds->readUInt32(spoofDH->fCFUStringLengths); 808 sectionLength = ds->readUInt32(spoofDH->fCFUStringLengthsSize) * 4; 809 ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 810 811 // Any Case Trie 812 sectionStart = ds->readUInt32(spoofDH->fAnyCaseTrie); 813 sectionLength = ds->readUInt32(spoofDH->fAnyCaseTrieLength); 814 utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 815 816 // Lower Case Trie 817 sectionStart = ds->readUInt32(spoofDH->fLowerCaseTrie); 818 sectionLength = ds->readUInt32(spoofDH->fLowerCaseTrieLength); 819 utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 820 821 // Script Sets. The data is an array of int32_t 822 sectionStart = ds->readUInt32(spoofDH->fScriptSets); 823 sectionLength = ds->readUInt32(spoofDH->fScriptSetsLength) * sizeof(ScriptSet); 824 ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 825 826 // And, last, swap the header itself. 827 // int32_t fMagic // swap this 828 // uint8_t fFormatVersion[4] // Do not swap this, just copy 829 // int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff. 830 // 831 uint32_t magic = ds->readUInt32(spoofDH->fMagic); 832 ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic); 833 834 if (outputDH->fFormatVersion != spoofDH->fFormatVersion) { 835 uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion)); 836 } 837 // swap starting at fLength 838 ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8 /* minus magic and fFormatVersion[4] */, &outputDH->fLength, status); 839 840 return totalSize; 841 } 842 843 #endif 844 845 846