1 /* 2 *************************************************************************** 3 * Copyright (C) 2008-2013, International Business Machines Corporation 4 * and others. All Rights Reserved. 5 *************************************************************************** 6 * file name: uspoof.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2008Feb13 12 * created by: Andy Heninger 13 * 14 * Unicode Spoof Detection 15 */ 16 #include "unicode/utypes.h" 17 #include "unicode/normalizer2.h" 18 #include "unicode/uspoof.h" 19 #include "unicode/ustring.h" 20 #include "unicode/utf16.h" 21 #include "cmemory.h" 22 #include "cstring.h" 23 #include "identifier_info.h" 24 #include "mutex.h" 25 #include "scriptset.h" 26 #include "uassert.h" 27 #include "ucln_in.h" 28 #include "uspoof_impl.h" 29 #include "umutex.h" 30 31 32 #if !UCONFIG_NO_NORMALIZATION 33 34 U_NAMESPACE_USE 35 36 37 // 38 // Static Objects used by the spoof impl, their thread safe initialization and their cleanup. 39 // 40 static UnicodeSet *gInclusionSet = NULL; 41 static UnicodeSet *gRecommendedSet = NULL; 42 static const Normalizer2 *gNfdNormalizer = NULL; 43 static UMutex gInitMutex = U_MUTEX_INITIALIZER; 44 45 static UBool U_CALLCONV 46 uspoof_cleanup(void) { 47 delete gInclusionSet; 48 gInclusionSet = NULL; 49 delete gRecommendedSet; 50 gRecommendedSet = NULL; 51 gNfdNormalizer = NULL; 52 return TRUE; 53 } 54 55 static void initializeStatics() { 56 Mutex m(&gInitMutex); 57 UErrorCode status = U_ZERO_ERROR; 58 if (gInclusionSet == NULL) { 59 gInclusionSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\ 60 \\-.\\u00B7\\u05F3\\u05F4\\u0F0B\\u200C\\u200D\\u2019]"), status); 61 gRecommendedSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\ 62 [0-z\\u00C0-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\ 63 \\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B\\u021E\ 64 \\u021F\\u0226-\\u0233\\u02BB\\u02BC\\u02EC\\u0300-\\u0304\ 65 \\u0306-\\u030C\\u030F-\\u0311\\u0313\\u0314\\u031B\\u0323-\ 66 \\u0328\\u032D\\u032E\\u0330\\u0331\\u0335\\u0338\\u0339\ 67 \\u0342-\\u0345\\u037B-\\u03CE\\u03FC-\\u045F\\u048A-\\u0525\ 68 \\u0531-\\u0586\\u05D0-\\u05F2\\u0621-\\u063F\\u0641-\\u0655\ 69 \\u0660-\\u0669\\u0670-\\u068D\\u068F-\\u06D5\\u06E5\\u06E6\ 70 \\u06EE-\\u06FF\\u0750-\\u07B1\\u0901-\\u0939\\u093C-\\u094D\ 71 \\u0950\\u0960-\\u0972\\u0979-\\u0A4D\\u0A5C-\\u0A74\\u0A81-\ 72 \\u0B43\\u0B47-\\u0B61\\u0B66-\\u0C56\\u0C60\\u0C61\\u0C66-\ 73 \\u0CD6\\u0CE0-\\u0CEF\\u0D02-\\u0D28\\u0D2A-\\u0D39\\u0D3D-\ 74 \\u0D43\\u0D46-\\u0D4D\\u0D57-\\u0D61\\u0D66-\\u0D8E\\u0D91-\ 75 \\u0DA5\\u0DA7-\\u0DDE\\u0DF2\\u0E01-\\u0ED9\\u0F00\\u0F20-\ 76 \\u0F8B\\u0F90-\\u109D\\u10D0-\\u10F0\\u10F7-\\u10FA\\u1200-\ 77 \\u135A\\u135F\\u1380-\\u138F\\u1401-\\u167F\\u1780-\\u17A2\ 78 \\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7-\ 79 \\u17DC\\u17E0-\\u17E9\\u1810-\\u18A8\\u18AA-\\u18F5\\u1E00-\ 80 \\u1E99\\u1F00-\\u1FFC\\u2D30-\\u2D65\\u2D80-\\u2DDE\\u3005-\ 81 \\u3007\\u3041-\\u31B7\\u3400-\\u9FCB\\uA000-\\uA48C\\uA67F\ 82 \\uA717-\\uA71F\\uA788\\uAA60-\\uAA7B\\uAC00-\\uD7A3\\uFA0E-\ 83 \\uFA29\\U00020000-\ 84 \\U0002B734]-[[:Cn:][:nfkcqc=n:][:XIDC=n:]]]"), status); 85 gNfdNormalizer = Normalizer2::getNFDInstance(status); 86 } 87 ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup); 88 89 return; 90 } 91 92 93 U_CAPI USpoofChecker * U_EXPORT2 94 uspoof_open(UErrorCode *status) { 95 if (U_FAILURE(*status)) { 96 return NULL; 97 } 98 initializeStatics(); 99 SpoofImpl *si = new SpoofImpl(SpoofData::getDefault(*status), *status); 100 if (U_FAILURE(*status)) { 101 delete si; 102 si = NULL; 103 } 104 return reinterpret_cast<USpoofChecker *>(si); 105 } 106 107 108 U_CAPI USpoofChecker * U_EXPORT2 109 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, 110 UErrorCode *status) { 111 if (U_FAILURE(*status)) { 112 return NULL; 113 } 114 initializeStatics(); 115 SpoofData *sd = new SpoofData(data, length, *status); 116 SpoofImpl *si = new SpoofImpl(sd, *status); 117 if (U_FAILURE(*status)) { 118 delete sd; 119 delete si; 120 return NULL; 121 } 122 if (sd == NULL || si == NULL) { 123 *status = U_MEMORY_ALLOCATION_ERROR; 124 delete sd; 125 delete si; 126 return NULL; 127 } 128 129 if (pActualLength != NULL) { 130 *pActualLength = sd->fRawData->fLength; 131 } 132 return reinterpret_cast<USpoofChecker *>(si); 133 } 134 135 136 U_CAPI USpoofChecker * U_EXPORT2 137 uspoof_clone(const USpoofChecker *sc, UErrorCode *status) { 138 const SpoofImpl *src = SpoofImpl::validateThis(sc, *status); 139 if (src == NULL) { 140 return NULL; 141 } 142 SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor 143 if (U_FAILURE(*status)) { 144 delete result; 145 result = NULL; 146 } 147 return reinterpret_cast<USpoofChecker *>(result); 148 } 149 150 151 U_CAPI void U_EXPORT2 152 uspoof_close(USpoofChecker *sc) { 153 UErrorCode status = U_ZERO_ERROR; 154 SpoofImpl *This = SpoofImpl::validateThis(sc, status); 155 delete This; 156 } 157 158 159 U_CAPI void U_EXPORT2 160 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) { 161 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 162 if (This == NULL) { 163 return; 164 } 165 166 // Verify that the requested checks are all ones (bits) that 167 // are acceptable, known values. 168 if (checks & ~USPOOF_ALL_CHECKS) { 169 *status = U_ILLEGAL_ARGUMENT_ERROR; 170 return; 171 } 172 173 This->fChecks = checks; 174 } 175 176 177 U_CAPI int32_t U_EXPORT2 178 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) { 179 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 180 if (This == NULL) { 181 return 0; 182 } 183 return This->fChecks; 184 } 185 186 U_CAPI void U_EXPORT2 187 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) { 188 UErrorCode status = U_ZERO_ERROR; 189 SpoofImpl *This = SpoofImpl::validateThis(sc, status); 190 if (This != NULL) { 191 This->fRestrictionLevel = restrictionLevel; 192 } 193 } 194 195 U_CAPI URestrictionLevel U_EXPORT2 196 uspoof_getRestrictionLevel(const USpoofChecker *sc) { 197 UErrorCode status = U_ZERO_ERROR; 198 const SpoofImpl *This = SpoofImpl::validateThis(sc, status); 199 if (This == NULL) { 200 return USPOOF_UNRESTRICTIVE; 201 } 202 return This->fRestrictionLevel; 203 } 204 205 U_CAPI void U_EXPORT2 206 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) { 207 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 208 if (This == NULL) { 209 return; 210 } 211 This->setAllowedLocales(localesList, *status); 212 } 213 214 U_CAPI const char * U_EXPORT2 215 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) { 216 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 217 if (This == NULL) { 218 return NULL; 219 } 220 return This->getAllowedLocales(*status); 221 } 222 223 224 U_CAPI const USet * U_EXPORT2 225 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) { 226 const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status); 227 return result->toUSet(); 228 } 229 230 U_CAPI const UnicodeSet * U_EXPORT2 231 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) { 232 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 233 if (This == NULL) { 234 return NULL; 235 } 236 return This->fAllowedCharsSet; 237 } 238 239 240 U_CAPI void U_EXPORT2 241 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) { 242 const UnicodeSet *set = UnicodeSet::fromUSet(chars); 243 uspoof_setAllowedUnicodeSet(sc, set, status); 244 } 245 246 247 U_CAPI void U_EXPORT2 248 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) { 249 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 250 if (This == NULL) { 251 return; 252 } 253 if (chars->isBogus()) { 254 *status = U_ILLEGAL_ARGUMENT_ERROR; 255 return; 256 } 257 UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone()); 258 if (clonedSet == NULL || clonedSet->isBogus()) { 259 *status = U_MEMORY_ALLOCATION_ERROR; 260 return; 261 } 262 clonedSet->freeze(); 263 delete This->fAllowedCharsSet; 264 This->fAllowedCharsSet = clonedSet; 265 This->fChecks |= USPOOF_CHAR_LIMIT; 266 } 267 268 269 U_CAPI int32_t U_EXPORT2 270 uspoof_check(const USpoofChecker *sc, 271 const UChar *id, int32_t length, 272 int32_t *position, 273 UErrorCode *status) { 274 275 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 276 if (This == NULL) { 277 return 0; 278 } 279 if (length < -1) { 280 *status = U_ILLEGAL_ARGUMENT_ERROR; 281 return 0; 282 } 283 UnicodeString idStr((length == -1), id, length); // Aliasing constructor. 284 int32_t result = uspoof_checkUnicodeString(sc, idStr, position, status); 285 return result; 286 } 287 288 289 U_CAPI int32_t U_EXPORT2 290 uspoof_checkUTF8(const USpoofChecker *sc, 291 const char *id, int32_t length, 292 int32_t *position, 293 UErrorCode *status) { 294 295 if (U_FAILURE(*status)) { 296 return 0; 297 } 298 UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id))); 299 int32_t result = uspoof_checkUnicodeString(sc, idStr, position, status); 300 return result; 301 } 302 303 304 U_CAPI int32_t U_EXPORT2 305 uspoof_areConfusable(const USpoofChecker *sc, 306 const UChar *id1, int32_t length1, 307 const UChar *id2, int32_t length2, 308 UErrorCode *status) { 309 SpoofImpl::validateThis(sc, *status); 310 if (U_FAILURE(*status)) { 311 return 0; 312 } 313 if (length1 < -1 || length2 < -1) { 314 *status = U_ILLEGAL_ARGUMENT_ERROR; 315 return 0; 316 } 317 318 UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor 319 UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor 320 return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); 321 } 322 323 324 U_CAPI int32_t U_EXPORT2 325 uspoof_areConfusableUTF8(const USpoofChecker *sc, 326 const char *id1, int32_t length1, 327 const char *id2, int32_t length2, 328 UErrorCode *status) { 329 SpoofImpl::validateThis(sc, *status); 330 if (U_FAILURE(*status)) { 331 return 0; 332 } 333 if (length1 < -1 || length2 < -1) { 334 *status = U_ILLEGAL_ARGUMENT_ERROR; 335 return 0; 336 } 337 UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : uprv_strlen(id1))); 338 UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : uprv_strlen(id2))); 339 int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); 340 return results; 341 } 342 343 344 U_CAPI int32_t U_EXPORT2 345 uspoof_areConfusableUnicodeString(const USpoofChecker *sc, 346 const icu::UnicodeString &id1, 347 const icu::UnicodeString &id2, 348 UErrorCode *status) { 349 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 350 if (U_FAILURE(*status)) { 351 return 0; 352 } 353 // 354 // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable, 355 // and for definitions of the types (single, whole, mixed-script) of confusables. 356 357 // We only care about a few of the check flags. Ignore the others. 358 // If no tests relavant to this function have been specified, return an error. 359 // TODO: is this really the right thing to do? It's probably an error on the caller's part, 360 // but logically we would just return 0 (no error). 361 if ((This->fChecks & (USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | 362 USPOOF_WHOLE_SCRIPT_CONFUSABLE)) == 0) { 363 *status = U_INVALID_STATE_ERROR; 364 return 0; 365 } 366 int32_t flagsForSkeleton = This->fChecks & USPOOF_ANY_CASE; 367 368 int32_t result = 0; 369 IdentifierInfo *identifierInfo = This->getIdentifierInfo(*status); 370 if (U_FAILURE(*status)) { 371 return 0; 372 } 373 identifierInfo->setIdentifier(id1, *status); 374 int32_t id1ScriptCount = identifierInfo->getScriptCount(); 375 identifierInfo->setIdentifier(id2, *status); 376 int32_t id2ScriptCount = identifierInfo->getScriptCount(); 377 This->releaseIdentifierInfo(identifierInfo); 378 identifierInfo = NULL; 379 380 if (This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) { 381 UnicodeString id1Skeleton; 382 UnicodeString id2Skeleton; 383 if (id1ScriptCount <= 1 && id2ScriptCount <= 1) { 384 flagsForSkeleton |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 385 uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id1, id1Skeleton, status); 386 uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id2, id2Skeleton, status); 387 if (id1Skeleton == id2Skeleton) { 388 result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 389 } 390 } 391 } 392 393 if (result & USPOOF_SINGLE_SCRIPT_CONFUSABLE) { 394 // If the two inputs are single script confusable they cannot also be 395 // mixed or whole script confusable, according to the UAX39 definitions. 396 // So we can skip those tests. 397 return result; 398 } 399 400 // Two identifiers are whole script confusable if each is of a single script 401 // and they are mixed script confusable. 402 UBool possiblyWholeScriptConfusables = 403 id1ScriptCount <= 1 && id2ScriptCount <= 1 && (This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE); 404 405 // 406 // Mixed Script Check 407 // 408 if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) || possiblyWholeScriptConfusables ) { 409 // For getSkeleton(), resetting the USPOOF_SINGLE_SCRIPT_CONFUSABLE flag will get us 410 // the mixed script table skeleton, which is what we want. 411 // The Any Case / Lower Case bit in the skelton flags was set at the top of the function. 412 UnicodeString id1Skeleton; 413 UnicodeString id2Skeleton; 414 flagsForSkeleton &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE; 415 uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id1, id1Skeleton, status); 416 uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id2, id2Skeleton, status); 417 if (id1Skeleton == id2Skeleton) { 418 result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 419 if (possiblyWholeScriptConfusables) { 420 result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 421 } 422 } 423 } 424 425 return result; 426 } 427 428 429 430 431 U_CAPI int32_t U_EXPORT2 432 uspoof_checkUnicodeString(const USpoofChecker *sc, 433 const icu::UnicodeString &id, 434 int32_t *position, 435 UErrorCode *status) { 436 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 437 if (This == NULL) { 438 return 0; 439 } 440 int32_t result = 0; 441 442 IdentifierInfo *identifierInfo = NULL; 443 if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) { 444 identifierInfo = This->getIdentifierInfo(*status); 445 if (U_FAILURE(*status)) { 446 goto cleanupAndReturn; 447 } 448 identifierInfo->setIdentifier(id, *status); 449 identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet); 450 } 451 452 453 if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) { 454 URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status); 455 if (idRestrictionLevel > This->fRestrictionLevel) { 456 result |= USPOOF_RESTRICTION_LEVEL; 457 } 458 if (This->fChecks & USPOOF_AUX_INFO) { 459 result |= idRestrictionLevel; 460 } 461 } 462 463 if ((This->fChecks) & USPOOF_MIXED_NUMBERS) { 464 const UnicodeSet *numerics = identifierInfo->getNumerics(); 465 if (numerics->size() > 1) { 466 result |= USPOOF_MIXED_NUMBERS; 467 } 468 469 // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier. 470 // We have no easy way to do the same in C. 471 // if (checkResult != null) { 472 // checkResult.numerics = numerics; 473 // } 474 } 475 476 477 if (This->fChecks & (USPOOF_CHAR_LIMIT)) { 478 int32_t i; 479 UChar32 c; 480 int32_t length = id.length(); 481 for (i=0; i<length ;) { 482 c = id.char32At(i); 483 i += U16_LENGTH(c); 484 if (!This->fAllowedCharsSet->contains(c)) { 485 result |= USPOOF_CHAR_LIMIT; 486 break; 487 } 488 } 489 } 490 491 if (This->fChecks & 492 (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) { 493 // These are the checks that need to be done on NFD input 494 UnicodeString nfdText; 495 gNfdNormalizer->normalize(id, nfdText, *status); 496 int32_t nfdLength = nfdText.length(); 497 498 if (This->fChecks & USPOOF_INVISIBLE) { 499 500 // scan for more than one occurence of the same non-spacing mark 501 // in a sequence of non-spacing marks. 502 int32_t i; 503 UChar32 c; 504 UChar32 firstNonspacingMark = 0; 505 UBool haveMultipleMarks = FALSE; 506 UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence. 507 508 for (i=0; i<nfdLength ;) { 509 c = nfdText.char32At(i); 510 i += U16_LENGTH(c); 511 if (u_charType(c) != U_NON_SPACING_MARK) { 512 firstNonspacingMark = 0; 513 if (haveMultipleMarks) { 514 marksSeenSoFar.clear(); 515 haveMultipleMarks = FALSE; 516 } 517 continue; 518 } 519 if (firstNonspacingMark == 0) { 520 firstNonspacingMark = c; 521 continue; 522 } 523 if (!haveMultipleMarks) { 524 marksSeenSoFar.add(firstNonspacingMark); 525 haveMultipleMarks = TRUE; 526 } 527 if (marksSeenSoFar.contains(c)) { 528 // report the error, and stop scanning. 529 // No need to find more than the first failure. 530 result |= USPOOF_INVISIBLE; 531 break; 532 } 533 marksSeenSoFar.add(c); 534 } 535 } 536 537 538 if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) { 539 // The basic test is the same for both whole and mixed script confusables. 540 // Compute the set of scripts that every input character has a confusable in. 541 // For this computation an input character is always considered to be 542 // confusable with itself in its own script. 543 // 544 // If the number of such scripts is two or more, and the input consisted of 545 // characters all from a single script, we have a whole script confusable. 546 // (The two scripts will be the original script and the one that is confusable) 547 // 548 // If the number of such scripts >= one, and the original input contained characters from 549 // more than one script, we have a mixed script confusable. (We can transform 550 // some of the characters, and end up with a visually similar string all in 551 // one script.) 552 553 if (identifierInfo == NULL) { 554 identifierInfo = This->getIdentifierInfo(*status); 555 if (U_FAILURE(*status)) { 556 goto cleanupAndReturn; 557 } 558 identifierInfo->setIdentifier(id, *status); 559 } 560 561 int32_t scriptCount = identifierInfo->getScriptCount(); 562 563 ScriptSet scripts; 564 This->wholeScriptCheck(nfdText, &scripts, *status); 565 int32_t confusableScriptCount = scripts.countMembers(); 566 //printf("confusableScriptCount = %d\n", confusableScriptCount); 567 568 if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) && 569 confusableScriptCount >= 2 && 570 scriptCount == 1) { 571 result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 572 } 573 574 if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) && 575 confusableScriptCount >= 1 && 576 scriptCount > 1) { 577 result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 578 } 579 } 580 } 581 582 cleanupAndReturn: 583 This->releaseIdentifierInfo(identifierInfo); 584 if (position != NULL) { 585 *position = 0; 586 } 587 return result; 588 } 589 590 591 U_CAPI int32_t U_EXPORT2 592 uspoof_getSkeleton(const USpoofChecker *sc, 593 uint32_t type, 594 const UChar *id, int32_t length, 595 UChar *dest, int32_t destCapacity, 596 UErrorCode *status) { 597 598 SpoofImpl::validateThis(sc, *status); 599 if (U_FAILURE(*status)) { 600 return 0; 601 } 602 if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) { 603 *status = U_ILLEGAL_ARGUMENT_ERROR; 604 return 0; 605 } 606 607 UnicodeString idStr((length==-1), id, length); // Aliasing constructor 608 UnicodeString destStr; 609 uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status); 610 destStr.extract(dest, destCapacity, *status); 611 return destStr.length(); 612 } 613 614 615 616 U_I18N_API UnicodeString & U_EXPORT2 617 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, 618 uint32_t type, 619 const UnicodeString &id, 620 UnicodeString &dest, 621 UErrorCode *status) { 622 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 623 if (U_FAILURE(*status)) { 624 return dest; 625 } 626 627 int32_t tableMask = 0; 628 switch (type) { 629 case 0: 630 tableMask = USPOOF_ML_TABLE_FLAG; 631 break; 632 case USPOOF_SINGLE_SCRIPT_CONFUSABLE: 633 tableMask = USPOOF_SL_TABLE_FLAG; 634 break; 635 case USPOOF_ANY_CASE: 636 tableMask = USPOOF_MA_TABLE_FLAG; 637 break; 638 case USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE: 639 tableMask = USPOOF_SA_TABLE_FLAG; 640 break; 641 default: 642 *status = U_ILLEGAL_ARGUMENT_ERROR; 643 return dest; 644 } 645 646 UnicodeString nfdId; 647 gNfdNormalizer->normalize(id, nfdId, *status); 648 649 // Apply the skeleton mapping to the NFD normalized input string 650 // Accumulate the skeleton, possibly unnormalized, in a UnicodeString. 651 int32_t inputIndex = 0; 652 UnicodeString skelStr; 653 int32_t normalizedLen = nfdId.length(); 654 for (inputIndex=0; inputIndex < normalizedLen; ) { 655 UChar32 c = nfdId.char32At(inputIndex); 656 inputIndex += U16_LENGTH(c); 657 This->confusableLookup(c, tableMask, skelStr); 658 } 659 660 gNfdNormalizer->normalize(skelStr, dest, *status); 661 return dest; 662 } 663 664 665 U_CAPI int32_t U_EXPORT2 666 uspoof_getSkeletonUTF8(const USpoofChecker *sc, 667 uint32_t type, 668 const char *id, int32_t length, 669 char *dest, int32_t destCapacity, 670 UErrorCode *status) { 671 SpoofImpl::validateThis(sc, *status); 672 if (U_FAILURE(*status)) { 673 return 0; 674 } 675 if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) { 676 *status = U_ILLEGAL_ARGUMENT_ERROR; 677 return 0; 678 } 679 680 UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id))); 681 UnicodeString destStr; 682 uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status); 683 if (U_FAILURE(*status)) { 684 return 0; 685 } 686 687 int32_t lengthInUTF8 = 0; 688 u_strToUTF8(dest, destCapacity, &lengthInUTF8, 689 destStr.getBuffer(), destStr.length(), status); 690 return lengthInUTF8; 691 } 692 693 694 U_CAPI int32_t U_EXPORT2 695 uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) { 696 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 697 if (This == NULL) { 698 U_ASSERT(U_FAILURE(*status)); 699 return 0; 700 } 701 int32_t dataSize = This->fSpoofData->fRawData->fLength; 702 if (capacity < dataSize) { 703 *status = U_BUFFER_OVERFLOW_ERROR; 704 return dataSize; 705 } 706 uprv_memcpy(buf, This->fSpoofData->fRawData, dataSize); 707 return dataSize; 708 } 709 710 U_CAPI const USet * U_EXPORT2 711 uspoof_getInclusionSet(UErrorCode *) { 712 initializeStatics(); 713 return gInclusionSet->toUSet(); 714 } 715 716 U_CAPI const USet * U_EXPORT2 717 uspoof_getRecommendedSet(UErrorCode *) { 718 initializeStatics(); 719 return gRecommendedSet->toUSet(); 720 } 721 722 U_I18N_API const UnicodeSet * U_EXPORT2 723 uspoof_getInclusionUnicodeSet(UErrorCode *) { 724 initializeStatics(); 725 return gInclusionSet; 726 } 727 728 U_I18N_API const UnicodeSet * U_EXPORT2 729 uspoof_getRecommendedUnicodeSet(UErrorCode *) { 730 initializeStatics(); 731 return gRecommendedSet; 732 } 733 734 735 736 #endif // !UCONFIG_NO_NORMALIZATION 737