1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2009-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File spooftest.c 9 * 10 *********************************************************************************/ 11 /*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */ 12 /** 13 * This is an API test for ICU spoof detection in plain C. It doesn't test very many cases, and doesn't 14 * try to test the full functionality. It just calls each function and verifies that it 15 * works on a basic level. 16 * 17 * More complete testing of spoof detection functionality is done with the C++ tests. 18 **/ 19 20 #include "unicode/utypes.h" 21 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION 22 23 #include <stdlib.h> 24 #include <stdio.h> 25 #include <string.h> 26 #include "unicode/uspoof.h" 27 #include "unicode/ustring.h" 28 #include "unicode/uset.h" 29 #include "cintltst.h" 30 31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 32 log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}} 33 34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 35 log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} 36 37 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \ 38 log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \ 39 __FILE__, __LINE__, #a, (a), #b, (b)); }} 40 41 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \ 42 log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \ 43 __FILE__, __LINE__, #a, (a), #b, (b)); }} 44 45 46 /* 47 * TEST_SETUP and TEST_TEARDOWN 48 * macros to handle the boilerplate around setting up test case. 49 * Put arbitrary test code between SETUP and TEARDOWN. 50 * "sc" is the ready-to-go SpoofChecker for use in the tests. 51 */ 52 #define TEST_SETUP { \ 53 UErrorCode status = U_ZERO_ERROR; \ 54 USpoofChecker *sc; \ 55 sc = uspoof_open(&status); \ 56 TEST_ASSERT_SUCCESS(status); \ 57 if (U_SUCCESS(status)){ 58 59 #define TEST_TEARDOWN \ 60 } \ 61 TEST_ASSERT_SUCCESS(status); \ 62 uspoof_close(sc); \ 63 } 64 65 66 static void TestUSpoofCAPI(void); 67 68 void addUSpoofTest(TestNode** root); 69 70 void addUSpoofTest(TestNode** root) 71 { 72 #if !UCONFIG_NO_FILE_IO 73 addTest(root, &TestUSpoofCAPI, "uspoof/TestUSpoofCAPI"); 74 #endif 75 } 76 77 /* 78 * Identifiers for verifying that spoof checking is minimally alive and working. 79 */ 80 const UChar goodLatin[] = {(UChar)0x75, (UChar)0x7a, 0}; /* "uz", all ASCII */ 81 /* (not confusable) */ 82 const UChar scMixed[] = {(UChar)0x73, (UChar)0x0441, 0}; /* "sc", with Cyrillic 'c' */ 83 /* (mixed script, confusable */ 84 85 const UChar scLatin[] = {(UChar)0x73, (UChar)0x63, 0}; /* "sc", plain ascii. */ 86 const UChar goodCyrl[] = {(UChar)0x438, (UChar)0x43B, 0}; /* Plain lower case Cyrillic letters, 87 no latin confusables */ 88 89 const UChar goodGreek[] = {(UChar)0x3c0, (UChar)0x3c6, 0}; /* Plain lower case Greek letters */ 90 91 const UChar lll_Latin_a[] = {(UChar)0x6c, (UChar)0x49, (UChar)0x31, 0}; /* lI1, all ASCII */ 92 93 /* Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/ 94 const UChar lll_Latin_b[] = {(UChar)0xff29, (UChar)0x217c, (UChar)0x196, 0}; 95 96 const UChar lll_Cyrl[] = {(UChar)0x0406, (UChar)0x04C0, (UChar)0x31, 0}; 97 98 /* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */ 99 const UChar lll_Skel[] = {(UChar)0x6c, (UChar)0x6c, (UChar)0x6c, 0}; 100 101 const UChar han_Hiragana[] = {(UChar)0x3086, (UChar)0x308A, (UChar)0x0020, (UChar)0x77F3, (UChar)0x7530, 0}; 102 103 /* Provide better code coverage */ 104 const char goodLatinUTF8[] = {0x75, 0x77, 0}; 105 /* 106 * Spoof Detction C API Tests 107 */ 108 static void TestUSpoofCAPI(void) { 109 110 /* 111 * basic uspoof_open(). 112 */ 113 { 114 USpoofChecker *sc; 115 UErrorCode status = U_ZERO_ERROR; 116 sc = uspoof_open(&status); 117 TEST_ASSERT_SUCCESS(status); 118 if (U_FAILURE(status)) { 119 /* If things are so broken that we can't even open a default spoof checker, */ 120 /* don't even try the rest of the tests. They would all fail. */ 121 return; 122 } 123 uspoof_close(sc); 124 } 125 126 127 128 /* 129 * Test Open from source rules. 130 */ 131 TEST_SETUP 132 const char *dataSrcDir; 133 char *fileName; 134 char *confusables; 135 int confusablesLength; 136 char *confusablesWholeScript; 137 int confusablesWholeScriptLength; 138 FILE *f; 139 UParseError pe; 140 int32_t errType; 141 USpoofChecker *rsc; 142 143 dataSrcDir = ctest_dataSrcDir(); 144 fileName = malloc(strlen(dataSrcDir) + 100); 145 strcpy(fileName, dataSrcDir); 146 strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt"); 147 f = fopen(fileName, "rb"); 148 TEST_ASSERT_NE(f, NULL); 149 confusables = malloc(3000000); 150 if (f != NULL) { 151 confusablesLength = fread(confusables, 1, 3000000, f); 152 fclose(f); 153 } 154 155 strcpy(fileName, dataSrcDir); 156 strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt"); 157 f = fopen(fileName, "rb"); 158 TEST_ASSERT_NE(f, NULL); 159 confusablesWholeScript = malloc(1000000); 160 if (f != NULL) { 161 confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f); 162 fclose(f); 163 } 164 165 rsc = uspoof_openFromSource(confusables, confusablesLength, 166 confusablesWholeScript, confusablesWholeScriptLength, 167 &errType, &pe, &status); 168 TEST_ASSERT_SUCCESS(status); 169 170 free(confusablesWholeScript); 171 free(confusables); 172 free(fileName); 173 uspoof_close(rsc); 174 /* printf("ParseError Line is %d\n", pe.line); */ 175 TEST_TEARDOWN; 176 177 178 /* 179 * openFromSerialized and serialize 180 */ 181 TEST_SETUP 182 int32_t serializedSize = 0; 183 int32_t actualLength = 0; 184 char *buf; 185 USpoofChecker *sc2; 186 int32_t checkResults; 187 188 189 serializedSize = uspoof_serialize(sc, NULL, 0, &status); 190 TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR); 191 TEST_ASSERT(serializedSize > 0); 192 193 /* Serialize the default spoof checker */ 194 status = U_ZERO_ERROR; 195 buf = (char *)malloc(serializedSize + 10); 196 TEST_ASSERT(buf != NULL); 197 buf[serializedSize] = 42; 198 uspoof_serialize(sc, buf, serializedSize, &status); 199 TEST_ASSERT_SUCCESS(status); 200 TEST_ASSERT_EQ(42, buf[serializedSize]); 201 202 /* Create a new spoof checker from the freshly serialized data */ 203 sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status); 204 TEST_ASSERT_SUCCESS(status); 205 TEST_ASSERT_NE(NULL, sc2); 206 TEST_ASSERT_EQ(serializedSize, actualLength); 207 208 /* Verify that the new spoof checker at least wiggles */ 209 checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status); 210 TEST_ASSERT_SUCCESS(status); 211 TEST_ASSERT_EQ(0, checkResults); 212 213 checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status); 214 TEST_ASSERT_SUCCESS(status); 215 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); 216 217 uspoof_close(sc2); 218 free(buf); 219 TEST_TEARDOWN; 220 221 222 223 /* 224 * Set & Get Check Flags 225 */ 226 TEST_SETUP 227 int32_t t; 228 uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status); 229 TEST_ASSERT_SUCCESS(status); 230 t = uspoof_getChecks(sc, &status); 231 TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS); 232 233 uspoof_setChecks(sc, 0, &status); 234 TEST_ASSERT_SUCCESS(status); 235 t = uspoof_getChecks(sc, &status); 236 TEST_ASSERT_EQ(0, t); 237 238 uspoof_setChecks(sc, 239 USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, 240 &status); 241 TEST_ASSERT_SUCCESS(status); 242 t = uspoof_getChecks(sc, &status); 243 TEST_ASSERT_SUCCESS(status); 244 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t); 245 TEST_TEARDOWN; 246 247 /* 248 * get & setAllowedChars 249 */ 250 TEST_SETUP 251 USet *us; 252 const USet *uset; 253 254 uset = uspoof_getAllowedChars(sc, &status); 255 TEST_ASSERT_SUCCESS(status); 256 TEST_ASSERT(uset_isFrozen(uset)); 257 us = uset_open((UChar32)0x41, (UChar32)0x5A); /* [A-Z] */ 258 uspoof_setAllowedChars(sc, us, &status); 259 TEST_ASSERT_SUCCESS(status); 260 TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status)); 261 TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status))); 262 TEST_ASSERT_SUCCESS(status); 263 uset_close(us); 264 TEST_TEARDOWN; 265 266 /* 267 * clone() 268 */ 269 270 TEST_SETUP 271 USpoofChecker *clone1 = NULL; 272 USpoofChecker *clone2 = NULL; 273 int32_t checkResults = 0; 274 275 clone1 = uspoof_clone(sc, &status); 276 TEST_ASSERT_SUCCESS(status); 277 TEST_ASSERT_NE(clone1, sc); 278 279 clone2 = uspoof_clone(clone1, &status); 280 TEST_ASSERT_SUCCESS(status); 281 TEST_ASSERT_NE(clone2, clone1); 282 283 uspoof_close(clone1); 284 285 /* Verify that the cloned spoof checker is alive */ 286 checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status); 287 TEST_ASSERT_SUCCESS(status); 288 TEST_ASSERT_EQ(0, checkResults); 289 290 checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status); 291 TEST_ASSERT_SUCCESS(status); 292 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); 293 uspoof_close(clone2); 294 TEST_TEARDOWN; 295 296 /* 297 * basic uspoof_check() 298 */ 299 TEST_SETUP 300 int32_t result; 301 result = uspoof_check(sc, goodLatin, -1, NULL, &status); 302 TEST_ASSERT_SUCCESS(status); 303 TEST_ASSERT_EQ(0, result); 304 305 result = uspoof_check(sc, han_Hiragana, -1, NULL, &status); 306 TEST_ASSERT_SUCCESS(status); 307 TEST_ASSERT_EQ(0, result); 308 309 result = uspoof_check(sc, scMixed, -1, NULL, &status); 310 TEST_ASSERT_SUCCESS(status); 311 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, result); 312 TEST_TEARDOWN 313 314 315 /* 316 * get & set Checks 317 */ 318 TEST_SETUP 319 int32_t checks; 320 int32_t checks2; 321 int32_t checkResults; 322 323 checks = uspoof_getChecks(sc, &status); 324 TEST_ASSERT_SUCCESS(status); 325 TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks); 326 327 checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE); 328 uspoof_setChecks(sc, checks, &status); 329 TEST_ASSERT_SUCCESS(status); 330 checks2 = uspoof_getChecks(sc, &status); 331 TEST_ASSERT_EQ(checks, checks2); 332 333 /* The checks that were disabled just above are the same ones that the "scMixed" test fails. 334 So with those tests gone checking that Identifier should now succeed */ 335 checkResults = uspoof_check(sc, scMixed, -1, NULL, &status); 336 TEST_ASSERT_SUCCESS(status); 337 TEST_ASSERT_EQ(0, checkResults); 338 TEST_TEARDOWN; 339 340 /* 341 * AllowedLoacles 342 */ 343 344 TEST_SETUP 345 const char *allowedLocales; 346 int32_t checkResults; 347 348 /* Default allowed locales list should be empty */ 349 allowedLocales = uspoof_getAllowedLocales(sc, &status); 350 TEST_ASSERT_SUCCESS(status); 351 TEST_ASSERT(strcmp("", allowedLocales) == 0) 352 353 /* Allow en and ru, which should enable Latin and Cyrillic only to pass */ 354 uspoof_setAllowedLocales(sc, "en, ru_RU", &status); 355 TEST_ASSERT_SUCCESS(status); 356 allowedLocales = uspoof_getAllowedLocales(sc, &status); 357 TEST_ASSERT_SUCCESS(status); 358 TEST_ASSERT(strstr(allowedLocales, "en") != NULL); 359 TEST_ASSERT(strstr(allowedLocales, "ru") != NULL); 360 361 /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also, 362 * which we don't want to see in this test. */ 363 uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status); 364 TEST_ASSERT_SUCCESS(status); 365 366 checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status); 367 TEST_ASSERT_SUCCESS(status); 368 TEST_ASSERT_EQ(0, checkResults); 369 370 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); 371 TEST_ASSERT_SUCCESS(status); 372 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults); 373 374 checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status); 375 TEST_ASSERT_SUCCESS(status); 376 TEST_ASSERT_EQ(0, checkResults); 377 378 /* Reset with an empty locale list, which should allow all characters to pass */ 379 uspoof_setAllowedLocales(sc, " ", &status); 380 TEST_ASSERT_SUCCESS(status); 381 382 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); 383 TEST_ASSERT_SUCCESS(status); 384 TEST_ASSERT_EQ(0, checkResults); 385 TEST_TEARDOWN; 386 387 /* 388 * AllowedChars set/get the USet of allowed characters. 389 */ 390 TEST_SETUP 391 const USet *set; 392 USet *tmpSet; 393 int32_t checkResults; 394 395 /* By default, we should see no restriction; the USet should allow all characters. */ 396 set = uspoof_getAllowedChars(sc, &status); 397 TEST_ASSERT_SUCCESS(status); 398 tmpSet = uset_open(0, 0x10ffff); 399 TEST_ASSERT(uset_equals(tmpSet, set)); 400 401 /* Setting the allowed chars should enable the check. */ 402 uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status); 403 TEST_ASSERT_SUCCESS(status); 404 405 /* Remove a character that is in our good Latin test identifier from the allowed chars set. */ 406 uset_remove(tmpSet, goodLatin[1]); 407 uspoof_setAllowedChars(sc, tmpSet, &status); 408 TEST_ASSERT_SUCCESS(status); 409 uset_close(tmpSet); 410 411 /* Latin Identifier should now fail; other non-latin test cases should still be OK */ 412 checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status); 413 TEST_ASSERT_SUCCESS(status); 414 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults); 415 416 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); 417 TEST_ASSERT_SUCCESS(status); 418 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults); 419 TEST_TEARDOWN; 420 421 /* 422 * check UTF-8 423 */ 424 TEST_SETUP 425 char utf8buf[200]; 426 int32_t checkResults; 427 int32_t position; 428 429 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status); 430 TEST_ASSERT_SUCCESS(status); 431 position = 666; 432 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); 433 TEST_ASSERT_SUCCESS(status); 434 TEST_ASSERT_EQ(0, checkResults); 435 TEST_ASSERT_EQ(666, position); 436 437 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status); 438 TEST_ASSERT_SUCCESS(status); 439 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); 440 TEST_ASSERT_SUCCESS(status); 441 TEST_ASSERT_EQ(0, checkResults); 442 443 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status); 444 TEST_ASSERT_SUCCESS(status); 445 position = 666; 446 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); 447 TEST_ASSERT_SUCCESS(status); 448 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults); 449 TEST_ASSERT_EQ(2, position); 450 451 TEST_TEARDOWN; 452 453 /* 454 * uspoof_areConfusable() 455 */ 456 TEST_SETUP 457 int32_t checkResults; 458 459 checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status); 460 TEST_ASSERT_SUCCESS(status); 461 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); 462 463 checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status); 464 TEST_ASSERT_SUCCESS(status); 465 TEST_ASSERT_EQ(0, checkResults); 466 467 checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status); 468 TEST_ASSERT_SUCCESS(status); 469 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults); 470 471 TEST_TEARDOWN; 472 473 /* 474 * areConfusableUTF8 475 */ 476 TEST_SETUP 477 int32_t checkResults; 478 char s1[200]; 479 char s2[200]; 480 481 482 u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status); 483 u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status); 484 TEST_ASSERT_SUCCESS(status); 485 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); 486 TEST_ASSERT_SUCCESS(status); 487 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); 488 489 u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status); 490 u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status); 491 TEST_ASSERT_SUCCESS(status); 492 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); 493 TEST_ASSERT_SUCCESS(status); 494 TEST_ASSERT_EQ(0, checkResults); 495 496 u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status); 497 u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status); 498 TEST_ASSERT_SUCCESS(status); 499 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); 500 TEST_ASSERT_SUCCESS(status); 501 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults); 502 503 TEST_TEARDOWN; 504 505 506 /* 507 * getSkeleton 508 */ 509 510 TEST_SETUP 511 UChar dest[100]; 512 int32_t skelLength; 513 514 skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, sizeof(dest)/sizeof(UChar), &status); 515 TEST_ASSERT_SUCCESS(status); 516 TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest)); 517 TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength); 518 519 skelLength = uspoof_getSkeletonUTF8(sc, USPOOF_ANY_CASE, goodLatinUTF8, -1, (char*)dest, 520 sizeof(dest)/sizeof(UChar), &status); 521 TEST_ASSERT_SUCCESS(status); 522 523 skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status); 524 TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status); 525 TEST_ASSERT_EQ(3, skelLength); 526 status = U_ZERO_ERROR; 527 528 TEST_TEARDOWN; 529 } 530 531 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ 532