1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File spooftest.c 9 * 10 *********************************************************************************/ 11 /*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */ 12 /** 13 * This is an API test for ICU spoof detection in plain C. It doesn't test very many cases, and doesn't 14 * try to test the full functionality. It just calls each function and verifies that it 15 * works on a basic level. 16 * 17 * More complete testing of spoof detection functionality is done with the C++ tests. 18 **/ 19 20 #include "unicode/utypes.h" 21 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 22 23 #include <stdlib.h> 24 #include <stdio.h> 25 #include <string.h> 26 #include "unicode/uspoof.h" 27 #include "unicode/ustring.h" 28 #include "unicode/uset.h" 29 #include "cintltst.h" 30 31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 32 log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}} 33 34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 35 log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} 36 37 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \ 38 log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \ 39 __FILE__, __LINE__, #a, (a), #b, (b)); }} 40 41 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \ 42 log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \ 43 __FILE__, __LINE__, #a, (a), #b, (b)); }} 44 45 46 /* 47 * TEST_SETUP and TEST_TEARDOWN 48 * macros to handle the boilerplate around setting up test case. 49 * Put arbitrary test code between SETUP and TEARDOWN. 50 * "sc" is the ready-to-go SpoofChecker for use in the tests. 51 */ 52 #define TEST_SETUP { \ 53 UErrorCode status = U_ZERO_ERROR; \ 54 USpoofChecker *sc; \ 55 sc = uspoof_open(&status); \ 56 TEST_ASSERT_SUCCESS(status); \ 57 if (U_SUCCESS(status)){ 58 59 #define TEST_TEARDOWN \ 60 } \ 61 TEST_ASSERT_SUCCESS(status); \ 62 uspoof_close(sc); \ 63 } 64 65 66 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 67 char buf_inside_macro[120]; 68 int32_t len = (int32_t)strlen(expected); 69 UBool success; 70 if (nulTerm) { 71 u_austrncpy(buf_inside_macro, (actual), len+1); 72 buf_inside_macro[len+2] = 0; 73 success = (strcmp((expected), buf_inside_macro) == 0); 74 } else { 75 u_austrncpy(buf_inside_macro, (actual), len); 76 buf_inside_macro[len+1] = 0; 77 success = (strncmp((expected), buf_inside_macro, len) == 0); 78 } 79 if (success == FALSE) { 80 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 81 file, line, (expected), buf_inside_macro); 82 } 83 } 84 85 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 86 87 88 89 static void TestUSpoofCAPI(void); 90 91 void addUSpoofTest(TestNode** root); 92 93 void addUSpoofTest(TestNode** root) 94 { 95 addTest(root, &TestUSpoofCAPI, "uspoof/TestUSpoofCAPI"); 96 } 97 98 /* 99 * Identifiers for verifying that spoof checking is minimally alive and working. 100 */ 101 const UChar goodLatin[] = {(UChar)0x75, (UChar)0x77, 0}; /* "uw", all ASCII */ 102 /* (not confusable) */ 103 const UChar scMixed[] = {(UChar)0x73, (UChar)0x0441, 0}; /* "sc", with Cyrillic 'c' */ 104 /* (mixed script, confusable */ 105 106 const UChar scLatin[] = {(UChar)0x73, (UChar)0x63, 0}; /* "sc", plain ascii. */ 107 const UChar goodCyrl[] = {(UChar)0x438, (UChar)0x43B, 0}; /* Plain lower case Cyrillic letters, 108 no latin confusables */ 109 110 const UChar goodGreek[] = {(UChar)0x3c0, (UChar)0x3c6, 0}; /* Plain lower case Greek letters */ 111 112 const UChar lll_Latin_a[] = {(UChar)0x6c, (UChar)0x49, (UChar)0x31, 0}; /* lI1, all ASCII */ 113 114 /* Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/ 115 const UChar lll_Latin_b[] = {(UChar)0xff29, (UChar)0x217c, (UChar)0x196, 0}; 116 117 const UChar lll_Cyrl[] = {(UChar)0x0406, (UChar)0x04C0, (UChar)0x31, 0}; 118 119 /* The skeleton transform for all of thes 'lll' lookalikes is all ascii digit 1. */ 120 const UChar lll_Skel[] = {(UChar)0x31, (UChar)0x31, (UChar)0x31, 0}; 121 122 /* 123 * Spoof Detction C API Tests 124 */ 125 static void TestUSpoofCAPI(void) { 126 127 /* 128 * basic uspoof_open(). 129 */ 130 { 131 USpoofChecker *sc; 132 UErrorCode status = U_ZERO_ERROR; 133 sc = uspoof_open(&status); 134 TEST_ASSERT_SUCCESS(status); 135 if (U_FAILURE(status)) { 136 /* If things are so broken that we can't even open a default spoof checker, */ 137 /* don't even try the rest of the tests. They would all fail. */ 138 return; 139 } 140 uspoof_close(sc); 141 } 142 143 144 145 /* 146 * Test Open from source rules. 147 */ 148 TEST_SETUP 149 const char *dataSrcDir; 150 char *fileName; 151 char *confusables; 152 int confusablesLength; 153 char *confusablesWholeScript; 154 int confusablesWholeScriptLength; 155 FILE *f; 156 UParseError pe; 157 int32_t errType; 158 USpoofChecker *rsc; 159 160 dataSrcDir = ctest_dataSrcDir(); 161 fileName = malloc(strlen(dataSrcDir) + 100); 162 strcpy(fileName, dataSrcDir); 163 strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt"); 164 f = fopen(fileName, "r"); 165 TEST_ASSERT_NE(f, NULL); 166 confusables = malloc(3000000); 167 confusablesLength = fread(confusables, 1, 3000000, f); 168 fclose(f); 169 170 171 strcpy(fileName, dataSrcDir); 172 strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt"); 173 f = fopen(fileName, "r"); 174 TEST_ASSERT_NE(f, NULL); 175 confusablesWholeScript = malloc(1000000); 176 confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f); 177 fclose(f); 178 179 rsc = uspoof_openFromSource(confusables, confusablesLength, 180 confusablesWholeScript, confusablesWholeScriptLength, 181 &errType, &pe, &status); 182 TEST_ASSERT_SUCCESS(status); 183 184 free(confusablesWholeScript); 185 free(confusables); 186 free(fileName); 187 uspoof_close(rsc); 188 /* printf("ParseError Line is %d\n", pe.line); */ 189 TEST_TEARDOWN; 190 191 192 /* 193 * openFromSerialized and serialize 194 */ 195 TEST_SETUP 196 int32_t serializedSize = 0; 197 int32_t actualLength = 0; 198 char *buf; 199 USpoofChecker *sc2; 200 int32_t checkResults; 201 202 203 serializedSize = uspoof_serialize(sc, NULL, 0, &status); 204 TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR); 205 TEST_ASSERT(serializedSize > 0); 206 207 /* Serialize the default spoof checker */ 208 status = U_ZERO_ERROR; 209 buf = (char *)malloc(serializedSize + 10); 210 TEST_ASSERT(buf != NULL); 211 buf[serializedSize] = 42; 212 uspoof_serialize(sc, buf, serializedSize, &status); 213 TEST_ASSERT_SUCCESS(status); 214 TEST_ASSERT_EQ(42, buf[serializedSize]); 215 216 /* Create a new spoof checker from the freshly serialized data */ 217 sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status); 218 TEST_ASSERT_SUCCESS(status); 219 TEST_ASSERT_NE(NULL, sc2); 220 TEST_ASSERT_EQ(serializedSize, actualLength); 221 222 /* Verify that the new spoof checker at least wiggles */ 223 checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status); 224 TEST_ASSERT_SUCCESS(status); 225 TEST_ASSERT_EQ(0, checkResults); 226 227 checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status); 228 TEST_ASSERT_SUCCESS(status); 229 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); 230 231 uspoof_close(sc2); 232 free(buf); 233 TEST_TEARDOWN; 234 235 236 237 /* 238 * Set & Get Check Flags 239 */ 240 TEST_SETUP 241 int32_t t; 242 uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status); 243 TEST_ASSERT_SUCCESS(status); 244 t = uspoof_getChecks(sc, &status); 245 TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS); 246 247 uspoof_setChecks(sc, 0, &status); 248 TEST_ASSERT_SUCCESS(status); 249 t = uspoof_getChecks(sc, &status); 250 TEST_ASSERT_EQ(0, t); 251 252 uspoof_setChecks(sc, 253 USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, 254 &status); 255 TEST_ASSERT_SUCCESS(status); 256 t = uspoof_getChecks(sc, &status); 257 TEST_ASSERT_SUCCESS(status); 258 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t); 259 TEST_TEARDOWN; 260 261 /* 262 * get & setAllowedChars 263 */ 264 TEST_SETUP 265 USet *us; 266 const USet *uset; 267 268 uset = uspoof_getAllowedChars(sc, &status); 269 TEST_ASSERT_SUCCESS(status); 270 TEST_ASSERT(uset_isFrozen(uset)); 271 us = uset_open((UChar32)0x41, (UChar32)0x5A); /* [A-Z] */ 272 uspoof_setAllowedChars(sc, us, &status); 273 TEST_ASSERT_SUCCESS(status); 274 TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status)); 275 TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status))); 276 TEST_ASSERT_SUCCESS(status); 277 uset_close(us); 278 TEST_TEARDOWN; 279 280 /* 281 * clone() 282 */ 283 284 TEST_SETUP 285 USpoofChecker *clone1 = NULL; 286 USpoofChecker *clone2 = NULL; 287 int32_t checkResults = 0; 288 289 clone1 = uspoof_clone(sc, &status); 290 TEST_ASSERT_SUCCESS(status); 291 TEST_ASSERT_NE(clone1, sc); 292 293 clone2 = uspoof_clone(clone1, &status); 294 TEST_ASSERT_SUCCESS(status); 295 TEST_ASSERT_NE(clone2, clone1); 296 297 uspoof_close(clone1); 298 299 /* Verify that the cloned spoof checker is alive */ 300 checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status); 301 TEST_ASSERT_SUCCESS(status); 302 TEST_ASSERT_EQ(0, checkResults); 303 304 checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status); 305 TEST_ASSERT_SUCCESS(status); 306 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); 307 uspoof_close(clone2); 308 TEST_TEARDOWN; 309 310 /* 311 * get & set Checks 312 */ 313 TEST_SETUP 314 int32_t checks; 315 int32_t checks2; 316 int32_t checkResults; 317 318 checks = uspoof_getChecks(sc, &status); 319 TEST_ASSERT_SUCCESS(status); 320 TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks); 321 322 checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE); 323 uspoof_setChecks(sc, checks, &status); 324 TEST_ASSERT_SUCCESS(status); 325 checks2 = uspoof_getChecks(sc, &status); 326 TEST_ASSERT_EQ(checks, checks2); 327 328 /* The checks that were disabled just above are the same ones that the "scMixed" test fails. 329 So with those tests gone checking that Identifier should now succeed */ 330 checkResults = uspoof_check(sc, scMixed, -1, NULL, &status); 331 TEST_ASSERT_SUCCESS(status); 332 TEST_ASSERT_EQ(0, checkResults); 333 TEST_TEARDOWN; 334 335 /* 336 * AllowedLoacles 337 */ 338 339 TEST_SETUP 340 const char *allowedLocales; 341 int32_t checkResults; 342 343 /* Default allowed locales list should be empty */ 344 allowedLocales = uspoof_getAllowedLocales(sc, &status); 345 TEST_ASSERT_SUCCESS(status); 346 TEST_ASSERT(strcmp("", allowedLocales) == 0) 347 348 /* Allow en and ru, which should enable Latin and Cyrillic only to pass */ 349 uspoof_setAllowedLocales(sc, "en, ru_RU", &status); 350 TEST_ASSERT_SUCCESS(status); 351 allowedLocales = uspoof_getAllowedLocales(sc, &status); 352 TEST_ASSERT_SUCCESS(status); 353 TEST_ASSERT(strstr(allowedLocales, "en") != NULL); 354 TEST_ASSERT(strstr(allowedLocales, "ru") != NULL); 355 356 /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also, 357 * which we don't want to see in this test. */ 358 uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status); 359 TEST_ASSERT_SUCCESS(status); 360 361 checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status); 362 TEST_ASSERT_SUCCESS(status); 363 TEST_ASSERT_EQ(0, checkResults); 364 365 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); 366 TEST_ASSERT_SUCCESS(status); 367 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults); 368 369 checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status); 370 TEST_ASSERT_SUCCESS(status); 371 TEST_ASSERT_EQ(0, checkResults); 372 373 /* Reset with an empty locale list, which should allow all characters to pass */ 374 uspoof_setAllowedLocales(sc, " ", &status); 375 TEST_ASSERT_SUCCESS(status); 376 377 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); 378 TEST_ASSERT_SUCCESS(status); 379 TEST_ASSERT_EQ(0, checkResults); 380 TEST_TEARDOWN; 381 382 /* 383 * AllowedChars set/get the USet of allowed characters. 384 */ 385 TEST_SETUP 386 const USet *set; 387 USet *tmpSet; 388 int32_t checkResults; 389 390 /* By default, we should see no restriction; the USet should allow all characters. */ 391 set = uspoof_getAllowedChars(sc, &status); 392 TEST_ASSERT_SUCCESS(status); 393 tmpSet = uset_open(0, 0x10ffff); 394 TEST_ASSERT(uset_equals(tmpSet, set)); 395 396 /* Setting the allowed chars should enable the check. */ 397 uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status); 398 TEST_ASSERT_SUCCESS(status); 399 400 /* Remove a character that is in our good Latin test identifier from the allowed chars set. */ 401 uset_remove(tmpSet, goodLatin[1]); 402 uspoof_setAllowedChars(sc, tmpSet, &status); 403 TEST_ASSERT_SUCCESS(status); 404 uset_close(tmpSet); 405 406 /* Latin Identifier should now fail; other non-latin test cases should still be OK */ 407 checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status); 408 TEST_ASSERT_SUCCESS(status); 409 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults); 410 411 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); 412 TEST_ASSERT_SUCCESS(status); 413 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults); 414 TEST_TEARDOWN; 415 416 /* 417 * check UTF-8 418 */ 419 TEST_SETUP 420 char utf8buf[200]; 421 int32_t checkResults; 422 int32_t position; 423 424 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status); 425 TEST_ASSERT_SUCCESS(status); 426 position = 666; 427 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); 428 TEST_ASSERT_SUCCESS(status); 429 TEST_ASSERT_EQ(0, checkResults); 430 TEST_ASSERT_EQ(666, position); 431 432 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status); 433 TEST_ASSERT_SUCCESS(status); 434 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); 435 TEST_ASSERT_SUCCESS(status); 436 TEST_ASSERT_EQ(0, checkResults); 437 438 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status); 439 TEST_ASSERT_SUCCESS(status); 440 position = 666; 441 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); 442 TEST_ASSERT_SUCCESS(status); 443 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults); 444 TEST_ASSERT_EQ(2, position); 445 446 TEST_TEARDOWN; 447 448 /* 449 * uspoof_areConfusable() 450 */ 451 TEST_SETUP 452 int32_t checkResults; 453 454 checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status); 455 TEST_ASSERT_SUCCESS(status); 456 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); 457 458 checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status); 459 TEST_ASSERT_SUCCESS(status); 460 TEST_ASSERT_EQ(0, checkResults); 461 462 checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status); 463 TEST_ASSERT_SUCCESS(status); 464 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults); 465 466 TEST_TEARDOWN; 467 468 /* 469 * areConfusableUTF8 470 */ 471 TEST_SETUP 472 int32_t checkResults; 473 char s1[200]; 474 char s2[200]; 475 476 477 u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status); 478 u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status); 479 TEST_ASSERT_SUCCESS(status); 480 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); 481 TEST_ASSERT_SUCCESS(status); 482 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); 483 484 u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status); 485 u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status); 486 TEST_ASSERT_SUCCESS(status); 487 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); 488 TEST_ASSERT_SUCCESS(status); 489 TEST_ASSERT_EQ(0, checkResults); 490 491 u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status); 492 u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status); 493 TEST_ASSERT_SUCCESS(status); 494 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); 495 TEST_ASSERT_SUCCESS(status); 496 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults); 497 498 TEST_TEARDOWN; 499 500 501 /* 502 * getSkeleton 503 */ 504 505 TEST_SETUP 506 UChar dest[100]; 507 int32_t skelLength; 508 509 skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, sizeof(dest)/sizeof(UChar), &status); 510 TEST_ASSERT_SUCCESS(status); 511 TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest)); 512 TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength); 513 514 skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status); 515 TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status); 516 TEST_ASSERT_EQ(3, skelLength); 517 status = U_ZERO_ERROR; 518 519 TEST_TEARDOWN; 520 } 521 522 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ 523