1 /* 2 * Internationalization test for CUPS. 3 * 4 * Copyright 2007-2014 by Apple Inc. 5 * Copyright 1997-2006 by Easy Software Products. 6 * 7 * These coded instructions, statements, and computer programs are the 8 * property of Apple Inc. and are protected by Federal copyright 9 * law. Distribution and use rights are outlined in the file "LICENSE.txt" 10 * which should have been included with this file. If this file is 11 * missing or damaged, see the license at "http://www.cups.org/". 12 * 13 * This file is subject to the Apple OS-Developed Software exception. 14 */ 15 16 /* 17 * Include necessary headers... 18 */ 19 20 #include "string-private.h" 21 #include "language-private.h" 22 #include <stdlib.h> 23 #include <time.h> 24 #include <unistd.h> 25 26 27 /* 28 * Local globals... 29 */ 30 31 static const char * const lang_encodings[] = 32 { /* Encoding strings */ 33 "us-ascii", "iso-8859-1", 34 "iso-8859-2", "iso-8859-3", 35 "iso-8859-4", "iso-8859-5", 36 "iso-8859-6", "iso-8859-7", 37 "iso-8859-8", "iso-8859-9", 38 "iso-8859-10", "utf-8", 39 "iso-8859-13", "iso-8859-14", 40 "iso-8859-15", "windows-874", 41 "windows-1250", "windows-1251", 42 "windows-1252", "windows-1253", 43 "windows-1254", "windows-1255", 44 "windows-1256", "windows-1257", 45 "windows-1258", "koi8-r", 46 "koi8-u", "iso-8859-11", 47 "iso-8859-16", "mac-roman", 48 "unknown", "unknown", 49 "unknown", "unknown", 50 "unknown", "unknown", 51 "unknown", "unknown", 52 "unknown", "unknown", 53 "unknown", "unknown", 54 "unknown", "unknown", 55 "unknown", "unknown", 56 "unknown", "unknown", 57 "unknown", "unknown", 58 "unknown", "unknown", 59 "unknown", "unknown", 60 "unknown", "unknown", 61 "unknown", "unknown", 62 "unknown", "unknown", 63 "unknown", "unknown", 64 "unknown", "unknown", 65 "windows-932", "windows-936", 66 "windows-949", "windows-950", 67 "windows-1361", "unknown", 68 "unknown", "unknown", 69 "unknown", "unknown", 70 "unknown", "unknown", 71 "unknown", "unknown", 72 "unknown", "unknown", 73 "unknown", "unknown", 74 "unknown", "unknown", 75 "unknown", "unknown", 76 "unknown", "unknown", 77 "unknown", "unknown", 78 "unknown", "unknown", 79 "unknown", "unknown", 80 "unknown", "unknown", 81 "unknown", "unknown", 82 "unknown", "unknown", 83 "unknown", "unknown", 84 "unknown", "unknown", 85 "unknown", "unknown", 86 "unknown", "unknown", 87 "unknown", "unknown", 88 "unknown", "unknown", 89 "unknown", "unknown", 90 "unknown", "unknown", 91 "unknown", "unknown", 92 "unknown", "unknown", 93 "unknown", "unknown", 94 "unknown", "unknown", 95 "unknown", "unknown", 96 "unknown", "unknown", 97 "euc-cn", "euc-jp", 98 "euc-kr", "euc-tw", 99 "jis-x0213" 100 }; 101 102 103 /* 104 * Local functions... 105 */ 106 107 static void print_utf8(const char *msg, const cups_utf8_t *src); 108 109 110 /* 111 * 'main()' - Main entry for internationalization test module. 112 */ 113 114 int /* O - Exit code */ 115 main(int argc, /* I - Argument Count */ 116 char *argv[]) /* I - Arguments */ 117 { 118 FILE *fp; /* File pointer */ 119 int count; /* File line counter */ 120 int status, /* Status of current test */ 121 errors; /* Error count */ 122 char line[1024]; /* File line source string */ 123 int len; /* Length (count) of string */ 124 char legsrc[1024], /* Legacy source string */ 125 legdest[1024], /* Legacy destination string */ 126 *legptr; /* Pointer into legacy string */ 127 cups_utf8_t utf8latin[] = /* UTF-8 Latin-1 source */ 128 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 }; 129 /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */ 130 cups_utf8_t utf8repla[] = /* UTF-8 Latin-1 replacement */ 131 { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 }; 132 /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */ 133 cups_utf8_t utf8greek[] = /* UTF-8 Greek source string */ 134 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 }; 135 /* "A != <ALPHA>." - use ISO 8859-7 */ 136 cups_utf8_t utf8japan[] = /* UTF-8 Japanese source */ 137 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 }; 138 /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */ 139 cups_utf8_t utf8taiwan[] = /* UTF-8 Chinese source */ 140 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 }; 141 /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */ 142 cups_utf8_t utf8dest[1024]; /* UTF-8 destination string */ 143 cups_utf32_t utf32dest[1024]; /* UTF-32 destination string */ 144 145 146 if (argc > 1) 147 { 148 int i; /* Looping var */ 149 cups_encoding_t encoding; /* Source encoding */ 150 151 152 if (argc != 3) 153 { 154 puts("Usage: ./testi18n [filename charset]"); 155 return (1); 156 } 157 158 if ((fp = fopen(argv[1], "rb")) == NULL) 159 { 160 perror(argv[1]); 161 return (1); 162 } 163 164 for (i = 0, encoding = CUPS_AUTO_ENCODING; 165 i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0])); 166 i ++) 167 if (!_cups_strcasecmp(lang_encodings[i], argv[2])) 168 { 169 encoding = (cups_encoding_t)i; 170 break; 171 } 172 173 if (encoding == CUPS_AUTO_ENCODING) 174 { 175 fprintf(stderr, "%s: Unknown character set!\n", argv[2]); 176 return (1); 177 } 178 179 while (fgets(line, sizeof(line), fp)) 180 { 181 if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0) 182 { 183 fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line); 184 return (1); 185 } 186 187 fputs((char *)utf8dest, stdout); 188 } 189 190 fclose(fp); 191 return (0); 192 } 193 194 /* 195 * Start with some conversion tests from a UTF-8 test file. 196 */ 197 198 errors = 0; 199 200 if ((fp = fopen("utf8demo.txt", "rb")) == NULL) 201 { 202 perror("utf8demo.txt"); 203 return (1); 204 } 205 206 /* 207 * cupsUTF8ToUTF32 208 */ 209 210 fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout); 211 212 for (count = 0, status = 0; fgets(line, sizeof(line), fp);) 213 { 214 count ++; 215 216 if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0) 217 { 218 printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count); 219 errors ++; 220 status = 1; 221 break; 222 } 223 } 224 225 if (!status) 226 puts("PASS"); 227 228 /* 229 * cupsUTF8ToCharset(CUPS_EUC_JP) 230 */ 231 232 fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout); 233 234 rewind(fp); 235 236 for (count = 0, status = 0; fgets(line, sizeof(line), fp);) 237 { 238 count ++; 239 240 len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP); 241 if (len < 0) 242 { 243 printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count); 244 errors ++; 245 status = 1; 246 break; 247 } 248 } 249 250 if (!status) 251 puts("PASS"); 252 253 fclose(fp); 254 255 /* 256 * Test UTF-8 to legacy charset (ISO 8859-1)... 257 */ 258 259 fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout); 260 261 legdest[0] = 0; 262 263 len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1); 264 if (len < 0) 265 { 266 printf("FAIL (len=%d)\n", len); 267 errors ++; 268 } 269 else 270 puts("PASS"); 271 272 /* 273 * cupsCharsetToUTF8 274 */ 275 276 fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout); 277 278 strlcpy(legsrc, legdest, sizeof(legsrc)); 279 280 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1); 281 if ((size_t)len != strlen((char *)utf8latin)) 282 { 283 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin)); 284 print_utf8(" utf8latin", utf8latin); 285 print_utf8(" utf8dest", utf8dest); 286 errors ++; 287 } 288 else if (memcmp(utf8latin, utf8dest, (size_t)len)) 289 { 290 puts("FAIL (results do not match)"); 291 print_utf8(" utf8latin", utf8latin); 292 print_utf8(" utf8dest", utf8dest); 293 errors ++; 294 } 295 else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0) 296 { 297 puts("FAIL (replacement characters do not work!)"); 298 errors ++; 299 } 300 else 301 puts("PASS"); 302 303 /* 304 * Test UTF-8 to/from legacy charset (ISO 8859-7)... 305 */ 306 307 fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout); 308 309 if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0) 310 { 311 puts("FAIL"); 312 errors ++; 313 } 314 else 315 { 316 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++); 317 318 if (*legptr) 319 { 320 puts("FAIL (unknown character)"); 321 errors ++; 322 } 323 else 324 puts("PASS"); 325 } 326 327 fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout); 328 329 strlcpy(legsrc, legdest, sizeof(legsrc)); 330 331 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7); 332 if ((size_t)len != strlen((char *)utf8greek)) 333 { 334 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek)); 335 print_utf8(" utf8greek", utf8greek); 336 print_utf8(" utf8dest", utf8dest); 337 errors ++; 338 } 339 else if (memcmp(utf8greek, utf8dest, (size_t)len)) 340 { 341 puts("FAIL (results do not match)"); 342 print_utf8(" utf8greek", utf8greek); 343 print_utf8(" utf8dest", utf8dest); 344 errors ++; 345 } 346 else 347 puts("PASS"); 348 349 /* 350 * Test UTF-8 to/from legacy charset (Windows 932)... 351 */ 352 353 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout); 354 355 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0) 356 { 357 puts("FAIL"); 358 errors ++; 359 } 360 else 361 { 362 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++); 363 364 if (*legptr) 365 { 366 puts("FAIL (unknown character)"); 367 errors ++; 368 } 369 else 370 puts("PASS"); 371 } 372 373 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout); 374 375 strlcpy(legsrc, legdest, sizeof(legsrc)); 376 377 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932); 378 if ((size_t)len != strlen((char *)utf8japan)) 379 { 380 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan)); 381 print_utf8(" utf8japan", utf8japan); 382 print_utf8(" utf8dest", utf8dest); 383 errors ++; 384 } 385 else if (memcmp(utf8japan, utf8dest, (size_t)len)) 386 { 387 puts("FAIL (results do not match)"); 388 print_utf8(" utf8japan", utf8japan); 389 print_utf8(" utf8dest", utf8dest); 390 errors ++; 391 } 392 else 393 puts("PASS"); 394 395 /* 396 * Test UTF-8 to/from legacy charset (EUC-JP)... 397 */ 398 399 fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout); 400 401 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0) 402 { 403 puts("FAIL"); 404 errors ++; 405 } 406 else 407 { 408 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++); 409 410 if (*legptr) 411 { 412 puts("FAIL (unknown character)"); 413 errors ++; 414 } 415 else 416 puts("PASS"); 417 } 418 419 #ifndef __linux 420 fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout); 421 422 strlcpy(legsrc, legdest, sizeof(legsrc)); 423 424 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP); 425 if ((size_t)len != strlen((char *)utf8japan)) 426 { 427 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan)); 428 print_utf8(" utf8japan", utf8japan); 429 print_utf8(" utf8dest", utf8dest); 430 errors ++; 431 } 432 else if (memcmp(utf8japan, utf8dest, (size_t)len)) 433 { 434 puts("FAIL (results do not match)"); 435 print_utf8(" utf8japan", utf8japan); 436 print_utf8(" utf8dest", utf8dest); 437 errors ++; 438 } 439 else 440 puts("PASS"); 441 #endif /* !__linux */ 442 443 /* 444 * Test UTF-8 to/from legacy charset (Windows 950)... 445 */ 446 447 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout); 448 449 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0) 450 { 451 puts("FAIL"); 452 errors ++; 453 } 454 else 455 { 456 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++); 457 458 if (*legptr) 459 { 460 puts("FAIL (unknown character)"); 461 errors ++; 462 } 463 else 464 puts("PASS"); 465 } 466 467 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout); 468 469 strlcpy(legsrc, legdest, sizeof(legsrc)); 470 471 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950); 472 if ((size_t)len != strlen((char *)utf8taiwan)) 473 { 474 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan)); 475 print_utf8(" utf8taiwan", utf8taiwan); 476 print_utf8(" utf8dest", utf8dest); 477 errors ++; 478 } 479 else if (memcmp(utf8taiwan, utf8dest, (size_t)len)) 480 { 481 puts("FAIL (results do not match)"); 482 print_utf8(" utf8taiwan", utf8taiwan); 483 print_utf8(" utf8dest", utf8dest); 484 errors ++; 485 } 486 else 487 puts("PASS"); 488 489 /* 490 * Test UTF-8 to/from legacy charset (EUC-TW)... 491 */ 492 493 fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout); 494 495 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0) 496 { 497 puts("FAIL"); 498 errors ++; 499 } 500 else 501 { 502 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++); 503 504 if (*legptr) 505 { 506 puts("FAIL (unknown character)"); 507 errors ++; 508 } 509 else 510 puts("PASS"); 511 } 512 513 fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout); 514 515 strlcpy(legsrc, legdest, sizeof(legsrc)); 516 517 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW); 518 if ((size_t)len != strlen((char *)utf8taiwan)) 519 { 520 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan)); 521 print_utf8(" utf8taiwan", utf8taiwan); 522 print_utf8(" utf8dest", utf8dest); 523 errors ++; 524 } 525 else if (memcmp(utf8taiwan, utf8dest, (size_t)len)) 526 { 527 puts("FAIL (results do not match)"); 528 print_utf8(" utf8taiwan", utf8taiwan); 529 print_utf8(" utf8dest", utf8dest); 530 errors ++; 531 } 532 else 533 puts("PASS"); 534 535 #if 0 536 /* 537 * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)... 538 */ 539 if (verbose) 540 printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n"); 541 len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024); 542 if (len < 0) 543 return (1); 544 if (verbose) 545 { 546 print_utf8(" utf8good ", utf8good); 547 print_utf32(" utf32dest", utf32dest); 548 } 549 memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t)); 550 len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024); 551 if (len < 0) 552 return (1); 553 if (len != strlen ((char *) utf8good)) 554 return (1); 555 if (memcmp(utf8good, utf8dest, len) != 0) 556 return (1); 557 558 /* 559 * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)... 560 */ 561 if (verbose) 562 printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n"); 563 len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024); 564 if (len >= 0) 565 return (1); 566 if (verbose) 567 print_utf8(" utf8bad ", utf8bad); 568 569 /* 570 * Test _cupsCharmapFlush()... 571 */ 572 if (verbose) 573 printf("\ntesti18n: Testing _cupsCharmapFlush()...\n"); 574 _cupsCharmapFlush(); 575 return (0); 576 #endif /* 0 */ 577 578 return (errors > 0); 579 } 580 581 582 /* 583 * 'print_utf8()' - Print UTF-8 string with (optional) message. 584 */ 585 586 static void 587 print_utf8(const char *msg, /* I - Message String */ 588 const cups_utf8_t *src) /* I - UTF-8 Source String */ 589 { 590 const char *prefix; /* Prefix string */ 591 592 593 if (msg) 594 printf("%s:", msg); 595 596 for (prefix = " "; *src; src ++) 597 { 598 printf("%s%02x", prefix, *src); 599 600 if ((src[0] & 0x80) && (src[1] & 0x80)) 601 prefix = ""; 602 else 603 prefix = " "; 604 } 605 606 putchar('\n'); 607 } 608