Home | History | Annotate | Download | only in cups
      1 /*
      2  * Internationalization test for CUPS.
      3  *
      4  * Copyright 2007-2014 by Apple Inc.
      5  * Copyright 1997-2006 by Easy Software Products.
      6  *
      7  * These coded instructions, statements, and computer programs are the
      8  * property of Apple Inc. and are protected by Federal copyright
      9  * law.  Distribution and use rights are outlined in the file "LICENSE.txt"
     10  * which should have been included with this file.  If this file is
     11  * missing or damaged, see the license at "http://www.cups.org/".
     12  *
     13  * This file is subject to the Apple OS-Developed Software exception.
     14  */
     15 
     16 /*
     17  * Include necessary headers...
     18  */
     19 
     20 #include "string-private.h"
     21 #include "language-private.h"
     22 #include <stdlib.h>
     23 #include <time.h>
     24 #include <unistd.h>
     25 
     26 
     27 /*
     28  * Local globals...
     29  */
     30 
     31 static const char * const lang_encodings[] =
     32 			{		/* Encoding strings */
     33 			  "us-ascii",		"iso-8859-1",
     34 			  "iso-8859-2",		"iso-8859-3",
     35 			  "iso-8859-4",		"iso-8859-5",
     36 			  "iso-8859-6",		"iso-8859-7",
     37 			  "iso-8859-8",		"iso-8859-9",
     38 			  "iso-8859-10",	"utf-8",
     39 			  "iso-8859-13",	"iso-8859-14",
     40 			  "iso-8859-15",	"windows-874",
     41 			  "windows-1250",	"windows-1251",
     42 			  "windows-1252",	"windows-1253",
     43 			  "windows-1254",	"windows-1255",
     44 			  "windows-1256",	"windows-1257",
     45 			  "windows-1258",	"koi8-r",
     46 			  "koi8-u",		"iso-8859-11",
     47 			  "iso-8859-16",	"mac-roman",
     48 			  "unknown",		"unknown",
     49 			  "unknown",		"unknown",
     50 			  "unknown",		"unknown",
     51 			  "unknown",		"unknown",
     52 			  "unknown",		"unknown",
     53 			  "unknown",		"unknown",
     54 			  "unknown",		"unknown",
     55 			  "unknown",		"unknown",
     56 			  "unknown",		"unknown",
     57 			  "unknown",		"unknown",
     58 			  "unknown",		"unknown",
     59 			  "unknown",		"unknown",
     60 			  "unknown",		"unknown",
     61 			  "unknown",		"unknown",
     62 			  "unknown",		"unknown",
     63 			  "unknown",		"unknown",
     64 			  "unknown",		"unknown",
     65 			  "windows-932",	"windows-936",
     66 			  "windows-949",	"windows-950",
     67 			  "windows-1361",	"unknown",
     68 			  "unknown",		"unknown",
     69 			  "unknown",		"unknown",
     70 			  "unknown",		"unknown",
     71 			  "unknown",		"unknown",
     72 			  "unknown",		"unknown",
     73 			  "unknown",		"unknown",
     74 			  "unknown",		"unknown",
     75 			  "unknown",		"unknown",
     76 			  "unknown",		"unknown",
     77 			  "unknown",		"unknown",
     78 			  "unknown",		"unknown",
     79 			  "unknown",		"unknown",
     80 			  "unknown",		"unknown",
     81 			  "unknown",		"unknown",
     82 			  "unknown",		"unknown",
     83 			  "unknown",		"unknown",
     84 			  "unknown",		"unknown",
     85 			  "unknown",		"unknown",
     86 			  "unknown",		"unknown",
     87 			  "unknown",		"unknown",
     88 			  "unknown",		"unknown",
     89 			  "unknown",		"unknown",
     90 			  "unknown",		"unknown",
     91 			  "unknown",		"unknown",
     92 			  "unknown",		"unknown",
     93 			  "unknown",		"unknown",
     94 			  "unknown",		"unknown",
     95 			  "unknown",		"unknown",
     96 			  "unknown",		"unknown",
     97 			  "euc-cn",		"euc-jp",
     98 			  "euc-kr",		"euc-tw",
     99 			  "jis-x0213"
    100 			};
    101 
    102 
    103 /*
    104  * Local functions...
    105  */
    106 
    107 static void	print_utf8(const char *msg, const cups_utf8_t *src);
    108 
    109 
    110 /*
    111  * 'main()' - Main entry for internationalization test module.
    112  */
    113 
    114 int					/* O - Exit code */
    115 main(int  argc,				/* I - Argument Count */
    116      char *argv[])			/* I - Arguments */
    117 {
    118   FILE		*fp;			/* File pointer */
    119   int		count;			/* File line counter */
    120   int		status,			/* Status of current test */
    121 		errors;			/* Error count */
    122   char		line[1024];		/* File line source string */
    123   int		len;			/* Length (count) of string */
    124   char		legsrc[1024],		/* Legacy source string */
    125 		legdest[1024],		/* Legacy destination string */
    126 		*legptr;		/* Pointer into legacy string */
    127   cups_utf8_t	utf8latin[] =		/* UTF-8 Latin-1 source */
    128     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
    129     /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
    130   cups_utf8_t	utf8repla[] =		/* UTF-8 Latin-1 replacement */
    131     { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
    132     /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
    133   cups_utf8_t	utf8greek[] =		/* UTF-8 Greek source string */
    134     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
    135     /* "A != <ALPHA>." - use ISO 8859-7 */
    136   cups_utf8_t	utf8japan[] =		/* UTF-8 Japanese source */
    137     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
    138     /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
    139   cups_utf8_t	utf8taiwan[] =		/* UTF-8 Chinese source */
    140     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
    141     /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
    142   cups_utf8_t	utf8dest[1024];		/* UTF-8 destination string */
    143   cups_utf32_t	utf32dest[1024];	/* UTF-32 destination string */
    144 
    145 
    146   if (argc > 1)
    147   {
    148     int			i;		/* Looping var */
    149     cups_encoding_t	encoding;	/* Source encoding */
    150 
    151 
    152     if (argc != 3)
    153     {
    154       puts("Usage: ./testi18n [filename charset]");
    155       return (1);
    156     }
    157 
    158     if ((fp = fopen(argv[1], "rb")) == NULL)
    159     {
    160       perror(argv[1]);
    161       return (1);
    162     }
    163 
    164     for (i = 0, encoding = CUPS_AUTO_ENCODING;
    165          i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
    166 	 i ++)
    167       if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
    168       {
    169         encoding = (cups_encoding_t)i;
    170 	break;
    171       }
    172 
    173     if (encoding == CUPS_AUTO_ENCODING)
    174     {
    175       fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
    176       return (1);
    177     }
    178 
    179     while (fgets(line, sizeof(line), fp))
    180     {
    181       if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
    182       {
    183         fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
    184 	return (1);
    185       }
    186 
    187       fputs((char *)utf8dest, stdout);
    188     }
    189 
    190     fclose(fp);
    191     return (0);
    192   }
    193 
    194  /*
    195   * Start with some conversion tests from a UTF-8 test file.
    196   */
    197 
    198   errors = 0;
    199 
    200   if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
    201   {
    202     perror("utf8demo.txt");
    203     return (1);
    204   }
    205 
    206  /*
    207   * cupsUTF8ToUTF32
    208   */
    209 
    210   fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
    211 
    212   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
    213   {
    214     count ++;
    215 
    216     if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
    217     {
    218       printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
    219       errors ++;
    220       status = 1;
    221       break;
    222     }
    223   }
    224 
    225   if (!status)
    226     puts("PASS");
    227 
    228  /*
    229   * cupsUTF8ToCharset(CUPS_EUC_JP)
    230   */
    231 
    232   fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
    233 
    234   rewind(fp);
    235 
    236   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
    237   {
    238     count ++;
    239 
    240     len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
    241     if (len < 0)
    242     {
    243       printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
    244       errors ++;
    245       status = 1;
    246       break;
    247     }
    248   }
    249 
    250   if (!status)
    251     puts("PASS");
    252 
    253   fclose(fp);
    254 
    255  /*
    256   * Test UTF-8 to legacy charset (ISO 8859-1)...
    257   */
    258 
    259   fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
    260 
    261   legdest[0] = 0;
    262 
    263   len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
    264   if (len < 0)
    265   {
    266     printf("FAIL (len=%d)\n", len);
    267     errors ++;
    268   }
    269   else
    270     puts("PASS");
    271 
    272  /*
    273   * cupsCharsetToUTF8
    274   */
    275 
    276   fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
    277 
    278   strlcpy(legsrc, legdest, sizeof(legsrc));
    279 
    280   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
    281   if ((size_t)len != strlen((char *)utf8latin))
    282   {
    283     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
    284     print_utf8("    utf8latin", utf8latin);
    285     print_utf8("    utf8dest", utf8dest);
    286     errors ++;
    287   }
    288   else if (memcmp(utf8latin, utf8dest, (size_t)len))
    289   {
    290     puts("FAIL (results do not match)");
    291     print_utf8("    utf8latin", utf8latin);
    292     print_utf8("    utf8dest", utf8dest);
    293     errors ++;
    294   }
    295   else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
    296   {
    297     puts("FAIL (replacement characters do not work!)");
    298     errors ++;
    299   }
    300   else
    301     puts("PASS");
    302 
    303  /*
    304   * Test UTF-8 to/from legacy charset (ISO 8859-7)...
    305   */
    306 
    307   fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
    308 
    309   if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
    310   {
    311     puts("FAIL");
    312     errors ++;
    313   }
    314   else
    315   {
    316     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
    317 
    318     if (*legptr)
    319     {
    320       puts("FAIL (unknown character)");
    321       errors ++;
    322     }
    323     else
    324       puts("PASS");
    325   }
    326 
    327   fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
    328 
    329   strlcpy(legsrc, legdest, sizeof(legsrc));
    330 
    331   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
    332   if ((size_t)len != strlen((char *)utf8greek))
    333   {
    334     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
    335     print_utf8("    utf8greek", utf8greek);
    336     print_utf8("    utf8dest", utf8dest);
    337     errors ++;
    338   }
    339   else if (memcmp(utf8greek, utf8dest, (size_t)len))
    340   {
    341     puts("FAIL (results do not match)");
    342     print_utf8("    utf8greek", utf8greek);
    343     print_utf8("    utf8dest", utf8dest);
    344     errors ++;
    345   }
    346   else
    347     puts("PASS");
    348 
    349  /*
    350   * Test UTF-8 to/from legacy charset (Windows 932)...
    351   */
    352 
    353   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
    354 
    355   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
    356   {
    357     puts("FAIL");
    358     errors ++;
    359   }
    360   else
    361   {
    362     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
    363 
    364     if (*legptr)
    365     {
    366       puts("FAIL (unknown character)");
    367       errors ++;
    368     }
    369     else
    370       puts("PASS");
    371   }
    372 
    373   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
    374 
    375   strlcpy(legsrc, legdest, sizeof(legsrc));
    376 
    377   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
    378   if ((size_t)len != strlen((char *)utf8japan))
    379   {
    380     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
    381     print_utf8("    utf8japan", utf8japan);
    382     print_utf8("    utf8dest", utf8dest);
    383     errors ++;
    384   }
    385   else if (memcmp(utf8japan, utf8dest, (size_t)len))
    386   {
    387     puts("FAIL (results do not match)");
    388     print_utf8("    utf8japan", utf8japan);
    389     print_utf8("    utf8dest", utf8dest);
    390     errors ++;
    391   }
    392   else
    393     puts("PASS");
    394 
    395  /*
    396   * Test UTF-8 to/from legacy charset (EUC-JP)...
    397   */
    398 
    399   fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
    400 
    401   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
    402   {
    403     puts("FAIL");
    404     errors ++;
    405   }
    406   else
    407   {
    408     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
    409 
    410     if (*legptr)
    411     {
    412       puts("FAIL (unknown character)");
    413       errors ++;
    414     }
    415     else
    416       puts("PASS");
    417   }
    418 
    419 #ifndef __linux
    420   fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
    421 
    422   strlcpy(legsrc, legdest, sizeof(legsrc));
    423 
    424   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
    425   if ((size_t)len != strlen((char *)utf8japan))
    426   {
    427     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
    428     print_utf8("    utf8japan", utf8japan);
    429     print_utf8("    utf8dest", utf8dest);
    430     errors ++;
    431   }
    432   else if (memcmp(utf8japan, utf8dest, (size_t)len))
    433   {
    434     puts("FAIL (results do not match)");
    435     print_utf8("    utf8japan", utf8japan);
    436     print_utf8("    utf8dest", utf8dest);
    437     errors ++;
    438   }
    439   else
    440     puts("PASS");
    441 #endif /* !__linux */
    442 
    443  /*
    444   * Test UTF-8 to/from legacy charset (Windows 950)...
    445   */
    446 
    447   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
    448 
    449   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
    450   {
    451     puts("FAIL");
    452     errors ++;
    453   }
    454   else
    455   {
    456     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
    457 
    458     if (*legptr)
    459     {
    460       puts("FAIL (unknown character)");
    461       errors ++;
    462     }
    463     else
    464       puts("PASS");
    465   }
    466 
    467   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
    468 
    469   strlcpy(legsrc, legdest, sizeof(legsrc));
    470 
    471   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
    472   if ((size_t)len != strlen((char *)utf8taiwan))
    473   {
    474     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
    475     print_utf8("    utf8taiwan", utf8taiwan);
    476     print_utf8("    utf8dest", utf8dest);
    477     errors ++;
    478   }
    479   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
    480   {
    481     puts("FAIL (results do not match)");
    482     print_utf8("    utf8taiwan", utf8taiwan);
    483     print_utf8("    utf8dest", utf8dest);
    484     errors ++;
    485   }
    486   else
    487     puts("PASS");
    488 
    489  /*
    490   * Test UTF-8 to/from legacy charset (EUC-TW)...
    491   */
    492 
    493   fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
    494 
    495   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
    496   {
    497     puts("FAIL");
    498     errors ++;
    499   }
    500   else
    501   {
    502     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
    503 
    504     if (*legptr)
    505     {
    506       puts("FAIL (unknown character)");
    507       errors ++;
    508     }
    509     else
    510       puts("PASS");
    511   }
    512 
    513   fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
    514 
    515   strlcpy(legsrc, legdest, sizeof(legsrc));
    516 
    517   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
    518   if ((size_t)len != strlen((char *)utf8taiwan))
    519   {
    520     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
    521     print_utf8("    utf8taiwan", utf8taiwan);
    522     print_utf8("    utf8dest", utf8dest);
    523     errors ++;
    524   }
    525   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
    526   {
    527     puts("FAIL (results do not match)");
    528     print_utf8("    utf8taiwan", utf8taiwan);
    529     print_utf8("    utf8dest", utf8dest);
    530     errors ++;
    531   }
    532   else
    533     puts("PASS");
    534 
    535 #if 0
    536  /*
    537   * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
    538   */
    539   if (verbose)
    540     printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
    541   len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
    542   if (len < 0)
    543     return (1);
    544   if (verbose)
    545   {
    546     print_utf8(" utf8good ", utf8good);
    547     print_utf32(" utf32dest", utf32dest);
    548   }
    549   memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
    550   len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
    551   if (len < 0)
    552     return (1);
    553   if (len != strlen ((char *) utf8good))
    554     return (1);
    555   if (memcmp(utf8good, utf8dest, len) != 0)
    556     return (1);
    557 
    558  /*
    559   * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
    560   */
    561   if (verbose)
    562     printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
    563   len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
    564   if (len >= 0)
    565     return (1);
    566   if (verbose)
    567     print_utf8(" utf8bad  ", utf8bad);
    568 
    569  /*
    570   * Test _cupsCharmapFlush()...
    571   */
    572   if (verbose)
    573     printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
    574   _cupsCharmapFlush();
    575   return (0);
    576 #endif /* 0 */
    577 
    578   return (errors > 0);
    579 }
    580 
    581 
    582 /*
    583  * 'print_utf8()' - Print UTF-8 string with (optional) message.
    584  */
    585 
    586 static void
    587 print_utf8(const char	     *msg,	/* I - Message String */
    588 	   const cups_utf8_t *src)	/* I - UTF-8 Source String */
    589 {
    590   const char	*prefix;		/* Prefix string */
    591 
    592 
    593   if (msg)
    594     printf("%s:", msg);
    595 
    596   for (prefix = " "; *src; src ++)
    597   {
    598     printf("%s%02x", prefix, *src);
    599 
    600     if ((src[0] & 0x80) && (src[1] & 0x80))
    601       prefix = "";
    602     else
    603       prefix = " ";
    604   }
    605 
    606   putchar('\n');
    607 }
    608