1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1999-2013, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 #include "unicode/utypes.h" 9 #include "unicode/ustring.h" 10 #include "unicode/ctest.h" 11 #include "unicode/ucnv.h" 12 13 void TestEuroRegression(void); 14 void addTestEuroRegression(TestNode** root); 15 16 #if !UCONFIG_NO_LEGACY_CONVERSION 17 void addTestEuroRegression(TestNode** root) 18 { 19 addTest(root, &TestEuroRegression, "tsconv/eurocreg/TestEuroRegression"); 20 } 21 22 /* 23 * The table below lists codepages that are supposed to have roundtrip mappings for 24 * the U+20AC Euro sign. 25 * 26 * Changes made 2000nov28 and marked as such are due to the following: 27 * 28 * After updating all ibm-*.ucm files with precise fallback indicators (|0, |1, |3), 29 * some of these codepages failed the Euro regression test. 30 * This means that the actuall mappings changed when only the preciseness of fallback 31 * mappings should have changed. 32 * My (Markus) suspicion is that some files got Euro sign mappings added manually, 33 * changing their contents compared to the NLTC (IBM Toronto codepage database) definition. 34 * Such changes are highly undesirable because they effectively define new codepages. 35 * Codepage mapping files with "ibm-*.ucm" should always exactly match the files 36 * from the IBM codepage database. 37 * (If there are several mappings with the same number, then we choose the 38 * default mappings with Private-Use Area assignments.) 39 * 40 * Also, in the past, some aliases were set such that e.g. cp850 became an alias for ibm-858. 41 * This followed the practice of OS/2 that uses the old codepage number 850 for the new 42 * codepage 858, with the main difference being the additional Euro sign. 43 * However, we have documented that the "cp" prefix should be used for Microsoft-compatible 44 * codepages, and Microsoft Windows 2000's codepage 850 does not contain a Euro sign mapping. 45 * Therefore, cp850 must not support the Euro sign. 46 * In these cases, I have changed the codepage name here to point to a newer codepage with the 47 * Euro sign, using its new name. 48 * I could not find such "updates" for codepages 1362 and 1363 - we might want to supply them later. 49 */ 50 51 static const char convertersToCheck[][15] = { 52 "cp1250", 53 "cp1251", 54 "cp1252", 55 "cp1254", 56 "cp1255", 57 "cp1256", 58 "cp1257", 59 "cp1258", 60 "ibm1140", 61 "ibm1142", 62 "ibm1143", 63 "ibm1144", 64 "ibm1145", 65 "ibm1146", 66 "ibm1147", 67 "ibm1148", 68 "ibm1149", 69 "ibm1153", 70 "ibm1154", 71 "ibm1155", 72 "ibm1156", 73 "ibm1157", 74 "ibm1158", 75 /*"ibm-1159",*/ /* removed 2003Apr17 */ 76 "ibm12712", 77 "ibm16804", 78 "ibm-1160", 79 "ibm-1162", 80 "ibm-1164", 81 82 "ibm-858", /* was "cp850" changed 2000nov28 */ 83 /* duplicate "cp850" removed 2000nov28 */ 84 /*"ibm-9049",*/ /* was "cp857" changed 2002nov25 */ 85 "ibm-12712", /* was "cp424" changed 2000nov28 */ 86 "ibm-4899", /* was "cp803" changed 2000nov28 */ 87 "ibm-867", /* was "cp862" changed 2002nov25 */ 88 "cp1258", 89 "windows-950", 90 "cp1253", 91 /* "cp819", 92 "cp13488",*/ 93 "ibm-4971", 94 /*"ibm-9061",*/ /* was "cp869" changed 2002nov25 */ 95 /* "cp813",*/ 96 /*"ibm-9044",*/ /* was "cp852" changed 2002nov25 */ 97 /*"ibm-872",*/ /* was "cp855" changed 2002nov25 */ 98 /*"ibm-808",*/ /* was "cp866" changed 2002nov25 */ 99 /* "cp1131", 100 "cp1125",*/ 101 "ibm-902", /* was "cp922" changed 2003jan08 */ 102 "ibm-901", /* was "cp921" changed 2003jan09 */ 103 /*"ibm-17248",*/ /* was "cp864" changed 2002nov25 */ 104 /*"cp1008", 105 "cp1046",*/ 106 /* "cp9066", 107 "cp1129",*/ 108 "ibm-5123", /* was "cp1027" changed 2003jan08 */ 109 /* "cp300",*/ 110 /* "cp4930",*/ 111 "ibm-1364", 112 /* "cp1362" removed 2000nov28 */ 113 "cp1363", 114 /* "cp1114", removed 2002jul3 115 "cp947", removed 2002jul3 */ 116 "gb18030", 117 ""}; 118 119 UBool isEuroAware(UConverter*); 120 121 void TestEuroRegression() 122 { 123 int32_t i=0; 124 125 do 126 { 127 UErrorCode err = U_ZERO_ERROR; 128 UConverter* myConv = ucnv_open(convertersToCheck[i], &err); 129 if (U_FAILURE(err)&&convertersToCheck[i][0]) 130 log_data_err("%s \tMISSING [%s]\n", convertersToCheck[i], u_errorName(err)); 131 else 132 { 133 if (isEuroAware(myConv)) 134 log_verbose("%s \tsupports euro\n", convertersToCheck[i]); 135 else 136 log_err("%s \tDOES NOT support euro\n", convertersToCheck[i]); 137 ucnv_close(myConv); 138 } 139 } while (convertersToCheck[++i][0]); 140 } 141 142 UBool isEuroAware(UConverter* myConv) 143 { 144 static const UChar euroString[2] = { 0x20AC, 0x0000 }; 145 char target[20]; 146 UChar euroBack[2]; 147 int32_t targetSize, euroBackSize; 148 UErrorCode err = U_ZERO_ERROR; 149 /*const char* myName = ucnv_getName(myConv, &err);*/ 150 151 targetSize = ucnv_fromUChars(myConv, 152 target, 153 sizeof(target), 154 euroString, 155 -1, 156 &err); 157 if (U_FAILURE(err)) 158 { 159 log_err("Failure Occured in ucnv_fromUChars euro roundtrip test\n"); 160 return FALSE; 161 } 162 euroBackSize = ucnv_toUChars(myConv, 163 euroBack, 164 2, 165 target, 166 targetSize, 167 &err); 168 (void)euroBackSize; /* Suppress set but not used warning. */ 169 if (U_FAILURE(err)) 170 { 171 log_err("Failure Occured in ucnv_toUChars euro roundtrip test\n"); 172 return FALSE; 173 } 174 if (u_strcmp(euroString, euroBack)) 175 { 176 /* log_err("%s FAILED Euro rountrip\n", myName);*/ 177 return FALSE; 178 } 179 else 180 { 181 /* log_verbose("%s PASSED Euro rountrip\n", myName);*/ 182 return TRUE; 183 } 184 185 } 186 #else 187 void addTestEuroRegression(TestNode** root) 188 { 189 /* test nothing... */ 190 } 191 #endif 192