1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2001-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File custrtrn.C 9 * 10 * Modification History: 11 * Name Description 12 * Ram String transformations test 13 ********************************************************************************* 14 */ 15 /****************************************************************************/ 16 17 18 #include <stdlib.h> 19 #include <stdio.h> 20 #include <string.h> 21 #include "unicode/utypes.h" 22 #include "unicode/ustring.h" 23 #include "unicode/ures.h" 24 #include "ustr_imp.h" 25 #include "cintltst.h" 26 #include "cmemory.h" 27 #include "cstring.h" 28 #include "cwchar.h" 29 30 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 31 32 void addUCharTransformTest(TestNode** root); 33 34 static void Test_strToUTF32(void); 35 static void Test_strToUTF32_surrogates(void); 36 static void Test_strFromUTF32(void); 37 static void Test_strFromUTF32_surrogates(void); 38 static void Test_UChar_UTF8_API(void); 39 static void Test_FromUTF8(void); 40 static void Test_FromUTF8Lenient(void); 41 static void Test_UChar_WCHART_API(void); 42 static void Test_widestrs(void); 43 static void Test_WCHART_LongString(void); 44 static void Test_strToJavaModifiedUTF8(void); 45 static void Test_strFromJavaModifiedUTF8(void); 46 static void TestNullEmptySource(void); 47 48 void 49 addUCharTransformTest(TestNode** root) 50 { 51 addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32"); 52 addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates"); 53 addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32"); 54 addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates"); 55 addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API"); 56 addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8"); 57 addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient"); 58 addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API"); 59 addTest(root, &Test_widestrs, "custrtrn/Test_widestrs"); 60 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 61 addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString"); 62 #endif 63 addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8"); 64 addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8"); 65 addTest(root, &TestNullEmptySource, "custrtrn/TestNullEmptySource"); 66 } 67 68 static const UChar32 src32[]={ 69 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 70 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 71 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 72 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 73 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 74 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 75 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 76 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 77 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 78 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 79 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 80 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 81 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 82 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 83 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 84 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 85 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 86 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 87 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 88 /* test non-BMP code points */ 89 0x0002A699, 90 0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB, 91 0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7, 92 0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1, 93 0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0, 94 0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5, 95 96 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 97 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 98 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 99 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 100 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000 101 }; 102 103 static const UChar src16[] = { 104 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 105 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 106 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 107 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 108 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 109 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 110 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 111 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 112 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 113 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 114 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 115 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 116 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 117 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 118 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 119 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 120 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 121 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 122 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 123 124 /* test non-BMP code points */ 125 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 126 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 127 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 128 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 129 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 130 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 131 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 132 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 133 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 134 0xD869, 0xDED5, 135 136 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 137 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 138 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 139 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 140 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000 141 }; 142 143 144 static void Test_strToUTF32(void){ 145 UErrorCode err = U_ZERO_ERROR; 146 UChar32 u32Target[400]; 147 int32_t u32DestLen; 148 int i= 0; 149 150 /* first with length */ 151 u32DestLen = -2; 152 u_strToUTF32(u32Target, 0, &u32DestLen, src16, LENGTHOF(src16),&err); 153 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != LENGTHOF(src32)) { 154 log_err("u_strToUTF32(preflight with length): " 155 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", 156 (long)u32DestLen, (long)LENGTHOF(src32), u_errorName(err)); 157 return; 158 } 159 err = U_ZERO_ERROR; 160 u32DestLen = -2; 161 u_strToUTF32(u32Target, LENGTHOF(src32)+1, &u32DestLen, src16, LENGTHOF(src16),&err); 162 if(err != U_ZERO_ERROR || u32DestLen != LENGTHOF(src32)) { 163 log_err("u_strToUTF32(with length): " 164 "length %ld != %ld and %s != U_ZERO_ERROR\n", 165 (long)u32DestLen, (long)LENGTHOF(src32), u_errorName(err)); 166 return; 167 } 168 /*for(i=0; i< u32DestLen; i++){ 169 printf("0x%08X, ",uTarget[i]); 170 if(i%10==0){ 171 printf("\n"); 172 } 173 }*/ 174 for(i=0; i< LENGTHOF(src32); i++){ 175 if(u32Target[i] != src32[i]){ 176 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i); 177 } 178 } 179 if(u32Target[i] != 0){ 180 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i); 181 } 182 183 /* now NUL-terminated */ 184 u32DestLen = -2; 185 u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err); 186 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != LENGTHOF(src32)-1) { 187 log_err("u_strToUTF32(preflight with NUL-termination): " 188 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", 189 (long)u32DestLen, (long)LENGTHOF(src32)-1, u_errorName(err)); 190 return; 191 } 192 err = U_ZERO_ERROR; 193 u32DestLen = -2; 194 u_strToUTF32(u32Target, LENGTHOF(src32), &u32DestLen, src16, -1,&err); 195 if(err != U_ZERO_ERROR || u32DestLen != LENGTHOF(src32)-1) { 196 log_err("u_strToUTF32(with NUL-termination): " 197 "length %ld != %ld and %s != U_ZERO_ERROR\n", 198 (long)u32DestLen, (long)LENGTHOF(src32)-1, u_errorName(err)); 199 return; 200 } 201 202 for(i=0; i< LENGTHOF(src32); i++){ 203 if(u32Target[i] != src32[i]){ 204 log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]); 205 } 206 } 207 } 208 209 /* test unpaired surrogates */ 210 static void Test_strToUTF32_surrogates() { 211 UErrorCode err = U_ZERO_ERROR; 212 UChar32 u32Target[400]; 213 int32_t len16, u32DestLen; 214 int32_t numSubstitutions; 215 int i; 216 217 static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; 218 static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 }; 219 static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 }; 220 static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 }; 221 len16 = LENGTHOF(surr16); 222 for(i = 0; i < 4; ++i) { 223 err = U_ZERO_ERROR; 224 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err); 225 if(err != U_INVALID_CHAR_FOUND) { 226 log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n", 227 (long)i, u_errorName(err)); 228 return; 229 } 230 231 err = U_ZERO_ERROR; 232 u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err); 233 if(err != U_INVALID_CHAR_FOUND) { 234 log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n", 235 (long)i, u_errorName(err)); 236 return; 237 } 238 239 err = U_ZERO_ERROR; 240 u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err); 241 if(err != U_INVALID_CHAR_FOUND) { 242 log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", 243 (long)i, u_errorName(err)); 244 return; 245 } 246 247 err = U_ZERO_ERROR; 248 u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err); 249 if(err != U_INVALID_CHAR_FOUND) { 250 log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", 251 (long)i, u_errorName(err)); 252 return; 253 } 254 } 255 256 err = U_ZERO_ERROR; 257 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err); 258 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) { 259 log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 260 u_errorName(err)); 261 return; 262 } 263 264 err = U_ZERO_ERROR; 265 u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err); 266 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) { 267 log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 268 u_errorName(err)); 269 return; 270 } 271 272 err = U_ZERO_ERROR; 273 u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err); 274 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) { 275 log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 276 u_errorName(err)); 277 return; 278 } 279 280 err = U_ZERO_ERROR; 281 u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err); 282 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) { 283 log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 284 u_errorName(err)); 285 return; 286 } 287 288 /* with substitution character */ 289 numSubstitutions = -1; 290 err = U_ZERO_ERROR; 291 u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err); 292 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) { 293 log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 294 u_errorName(err)); 295 return; 296 } 297 298 err = U_ZERO_ERROR; 299 u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err); 300 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) { 301 log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 302 u_errorName(err)); 303 return; 304 } 305 306 err = U_ZERO_ERROR; 307 u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err); 308 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) { 309 log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 310 u_errorName(err)); 311 return; 312 } 313 314 err = U_ZERO_ERROR; 315 u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err); 316 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) { 317 log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 318 u_errorName(err)); 319 return; 320 } 321 } 322 323 static void Test_strFromUTF32(void){ 324 UErrorCode err = U_ZERO_ERROR; 325 UChar uTarget[400]; 326 int32_t uDestLen; 327 int i= 0; 328 329 /* first with length */ 330 uDestLen = -2; 331 u_strFromUTF32(uTarget,0,&uDestLen,src32,LENGTHOF(src32),&err); 332 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != LENGTHOF(src16)) { 333 log_err("u_strFromUTF32(preflight with length): " 334 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", 335 (long)uDestLen, (long)LENGTHOF(src16), u_errorName(err)); 336 return; 337 } 338 err = U_ZERO_ERROR; 339 uDestLen = -2; 340 u_strFromUTF32(uTarget, LENGTHOF(src16)+1,&uDestLen,src32,LENGTHOF(src32),&err); 341 if(err != U_ZERO_ERROR || uDestLen != LENGTHOF(src16)) { 342 log_err("u_strFromUTF32(with length): " 343 "length %ld != %ld and %s != U_ZERO_ERROR\n", 344 (long)uDestLen, (long)LENGTHOF(src16), u_errorName(err)); 345 return; 346 } 347 /*for(i=0; i< uDestLen; i++){ 348 printf("0x%04X, ",uTarget[i]); 349 if(i%10==0){ 350 printf("\n"); 351 } 352 }*/ 353 354 for(i=0; i< uDestLen; i++){ 355 if(uTarget[i] != src16[i]){ 356 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i); 357 } 358 } 359 if(uTarget[i] != 0){ 360 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i); 361 } 362 363 /* now NUL-terminated */ 364 uDestLen = -2; 365 u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err); 366 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != LENGTHOF(src16)-1) { 367 log_err("u_strFromUTF32(preflight with NUL-termination): " 368 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", 369 (long)uDestLen, (long)LENGTHOF(src16)-1, u_errorName(err)); 370 return; 371 } 372 err = U_ZERO_ERROR; 373 uDestLen = -2; 374 u_strFromUTF32(uTarget, LENGTHOF(src16),&uDestLen,src32,-1,&err); 375 if(err != U_ZERO_ERROR || uDestLen != LENGTHOF(src16)-1) { 376 log_err("u_strFromUTF32(with NUL-termination): " 377 "length %ld != %ld and %s != U_ZERO_ERROR\n", 378 (long)uDestLen, (long)LENGTHOF(src16)-1, u_errorName(err)); 379 return; 380 } 381 382 for(i=0; i< uDestLen; i++){ 383 if(uTarget[i] != src16[i]){ 384 log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]); 385 } 386 } 387 } 388 389 /* test surrogate code points */ 390 static void Test_strFromUTF32_surrogates() { 391 UErrorCode err = U_ZERO_ERROR; 392 UChar uTarget[400]; 393 int32_t len32, uDestLen; 394 int32_t numSubstitutions; 395 int i; 396 397 static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 }; 398 static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; 399 static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; 400 static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45, 401 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; 402 len32 = LENGTHOF(surr32); 403 for(i = 0; i < 6; ++i) { 404 err = U_ZERO_ERROR; 405 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err); 406 if(err != U_INVALID_CHAR_FOUND) { 407 log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n", 408 (long)i, u_errorName(err)); 409 return; 410 } 411 412 err = U_ZERO_ERROR; 413 u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err); 414 if(err != U_INVALID_CHAR_FOUND) { 415 log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n", 416 (long)i, u_errorName(err)); 417 return; 418 } 419 420 err = U_ZERO_ERROR; 421 u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err); 422 if(err != U_INVALID_CHAR_FOUND) { 423 log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", 424 (long)i, u_errorName(err)); 425 return; 426 } 427 428 err = U_ZERO_ERROR; 429 u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err); 430 if(err != U_INVALID_CHAR_FOUND) { 431 log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", 432 (long)i, u_errorName(err)); 433 return; 434 } 435 } 436 437 err = U_ZERO_ERROR; 438 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err); 439 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) { 440 log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 441 u_errorName(err)); 442 return; 443 } 444 445 err = U_ZERO_ERROR; 446 u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err); 447 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) { 448 log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 449 u_errorName(err)); 450 return; 451 } 452 453 err = U_ZERO_ERROR; 454 u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err); 455 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) { 456 log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 457 u_errorName(err)); 458 return; 459 } 460 461 err = U_ZERO_ERROR; 462 u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err); 463 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) { 464 log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 465 u_errorName(err)); 466 return; 467 } 468 469 /* with substitution character */ 470 numSubstitutions = -1; 471 err = U_ZERO_ERROR; 472 u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err); 473 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) { 474 log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 475 u_errorName(err)); 476 return; 477 } 478 479 err = U_ZERO_ERROR; 480 u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err); 481 if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) { 482 log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 483 u_errorName(err)); 484 return; 485 } 486 487 err = U_ZERO_ERROR; 488 u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err); 489 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) { 490 log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 491 u_errorName(err)); 492 return; 493 } 494 495 err = U_ZERO_ERROR; 496 u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err); 497 if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) { 498 log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 499 u_errorName(err)); 500 return; 501 } 502 } 503 504 static void Test_UChar_UTF8_API(void){ 505 506 UErrorCode err = U_ZERO_ERROR; 507 UChar uTemp[1]; 508 char u8Temp[1]; 509 UChar* uTarget=uTemp; 510 const char* u8Src; 511 int32_t u8SrcLen = 0; 512 int32_t uTargetLength = 0; 513 int32_t uDestLen=0; 514 const UChar* uSrc = src16; 515 int32_t uSrcLen = sizeof(src16)/2; 516 char* u8Target = u8Temp; 517 int32_t u8TargetLength =0; 518 int32_t u8DestLen =0; 519 UBool failed = FALSE; 520 int i= 0; 521 int32_t numSubstitutions; 522 523 { 524 /* preflight */ 525 u8Temp[0] = 0x12; 526 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); 527 if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){ 528 err = U_ZERO_ERROR; 529 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1)); 530 u8TargetLength = u8DestLen; 531 532 u8Target[u8TargetLength] = (char)0xfe; 533 u8DestLen = -1; 534 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); 535 if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){ 536 log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err)); 537 return; 538 } 539 540 } 541 else { 542 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); 543 } 544 failed = FALSE; 545 /*for(i=0; i< u8DestLen; i++){ 546 printf("0x%04X, ",u8Target[i]); 547 if(i%10==0){ 548 printf("\n"); 549 } 550 }*/ 551 /*for(i=0; i< u8DestLen; i++){ 552 if(u8Target[i] != src8[i]){ 553 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]); 554 failed =TRUE; 555 } 556 } 557 if(failed){ 558 log_err("u_strToUTF8() failed \n"); 559 }*/ 560 u8Src = u8Target; 561 u8SrcLen = u8DestLen; 562 563 /* preflight */ 564 uTemp[0] = 0x1234; 565 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); 566 if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){ 567 err = U_ZERO_ERROR; 568 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); 569 uTargetLength = uDestLen; 570 571 uTarget[uTargetLength] = 0xfff0; 572 uDestLen = -1; 573 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); 574 } 575 else { 576 log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n"); 577 } 578 /*for(i=0; i< uDestLen; i++){ 579 printf("0x%04X, ",uTarget[i]); 580 if(i%10==0){ 581 printf("\n"); 582 } 583 }*/ 584 585 if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) { 586 failed = TRUE; 587 } 588 for(i=0; i< uSrcLen; i++){ 589 if(uTarget[i] != src16[i]){ 590 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i); 591 failed =TRUE; 592 } 593 } 594 if(failed){ 595 log_err("error: u_strFromUTF8(after preflighting) failed\n"); 596 } 597 598 free(u8Target); 599 free(uTarget); 600 } 601 { 602 u8SrcLen = -1; 603 uTargetLength = 0; 604 uSrcLen =-1; 605 u8TargetLength=0; 606 failed = FALSE; 607 /* preflight */ 608 u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); 609 if(err == U_BUFFER_OVERFLOW_ERROR){ 610 err = U_ZERO_ERROR; 611 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1)); 612 u8TargetLength = u8DestLen; 613 614 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); 615 616 } 617 else { 618 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); 619 } 620 failed = FALSE; 621 /*for(i=0; i< u8DestLen; i++){ 622 printf("0x%04X, ",u8Target[i]); 623 if(i%10==0){ 624 printf("\n"); 625 } 626 }*/ 627 /*for(i=0; i< u8DestLen; i++){ 628 if(u8Target[i] != src8[i]){ 629 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]); 630 failed =TRUE; 631 } 632 } 633 if(failed){ 634 log_err("u_strToUTF8() failed \n"); 635 }*/ 636 u8Src = u8Target; 637 u8SrcLen = u8DestLen; 638 639 /* preflight */ 640 u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); 641 if(err == U_BUFFER_OVERFLOW_ERROR){ 642 err = U_ZERO_ERROR; 643 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); 644 uTargetLength = uDestLen; 645 646 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); 647 } 648 else { 649 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); 650 } 651 /*for(i=0; i< uDestLen; i++){ 652 printf("0x%04X, ",uTarget[i]); 653 if(i%10==0){ 654 printf("\n"); 655 } 656 }*/ 657 658 for(i=0; i< uSrcLen; i++){ 659 if(uTarget[i] != src16[i]){ 660 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i); 661 failed =TRUE; 662 } 663 } 664 if(failed){ 665 log_err("u_strToUTF8() failed \n"); 666 } 667 668 free(u8Target); 669 free(uTarget); 670 } 671 672 /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */ 673 { 674 static const UChar 675 withLead16[]={ 0x1800, 0xd89a, 0x0061 }, 676 withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 }, 677 withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */ 678 withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */ 679 static const uint8_t 680 withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 }, 681 withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 }, 682 withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */ 683 withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */ 684 UChar out16[10]; 685 char out8[10]; 686 687 if( 688 (err=U_ZERO_ERROR, u_strToUTF8(out8, LENGTHOF(out8), NULL, withLead16, LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) || 689 (err=U_ZERO_ERROR, u_strToUTF8(out8, LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) || 690 (err=U_ZERO_ERROR, u_strFromUTF8(out16, LENGTHOF(out16), NULL, (const char *)withLead8, LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) || 691 (err=U_ZERO_ERROR, u_strFromUTF8(out16, LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND) 692 ) { 693 log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n"); 694 } 695 696 /* test error handling with substitution characters */ 697 698 /* from UTF-8 with length */ 699 err=U_ZERO_ERROR; 700 numSubstitutions=-1; 701 out16[0]=0x55aa; 702 uDestLen=0; 703 u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, 704 (const char *)withTrail8, uprv_strlen((const char *)withTrail8), 705 0x50005, &numSubstitutions, 706 &err); 707 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) || 708 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) || 709 numSubstitutions!=1) { 710 log_err("error: u_strFromUTF8WithSub(length) failed\n"); 711 } 712 713 /* from UTF-8 with NUL termination */ 714 err=U_ZERO_ERROR; 715 numSubstitutions=-1; 716 out16[0]=0x55aa; 717 uDestLen=0; 718 u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, 719 (const char *)withTrail8, -1, 720 0xfffd, &numSubstitutions, 721 &err); 722 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) || 723 0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) || 724 numSubstitutions!=1) { 725 log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n"); 726 } 727 728 /* preflight from UTF-8 with NUL termination */ 729 err=U_ZERO_ERROR; 730 numSubstitutions=-1; 731 out16[0]=0x55aa; 732 uDestLen=0; 733 u_strFromUTF8WithSub(out16, 1, &uDestLen, 734 (const char *)withTrail8, -1, 735 0x50005, &numSubstitutions, 736 &err); 737 if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=1) { 738 log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n"); 739 } 740 741 /* to UTF-8 with length */ 742 err=U_ZERO_ERROR; 743 numSubstitutions=-1; 744 out8[0]=(char)0xf5; 745 u8DestLen=0; 746 u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, 747 withTrail16, u_strlen(withTrail16), 748 0xfffd, &numSubstitutions, 749 &err); 750 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) || 751 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) || 752 numSubstitutions!=1) { 753 log_err("error: u_strToUTF8WithSub(length) failed\n"); 754 } 755 756 /* to UTF-8 with NUL termination */ 757 err=U_ZERO_ERROR; 758 numSubstitutions=-1; 759 out8[0]=(char)0xf5; 760 u8DestLen=0; 761 u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, 762 withTrail16, -1, 763 0x1a, &numSubstitutions, 764 &err); 765 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) || 766 0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) || 767 numSubstitutions!=1) { 768 log_err("error: u_strToUTF8WithSub(NUL termination) failed\n"); 769 } 770 771 /* preflight to UTF-8 with NUL termination */ 772 err=U_ZERO_ERROR; 773 numSubstitutions=-1; 774 out8[0]=(char)0xf5; 775 u8DestLen=0; 776 u_strToUTF8WithSub(out8, 1, &u8DestLen, 777 withTrail16, -1, 778 0xfffd, &numSubstitutions, 779 &err); 780 if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) || 781 numSubstitutions!=1) { 782 log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n"); 783 } 784 785 /* test that numSubstitutions==0 if there are no substitutions */ 786 787 /* from UTF-8 with length (just first 3 bytes which are valid) */ 788 err=U_ZERO_ERROR; 789 numSubstitutions=-1; 790 out16[0]=0x55aa; 791 uDestLen=0; 792 u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, 793 (const char *)withTrail8, 3, 794 0x50005, &numSubstitutions, 795 &err); 796 if(U_FAILURE(err) || uDestLen!=1 || 797 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) || 798 numSubstitutions!=0) { 799 log_err("error: u_strFromUTF8WithSub(no subs) failed\n"); 800 } 801 802 /* to UTF-8 with length (just first UChar which is valid) */ 803 err=U_ZERO_ERROR; 804 numSubstitutions=-1; 805 out8[0]=(char)0xf5; 806 u8DestLen=0; 807 u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, 808 withTrail16, 1, 809 0xfffd, &numSubstitutions, 810 &err); 811 if(U_FAILURE(err) || u8DestLen!=3 || 812 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) || 813 numSubstitutions!=0) { 814 log_err("error: u_strToUTF8WithSub(no subs) failed\n"); 815 } 816 817 /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */ 818 819 /* from UTF-8 with length (just first 3 bytes which are valid) */ 820 err=U_ZERO_ERROR; 821 numSubstitutions=-1; 822 out16[0]=0x55aa; 823 uDestLen=0; 824 u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, 825 (const char *)withTrail8, 3, 826 U_SENTINEL, &numSubstitutions, 827 &err); 828 if(U_FAILURE(err) || uDestLen!=1 || 829 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) || 830 numSubstitutions!=0) { 831 log_err("error: u_strFromUTF8WithSub(no subchar) failed\n"); 832 } 833 834 /* to UTF-8 with length (just first UChar which is valid) */ 835 err=U_ZERO_ERROR; 836 numSubstitutions=-1; 837 out8[0]=(char)0xf5; 838 u8DestLen=0; 839 u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, 840 withTrail16, 1, 841 U_SENTINEL, &numSubstitutions, 842 &err); 843 if(U_FAILURE(err) || u8DestLen!=3 || 844 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) || 845 numSubstitutions!=0) { 846 log_err("error: u_strToUTF8WithSub(no subchar) failed\n"); 847 } 848 } 849 } 850 851 /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */ 852 static UBool 853 equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) { 854 UChar c1, c2; 855 856 while(length>0) { 857 c1=*s++; 858 c2=*t++; 859 if(c1!=c2 && c2!=0xfffd) { 860 return FALSE; 861 } 862 --length; 863 } 864 return TRUE; 865 } 866 867 /* test u_strFromUTF8Lenient() */ 868 static void 869 Test_FromUTF8(void) { 870 /* 871 * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)" 872 */ 873 static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 }; 874 UChar dest[64]; 875 UChar *destPointer; 876 int32_t destLength; 877 UErrorCode errorCode; 878 879 /* 3 bytes input, one UChar output (U+095C) */ 880 errorCode=U_ZERO_ERROR; 881 destLength=-99; 882 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode); 883 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) { 884 log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n", 885 (long)destLength, u_errorName(errorCode)); 886 } 887 888 /* 4 bytes input, two UChars output (U+095C U+0000) */ 889 errorCode=U_ZERO_ERROR; 890 destLength=-99; 891 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode); 892 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) { 893 log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n", 894 (long)destLength, u_errorName(errorCode)); 895 } 896 897 /* NUL-terminated 3 bytes input, one UChar output (U+095C) */ 898 errorCode=U_ZERO_ERROR; 899 destLength=-99; 900 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode); 901 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) { 902 log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n", 903 (long)destLength, u_errorName(errorCode)); 904 } 905 906 /* 3 bytes input, one UChar output (U+095C), transform not just preflight */ 907 errorCode=U_ZERO_ERROR; 908 dest[0]=dest[1]=99; 909 destLength=-99; 910 destPointer=u_strFromUTF8(dest, LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode); 911 if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) { 912 log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n", 913 (long)destLength, u_errorName(errorCode)); 914 } 915 } 916 917 /* test u_strFromUTF8Lenient() */ 918 static void 919 Test_FromUTF8Lenient(void) { 920 /* 921 * Multiple input strings, each NUL-terminated. 922 * Terminate with a string starting with 0xff. 923 */ 924 static const uint8_t bytes[]={ 925 /* well-formed UTF-8 */ 926 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80, 927 0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0, 928 929 /* various malformed sequences */ 930 0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0, 931 932 /* truncated input */ 933 0xc3, 0, 934 0xe0, 0, 935 0xe0, 0xa0, 0, 936 0xf0, 0, 937 0xf0, 0x90, 0, 938 0xf0, 0x90, 0x80, 0, 939 940 /* non-ASCII characters in the last few bytes */ 941 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0, 942 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0, 943 944 /* empty string */ 945 0, 946 947 /* finish */ 948 0xff, 0 949 }; 950 951 /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */ 952 static const UChar uchars[]={ 953 0x61, 0xdf, 0x800, 0xd840, 0xdc00, 954 0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0, 955 956 0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0, 957 958 0xfffd, 0, 959 0xfffd, 0, 960 0xfffd, 0, 961 0xfffd, 0, 962 0xfffd, 0, 963 0xfffd, 0, 964 965 0x61, 0xdf, 0x800, 0, 966 0x61, 0x800, 0xdf, 0, 967 968 0, 969 970 0 971 }; 972 973 UChar dest[64]; 974 const char *pb; 975 const UChar *pu, *pDest; 976 int32_t srcLength, destLength0, destLength; 977 int number; 978 UErrorCode errorCode; 979 980 /* verify checking for some illegal arguments */ 981 dest[0]=0x1234; 982 destLength=-1; 983 errorCode=U_ZERO_ERROR; 984 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode); 985 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) { 986 log_err("u_strFromUTF8Lenient(src=NULL) failed\n"); 987 } 988 989 dest[0]=0x1234; 990 destLength=-1; 991 errorCode=U_ZERO_ERROR; 992 pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode); 993 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 994 log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n"); 995 } 996 997 dest[0]=0x1234; 998 destLength=-1; 999 errorCode=U_MEMORY_ALLOCATION_ERROR; 1000 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode); 1001 if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) { 1002 log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n"); 1003 } 1004 1005 dest[0]=0x1234; 1006 destLength=-1; 1007 errorCode=U_MEMORY_ALLOCATION_ERROR; 1008 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, NULL); 1009 if(dest[0]!=0x1234) { 1010 log_err("u_strFromUTF8Lenient(pErrorCode=NULL) failed\n"); 1011 } 1012 1013 /* test normal behavior */ 1014 number=0; /* string number for log_err() */ 1015 1016 for(pb=(const char *)bytes, pu=uchars; 1017 *pb!=(char)0xff; 1018 pb+=srcLength+1, pu+=destLength0+1, ++number 1019 ) { 1020 srcLength=uprv_strlen(pb); 1021 destLength0=u_strlen(pu); 1022 1023 /* preflighting with NUL-termination */ 1024 dest[0]=0x1234; 1025 destLength=-1; 1026 errorCode=U_ZERO_ERROR; 1027 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode); 1028 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) || 1029 pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0 1030 ) { 1031 log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number); 1032 } 1033 1034 /* preflighting/some capacity with NUL-termination */ 1035 if(srcLength>0) { 1036 dest[destLength0-1]=0x1234; 1037 destLength=-1; 1038 errorCode=U_ZERO_ERROR; 1039 pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode); 1040 if (errorCode!=U_BUFFER_OVERFLOW_ERROR || 1041 dest[destLength0-1]!=0x1234 || destLength!=destLength0 1042 ) { 1043 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number); 1044 } 1045 } 1046 1047 /* conversion with NUL-termination, much capacity */ 1048 dest[0]=dest[destLength0]=0x1234; 1049 destLength=-1; 1050 errorCode=U_ZERO_ERROR; 1051 pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, -1, &errorCode); 1052 if (errorCode!=U_ZERO_ERROR || 1053 pDest!=dest || dest[destLength0]!=0 || 1054 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1055 ) { 1056 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number); 1057 } 1058 1059 /* conversion with NUL-termination, exact capacity */ 1060 dest[0]=dest[destLength0]=0x1234; 1061 destLength=-1; 1062 errorCode=U_ZERO_ERROR; 1063 pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode); 1064 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING || 1065 pDest!=dest || dest[destLength0]!=0x1234 || 1066 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1067 ) { 1068 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number); 1069 } 1070 1071 /* preflighting with length */ 1072 dest[0]=0x1234; 1073 destLength=-1; 1074 errorCode=U_ZERO_ERROR; 1075 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode); 1076 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) || 1077 pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength 1078 ) { 1079 log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number); 1080 } 1081 1082 /* preflighting/some capacity with length */ 1083 if(srcLength>0) { 1084 dest[srcLength-1]=0x1234; 1085 destLength=-1; 1086 errorCode=U_ZERO_ERROR; 1087 pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode); 1088 if (errorCode!=U_BUFFER_OVERFLOW_ERROR || 1089 dest[srcLength-1]!=0x1234 || destLength!=srcLength 1090 ) { 1091 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number); 1092 } 1093 } 1094 1095 /* conversion with length, much capacity */ 1096 dest[0]=dest[destLength0]=0x1234; 1097 destLength=-1; 1098 errorCode=U_ZERO_ERROR; 1099 pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, srcLength, &errorCode); 1100 if (errorCode!=U_ZERO_ERROR || 1101 pDest!=dest || dest[destLength0]!=0 || 1102 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1103 ) { 1104 log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number); 1105 } 1106 1107 /* conversion with length, srcLength capacity */ 1108 dest[0]=dest[srcLength]=dest[destLength0]=0x1234; 1109 destLength=-1; 1110 errorCode=U_ZERO_ERROR; 1111 pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode); 1112 if(srcLength==destLength0) { 1113 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING || 1114 pDest!=dest || dest[destLength0]!=0x1234 || 1115 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1116 ) { 1117 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number); 1118 } 1119 } else { 1120 if (errorCode!=U_ZERO_ERROR || 1121 pDest!=dest || dest[destLength0]!=0 || 1122 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1123 ) { 1124 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number); 1125 } 1126 } 1127 } 1128 } 1129 1130 static const uint16_t src16j[] = { 1131 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 1132 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 1133 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 1134 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 1135 0x0000, 1136 /* Test only ASCII */ 1137 1138 }; 1139 static const uint16_t src16WithNulls[] = { 1140 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000, 1141 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000, 1142 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000, 1143 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000, 1144 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000, 1145 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000, 1146 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000, 1147 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000, 1148 /* test only ASCII */ 1149 /* 1150 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 1151 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 1152 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 1153 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 1154 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 1155 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 1156 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 1157 0x0054, 0x0000 */ 1158 1159 }; 1160 static void Test_UChar_WCHART_API(void){ 1161 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 1162 UErrorCode err = U_ZERO_ERROR; 1163 const UChar* uSrc = src16j; 1164 int32_t uSrcLen = sizeof(src16j)/2; 1165 wchar_t* wDest = NULL; 1166 int32_t wDestLen = 0; 1167 int32_t reqLen= 0 ; 1168 UBool failed = FALSE; 1169 UChar* uDest = NULL; 1170 int32_t uDestLen = 0; 1171 int i =0; 1172 { 1173 /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */ 1174 if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) { 1175 log_err("u_strFromWCS() should return NULL with a bad argument\n"); 1176 } 1177 if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) { 1178 log_err("u_strToWCS() should return NULL with a bad argument\n"); 1179 } 1180 1181 /* NULL source & destination. */ 1182 err = U_ZERO_ERROR; 1183 u_strFromWCS(NULL,0,NULL,NULL,0,&err); 1184 if (err != U_STRING_NOT_TERMINATED_WARNING) { 1185 log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err)); 1186 } 1187 err = U_ZERO_ERROR; 1188 u_strToWCS(NULL,0,NULL,NULL,0,&err); 1189 if (err != U_STRING_NOT_TERMINATED_WARNING) { 1190 log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err)); 1191 } 1192 err = U_ZERO_ERROR; 1193 1194 /* pre-flight*/ 1195 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); 1196 1197 if(err == U_BUFFER_OVERFLOW_ERROR){ 1198 err=U_ZERO_ERROR; 1199 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); 1200 wDestLen = reqLen+1; 1201 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); 1202 } 1203 1204 /* pre-flight */ 1205 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); 1206 1207 1208 if(err == U_BUFFER_OVERFLOW_ERROR){ 1209 err =U_ZERO_ERROR; 1210 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); 1211 uDestLen = reqLen + 1; 1212 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); 1213 }else if(U_FAILURE(err)){ 1214 1215 log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err)); 1216 return; 1217 } 1218 1219 for(i=0; i< uSrcLen; i++){ 1220 if(uDest[i] != src16j[i]){ 1221 log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i); 1222 failed =TRUE; 1223 } 1224 } 1225 1226 if(U_FAILURE(err)){ 1227 failed = TRUE; 1228 } 1229 if(failed){ 1230 log_err("u_strToWCS() failed \n"); 1231 } 1232 free(wDest); 1233 free(uDest); 1234 1235 1236 /* test with embeded nulls */ 1237 uSrc = src16WithNulls; 1238 uSrcLen = sizeof(src16WithNulls)/2; 1239 wDestLen =0; 1240 uDestLen =0; 1241 wDest = NULL; 1242 uDest = NULL; 1243 /* pre-flight*/ 1244 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); 1245 1246 if(err == U_BUFFER_OVERFLOW_ERROR){ 1247 err=U_ZERO_ERROR; 1248 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); 1249 wDestLen = reqLen+1; 1250 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); 1251 } 1252 1253 /* pre-flight */ 1254 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); 1255 1256 if(err == U_BUFFER_OVERFLOW_ERROR){ 1257 err =U_ZERO_ERROR; 1258 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); 1259 uDestLen = reqLen + 1; 1260 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); 1261 } 1262 1263 if(!U_FAILURE(err)) { 1264 for(i=0; i< uSrcLen; i++){ 1265 if(uDest[i] != src16WithNulls[i]){ 1266 log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i); 1267 failed =TRUE; 1268 } 1269 } 1270 } 1271 1272 if(U_FAILURE(err)){ 1273 failed = TRUE; 1274 } 1275 if(failed){ 1276 log_err("u_strToWCS() failed \n"); 1277 } 1278 free(wDest); 1279 free(uDest); 1280 1281 } 1282 1283 { 1284 1285 uSrc = src16j; 1286 uSrcLen = sizeof(src16j)/2; 1287 wDestLen =0; 1288 uDestLen =0; 1289 wDest = NULL; 1290 uDest = NULL; 1291 wDestLen = 0; 1292 /* pre-flight*/ 1293 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err); 1294 1295 if(err == U_BUFFER_OVERFLOW_ERROR){ 1296 err=U_ZERO_ERROR; 1297 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); 1298 wDestLen = reqLen+1; 1299 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err); 1300 } 1301 uDestLen = 0; 1302 /* pre-flight */ 1303 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err); 1304 1305 if(err == U_BUFFER_OVERFLOW_ERROR){ 1306 err =U_ZERO_ERROR; 1307 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); 1308 uDestLen = reqLen + 1; 1309 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err); 1310 } 1311 1312 1313 if(!U_FAILURE(err)) { 1314 for(i=0; i< uSrcLen; i++){ 1315 if(uDest[i] != src16j[i]){ 1316 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i); 1317 failed =TRUE; 1318 } 1319 } 1320 } 1321 1322 if(U_FAILURE(err)){ 1323 failed = TRUE; 1324 } 1325 if(failed){ 1326 log_err("u_strToWCS() failed \n"); 1327 } 1328 free(wDest); 1329 free(uDest); 1330 } 1331 1332 /* 1333 * Test u_terminateWChars(). 1334 * All u_terminateXYZ() use the same implementation macro; 1335 * we test this function to improve API coverage. 1336 */ 1337 { 1338 wchar_t buffer[10]; 1339 1340 err=U_ZERO_ERROR; 1341 buffer[3]=0x20ac; 1342 wDestLen=u_terminateWChars(buffer, LENGTHOF(buffer), 3, &err); 1343 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) { 1344 log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n", 1345 u_errorName(err), wDestLen, buffer[3]); 1346 } 1347 1348 err=U_ZERO_ERROR; 1349 buffer[3]=0x20ac; 1350 wDestLen=u_terminateWChars(buffer, 3, 3, &err); 1351 if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) { 1352 log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n", 1353 u_errorName(err), wDestLen, buffer[3]); 1354 } 1355 1356 err=U_STRING_NOT_TERMINATED_WARNING; 1357 buffer[3]=0x20ac; 1358 wDestLen=u_terminateWChars(buffer, LENGTHOF(buffer), 3, &err); 1359 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) { 1360 log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n", 1361 u_errorName(err), wDestLen, buffer[3]); 1362 } 1363 1364 err=U_ZERO_ERROR; 1365 buffer[3]=0x20ac; 1366 wDestLen=u_terminateWChars(buffer, 2, 3, &err); 1367 if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) { 1368 log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n", 1369 u_errorName(err), wDestLen, buffer[3]); 1370 } 1371 } 1372 #else 1373 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); 1374 #endif 1375 } 1376 1377 static void Test_widestrs() 1378 { 1379 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 1380 wchar_t ws[100]; 1381 UChar rts[100]; 1382 int32_t wcap = sizeof(ws) / sizeof(*ws); 1383 int32_t wl; 1384 int32_t rtcap = sizeof(rts) / sizeof(*rts); 1385 int32_t rtl; 1386 wchar_t *wcs; 1387 UChar *cp; 1388 const char *errname; 1389 UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0}; 1390 int32_t ul = sizeof(ustr)/sizeof(*ustr) -1; 1391 char astr[100]; 1392 1393 UErrorCode err; 1394 1395 err = U_ZERO_ERROR; 1396 wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err); 1397 if (U_FAILURE(err)) { 1398 errname = u_errorName(err); 1399 log_err("test_widestrs: u_strToWCS error: %s!\n",errname); 1400 } 1401 if(ul!=wl){ 1402 log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl); 1403 } 1404 err = U_ZERO_ERROR; 1405 wl = (int32_t)uprv_wcslen(wcs); 1406 cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err); 1407 if (U_FAILURE(err)) { 1408 errname = u_errorName(err); 1409 fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname); 1410 } 1411 if(wl != rtl){ 1412 log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl); 1413 } 1414 #else 1415 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); 1416 #endif 1417 } 1418 1419 static void 1420 Test_WCHART_LongString(){ 1421 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 1422 UErrorCode status = U_ZERO_ERROR; 1423 const char* testdatapath=loadTestData(&status); 1424 UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status); 1425 int32_t strLen =0; 1426 const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status); 1427 const UChar* uSrc = str; 1428 int32_t uSrcLen = strLen; 1429 int32_t wDestLen =0, reqLen=0, i=0; 1430 int32_t uDestLen =0; 1431 wchar_t* wDest = NULL; 1432 UChar* uDest = NULL; 1433 UBool failed = FALSE; 1434 1435 log_verbose("Loaded string of %d UChars\n", uSrcLen); 1436 1437 if(U_FAILURE(status)){ 1438 log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status)); 1439 return; 1440 } 1441 1442 /* pre-flight*/ 1443 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status); 1444 1445 if(status == U_BUFFER_OVERFLOW_ERROR){ 1446 status=U_ZERO_ERROR; 1447 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); 1448 wDestLen = reqLen+1; 1449 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status); 1450 log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t)); 1451 } 1452 1453 { 1454 int j; 1455 for(j=0;j>=0&&j<reqLen;j++) { 1456 if(wDest[j]!=uSrc[j]) { 1457 log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j); 1458 break; 1459 } 1460 } 1461 } 1462 1463 uDestLen = 0; 1464 /* pre-flight */ 1465 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status); 1466 if(status == U_BUFFER_OVERFLOW_ERROR){ 1467 status =U_ZERO_ERROR; 1468 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); 1469 u_memset(uDest,0xFFFF,reqLen+1); 1470 uDestLen = reqLen + 1; 1471 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status); 1472 log_verbose("Back to %d UChars\n", reqLen); 1473 } 1474 #if defined(U_WCHAR_IS_UTF16) 1475 log_verbose("U_WCHAR_IS_UTF16\n"); 1476 #elif defined(U_WCHAR_IS_UTF32) 1477 log_verbose("U_WCHAR_IS_UTF32\n"); 1478 #else 1479 log_verbose("U_WCHAR_IS_idunno (not UTF)\n"); 1480 #endif 1481 1482 if(reqLen!=uSrcLen) { 1483 log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen); 1484 } 1485 1486 for(i=0; i< uSrcLen; i++){ 1487 if(uDest[i] != str[i]){ 1488 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i); 1489 failed =TRUE; 1490 } 1491 } 1492 1493 if(U_FAILURE(status)){ 1494 failed = TRUE; 1495 } 1496 if(failed){ 1497 log_err("u_strToWCS() failed \n"); 1498 } 1499 free(wDest); 1500 free(uDest); 1501 /* close the bundle */ 1502 ures_close(theBundle); 1503 #else 1504 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); 1505 #endif 1506 } 1507 1508 static void Test_strToJavaModifiedUTF8() { 1509 static const UChar src[]={ 1510 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3, 1511 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003, 1512 0xd800, 0xdc00, 0xdc00, 0xd800, 0, 1513 0xdbff, 0xdfff, 1514 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f 1515 }; 1516 static const uint8_t expected[]={ 1517 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3, 1518 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83, 1519 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83, 1520 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80, 1521 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1522 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f 1523 }; 1524 static const UChar shortSrc[]={ 1525 0xe01, 0xe1, 0x61 1526 }; 1527 static const uint8_t shortExpected[]={ 1528 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61 1529 }; 1530 static const UChar asciiNul[]={ 1531 0x61, 0x62, 0x63, 0 1532 }; 1533 static const uint8_t asciiNulExpected[]={ 1534 0x61, 0x62, 0x63 1535 }; 1536 char dest[200]; 1537 char *p; 1538 int32_t length, expectedTerminatedLength; 1539 UErrorCode errorCode; 1540 1541 expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")- 1542 (const char *)expected); 1543 1544 errorCode=U_ZERO_ERROR; 1545 length=-5; 1546 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, 1547 src, LENGTHOF(src), &errorCode); 1548 if( U_FAILURE(errorCode) || p!=dest || 1549 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1550 dest[length]!=0 1551 ) { 1552 log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode)); 1553 } 1554 memset(dest, 0xff, sizeof(dest)); 1555 errorCode=U_ZERO_ERROR; 1556 length=-5; 1557 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, 1558 src, LENGTHOF(src), &errorCode); 1559 if( U_FAILURE(errorCode) || p!=dest || 1560 0!=memcmp(dest, expected, LENGTHOF(expected)) || 1561 dest[LENGTHOF(expected)]!=0 1562 ) { 1563 log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode)); 1564 } 1565 memset(dest, 0xff, sizeof(dest)); 1566 errorCode=U_ZERO_ERROR; 1567 length=-5; 1568 p=u_strToJavaModifiedUTF8(dest, LENGTHOF(expected), &length, 1569 src, LENGTHOF(src), &errorCode); 1570 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest || 1571 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1572 dest[length]!=(char)0xff 1573 ) { 1574 log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode)); 1575 } 1576 memset(dest, 0xff, sizeof(dest)); 1577 errorCode=U_ZERO_ERROR; 1578 length=-5; 1579 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode); 1580 if( U_FAILURE(errorCode) || p!=dest || 1581 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || 1582 dest[length]!=0 1583 ) { 1584 log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode)); 1585 } 1586 memset(dest, 0xff, sizeof(dest)); 1587 errorCode=U_ZERO_ERROR; 1588 length=-5; 1589 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode); 1590 if( U_FAILURE(errorCode) || p!=dest || 1591 0!=memcmp(dest, expected, expectedTerminatedLength) || 1592 dest[expectedTerminatedLength]!=0 1593 ) { 1594 log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode)); 1595 } 1596 memset(dest, 0xff, sizeof(dest)); 1597 errorCode=U_ZERO_ERROR; 1598 length=-5; 1599 p=u_strToJavaModifiedUTF8(dest, LENGTHOF(expected)/2, &length, 1600 src, LENGTHOF(src), &errorCode); 1601 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || 1602 length!=LENGTHOF(expected) || dest[LENGTHOF(expected)/2]!=(char)0xff 1603 ) { 1604 log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode)); 1605 } 1606 memset(dest, 0xff, sizeof(dest)); 1607 errorCode=U_ZERO_ERROR; 1608 length=-5; 1609 p=u_strToJavaModifiedUTF8(NULL, 0, &length, 1610 src, LENGTHOF(src), &errorCode); 1611 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || 1612 length!=LENGTHOF(expected) || dest[0]!=(char)0xff 1613 ) { 1614 log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode)); 1615 } 1616 memset(dest, 0xff, sizeof(dest)); 1617 errorCode=U_ZERO_ERROR; 1618 length=-5; 1619 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, 1620 shortSrc, LENGTHOF(shortSrc), &errorCode); 1621 if( U_FAILURE(errorCode) || p!=dest || 1622 length!=LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) || 1623 dest[length]!=0 1624 ) { 1625 log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode)); 1626 } 1627 memset(dest, 0xff, sizeof(dest)); 1628 errorCode=U_ZERO_ERROR; 1629 length=-5; 1630 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, 1631 asciiNul, -1, &errorCode); 1632 if( U_FAILURE(errorCode) || p!=dest || 1633 length!=LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) || 1634 dest[length]!=0 1635 ) { 1636 log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode)); 1637 } 1638 memset(dest, 0xff, sizeof(dest)); 1639 errorCode=U_ZERO_ERROR; 1640 length=-5; 1641 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, 1642 NULL, 0, &errorCode); 1643 if( U_FAILURE(errorCode) || p!=dest || 1644 length!=0 || dest[0]!=0 1645 ) { 1646 log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode)); 1647 } 1648 1649 /* illegal arguments */ 1650 memset(dest, 0xff, sizeof(dest)); 1651 errorCode=U_ZERO_ERROR; 1652 length=-5; 1653 p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length, 1654 src, LENGTHOF(src), &errorCode); 1655 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { 1656 log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode)); 1657 } 1658 memset(dest, 0xff, sizeof(dest)); 1659 errorCode=U_ZERO_ERROR; 1660 length=-5; 1661 p=u_strToJavaModifiedUTF8(dest, -1, &length, 1662 src, LENGTHOF(src), &errorCode); 1663 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { 1664 log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode)); 1665 } 1666 memset(dest, 0xff, sizeof(dest)); 1667 errorCode=U_ZERO_ERROR; 1668 length=-5; 1669 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length, 1670 NULL, LENGTHOF(src), &errorCode); 1671 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { 1672 log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode)); 1673 } 1674 memset(dest, 0xff, sizeof(dest)); 1675 errorCode=U_ZERO_ERROR; 1676 length=-5; 1677 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length, 1678 NULL, -1, &errorCode); 1679 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { 1680 log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode)); 1681 } 1682 } 1683 1684 static void Test_strFromJavaModifiedUTF8() { 1685 static const uint8_t src[]={ 1686 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3, 1687 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83, 1688 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83, 1689 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0, 1690 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1691 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80, /* invalid sequences */ 1692 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 1693 0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad, /* non-shortest forms are allowed */ 1694 0xe0, 0xb8, 0x8e, 0x6f 1695 }; 1696 static const UChar expected[]={ 1697 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3, 1698 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003, 1699 0xd800, 0xdc00, 0xdc00, 0xd800, 0, 1700 0xdbff, 0xdfff, 1701 0xfffd, 0xfffd, 0xfffd, 0xfffd, 1702 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 1703 0x6c, 0xed, 1704 0xe0e, 0x6f 1705 }; 1706 static const uint8_t shortSrc[]={ 1707 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61 1708 }; 1709 static const UChar shortExpected[]={ 1710 0xe01, 0xe1, 0x61 1711 }; 1712 static const uint8_t asciiNul[]={ 1713 0x61, 0x62, 0x63, 0 1714 }; 1715 static const UChar asciiNulExpected[]={ 1716 0x61, 0x62, 0x63 1717 }; 1718 static const uint8_t invalid[]={ 1719 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80 1720 }; 1721 static const UChar invalidExpectedFFFD[]={ 1722 0xfffd, 0xfffd, 0xfffd, 0xfffd 1723 }; 1724 static const UChar invalidExpected50000[]={ 1725 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00 1726 }; 1727 UChar dest[200]; 1728 UChar *p; 1729 int32_t length, expectedTerminatedLength; 1730 int32_t numSubstitutions; 1731 UErrorCode errorCode; 1732 1733 expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected); 1734 1735 errorCode=U_ZERO_ERROR; 1736 length=numSubstitutions=-5; 1737 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1738 (const char *)src, LENGTHOF(src), 1739 0xfffd, &numSubstitutions, &errorCode); 1740 if( U_FAILURE(errorCode) || p!=dest || 1741 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1742 dest[length]!=0 || 1743 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) 1744 ) { 1745 log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode)); 1746 } 1747 memset(dest, 0xff, sizeof(dest)); 1748 errorCode=U_ZERO_ERROR; 1749 length=numSubstitutions=-5; 1750 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL, 1751 (const char *)src, LENGTHOF(src), 1752 0xfffd, &numSubstitutions, &errorCode); 1753 if( U_FAILURE(errorCode) || p!=dest || 1754 0!=memcmp(dest, expected, LENGTHOF(expected)) || 1755 dest[LENGTHOF(expected)]!=0 || 1756 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) 1757 ) { 1758 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode)); 1759 } 1760 memset(dest, 0xff, sizeof(dest)); 1761 errorCode=U_ZERO_ERROR; 1762 length=numSubstitutions=-5; 1763 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1764 (const char *)src, LENGTHOF(src), 1765 0xfffd, NULL, &errorCode); 1766 if( U_FAILURE(errorCode) || p!=dest || 1767 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1768 dest[length]!=0 1769 ) { 1770 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode)); 1771 } 1772 memset(dest, 0xff, sizeof(dest)); 1773 errorCode=U_ZERO_ERROR; 1774 length=numSubstitutions=-5; 1775 p=u_strFromJavaModifiedUTF8WithSub(dest, LENGTHOF(expected), &length, 1776 (const char *)src, LENGTHOF(src), 1777 0xfffd, &numSubstitutions, &errorCode); 1778 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest || 1779 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1780 dest[length]!=0xffff || 1781 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) 1782 ) { 1783 log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode)); 1784 } 1785 memset(dest, 0xff, sizeof(dest)); 1786 errorCode=U_ZERO_ERROR; 1787 length=numSubstitutions=-5; 1788 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1789 (const char *)src, -1, 1790 0xfffd, &numSubstitutions, &errorCode); 1791 if( U_FAILURE(errorCode) || p!=dest || 1792 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || 1793 dest[length]!=0 || 1794 numSubstitutions!=0 1795 ) { 1796 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode)); 1797 } 1798 memset(dest, 0xff, sizeof(dest)); 1799 errorCode=U_ZERO_ERROR; 1800 length=numSubstitutions=-5; 1801 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL, 1802 (const char *)src, -1, 1803 0xfffd, &numSubstitutions, &errorCode); 1804 if( U_FAILURE(errorCode) || p!=dest || 1805 0!=memcmp(dest, expected, expectedTerminatedLength) || 1806 dest[expectedTerminatedLength]!=0 || 1807 numSubstitutions!=0 1808 ) { 1809 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode)); 1810 } 1811 memset(dest, 0xff, sizeof(dest)); 1812 errorCode=U_ZERO_ERROR; 1813 length=numSubstitutions=-5; 1814 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1815 (const char *)src, -1, 1816 0xfffd, NULL, &errorCode); 1817 if( U_FAILURE(errorCode) || p!=dest || 1818 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || 1819 dest[length]!=0 1820 ) { 1821 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode)); 1822 } 1823 memset(dest, 0xff, sizeof(dest)); 1824 errorCode=U_ZERO_ERROR; 1825 length=numSubstitutions=-5; 1826 p=u_strFromJavaModifiedUTF8WithSub(dest, LENGTHOF(expected)/2, &length, 1827 (const char *)src, LENGTHOF(src), 1828 0xfffd, &numSubstitutions, &errorCode); 1829 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || 1830 length!=LENGTHOF(expected) || dest[LENGTHOF(expected)/2]!=0xffff 1831 ) { 1832 log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode)); 1833 } 1834 memset(dest, 0xff, sizeof(dest)); 1835 errorCode=U_ZERO_ERROR; 1836 length=numSubstitutions=-5; 1837 p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length, 1838 (const char *)src, LENGTHOF(src), 1839 0xfffd, &numSubstitutions, &errorCode); 1840 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || 1841 length!=LENGTHOF(expected) || dest[0]!=0xffff 1842 ) { 1843 log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode)); 1844 } 1845 memset(dest, 0xff, sizeof(dest)); 1846 errorCode=U_ZERO_ERROR; 1847 length=numSubstitutions=-5; 1848 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1849 (const char *)shortSrc, LENGTHOF(shortSrc), 1850 0xfffd, &numSubstitutions, &errorCode); 1851 if( U_FAILURE(errorCode) || p!=dest || 1852 length!=LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) || 1853 dest[length]!=0 || 1854 numSubstitutions!=0 1855 ) { 1856 log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode)); 1857 } 1858 memset(dest, 0xff, sizeof(dest)); 1859 errorCode=U_ZERO_ERROR; 1860 length=numSubstitutions=-5; 1861 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1862 (const char *)asciiNul, -1, 1863 0xfffd, &numSubstitutions, &errorCode); 1864 if( U_FAILURE(errorCode) || p!=dest || 1865 length!=LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) || 1866 dest[length]!=0 || 1867 numSubstitutions!=0 1868 ) { 1869 log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode)); 1870 } 1871 memset(dest, 0xff, sizeof(dest)); 1872 errorCode=U_ZERO_ERROR; 1873 length=numSubstitutions=-5; 1874 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1875 NULL, 0, 0xfffd, &numSubstitutions, &errorCode); 1876 if( U_FAILURE(errorCode) || p!=dest || 1877 length!=0 || dest[0]!=0 || 1878 numSubstitutions!=0 1879 ) { 1880 log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode)); 1881 } 1882 memset(dest, 0xff, sizeof(dest)); 1883 errorCode=U_ZERO_ERROR; 1884 length=numSubstitutions=-5; 1885 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1886 (const char *)invalid, LENGTHOF(invalid), 1887 0xfffd, &numSubstitutions, &errorCode); 1888 if( U_FAILURE(errorCode) || p!=dest || 1889 length!=LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) || 1890 dest[length]!=0 || 1891 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) 1892 ) { 1893 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode)); 1894 } 1895 memset(dest, 0xff, sizeof(dest)); 1896 errorCode=U_ZERO_ERROR; 1897 length=numSubstitutions=-5; 1898 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1899 (const char *)invalid, LENGTHOF(invalid), 1900 0x50000, &numSubstitutions, &errorCode); 1901 if( U_FAILURE(errorCode) || p!=dest || 1902 length!=LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) || 1903 dest[length]!=0 || 1904 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) /* not ...50000 */ 1905 ) { 1906 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode)); 1907 } 1908 memset(dest, 0xff, sizeof(dest)); 1909 errorCode=U_ZERO_ERROR; 1910 length=numSubstitutions=-5; 1911 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1912 (const char *)invalid, LENGTHOF(invalid), 1913 U_SENTINEL, &numSubstitutions, &errorCode); 1914 if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) { 1915 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode)); 1916 } 1917 memset(dest, 0xff, sizeof(dest)); 1918 errorCode=U_ZERO_ERROR; 1919 length=numSubstitutions=-5; 1920 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1921 (const char *)src, LENGTHOF(src), 1922 U_SENTINEL, &numSubstitutions, &errorCode); 1923 if( errorCode!=U_INVALID_CHAR_FOUND || 1924 length>=LENGTHOF(expected) || dest[LENGTHOF(expected)-1]!=0xffff || 1925 numSubstitutions!=0 1926 ) { 1927 log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode)); 1928 } 1929 1930 /* illegal arguments */ 1931 memset(dest, 0xff, sizeof(dest)); 1932 errorCode=U_ZERO_ERROR; 1933 length=numSubstitutions=-5; 1934 p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length, 1935 (const char *)src, LENGTHOF(src), 1936 0xfffd, &numSubstitutions, &errorCode); 1937 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1938 log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode)); 1939 } 1940 memset(dest, 0xff, sizeof(dest)); 1941 errorCode=U_ZERO_ERROR; 1942 length=numSubstitutions=-5; 1943 p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length, 1944 (const char *)src, LENGTHOF(src), 1945 0xfffd, &numSubstitutions, &errorCode); 1946 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1947 log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode)); 1948 } 1949 memset(dest, 0xff, sizeof(dest)); 1950 errorCode=U_ZERO_ERROR; 1951 length=numSubstitutions=-5; 1952 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, 1953 NULL, LENGTHOF(src), 1954 0xfffd, &numSubstitutions, &errorCode); 1955 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1956 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode)); 1957 } 1958 memset(dest, 0xff, sizeof(dest)); 1959 errorCode=U_ZERO_ERROR; 1960 length=numSubstitutions=-5; 1961 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, 1962 NULL, -1, 0xfffd, &numSubstitutions, &errorCode); 1963 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1964 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode)); 1965 } 1966 memset(dest, 0xff, sizeof(dest)); 1967 errorCode=U_ZERO_ERROR; 1968 length=numSubstitutions=-5; 1969 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, 1970 (const char *)src, LENGTHOF(src), 1971 0x110000, &numSubstitutions, &errorCode); 1972 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1973 log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode)); 1974 } 1975 memset(dest, 0xff, sizeof(dest)); 1976 errorCode=U_ZERO_ERROR; 1977 length=numSubstitutions=-5; 1978 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, 1979 (const char *)src, LENGTHOF(src), 1980 0xdfff, &numSubstitutions, &errorCode); 1981 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1982 log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode)); 1983 } 1984 } 1985 1986 /* test that string transformation functions permit NULL source pointer when source length==0 */ 1987 static void TestNullEmptySource() { 1988 char dest8[4]={ 3, 3, 3, 3 }; 1989 UChar dest16[4]={ 3, 3, 3, 3 }; 1990 UChar32 dest32[4]={ 3, 3, 3, 3 }; 1991 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 1992 wchar_t destW[4]={ 3, 3, 3, 3 }; 1993 #endif 1994 1995 int32_t length; 1996 UErrorCode errorCode; 1997 1998 /* u_strFromXyz() */ 1999 2000 dest16[0]=3; 2001 length=3; 2002 errorCode=U_ZERO_ERROR; 2003 u_strFromUTF8(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); 2004 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2005 log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n"); 2006 } 2007 2008 dest16[0]=3; 2009 length=3; 2010 errorCode=U_ZERO_ERROR; 2011 u_strFromUTF8WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2012 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2013 log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n"); 2014 } 2015 2016 dest16[0]=3; 2017 length=3; 2018 errorCode=U_ZERO_ERROR; 2019 u_strFromUTF8Lenient(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); 2020 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2021 log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n"); 2022 } 2023 2024 dest16[0]=3; 2025 length=3; 2026 errorCode=U_ZERO_ERROR; 2027 u_strFromUTF32(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); 2028 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2029 log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n"); 2030 } 2031 2032 dest16[0]=3; 2033 length=3; 2034 errorCode=U_ZERO_ERROR; 2035 u_strFromUTF32WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2036 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2037 log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n"); 2038 } 2039 2040 dest16[0]=3; 2041 length=3; 2042 errorCode=U_ZERO_ERROR; 2043 u_strFromJavaModifiedUTF8WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2044 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2045 log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n"); 2046 } 2047 2048 /* u_strToXyz() */ 2049 2050 dest8[0]=3; 2051 length=3; 2052 errorCode=U_ZERO_ERROR; 2053 u_strToUTF8(dest8, LENGTHOF(dest8), &length, NULL, 0, &errorCode); 2054 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { 2055 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n"); 2056 } 2057 2058 dest8[0]=3; 2059 length=3; 2060 errorCode=U_ZERO_ERROR; 2061 u_strToUTF8WithSub(dest8, LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2062 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { 2063 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n"); 2064 } 2065 2066 dest32[0]=3; 2067 length=3; 2068 errorCode=U_ZERO_ERROR; 2069 u_strToUTF32(dest32, LENGTHOF(dest32), &length, NULL, 0, &errorCode); 2070 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) { 2071 log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n"); 2072 } 2073 2074 dest32[0]=3; 2075 length=3; 2076 errorCode=U_ZERO_ERROR; 2077 u_strToUTF32WithSub(dest32, LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2078 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) { 2079 log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n"); 2080 } 2081 2082 dest8[0]=3; 2083 length=3; 2084 errorCode=U_ZERO_ERROR; 2085 u_strToJavaModifiedUTF8(dest8, LENGTHOF(dest8), &length, NULL, 0, &errorCode); 2086 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { 2087 log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n"); 2088 } 2089 2090 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 2091 2092 dest16[0]=3; 2093 length=3; 2094 errorCode=U_ZERO_ERROR; 2095 u_strFromWCS(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); 2096 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2097 log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n"); 2098 } 2099 2100 destW[0]=3; 2101 length=3; 2102 errorCode=U_ZERO_ERROR; 2103 u_strToWCS(destW, LENGTHOF(destW), &length, NULL, 0, &errorCode); 2104 if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) { 2105 log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n"); 2106 } 2107 2108 #endif 2109 } 2110