1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2001-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File custrtrn.C 9 * 10 * Modification History: 11 * Name Description 12 * Ram String transformations test 13 ********************************************************************************* 14 */ 15 /****************************************************************************/ 16 17 18 #include <stdlib.h> 19 #include <stdio.h> 20 #include <string.h> 21 #include "unicode/utypes.h" 22 #include "unicode/ustring.h" 23 #include "unicode/ures.h" 24 #include "ustr_imp.h" 25 #include "cintltst.h" 26 #include "cmemory.h" 27 #include "cstring.h" 28 #include "cwchar.h" 29 30 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 31 32 void addUCharTransformTest(TestNode** root); 33 34 static void Test_strToUTF32(void); 35 static void Test_strToUTF32_surrogates(void); 36 static void Test_strFromUTF32(void); 37 static void Test_strFromUTF32_surrogates(void); 38 static void Test_UChar_UTF8_API(void); 39 static void Test_FromUTF8(void); 40 static void Test_FromUTF8Lenient(void); 41 static void Test_UChar_WCHART_API(void); 42 static void Test_widestrs(void); 43 static void Test_WCHART_LongString(void); 44 static void Test_strToJavaModifiedUTF8(void); 45 static void Test_strFromJavaModifiedUTF8(void); 46 static void TestNullEmptySource(void); 47 48 void 49 addUCharTransformTest(TestNode** root) 50 { 51 addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32"); 52 addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates"); 53 addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32"); 54 addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates"); 55 addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API"); 56 addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8"); 57 addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient"); 58 addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API"); 59 addTest(root, &Test_widestrs, "custrtrn/Test_widestrs"); 60 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 61 addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString"); 62 #endif 63 addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8"); 64 addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8"); 65 addTest(root, &TestNullEmptySource, "custrtrn/TestNullEmptySource"); 66 } 67 68 static const UChar32 src32[]={ 69 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 70 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 71 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 72 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 73 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 74 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 75 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 76 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 77 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 78 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 79 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 80 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 81 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 82 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 83 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 84 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 85 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 86 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 87 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 88 /* test non-BMP code points */ 89 0x0002A699, 90 0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB, 91 0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7, 92 0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1, 93 0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0, 94 0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5, 95 96 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 97 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 98 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 99 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 100 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000 101 }; 102 103 static const UChar src16[] = { 104 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 105 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 106 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 107 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 108 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 109 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 110 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 111 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 112 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 113 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 114 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 115 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 116 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 117 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 118 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 119 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 120 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 121 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 122 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 123 124 /* test non-BMP code points */ 125 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 126 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 127 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 128 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 129 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 130 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 131 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 132 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 133 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 134 0xD869, 0xDED5, 135 136 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 137 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 138 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 139 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 140 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000 141 }; 142 143 144 static void Test_strToUTF32(void){ 145 UErrorCode err = U_ZERO_ERROR; 146 UChar32 u32Target[400]; 147 int32_t u32DestLen; 148 int i= 0; 149 150 /* first with length */ 151 u32DestLen = -2; 152 u_strToUTF32(u32Target, 0, &u32DestLen, src16, LENGTHOF(src16),&err); 153 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != LENGTHOF(src32)) { 154 log_err("u_strToUTF32(preflight with length): " 155 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", 156 (long)u32DestLen, (long)LENGTHOF(src32), u_errorName(err)); 157 return; 158 } 159 err = U_ZERO_ERROR; 160 u32DestLen = -2; 161 u_strToUTF32(u32Target, LENGTHOF(src32)+1, &u32DestLen, src16, LENGTHOF(src16),&err); 162 if(err != U_ZERO_ERROR || u32DestLen != LENGTHOF(src32)) { 163 log_err("u_strToUTF32(with length): " 164 "length %ld != %ld and %s != U_ZERO_ERROR\n", 165 (long)u32DestLen, (long)LENGTHOF(src32), u_errorName(err)); 166 return; 167 } 168 /*for(i=0; i< u32DestLen; i++){ 169 printf("0x%08X, ",uTarget[i]); 170 if(i%10==0){ 171 printf("\n"); 172 } 173 }*/ 174 for(i=0; i< LENGTHOF(src32); i++){ 175 if(u32Target[i] != src32[i]){ 176 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i); 177 } 178 } 179 if(u32Target[i] != 0){ 180 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i); 181 } 182 183 /* now NUL-terminated */ 184 u32DestLen = -2; 185 u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err); 186 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != LENGTHOF(src32)-1) { 187 log_err("u_strToUTF32(preflight with NUL-termination): " 188 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", 189 (long)u32DestLen, (long)LENGTHOF(src32)-1, u_errorName(err)); 190 return; 191 } 192 err = U_ZERO_ERROR; 193 u32DestLen = -2; 194 u_strToUTF32(u32Target, LENGTHOF(src32), &u32DestLen, src16, -1,&err); 195 if(err != U_ZERO_ERROR || u32DestLen != LENGTHOF(src32)-1) { 196 log_err("u_strToUTF32(with NUL-termination): " 197 "length %ld != %ld and %s != U_ZERO_ERROR\n", 198 (long)u32DestLen, (long)LENGTHOF(src32)-1, u_errorName(err)); 199 return; 200 } 201 202 for(i=0; i< LENGTHOF(src32); i++){ 203 if(u32Target[i] != src32[i]){ 204 log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]); 205 } 206 } 207 } 208 209 /* test unpaired surrogates */ 210 static void Test_strToUTF32_surrogates() { 211 UErrorCode err = U_ZERO_ERROR; 212 UChar32 u32Target[400]; 213 int32_t len16, u32DestLen; 214 int32_t numSubstitutions; 215 int i; 216 217 static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; 218 static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 }; 219 static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 }; 220 static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 }; 221 len16 = LENGTHOF(surr16); 222 for(i = 0; i < 4; ++i) { 223 err = U_ZERO_ERROR; 224 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err); 225 if(err != U_INVALID_CHAR_FOUND) { 226 log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n", 227 (long)i, u_errorName(err)); 228 return; 229 } 230 231 err = U_ZERO_ERROR; 232 u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err); 233 if(err != U_INVALID_CHAR_FOUND) { 234 log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n", 235 (long)i, u_errorName(err)); 236 return; 237 } 238 239 err = U_ZERO_ERROR; 240 u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err); 241 if(err != U_INVALID_CHAR_FOUND) { 242 log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", 243 (long)i, u_errorName(err)); 244 return; 245 } 246 247 err = U_ZERO_ERROR; 248 u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err); 249 if(err != U_INVALID_CHAR_FOUND) { 250 log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", 251 (long)i, u_errorName(err)); 252 return; 253 } 254 } 255 256 err = U_ZERO_ERROR; 257 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err); 258 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) { 259 log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 260 u_errorName(err)); 261 return; 262 } 263 264 err = U_ZERO_ERROR; 265 u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err); 266 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) { 267 log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 268 u_errorName(err)); 269 return; 270 } 271 272 err = U_ZERO_ERROR; 273 u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err); 274 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) { 275 log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 276 u_errorName(err)); 277 return; 278 } 279 280 err = U_ZERO_ERROR; 281 u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err); 282 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) { 283 log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 284 u_errorName(err)); 285 return; 286 } 287 288 /* with substitution character */ 289 numSubstitutions = -1; 290 err = U_ZERO_ERROR; 291 u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err); 292 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) { 293 log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 294 u_errorName(err)); 295 return; 296 } 297 298 err = U_ZERO_ERROR; 299 u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err); 300 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) { 301 log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 302 u_errorName(err)); 303 return; 304 } 305 306 err = U_ZERO_ERROR; 307 u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err); 308 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) { 309 log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 310 u_errorName(err)); 311 return; 312 } 313 314 err = U_ZERO_ERROR; 315 u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err); 316 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) { 317 log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 318 u_errorName(err)); 319 return; 320 } 321 } 322 323 static void Test_strFromUTF32(void){ 324 UErrorCode err = U_ZERO_ERROR; 325 UChar uTarget[400]; 326 int32_t uDestLen; 327 int i= 0; 328 329 /* first with length */ 330 uDestLen = -2; 331 u_strFromUTF32(uTarget,0,&uDestLen,src32,LENGTHOF(src32),&err); 332 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != LENGTHOF(src16)) { 333 log_err("u_strFromUTF32(preflight with length): " 334 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", 335 (long)uDestLen, (long)LENGTHOF(src16), u_errorName(err)); 336 return; 337 } 338 err = U_ZERO_ERROR; 339 uDestLen = -2; 340 u_strFromUTF32(uTarget, LENGTHOF(src16)+1,&uDestLen,src32,LENGTHOF(src32),&err); 341 if(err != U_ZERO_ERROR || uDestLen != LENGTHOF(src16)) { 342 log_err("u_strFromUTF32(with length): " 343 "length %ld != %ld and %s != U_ZERO_ERROR\n", 344 (long)uDestLen, (long)LENGTHOF(src16), u_errorName(err)); 345 return; 346 } 347 /*for(i=0; i< uDestLen; i++){ 348 printf("0x%04X, ",uTarget[i]); 349 if(i%10==0){ 350 printf("\n"); 351 } 352 }*/ 353 354 for(i=0; i< uDestLen; i++){ 355 if(uTarget[i] != src16[i]){ 356 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i); 357 } 358 } 359 if(uTarget[i] != 0){ 360 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i); 361 } 362 363 /* now NUL-terminated */ 364 uDestLen = -2; 365 u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err); 366 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != LENGTHOF(src16)-1) { 367 log_err("u_strFromUTF32(preflight with NUL-termination): " 368 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", 369 (long)uDestLen, (long)LENGTHOF(src16)-1, u_errorName(err)); 370 return; 371 } 372 err = U_ZERO_ERROR; 373 uDestLen = -2; 374 u_strFromUTF32(uTarget, LENGTHOF(src16),&uDestLen,src32,-1,&err); 375 if(err != U_ZERO_ERROR || uDestLen != LENGTHOF(src16)-1) { 376 log_err("u_strFromUTF32(with NUL-termination): " 377 "length %ld != %ld and %s != U_ZERO_ERROR\n", 378 (long)uDestLen, (long)LENGTHOF(src16)-1, u_errorName(err)); 379 return; 380 } 381 382 for(i=0; i< uDestLen; i++){ 383 if(uTarget[i] != src16[i]){ 384 log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]); 385 } 386 } 387 } 388 389 /* test surrogate code points */ 390 static void Test_strFromUTF32_surrogates() { 391 UErrorCode err = U_ZERO_ERROR; 392 UChar uTarget[400]; 393 int32_t len32, uDestLen; 394 int32_t numSubstitutions; 395 int i; 396 397 static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 }; 398 static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; 399 static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; 400 static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45, 401 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; 402 len32 = LENGTHOF(surr32); 403 for(i = 0; i < 6; ++i) { 404 err = U_ZERO_ERROR; 405 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err); 406 if(err != U_INVALID_CHAR_FOUND) { 407 log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n", 408 (long)i, u_errorName(err)); 409 return; 410 } 411 412 err = U_ZERO_ERROR; 413 u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err); 414 if(err != U_INVALID_CHAR_FOUND) { 415 log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n", 416 (long)i, u_errorName(err)); 417 return; 418 } 419 420 err = U_ZERO_ERROR; 421 u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err); 422 if(err != U_INVALID_CHAR_FOUND) { 423 log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", 424 (long)i, u_errorName(err)); 425 return; 426 } 427 428 err = U_ZERO_ERROR; 429 u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err); 430 if(err != U_INVALID_CHAR_FOUND) { 431 log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", 432 (long)i, u_errorName(err)); 433 return; 434 } 435 } 436 437 err = U_ZERO_ERROR; 438 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err); 439 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) { 440 log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 441 u_errorName(err)); 442 return; 443 } 444 445 err = U_ZERO_ERROR; 446 u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err); 447 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) { 448 log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 449 u_errorName(err)); 450 return; 451 } 452 453 err = U_ZERO_ERROR; 454 u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err); 455 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) { 456 log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 457 u_errorName(err)); 458 return; 459 } 460 461 err = U_ZERO_ERROR; 462 u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err); 463 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) { 464 log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 465 u_errorName(err)); 466 return; 467 } 468 469 /* with substitution character */ 470 numSubstitutions = -1; 471 err = U_ZERO_ERROR; 472 u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err); 473 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) { 474 log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 475 u_errorName(err)); 476 return; 477 } 478 479 err = U_ZERO_ERROR; 480 u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err); 481 if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) { 482 log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 483 u_errorName(err)); 484 return; 485 } 486 487 err = U_ZERO_ERROR; 488 u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err); 489 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) { 490 log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", 491 u_errorName(err)); 492 return; 493 } 494 495 err = U_ZERO_ERROR; 496 u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err); 497 if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) { 498 log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", 499 u_errorName(err)); 500 return; 501 } 502 } 503 504 static void Test_UChar_UTF8_API(void){ 505 506 UErrorCode err = U_ZERO_ERROR; 507 UChar uTemp[1]; 508 char u8Temp[1]; 509 UChar* uTarget=uTemp; 510 const char* u8Src; 511 int32_t u8SrcLen = 0; 512 int32_t uTargetLength = 0; 513 int32_t uDestLen=0; 514 const UChar* uSrc = src16; 515 int32_t uSrcLen = sizeof(src16)/2; 516 char* u8Target = u8Temp; 517 int32_t u8TargetLength =0; 518 int32_t u8DestLen =0; 519 UBool failed = FALSE; 520 int i= 0; 521 int32_t numSubstitutions; 522 523 { 524 /* preflight */ 525 u8Temp[0] = 0x12; 526 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); 527 if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){ 528 err = U_ZERO_ERROR; 529 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1)); 530 u8TargetLength = u8DestLen; 531 532 u8Target[u8TargetLength] = (char)0xfe; 533 u8DestLen = -1; 534 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); 535 if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){ 536 log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err)); 537 return; 538 } 539 540 } 541 else { 542 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); 543 } 544 failed = FALSE; 545 /*for(i=0; i< u8DestLen; i++){ 546 printf("0x%04X, ",u8Target[i]); 547 if(i%10==0){ 548 printf("\n"); 549 } 550 }*/ 551 /*for(i=0; i< u8DestLen; i++){ 552 if(u8Target[i] != src8[i]){ 553 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]); 554 failed =TRUE; 555 } 556 } 557 if(failed){ 558 log_err("u_strToUTF8() failed \n"); 559 }*/ 560 u8Src = u8Target; 561 u8SrcLen = u8DestLen; 562 563 /* preflight */ 564 uTemp[0] = 0x1234; 565 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); 566 if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){ 567 err = U_ZERO_ERROR; 568 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); 569 uTargetLength = uDestLen; 570 571 uTarget[uTargetLength] = 0xfff0; 572 uDestLen = -1; 573 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); 574 } 575 else { 576 log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n"); 577 } 578 /*for(i=0; i< uDestLen; i++){ 579 printf("0x%04X, ",uTarget[i]); 580 if(i%10==0){ 581 printf("\n"); 582 } 583 }*/ 584 585 if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) { 586 failed = TRUE; 587 } 588 for(i=0; i< uSrcLen; i++){ 589 if(uTarget[i] != src16[i]){ 590 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i); 591 failed =TRUE; 592 } 593 } 594 if(failed){ 595 log_err("error: u_strFromUTF8(after preflighting) failed\n"); 596 } 597 598 free(u8Target); 599 free(uTarget); 600 } 601 { 602 u8SrcLen = -1; 603 uTargetLength = 0; 604 uSrcLen =-1; 605 u8TargetLength=0; 606 failed = FALSE; 607 /* preflight */ 608 u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); 609 if(err == U_BUFFER_OVERFLOW_ERROR){ 610 err = U_ZERO_ERROR; 611 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1)); 612 u8TargetLength = u8DestLen; 613 614 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); 615 616 } 617 else { 618 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); 619 } 620 failed = FALSE; 621 /*for(i=0; i< u8DestLen; i++){ 622 printf("0x%04X, ",u8Target[i]); 623 if(i%10==0){ 624 printf("\n"); 625 } 626 }*/ 627 /*for(i=0; i< u8DestLen; i++){ 628 if(u8Target[i] != src8[i]){ 629 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]); 630 failed =TRUE; 631 } 632 } 633 if(failed){ 634 log_err("u_strToUTF8() failed \n"); 635 }*/ 636 u8Src = u8Target; 637 u8SrcLen = u8DestLen; 638 639 /* preflight */ 640 u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); 641 if(err == U_BUFFER_OVERFLOW_ERROR){ 642 err = U_ZERO_ERROR; 643 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); 644 uTargetLength = uDestLen; 645 646 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); 647 } 648 else { 649 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); 650 } 651 /*for(i=0; i< uDestLen; i++){ 652 printf("0x%04X, ",uTarget[i]); 653 if(i%10==0){ 654 printf("\n"); 655 } 656 }*/ 657 658 for(i=0; i< uSrcLen; i++){ 659 if(uTarget[i] != src16[i]){ 660 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i); 661 failed =TRUE; 662 } 663 } 664 if(failed){ 665 log_err("u_strToUTF8() failed \n"); 666 } 667 668 free(u8Target); 669 free(uTarget); 670 } 671 672 /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */ 673 { 674 static const UChar 675 withLead16[]={ 0x1800, 0xd89a, 0x0061 }, 676 withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 }, 677 withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */ 678 withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */ 679 static const uint8_t 680 withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 }, 681 withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 }, 682 withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */ 683 withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */ 684 UChar out16[10]; 685 char out8[10]; 686 687 if( 688 (err=U_ZERO_ERROR, u_strToUTF8(out8, LENGTHOF(out8), NULL, withLead16, LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) || 689 (err=U_ZERO_ERROR, u_strToUTF8(out8, LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) || 690 (err=U_ZERO_ERROR, u_strFromUTF8(out16, LENGTHOF(out16), NULL, (const char *)withLead8, LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) || 691 (err=U_ZERO_ERROR, u_strFromUTF8(out16, LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND) 692 ) { 693 log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n"); 694 } 695 696 /* test error handling with substitution characters */ 697 698 /* from UTF-8 with length */ 699 err=U_ZERO_ERROR; 700 numSubstitutions=-1; 701 out16[0]=0x55aa; 702 uDestLen=0; 703 u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, 704 (const char *)withTrail8, uprv_strlen((const char *)withTrail8), 705 0x50005, &numSubstitutions, 706 &err); 707 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) || 708 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) || 709 numSubstitutions!=1) { 710 log_err("error: u_strFromUTF8WithSub(length) failed\n"); 711 } 712 713 /* from UTF-8 with NUL termination */ 714 err=U_ZERO_ERROR; 715 numSubstitutions=-1; 716 out16[0]=0x55aa; 717 uDestLen=0; 718 u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, 719 (const char *)withTrail8, -1, 720 0xfffd, &numSubstitutions, 721 &err); 722 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) || 723 0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) || 724 numSubstitutions!=1) { 725 log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n"); 726 } 727 728 /* preflight from UTF-8 with NUL termination */ 729 err=U_ZERO_ERROR; 730 numSubstitutions=-1; 731 out16[0]=0x55aa; 732 uDestLen=0; 733 u_strFromUTF8WithSub(out16, 1, &uDestLen, 734 (const char *)withTrail8, -1, 735 0x50005, &numSubstitutions, 736 &err); 737 if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=1) { 738 log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n"); 739 } 740 741 /* to UTF-8 with length */ 742 err=U_ZERO_ERROR; 743 numSubstitutions=-1; 744 out8[0]=(char)0xf5; 745 u8DestLen=0; 746 u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, 747 withTrail16, u_strlen(withTrail16), 748 0xfffd, &numSubstitutions, 749 &err); 750 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) || 751 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) || 752 numSubstitutions!=1) { 753 log_err("error: u_strToUTF8WithSub(length) failed\n"); 754 } 755 756 /* to UTF-8 with NUL termination */ 757 err=U_ZERO_ERROR; 758 numSubstitutions=-1; 759 out8[0]=(char)0xf5; 760 u8DestLen=0; 761 u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, 762 withTrail16, -1, 763 0x1a, &numSubstitutions, 764 &err); 765 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) || 766 0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) || 767 numSubstitutions!=1) { 768 log_err("error: u_strToUTF8WithSub(NUL termination) failed\n"); 769 } 770 771 /* preflight to UTF-8 with NUL termination */ 772 err=U_ZERO_ERROR; 773 numSubstitutions=-1; 774 out8[0]=(char)0xf5; 775 u8DestLen=0; 776 u_strToUTF8WithSub(out8, 1, &u8DestLen, 777 withTrail16, -1, 778 0xfffd, &numSubstitutions, 779 &err); 780 if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) || 781 numSubstitutions!=1) { 782 log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n"); 783 } 784 785 /* test that numSubstitutions==0 if there are no substitutions */ 786 787 /* from UTF-8 with length (just first 3 bytes which are valid) */ 788 err=U_ZERO_ERROR; 789 numSubstitutions=-1; 790 out16[0]=0x55aa; 791 uDestLen=0; 792 u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, 793 (const char *)withTrail8, 3, 794 0x50005, &numSubstitutions, 795 &err); 796 if(U_FAILURE(err) || uDestLen!=1 || 797 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) || 798 numSubstitutions!=0) { 799 log_err("error: u_strFromUTF8WithSub(no subs) failed\n"); 800 } 801 802 /* to UTF-8 with length (just first UChar which is valid) */ 803 err=U_ZERO_ERROR; 804 numSubstitutions=-1; 805 out8[0]=(char)0xf5; 806 u8DestLen=0; 807 u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, 808 withTrail16, 1, 809 0xfffd, &numSubstitutions, 810 &err); 811 if(U_FAILURE(err) || u8DestLen!=3 || 812 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) || 813 numSubstitutions!=0) { 814 log_err("error: u_strToUTF8WithSub(no subs) failed\n"); 815 } 816 817 /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */ 818 819 /* from UTF-8 with length (just first 3 bytes which are valid) */ 820 err=U_ZERO_ERROR; 821 numSubstitutions=-1; 822 out16[0]=0x55aa; 823 uDestLen=0; 824 u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, 825 (const char *)withTrail8, 3, 826 U_SENTINEL, &numSubstitutions, 827 &err); 828 if(U_FAILURE(err) || uDestLen!=1 || 829 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) || 830 numSubstitutions!=0) { 831 log_err("error: u_strFromUTF8WithSub(no subchar) failed\n"); 832 } 833 834 /* to UTF-8 with length (just first UChar which is valid) */ 835 err=U_ZERO_ERROR; 836 numSubstitutions=-1; 837 out8[0]=(char)0xf5; 838 u8DestLen=0; 839 u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, 840 withTrail16, 1, 841 U_SENTINEL, &numSubstitutions, 842 &err); 843 if(U_FAILURE(err) || u8DestLen!=3 || 844 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) || 845 numSubstitutions!=0) { 846 log_err("error: u_strToUTF8WithSub(no subchar) failed\n"); 847 } 848 } 849 } 850 851 /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */ 852 static UBool 853 equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) { 854 UChar c1, c2; 855 856 while(length>0) { 857 c1=*s++; 858 c2=*t++; 859 if(c1!=c2 && c2!=0xfffd) { 860 return FALSE; 861 } 862 --length; 863 } 864 return TRUE; 865 } 866 867 /* test u_strFromUTF8Lenient() */ 868 static void 869 Test_FromUTF8(void) { 870 /* 871 * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)" 872 */ 873 static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 }; 874 UChar dest[64]; 875 UChar *destPointer; 876 int32_t destLength; 877 UErrorCode errorCode; 878 879 /* 3 bytes input, one UChar output (U+095C) */ 880 errorCode=U_ZERO_ERROR; 881 destLength=-99; 882 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode); 883 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) { 884 log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n", 885 (long)destLength, u_errorName(errorCode)); 886 } 887 888 /* 4 bytes input, two UChars output (U+095C U+0000) */ 889 errorCode=U_ZERO_ERROR; 890 destLength=-99; 891 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode); 892 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) { 893 log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n", 894 (long)destLength, u_errorName(errorCode)); 895 } 896 897 /* NUL-terminated 3 bytes input, one UChar output (U+095C) */ 898 errorCode=U_ZERO_ERROR; 899 destLength=-99; 900 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode); 901 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) { 902 log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n", 903 (long)destLength, u_errorName(errorCode)); 904 } 905 906 /* 3 bytes input, one UChar output (U+095C), transform not just preflight */ 907 errorCode=U_ZERO_ERROR; 908 dest[0]=dest[1]=99; 909 destLength=-99; 910 destPointer=u_strFromUTF8(dest, LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode); 911 if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) { 912 log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n", 913 (long)destLength, u_errorName(errorCode)); 914 } 915 } 916 917 /* test u_strFromUTF8Lenient() */ 918 static void 919 Test_FromUTF8Lenient(void) { 920 /* 921 * Multiple input strings, each NUL-terminated. 922 * Terminate with a string starting with 0xff. 923 */ 924 static const uint8_t bytes[]={ 925 /* well-formed UTF-8 */ 926 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80, 927 0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0, 928 929 /* various malformed sequences */ 930 0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0, 931 932 /* truncated input */ 933 0xc3, 0, 934 0xe0, 0, 935 0xe0, 0xa0, 0, 936 0xf0, 0, 937 0xf0, 0x90, 0, 938 0xf0, 0x90, 0x80, 0, 939 940 /* non-ASCII characters in the last few bytes */ 941 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0, 942 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0, 943 944 /* empty string */ 945 0, 946 947 /* finish */ 948 0xff, 0 949 }; 950 951 /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */ 952 static const UChar uchars[]={ 953 0x61, 0xdf, 0x800, 0xd840, 0xdc00, 954 0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0, 955 956 0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0, 957 958 0xfffd, 0, 959 0xfffd, 0, 960 0xfffd, 0, 961 0xfffd, 0, 962 0xfffd, 0, 963 0xfffd, 0, 964 965 0x61, 0xdf, 0x800, 0, 966 0x61, 0x800, 0xdf, 0, 967 968 0, 969 970 0 971 }; 972 973 UChar dest[64]; 974 const char *pb; 975 const UChar *pu, *pDest; 976 int32_t srcLength, destLength0, destLength; 977 int number; 978 UErrorCode errorCode; 979 980 /* verify checking for some illegal arguments */ 981 dest[0]=0x1234; 982 destLength=-1; 983 errorCode=U_ZERO_ERROR; 984 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode); 985 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) { 986 log_err("u_strFromUTF8Lenient(src=NULL) failed\n"); 987 } 988 989 dest[0]=0x1234; 990 destLength=-1; 991 errorCode=U_ZERO_ERROR; 992 pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode); 993 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 994 log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n"); 995 } 996 997 dest[0]=0x1234; 998 destLength=-1; 999 errorCode=U_MEMORY_ALLOCATION_ERROR; 1000 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode); 1001 if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) { 1002 log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n"); 1003 } 1004 1005 dest[0]=0x1234; 1006 destLength=-1; 1007 errorCode=U_MEMORY_ALLOCATION_ERROR; 1008 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, NULL); 1009 if(dest[0]!=0x1234) { 1010 log_err("u_strFromUTF8Lenient(pErrorCode=NULL) failed\n"); 1011 } 1012 1013 /* test normal behavior */ 1014 number=0; /* string number for log_err() */ 1015 1016 for(pb=(const char *)bytes, pu=uchars; 1017 *pb!=(char)0xff; 1018 pb+=srcLength+1, pu+=destLength0+1, ++number 1019 ) { 1020 srcLength=uprv_strlen(pb); 1021 destLength0=u_strlen(pu); 1022 1023 /* preflighting with NUL-termination */ 1024 dest[0]=0x1234; 1025 destLength=-1; 1026 errorCode=U_ZERO_ERROR; 1027 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode); 1028 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) || 1029 pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0 1030 ) { 1031 log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number); 1032 } 1033 1034 /* preflighting/some capacity with NUL-termination */ 1035 if(srcLength>0) { 1036 dest[destLength0-1]=0x1234; 1037 destLength=-1; 1038 errorCode=U_ZERO_ERROR; 1039 pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode); 1040 if (errorCode!=U_BUFFER_OVERFLOW_ERROR || 1041 dest[destLength0-1]!=0x1234 || destLength!=destLength0 1042 ) { 1043 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number); 1044 } 1045 } 1046 1047 /* conversion with NUL-termination, much capacity */ 1048 dest[0]=dest[destLength0]=0x1234; 1049 destLength=-1; 1050 errorCode=U_ZERO_ERROR; 1051 pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, -1, &errorCode); 1052 if (errorCode!=U_ZERO_ERROR || 1053 pDest!=dest || dest[destLength0]!=0 || 1054 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1055 ) { 1056 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number); 1057 } 1058 1059 /* conversion with NUL-termination, exact capacity */ 1060 dest[0]=dest[destLength0]=0x1234; 1061 destLength=-1; 1062 errorCode=U_ZERO_ERROR; 1063 pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode); 1064 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING || 1065 pDest!=dest || dest[destLength0]!=0x1234 || 1066 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1067 ) { 1068 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number); 1069 } 1070 1071 /* preflighting with length */ 1072 dest[0]=0x1234; 1073 destLength=-1; 1074 errorCode=U_ZERO_ERROR; 1075 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode); 1076 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) || 1077 pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength 1078 ) { 1079 log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number); 1080 } 1081 1082 /* preflighting/some capacity with length */ 1083 if(srcLength>0) { 1084 dest[srcLength-1]=0x1234; 1085 destLength=-1; 1086 errorCode=U_ZERO_ERROR; 1087 pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode); 1088 if (errorCode!=U_BUFFER_OVERFLOW_ERROR || 1089 dest[srcLength-1]!=0x1234 || destLength!=srcLength 1090 ) { 1091 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number); 1092 } 1093 } 1094 1095 /* conversion with length, much capacity */ 1096 dest[0]=dest[destLength0]=0x1234; 1097 destLength=-1; 1098 errorCode=U_ZERO_ERROR; 1099 pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, srcLength, &errorCode); 1100 if (errorCode!=U_ZERO_ERROR || 1101 pDest!=dest || dest[destLength0]!=0 || 1102 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1103 ) { 1104 log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number); 1105 } 1106 1107 /* conversion with length, srcLength capacity */ 1108 dest[0]=dest[srcLength]=dest[destLength0]=0x1234; 1109 destLength=-1; 1110 errorCode=U_ZERO_ERROR; 1111 pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode); 1112 if(srcLength==destLength0) { 1113 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING || 1114 pDest!=dest || dest[destLength0]!=0x1234 || 1115 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1116 ) { 1117 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number); 1118 } 1119 } else { 1120 if (errorCode!=U_ZERO_ERROR || 1121 pDest!=dest || dest[destLength0]!=0 || 1122 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) 1123 ) { 1124 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number); 1125 } 1126 } 1127 } 1128 } 1129 1130 static const uint16_t src16j[] = { 1131 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 1132 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 1133 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 1134 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 1135 0x0000, 1136 /* Test only ASCII */ 1137 1138 }; 1139 static const uint16_t src16WithNulls[] = { 1140 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000, 1141 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000, 1142 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000, 1143 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000, 1144 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000, 1145 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000, 1146 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000, 1147 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000, 1148 /* test only ASCII */ 1149 /* 1150 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 1151 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 1152 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 1153 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 1154 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 1155 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 1156 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 1157 0x0054, 0x0000 */ 1158 1159 }; 1160 static void Test_UChar_WCHART_API(void){ 1161 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 1162 UErrorCode err = U_ZERO_ERROR; 1163 const UChar* uSrc = src16j; 1164 int32_t uSrcLen = sizeof(src16j)/2; 1165 wchar_t* wDest = NULL; 1166 int32_t wDestLen = 0; 1167 int32_t reqLen= 0 ; 1168 UBool failed = FALSE; 1169 UChar* uDest = NULL; 1170 int32_t uDestLen = 0; 1171 int i =0; 1172 { 1173 /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */ 1174 if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) { 1175 log_err("u_strFromWCS() should return NULL with a bad argument\n"); 1176 } 1177 if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) { 1178 log_err("u_strToWCS() should return NULL with a bad argument\n"); 1179 } 1180 1181 /* NULL source & destination. */ 1182 err = U_ZERO_ERROR; 1183 u_strFromWCS(NULL,0,NULL,NULL,0,&err); 1184 if (err != U_STRING_NOT_TERMINATED_WARNING) { 1185 log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err)); 1186 } 1187 err = U_ZERO_ERROR; 1188 u_strToWCS(NULL,0,NULL,NULL,0,&err); 1189 if (err != U_STRING_NOT_TERMINATED_WARNING) { 1190 log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err)); 1191 } 1192 err = U_ZERO_ERROR; 1193 1194 /* pre-flight*/ 1195 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); 1196 1197 if(err == U_BUFFER_OVERFLOW_ERROR){ 1198 err=U_ZERO_ERROR; 1199 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); 1200 wDestLen = reqLen+1; 1201 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); 1202 } 1203 1204 /* pre-flight */ 1205 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); 1206 1207 1208 if(err == U_BUFFER_OVERFLOW_ERROR){ 1209 err =U_ZERO_ERROR; 1210 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); 1211 uDestLen = reqLen + 1; 1212 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); 1213 }else if(U_FAILURE(err)){ 1214 1215 log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err)); 1216 return; 1217 } 1218 1219 for(i=0; i< uSrcLen; i++){ 1220 if(uDest[i] != src16j[i]){ 1221 log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i); 1222 failed =TRUE; 1223 } 1224 } 1225 1226 if(U_FAILURE(err)){ 1227 failed = TRUE; 1228 } 1229 if(failed){ 1230 log_err("u_strToWCS() failed \n"); 1231 } 1232 free(wDest); 1233 free(uDest); 1234 1235 1236 /* test with embeded nulls */ 1237 uSrc = src16WithNulls; 1238 uSrcLen = sizeof(src16WithNulls)/2; 1239 wDestLen =0; 1240 uDestLen =0; 1241 wDest = NULL; 1242 uDest = NULL; 1243 /* pre-flight*/ 1244 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); 1245 1246 if(err == U_BUFFER_OVERFLOW_ERROR){ 1247 err=U_ZERO_ERROR; 1248 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); 1249 wDestLen = reqLen+1; 1250 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); 1251 } 1252 1253 /* pre-flight */ 1254 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); 1255 1256 if(err == U_BUFFER_OVERFLOW_ERROR){ 1257 err =U_ZERO_ERROR; 1258 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); 1259 uDestLen = reqLen + 1; 1260 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); 1261 } 1262 1263 if(!U_FAILURE(err)) { 1264 for(i=0; i< uSrcLen; i++){ 1265 if(uDest[i] != src16WithNulls[i]){ 1266 log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i); 1267 failed =TRUE; 1268 } 1269 } 1270 } 1271 1272 if(U_FAILURE(err)){ 1273 failed = TRUE; 1274 } 1275 if(failed){ 1276 log_err("u_strToWCS() failed \n"); 1277 } 1278 free(wDest); 1279 free(uDest); 1280 1281 } 1282 1283 { 1284 1285 uSrc = src16j; 1286 uSrcLen = sizeof(src16j)/2; 1287 wDestLen =0; 1288 uDestLen =0; 1289 wDest = NULL; 1290 uDest = NULL; 1291 wDestLen = 0; 1292 /* pre-flight*/ 1293 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err); 1294 1295 if(err == U_BUFFER_OVERFLOW_ERROR){ 1296 err=U_ZERO_ERROR; 1297 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); 1298 wDestLen = reqLen+1; 1299 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err); 1300 } 1301 uDestLen = 0; 1302 /* pre-flight */ 1303 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err); 1304 1305 if(err == U_BUFFER_OVERFLOW_ERROR){ 1306 err =U_ZERO_ERROR; 1307 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); 1308 uDestLen = reqLen + 1; 1309 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err); 1310 } 1311 1312 1313 if(!U_FAILURE(err)) { 1314 for(i=0; i< uSrcLen; i++){ 1315 if(uDest[i] != src16j[i]){ 1316 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i); 1317 failed =TRUE; 1318 } 1319 } 1320 } 1321 1322 if(U_FAILURE(err)){ 1323 failed = TRUE; 1324 } 1325 if(failed){ 1326 log_err("u_strToWCS() failed \n"); 1327 } 1328 free(wDest); 1329 free(uDest); 1330 } 1331 1332 /* 1333 * Test u_terminateWChars(). 1334 * All u_terminateXYZ() use the same implementation macro; 1335 * we test this function to improve API coverage. 1336 */ 1337 { 1338 wchar_t buffer[10]; 1339 1340 err=U_ZERO_ERROR; 1341 buffer[3]=0x20ac; 1342 wDestLen=u_terminateWChars(buffer, LENGTHOF(buffer), 3, &err); 1343 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) { 1344 log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n", 1345 u_errorName(err), wDestLen, buffer[3]); 1346 } 1347 1348 err=U_ZERO_ERROR; 1349 buffer[3]=0x20ac; 1350 wDestLen=u_terminateWChars(buffer, 3, 3, &err); 1351 if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) { 1352 log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n", 1353 u_errorName(err), wDestLen, buffer[3]); 1354 } 1355 1356 err=U_STRING_NOT_TERMINATED_WARNING; 1357 buffer[3]=0x20ac; 1358 wDestLen=u_terminateWChars(buffer, LENGTHOF(buffer), 3, &err); 1359 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) { 1360 log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n", 1361 u_errorName(err), wDestLen, buffer[3]); 1362 } 1363 1364 err=U_ZERO_ERROR; 1365 buffer[3]=0x20ac; 1366 wDestLen=u_terminateWChars(buffer, 2, 3, &err); 1367 if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) { 1368 log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n", 1369 u_errorName(err), wDestLen, buffer[3]); 1370 } 1371 } 1372 #else 1373 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); 1374 #endif 1375 } 1376 1377 static void Test_widestrs() 1378 { 1379 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 1380 wchar_t ws[100]; 1381 UChar rts[100]; 1382 int32_t wcap = sizeof(ws) / sizeof(*ws); 1383 int32_t wl; 1384 int32_t rtcap = sizeof(rts) / sizeof(*rts); 1385 int32_t rtl; 1386 wchar_t *wcs; 1387 UChar *cp; 1388 const char *errname; 1389 UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0}; 1390 int32_t ul = sizeof(ustr)/sizeof(*ustr) -1; 1391 char astr[100]; 1392 1393 UErrorCode err; 1394 1395 err = U_ZERO_ERROR; 1396 wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err); 1397 if (U_FAILURE(err)) { 1398 errname = u_errorName(err); 1399 log_err("test_widestrs: u_strToWCS error: %s!\n",errname); 1400 } 1401 if(ul!=wl){ 1402 log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl); 1403 } 1404 err = U_ZERO_ERROR; 1405 wl = (int32_t)uprv_wcslen(wcs); 1406 cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err); 1407 if (U_FAILURE(err)) { 1408 errname = u_errorName(err); 1409 fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname); 1410 } 1411 if(wl != rtl){ 1412 log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl); 1413 } 1414 #else 1415 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); 1416 #endif 1417 } 1418 1419 static void 1420 Test_WCHART_LongString(){ 1421 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 1422 UErrorCode status = U_ZERO_ERROR; 1423 const char* testdatapath=loadTestData(&status); 1424 UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status); 1425 int32_t strLen =0; 1426 const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status); 1427 const UChar* uSrc = str; 1428 int32_t uSrcLen = strLen; 1429 int32_t wDestLen =0, reqLen=0, i=0; 1430 int32_t uDestLen =0; 1431 wchar_t* wDest = NULL; 1432 UChar* uDest = NULL; 1433 UBool failed = FALSE; 1434 1435 if(U_FAILURE(status)){ 1436 log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status)); 1437 return; 1438 } 1439 1440 /* pre-flight*/ 1441 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status); 1442 1443 if(status == U_BUFFER_OVERFLOW_ERROR){ 1444 status=U_ZERO_ERROR; 1445 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); 1446 wDestLen = reqLen+1; 1447 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status); 1448 } 1449 uDestLen = 0; 1450 /* pre-flight */ 1451 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status); 1452 1453 if(status == U_BUFFER_OVERFLOW_ERROR){ 1454 status =U_ZERO_ERROR; 1455 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); 1456 uDestLen = reqLen + 1; 1457 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status); 1458 } 1459 1460 1461 for(i=0; i< uSrcLen; i++){ 1462 if(uDest[i] != str[i]){ 1463 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i); 1464 failed =TRUE; 1465 } 1466 } 1467 1468 if(U_FAILURE(status)){ 1469 failed = TRUE; 1470 } 1471 if(failed){ 1472 log_err("u_strToWCS() failed \n"); 1473 } 1474 free(wDest); 1475 free(uDest); 1476 /* close the bundle */ 1477 ures_close(theBundle); 1478 #else 1479 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); 1480 #endif 1481 } 1482 1483 static void Test_strToJavaModifiedUTF8() { 1484 static const UChar src[]={ 1485 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3, 1486 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003, 1487 0xd800, 0xdc00, 0xdc00, 0xd800, 0, 1488 0xdbff, 0xdfff, 1489 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f 1490 }; 1491 static const uint8_t expected[]={ 1492 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3, 1493 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83, 1494 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83, 1495 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80, 1496 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1497 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f 1498 }; 1499 static const UChar shortSrc[]={ 1500 0xe01, 0xe1, 0x61 1501 }; 1502 static const uint8_t shortExpected[]={ 1503 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61 1504 }; 1505 static const UChar asciiNul[]={ 1506 0x61, 0x62, 0x63, 0 1507 }; 1508 static const uint8_t asciiNulExpected[]={ 1509 0x61, 0x62, 0x63 1510 }; 1511 char dest[200]; 1512 char *p; 1513 int32_t length, expectedTerminatedLength; 1514 UErrorCode errorCode; 1515 1516 expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")- 1517 (const char *)expected); 1518 1519 errorCode=U_ZERO_ERROR; 1520 length=-5; 1521 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, 1522 src, LENGTHOF(src), &errorCode); 1523 if( U_FAILURE(errorCode) || p!=dest || 1524 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1525 dest[length]!=0 1526 ) { 1527 log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode)); 1528 } 1529 memset(dest, 0xff, sizeof(dest)); 1530 errorCode=U_ZERO_ERROR; 1531 length=-5; 1532 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, 1533 src, LENGTHOF(src), &errorCode); 1534 if( U_FAILURE(errorCode) || p!=dest || 1535 0!=memcmp(dest, expected, LENGTHOF(expected)) || 1536 dest[LENGTHOF(expected)]!=0 1537 ) { 1538 log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode)); 1539 } 1540 memset(dest, 0xff, sizeof(dest)); 1541 errorCode=U_ZERO_ERROR; 1542 length=-5; 1543 p=u_strToJavaModifiedUTF8(dest, LENGTHOF(expected), &length, 1544 src, LENGTHOF(src), &errorCode); 1545 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest || 1546 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1547 dest[length]!=(char)0xff 1548 ) { 1549 log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode)); 1550 } 1551 memset(dest, 0xff, sizeof(dest)); 1552 errorCode=U_ZERO_ERROR; 1553 length=-5; 1554 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode); 1555 if( U_FAILURE(errorCode) || p!=dest || 1556 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || 1557 dest[length]!=0 1558 ) { 1559 log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode)); 1560 } 1561 memset(dest, 0xff, sizeof(dest)); 1562 errorCode=U_ZERO_ERROR; 1563 length=-5; 1564 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode); 1565 if( U_FAILURE(errorCode) || p!=dest || 1566 0!=memcmp(dest, expected, expectedTerminatedLength) || 1567 dest[expectedTerminatedLength]!=0 1568 ) { 1569 log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode)); 1570 } 1571 memset(dest, 0xff, sizeof(dest)); 1572 errorCode=U_ZERO_ERROR; 1573 length=-5; 1574 p=u_strToJavaModifiedUTF8(dest, LENGTHOF(expected)/2, &length, 1575 src, LENGTHOF(src), &errorCode); 1576 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || 1577 length!=LENGTHOF(expected) || dest[LENGTHOF(expected)/2]!=(char)0xff 1578 ) { 1579 log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode)); 1580 } 1581 memset(dest, 0xff, sizeof(dest)); 1582 errorCode=U_ZERO_ERROR; 1583 length=-5; 1584 p=u_strToJavaModifiedUTF8(NULL, 0, &length, 1585 src, LENGTHOF(src), &errorCode); 1586 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || 1587 length!=LENGTHOF(expected) || dest[0]!=(char)0xff 1588 ) { 1589 log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode)); 1590 } 1591 memset(dest, 0xff, sizeof(dest)); 1592 errorCode=U_ZERO_ERROR; 1593 length=-5; 1594 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, 1595 shortSrc, LENGTHOF(shortSrc), &errorCode); 1596 if( U_FAILURE(errorCode) || p!=dest || 1597 length!=LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) || 1598 dest[length]!=0 1599 ) { 1600 log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode)); 1601 } 1602 memset(dest, 0xff, sizeof(dest)); 1603 errorCode=U_ZERO_ERROR; 1604 length=-5; 1605 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, 1606 asciiNul, -1, &errorCode); 1607 if( U_FAILURE(errorCode) || p!=dest || 1608 length!=LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) || 1609 dest[length]!=0 1610 ) { 1611 log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode)); 1612 } 1613 memset(dest, 0xff, sizeof(dest)); 1614 errorCode=U_ZERO_ERROR; 1615 length=-5; 1616 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, 1617 NULL, 0, &errorCode); 1618 if( U_FAILURE(errorCode) || p!=dest || 1619 length!=0 || dest[0]!=0 1620 ) { 1621 log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode)); 1622 } 1623 1624 /* illegal arguments */ 1625 memset(dest, 0xff, sizeof(dest)); 1626 errorCode=U_ZERO_ERROR; 1627 length=-5; 1628 p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length, 1629 src, LENGTHOF(src), &errorCode); 1630 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { 1631 log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode)); 1632 } 1633 memset(dest, 0xff, sizeof(dest)); 1634 errorCode=U_ZERO_ERROR; 1635 length=-5; 1636 p=u_strToJavaModifiedUTF8(dest, -1, &length, 1637 src, LENGTHOF(src), &errorCode); 1638 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { 1639 log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode)); 1640 } 1641 memset(dest, 0xff, sizeof(dest)); 1642 errorCode=U_ZERO_ERROR; 1643 length=-5; 1644 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length, 1645 NULL, LENGTHOF(src), &errorCode); 1646 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { 1647 log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode)); 1648 } 1649 memset(dest, 0xff, sizeof(dest)); 1650 errorCode=U_ZERO_ERROR; 1651 length=-5; 1652 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length, 1653 NULL, -1, &errorCode); 1654 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { 1655 log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode)); 1656 } 1657 } 1658 1659 static void Test_strFromJavaModifiedUTF8() { 1660 static const uint8_t src[]={ 1661 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3, 1662 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83, 1663 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83, 1664 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0, 1665 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1666 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80, /* invalid sequences */ 1667 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 1668 0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad, /* non-shortest forms are allowed */ 1669 0xe0, 0xb8, 0x8e, 0x6f 1670 }; 1671 static const UChar expected[]={ 1672 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3, 1673 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003, 1674 0xd800, 0xdc00, 0xdc00, 0xd800, 0, 1675 0xdbff, 0xdfff, 1676 0xfffd, 0xfffd, 0xfffd, 0xfffd, 1677 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 1678 0x6c, 0xed, 1679 0xe0e, 0x6f 1680 }; 1681 static const uint8_t shortSrc[]={ 1682 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61 1683 }; 1684 static const UChar shortExpected[]={ 1685 0xe01, 0xe1, 0x61 1686 }; 1687 static const uint8_t asciiNul[]={ 1688 0x61, 0x62, 0x63, 0 1689 }; 1690 static const UChar asciiNulExpected[]={ 1691 0x61, 0x62, 0x63 1692 }; 1693 static const uint8_t invalid[]={ 1694 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80 1695 }; 1696 static const UChar invalidExpectedFFFD[]={ 1697 0xfffd, 0xfffd, 0xfffd, 0xfffd 1698 }; 1699 static const UChar invalidExpected50000[]={ 1700 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00 1701 }; 1702 UChar dest[200]; 1703 UChar *p; 1704 int32_t length, expectedTerminatedLength; 1705 int32_t numSubstitutions; 1706 UErrorCode errorCode; 1707 1708 expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected); 1709 1710 errorCode=U_ZERO_ERROR; 1711 length=numSubstitutions=-5; 1712 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1713 (const char *)src, LENGTHOF(src), 1714 0xfffd, &numSubstitutions, &errorCode); 1715 if( U_FAILURE(errorCode) || p!=dest || 1716 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1717 dest[length]!=0 || 1718 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) 1719 ) { 1720 log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode)); 1721 } 1722 memset(dest, 0xff, sizeof(dest)); 1723 errorCode=U_ZERO_ERROR; 1724 length=numSubstitutions=-5; 1725 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL, 1726 (const char *)src, LENGTHOF(src), 1727 0xfffd, &numSubstitutions, &errorCode); 1728 if( U_FAILURE(errorCode) || p!=dest || 1729 0!=memcmp(dest, expected, LENGTHOF(expected)) || 1730 dest[LENGTHOF(expected)]!=0 || 1731 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) 1732 ) { 1733 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode)); 1734 } 1735 memset(dest, 0xff, sizeof(dest)); 1736 errorCode=U_ZERO_ERROR; 1737 length=numSubstitutions=-5; 1738 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1739 (const char *)src, LENGTHOF(src), 1740 0xfffd, NULL, &errorCode); 1741 if( U_FAILURE(errorCode) || p!=dest || 1742 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1743 dest[length]!=0 1744 ) { 1745 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode)); 1746 } 1747 memset(dest, 0xff, sizeof(dest)); 1748 errorCode=U_ZERO_ERROR; 1749 length=numSubstitutions=-5; 1750 p=u_strFromJavaModifiedUTF8WithSub(dest, LENGTHOF(expected), &length, 1751 (const char *)src, LENGTHOF(src), 1752 0xfffd, &numSubstitutions, &errorCode); 1753 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest || 1754 length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || 1755 dest[length]!=0xffff || 1756 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) 1757 ) { 1758 log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode)); 1759 } 1760 memset(dest, 0xff, sizeof(dest)); 1761 errorCode=U_ZERO_ERROR; 1762 length=numSubstitutions=-5; 1763 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1764 (const char *)src, -1, 1765 0xfffd, &numSubstitutions, &errorCode); 1766 if( U_FAILURE(errorCode) || p!=dest || 1767 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || 1768 dest[length]!=0 || 1769 numSubstitutions!=0 1770 ) { 1771 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode)); 1772 } 1773 memset(dest, 0xff, sizeof(dest)); 1774 errorCode=U_ZERO_ERROR; 1775 length=numSubstitutions=-5; 1776 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL, 1777 (const char *)src, -1, 1778 0xfffd, &numSubstitutions, &errorCode); 1779 if( U_FAILURE(errorCode) || p!=dest || 1780 0!=memcmp(dest, expected, expectedTerminatedLength) || 1781 dest[expectedTerminatedLength]!=0 || 1782 numSubstitutions!=0 1783 ) { 1784 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode)); 1785 } 1786 memset(dest, 0xff, sizeof(dest)); 1787 errorCode=U_ZERO_ERROR; 1788 length=numSubstitutions=-5; 1789 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1790 (const char *)src, -1, 1791 0xfffd, NULL, &errorCode); 1792 if( U_FAILURE(errorCode) || p!=dest || 1793 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || 1794 dest[length]!=0 1795 ) { 1796 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode)); 1797 } 1798 memset(dest, 0xff, sizeof(dest)); 1799 errorCode=U_ZERO_ERROR; 1800 length=numSubstitutions=-5; 1801 p=u_strFromJavaModifiedUTF8WithSub(dest, LENGTHOF(expected)/2, &length, 1802 (const char *)src, LENGTHOF(src), 1803 0xfffd, &numSubstitutions, &errorCode); 1804 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || 1805 length!=LENGTHOF(expected) || dest[LENGTHOF(expected)/2]!=0xffff 1806 ) { 1807 log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode)); 1808 } 1809 memset(dest, 0xff, sizeof(dest)); 1810 errorCode=U_ZERO_ERROR; 1811 length=numSubstitutions=-5; 1812 p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length, 1813 (const char *)src, LENGTHOF(src), 1814 0xfffd, &numSubstitutions, &errorCode); 1815 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || 1816 length!=LENGTHOF(expected) || dest[0]!=0xffff 1817 ) { 1818 log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode)); 1819 } 1820 memset(dest, 0xff, sizeof(dest)); 1821 errorCode=U_ZERO_ERROR; 1822 length=numSubstitutions=-5; 1823 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1824 (const char *)shortSrc, LENGTHOF(shortSrc), 1825 0xfffd, &numSubstitutions, &errorCode); 1826 if( U_FAILURE(errorCode) || p!=dest || 1827 length!=LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) || 1828 dest[length]!=0 || 1829 numSubstitutions!=0 1830 ) { 1831 log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode)); 1832 } 1833 memset(dest, 0xff, sizeof(dest)); 1834 errorCode=U_ZERO_ERROR; 1835 length=numSubstitutions=-5; 1836 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1837 (const char *)asciiNul, -1, 1838 0xfffd, &numSubstitutions, &errorCode); 1839 if( U_FAILURE(errorCode) || p!=dest || 1840 length!=LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) || 1841 dest[length]!=0 || 1842 numSubstitutions!=0 1843 ) { 1844 log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode)); 1845 } 1846 memset(dest, 0xff, sizeof(dest)); 1847 errorCode=U_ZERO_ERROR; 1848 length=numSubstitutions=-5; 1849 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1850 NULL, 0, 0xfffd, &numSubstitutions, &errorCode); 1851 if( U_FAILURE(errorCode) || p!=dest || 1852 length!=0 || dest[0]!=0 || 1853 numSubstitutions!=0 1854 ) { 1855 log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode)); 1856 } 1857 memset(dest, 0xff, sizeof(dest)); 1858 errorCode=U_ZERO_ERROR; 1859 length=numSubstitutions=-5; 1860 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1861 (const char *)invalid, LENGTHOF(invalid), 1862 0xfffd, &numSubstitutions, &errorCode); 1863 if( U_FAILURE(errorCode) || p!=dest || 1864 length!=LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) || 1865 dest[length]!=0 || 1866 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) 1867 ) { 1868 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode)); 1869 } 1870 memset(dest, 0xff, sizeof(dest)); 1871 errorCode=U_ZERO_ERROR; 1872 length=numSubstitutions=-5; 1873 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1874 (const char *)invalid, LENGTHOF(invalid), 1875 0x50000, &numSubstitutions, &errorCode); 1876 if( U_FAILURE(errorCode) || p!=dest || 1877 length!=LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) || 1878 dest[length]!=0 || 1879 numSubstitutions!=LENGTHOF(invalidExpectedFFFD) /* not ...50000 */ 1880 ) { 1881 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode)); 1882 } 1883 memset(dest, 0xff, sizeof(dest)); 1884 errorCode=U_ZERO_ERROR; 1885 length=numSubstitutions=-5; 1886 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1887 (const char *)invalid, LENGTHOF(invalid), 1888 U_SENTINEL, &numSubstitutions, &errorCode); 1889 if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) { 1890 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode)); 1891 } 1892 memset(dest, 0xff, sizeof(dest)); 1893 errorCode=U_ZERO_ERROR; 1894 length=numSubstitutions=-5; 1895 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, 1896 (const char *)src, LENGTHOF(src), 1897 U_SENTINEL, &numSubstitutions, &errorCode); 1898 if( errorCode!=U_INVALID_CHAR_FOUND || 1899 length>=LENGTHOF(expected) || dest[LENGTHOF(expected)-1]!=0xffff || 1900 numSubstitutions!=0 1901 ) { 1902 log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode)); 1903 } 1904 1905 /* illegal arguments */ 1906 memset(dest, 0xff, sizeof(dest)); 1907 errorCode=U_ZERO_ERROR; 1908 length=numSubstitutions=-5; 1909 p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length, 1910 (const char *)src, LENGTHOF(src), 1911 0xfffd, &numSubstitutions, &errorCode); 1912 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1913 log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode)); 1914 } 1915 memset(dest, 0xff, sizeof(dest)); 1916 errorCode=U_ZERO_ERROR; 1917 length=numSubstitutions=-5; 1918 p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length, 1919 (const char *)src, LENGTHOF(src), 1920 0xfffd, &numSubstitutions, &errorCode); 1921 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1922 log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode)); 1923 } 1924 memset(dest, 0xff, sizeof(dest)); 1925 errorCode=U_ZERO_ERROR; 1926 length=numSubstitutions=-5; 1927 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, 1928 NULL, LENGTHOF(src), 1929 0xfffd, &numSubstitutions, &errorCode); 1930 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1931 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode)); 1932 } 1933 memset(dest, 0xff, sizeof(dest)); 1934 errorCode=U_ZERO_ERROR; 1935 length=numSubstitutions=-5; 1936 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, 1937 NULL, -1, 0xfffd, &numSubstitutions, &errorCode); 1938 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1939 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode)); 1940 } 1941 memset(dest, 0xff, sizeof(dest)); 1942 errorCode=U_ZERO_ERROR; 1943 length=numSubstitutions=-5; 1944 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, 1945 (const char *)src, LENGTHOF(src), 1946 0x110000, &numSubstitutions, &errorCode); 1947 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1948 log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode)); 1949 } 1950 memset(dest, 0xff, sizeof(dest)); 1951 errorCode=U_ZERO_ERROR; 1952 length=numSubstitutions=-5; 1953 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, 1954 (const char *)src, LENGTHOF(src), 1955 0xdfff, &numSubstitutions, &errorCode); 1956 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { 1957 log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode)); 1958 } 1959 } 1960 1961 /* test that string transformation functions permit NULL source pointer when source length==0 */ 1962 static void TestNullEmptySource() { 1963 char dest8[4]={ 3, 3, 3, 3 }; 1964 UChar dest16[4]={ 3, 3, 3, 3 }; 1965 UChar32 dest32[4]={ 3, 3, 3, 3 }; 1966 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 1967 wchar_t destW[4]={ 3, 3, 3, 3 }; 1968 #endif 1969 1970 int32_t length; 1971 UErrorCode errorCode; 1972 1973 /* u_strFromXyz() */ 1974 1975 dest16[0]=3; 1976 length=3; 1977 errorCode=U_ZERO_ERROR; 1978 u_strFromUTF8(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); 1979 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 1980 log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n"); 1981 } 1982 1983 dest16[0]=3; 1984 length=3; 1985 errorCode=U_ZERO_ERROR; 1986 u_strFromUTF8WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); 1987 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 1988 log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n"); 1989 } 1990 1991 dest16[0]=3; 1992 length=3; 1993 errorCode=U_ZERO_ERROR; 1994 u_strFromUTF8Lenient(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); 1995 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 1996 log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n"); 1997 } 1998 1999 dest16[0]=3; 2000 length=3; 2001 errorCode=U_ZERO_ERROR; 2002 u_strFromUTF32(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); 2003 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2004 log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n"); 2005 } 2006 2007 dest16[0]=3; 2008 length=3; 2009 errorCode=U_ZERO_ERROR; 2010 u_strFromUTF32WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2011 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2012 log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n"); 2013 } 2014 2015 dest16[0]=3; 2016 length=3; 2017 errorCode=U_ZERO_ERROR; 2018 u_strFromJavaModifiedUTF8WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2019 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2020 log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n"); 2021 } 2022 2023 /* u_strToXyz() */ 2024 2025 dest8[0]=3; 2026 length=3; 2027 errorCode=U_ZERO_ERROR; 2028 u_strToUTF8(dest8, LENGTHOF(dest8), &length, NULL, 0, &errorCode); 2029 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { 2030 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n"); 2031 } 2032 2033 dest8[0]=3; 2034 length=3; 2035 errorCode=U_ZERO_ERROR; 2036 u_strToUTF8WithSub(dest8, LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2037 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { 2038 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n"); 2039 } 2040 2041 dest32[0]=3; 2042 length=3; 2043 errorCode=U_ZERO_ERROR; 2044 u_strToUTF32(dest32, LENGTHOF(dest32), &length, NULL, 0, &errorCode); 2045 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) { 2046 log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n"); 2047 } 2048 2049 dest32[0]=3; 2050 length=3; 2051 errorCode=U_ZERO_ERROR; 2052 u_strToUTF32WithSub(dest32, LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode); 2053 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) { 2054 log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n"); 2055 } 2056 2057 dest8[0]=3; 2058 length=3; 2059 errorCode=U_ZERO_ERROR; 2060 u_strToJavaModifiedUTF8(dest8, LENGTHOF(dest8), &length, NULL, 0, &errorCode); 2061 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { 2062 log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n"); 2063 } 2064 2065 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) 2066 2067 dest16[0]=3; 2068 length=3; 2069 errorCode=U_ZERO_ERROR; 2070 u_strFromWCS(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); 2071 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { 2072 log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n"); 2073 } 2074 2075 destW[0]=3; 2076 length=3; 2077 errorCode=U_ZERO_ERROR; 2078 u_strToWCS(destW, LENGTHOF(destW), &length, NULL, 0, &errorCode); 2079 if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) { 2080 log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n"); 2081 } 2082 2083 #endif 2084 } 2085