1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /* 7 ******************************************************************************** 8 * File NCCBTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda 7/21/1999 Testing error callback routines 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 #include "cstring.h" 20 #include "unicode/uloc.h" 21 #include "unicode/ucnv.h" 22 #include "unicode/ucnv_err.h" 23 #include "cintltst.h" 24 #include "unicode/utypes.h" 25 #include "unicode/ustring.h" 26 #include "nccbtst.h" 27 #include "unicode/ucnv_cb.h" 28 #define NEW_MAX_BUFFER 999 29 30 #define nct_min(x,y) ((x<y) ? x : y) 31 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) 32 33 static int32_t gInBufferSize = 0; 34 static int32_t gOutBufferSize = 0; 35 static char gNuConvTestName[1024]; 36 37 static void printSeq(const uint8_t* a, int len) 38 { 39 int i=0; 40 log_verbose("\n{"); 41 while (i<len) 42 log_verbose("0x%02X, ", a[i++]); 43 log_verbose("}\n"); 44 } 45 46 static void printUSeq(const UChar* a, int len) 47 { 48 int i=0; 49 log_verbose("{"); 50 while (i<len) 51 log_verbose(" 0x%04x, ", a[i++]); 52 log_verbose("}\n"); 53 } 54 55 static void printSeqErr(const uint8_t* a, int len) 56 { 57 int i=0; 58 fprintf(stderr, "{"); 59 while (i<len) 60 fprintf(stderr, " 0x%02x, ", a[i++]); 61 fprintf(stderr, "}\n"); 62 } 63 64 static void printUSeqErr(const UChar* a, int len) 65 { 66 int i=0; 67 fprintf(stderr, "{"); 68 while (i<len) 69 fprintf(stderr, "0x%04x, ", a[i++]); 70 fprintf(stderr,"}\n"); 71 } 72 73 static void setNuConvTestName(const char *codepage, const char *direction) 74 { 75 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 76 codepage, 77 direction, 78 (int)gInBufferSize, 79 (int)gOutBufferSize); 80 } 81 82 83 static void TestCallBackFailure(void); 84 85 void addTestConvertErrorCallBack(TestNode** root); 86 87 void addTestConvertErrorCallBack(TestNode** root) 88 { 89 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); 90 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); 91 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); 92 /* BEGIN android-removed 93 To save space, Android does not build complete CJK conversion tables. 94 We skip the test here. 95 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); 96 END android-removed */ 97 98 #if !UCONFIG_NO_LEGACY_CONVERSION 99 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); 100 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); 101 #endif 102 103 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); 104 } 105 106 static void TestSkipCallBack() 107 { 108 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 109 TestSkip(1,NEW_MAX_BUFFER); 110 TestSkip(1,1); 111 TestSkip(NEW_MAX_BUFFER, 1); 112 } 113 114 static void TestStopCallBack() 115 { 116 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 117 TestStop(1,NEW_MAX_BUFFER); 118 TestStop(1,1); 119 TestStop(NEW_MAX_BUFFER, 1); 120 } 121 122 static void TestSubCallBack() 123 { 124 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 125 TestSub(1,NEW_MAX_BUFFER); 126 TestSub(1,1); 127 TestSub(NEW_MAX_BUFFER, 1); 128 129 #if !UCONFIG_NO_LEGACY_CONVERSION 130 TestEBCDIC_STATEFUL_Sub(1, 1); 131 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); 132 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); 133 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 134 #endif 135 } 136 137 static void TestSubWithValueCallBack() 138 { 139 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 140 TestSubWithValue(1,NEW_MAX_BUFFER); 141 TestSubWithValue(1,1); 142 TestSubWithValue(NEW_MAX_BUFFER, 1); 143 } 144 145 #if !UCONFIG_NO_LEGACY_CONVERSION 146 static void TestLegalAndOtherCallBack() 147 { 148 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 149 TestLegalAndOthers(1,NEW_MAX_BUFFER); 150 TestLegalAndOthers(1,1); 151 TestLegalAndOthers(NEW_MAX_BUFFER, 1); 152 } 153 154 static void TestSingleByteCallBack() 155 { 156 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 157 TestSingleByte(1,NEW_MAX_BUFFER); 158 TestSingleByte(1,1); 159 TestSingleByte(NEW_MAX_BUFFER, 1); 160 } 161 #endif 162 163 static void TestSkip(int32_t inputsize, int32_t outputsize) 164 { 165 static const uint8_t expskipIBM_949[]= { 166 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 167 168 static const uint8_t expskipIBM_943[] = { 169 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; 170 171 static const uint8_t expskipIBM_930[] = { 172 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; 173 174 gInBufferSize = inputsize; 175 gOutBufferSize = outputsize; 176 177 /*From Unicode*/ 178 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); 179 180 #if !UCONFIG_NO_LEGACY_CONVERSION 181 { 182 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 183 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 184 185 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; 186 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; 187 188 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 189 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", 190 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) 191 log_err("u-> ibm-949 with skip did not match.\n"); 192 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 193 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", 194 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) 195 log_err("u-> ibm-943 with skip did not match.\n"); 196 } 197 198 { 199 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; 200 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; 201 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; 202 203 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ 204 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, 205 fromUBytes, sizeof(fromUBytes), 206 "ibm-930", 207 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, 208 NULL, 0) 209 ) { 210 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); 211 } 212 } 213 #endif 214 215 { 216 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 217 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; 218 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; 219 220 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 221 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; 222 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; 223 224 /* US-ASCII */ 225 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 226 usasciiFromUBytes, sizeof(usasciiFromUBytes), 227 "US-ASCII", 228 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 229 NULL, 0) 230 ) { 231 log_err("u->US-ASCII with skip did not match.\n"); 232 } 233 234 #if !UCONFIG_NO_LEGACY_CONVERSION 235 /* SBCS NLTC codepage 367 for US-ASCII */ 236 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 237 usasciiFromUBytes, sizeof(usasciiFromUBytes), 238 "ibm-367", 239 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 240 NULL, 0) 241 ) { 242 log_err("u->ibm-367 with skip did not match.\n"); 243 } 244 #endif 245 246 /* ISO-Latin-1 */ 247 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 248 latin1FromUBytes, sizeof(latin1FromUBytes), 249 "LATIN_1", 250 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 251 NULL, 0) 252 ) { 253 log_err("u->LATIN_1 with skip did not match.\n"); 254 } 255 256 #if !UCONFIG_NO_LEGACY_CONVERSION 257 /* windows-1252 */ 258 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 259 latin1FromUBytes, sizeof(latin1FromUBytes), 260 "windows-1252", 261 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 262 NULL, 0) 263 ) { 264 log_err("u->windows-1252 with skip did not match.\n"); 265 } 266 } 267 268 { 269 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 270 static const uint8_t toIBM943[]= { 0x61, 0x61 }; 271 static const int32_t offset[]= {0, 4}; 272 273 /* EUC_JP*/ 274 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 275 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 276 0x61, 0x8e, 0xe0, 277 }; 278 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; 279 280 /*EUC_TW*/ 281 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 282 static const uint8_t to_euc_tw[]={ 283 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 284 0x61, 0xe6, 0xca, 0x8a, 285 }; 286 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; 287 288 /*ISO-2022-JP*/ 289 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; 290 static const uint8_t to_iso_2022_jp[]={ 291 0x41, 292 0x42, 293 294 }; 295 static const int32_t from_iso_2022_jpOffs [] ={0,2}; 296 297 /*ISO-2022-JP*/ 298 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 299 static const uint8_t to_iso_2022_jp2[]={ 300 0x41, 301 0x43, 302 303 }; 304 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; 305 306 /*ISO-2022-cn*/ 307 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 308 static const uint8_t to_iso_2022_cn[]={ 309 0x41, 0x42 310 }; 311 static const int32_t from_iso_2022_cnOffs [] ={ 312 0, 2 313 }; 314 315 /*ISO-2022-CN*/ 316 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 317 static const uint8_t to_iso_2022_cn1[]={ 318 0x41, 0x43 319 320 }; 321 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; 322 323 /*ISO-2022-kr*/ 324 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 325 static const uint8_t to_iso_2022_kr[]={ 326 0x1b, 0x24, 0x29, 0x43, 327 0x41, 328 0x0e, 0x25, 0x50, 329 0x25, 0x50, 330 0x0f, 0x42, 331 }; 332 static const int32_t from_iso_2022_krOffs [] ={ 333 -1,-1,-1,-1, 334 0, 335 1,1,1, 336 3,3, 337 4,4 338 }; 339 340 /*ISO-2022-kr*/ 341 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 342 static const uint8_t to_iso_2022_kr1[]={ 343 0x1b, 0x24, 0x29, 0x43, 344 0x41, 345 0x0e, 0x25, 0x50, 346 0x25, 0x50, 347 348 }; 349 static const int32_t from_iso_2022_krOffs1 [] ={ 350 -1,-1,-1,-1, 351 0, 352 1,1,1, 353 3,3, 354 355 }; 356 /* HZ encoding */ 357 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 358 359 static const uint8_t to_hz[]={ 360 0x7e, 0x7d, 0x41, 361 0x7e, 0x7b, 0x26, 0x30, 362 0x26, 0x30, 363 0x7e, 0x7d, 0x42, 364 365 }; 366 static const int32_t from_hzOffs [] ={ 367 0,0,0, 368 1,1,1,1, 369 3,3, 370 4,4,4,4 371 }; 372 373 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 374 375 static const uint8_t to_hz1[]={ 376 0x7e, 0x7d, 0x41, 377 0x7e, 0x7b, 0x26, 0x30, 378 0x26, 0x30, 379 380 381 }; 382 static const int32_t from_hzOffs1 [] ={ 383 0,0,0, 384 1,1,1,1, 385 3,3, 386 387 }; 388 389 #endif 390 391 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 392 393 static const uint8_t to_SCSU[]={ 394 0x41, 395 0x42 396 397 398 }; 399 static const int32_t from_SCSUOffs [] ={ 400 0, 401 2, 402 403 }; 404 405 #if !UCONFIG_NO_LEGACY_CONVERSION 406 /* ISCII */ 407 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 408 static const uint8_t to_iscii[]={ 409 0x41, 410 0x42, 411 }; 412 static const int32_t from_isciiOffs [] ={ 413 0,2, 414 415 }; 416 /*ISCII*/ 417 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 418 static const uint8_t to_iscii1[]={ 419 0x44, 420 0x43, 421 422 }; 423 static const int32_t from_isciiOffs1 [] ={0,2}; 424 425 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 426 toIBM943, sizeof(toIBM943), "ibm-943", 427 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) 428 log_err("u-> ibm-943 with skip did not match.\n"); 429 430 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 431 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 432 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) 433 log_err("u-> euc-jp with skip did not match.\n"); 434 435 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 436 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 437 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) 438 log_err("u-> euc-tw with skip did not match.\n"); 439 440 /*iso_2022_jp*/ 441 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 442 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 443 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) 444 log_err("u-> iso-2022-jp with skip did not match.\n"); 445 446 /* with context */ 447 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 448 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 449 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 450 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 451 452 /*iso_2022_cn*/ 453 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 454 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 455 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) 456 log_err("u-> iso-2022-cn with skip did not match.\n"); 457 /*with context*/ 458 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), 459 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", 460 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 461 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 462 463 /*iso_2022_kr*/ 464 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 465 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 466 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) 467 log_err("u-> iso-2022-kr with skip did not match.\n"); 468 /*with context*/ 469 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), 470 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", 471 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 472 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 473 474 /*hz*/ 475 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 476 to_hz, sizeof(to_hz), "HZ", 477 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) 478 log_err("u-> HZ with skip did not match.\n"); 479 /*with context*/ 480 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]), 481 to_hz1, sizeof(to_hz1), "hz", 482 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 483 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 484 #endif 485 486 /*SCSU*/ 487 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 488 to_SCSU, sizeof(to_SCSU), "SCSU", 489 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) 490 log_err("u-> SCSU with skip did not match.\n"); 491 492 #if !UCONFIG_NO_LEGACY_CONVERSION 493 /*ISCII*/ 494 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 495 to_iscii, sizeof(to_iscii), "ISCII,version=0", 496 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) 497 log_err("u-> iscii with skip did not match.\n"); 498 /*with context*/ 499 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]), 500 to_iscii1, sizeof(to_iscii1), "ISCII,version=0", 501 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 502 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 503 #endif 504 } 505 506 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 507 { 508 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 509 0xFB, 0xEE, 0x28, /* from source offset 0 */ 510 0x24, 0x1E, 0x52, 511 0xB2, 512 0x20, 513 0xB3, 514 0xB1, 515 0x0D, 516 0x0A, 517 518 0x20, /* from 8 */ 519 0x00, 520 0xD0, 0x6C, 521 0xB6, 522 0xD8, 0xA5, 523 0x20, 524 0x68, 525 0x59, 526 527 0xF9, 0x28, /* from 16 */ 528 0x6D, 529 0x20, 530 0x73, 531 0xE0, 0x2D, 532 0xDE, 0x43, 533 0xD0, 0x33, 534 0x20, 535 536 0xFA, 0x83, /* from 24 */ 537 0x25, 0x01, 538 0xFB, 0x16, 0x87, 539 0x4B, 0x16, 540 0x20, 541 0xE6, 0xBD, 542 0xEB, 0x5B, 543 0x4B, 0xCC, 544 545 0xF9, 0xA2, /* from 32 */ 546 0xFC, 0x10, 0x3E, 547 0xFE, 0x16, 0x3A, 0x8C, 548 0x20, 549 0xFC, 0x03, 0xAC, 550 551 0x01, /* from 41 */ 552 0xDE, 0x83, 553 0x20, 554 0x09 555 }; 556 static const UChar expected[]={ 557 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ 558 0x0063, 0x0061, 0x000D, 0x000A, 559 560 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ 561 0x0930, 0x0020, 0x0918, 0x0909, 562 563 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ 564 0x4000, 0x4E00, 0x7777, 0x0020, 565 566 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ 567 0x0020, 0xD7A3, 0xDC00, 0xD800, 568 569 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ 570 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 571 572 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ 573 0x0009 574 }; 575 static const int32_t offsets[]={ 576 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, 577 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, 578 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 579 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, 580 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, 581 41, 42, 42, 43, 44 582 }; 583 584 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ 585 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 586 sampleText, sizeof(sampleText), 587 "BOCU-1", 588 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 589 ) { 590 log_err("u->BOCU-1 with skip did not match.\n"); 591 } 592 } 593 594 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 595 { 596 const uint8_t sampleText[]={ 597 0x61, /* 'a' */ 598 0xc4, 0xb5, /* U+0135 */ 599 0xed, 0x80, 0xa0, /* Hangul U+d020 */ 600 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ 601 0xee, 0x80, 0x80, /* PUA U+e000 */ 602 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ 603 0x62, /* 'b' */ 604 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ 605 0xd0, 0x80 /* U+0400 */ 606 }; 607 UChar expected[]={ 608 0x0061, 609 0x0135, 610 0xd020, 611 0xd801, 0xdc01, 612 0xe000, 613 0xdc01, 614 0x0062, 615 0xd801, 616 0x0400 617 }; 618 int32_t offsets[]={ 619 0, 620 1, 1, 621 2, 2, 2, 622 3, 3, 3, 4, 4, 4, 623 5, 5, 5, 624 6, 6, 6, 625 7, 626 8, 8, 8, 627 9, 9 628 }; 629 630 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ 631 632 /* without offsets */ 633 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 634 sampleText, sizeof(sampleText), 635 "CESU-8", 636 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) 637 ) { 638 log_err("u->CESU-8 with skip did not match.\n"); 639 } 640 641 /* with offsets */ 642 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 643 sampleText, sizeof(sampleText), 644 "CESU-8", 645 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 646 ) { 647 log_err("u->CESU-8 with skip did not match.\n"); 648 } 649 } 650 651 /*to Unicode*/ 652 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); 653 654 #if !UCONFIG_NO_LEGACY_CONVERSION 655 { 656 657 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; 658 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 659 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 660 661 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; 662 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; 663 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; 664 665 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), 666 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949", 667 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) 668 log_err("ibm-949->u with skip did not match.\n"); 669 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), 670 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943", 671 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) 672 log_err("ibm-943->u with skip did not match.\n"); 673 674 675 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), 676 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 677 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) 678 log_err("ibm-930->u with skip did not match.\n"); 679 680 681 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930), 682 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 683 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 684 log_err("ibm-930->u with skip did not match.\n"); 685 } 686 #endif 687 688 { 689 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; 690 static const UChar usasciiToU[] = { 0x61, 0x31 }; 691 static const int32_t usasciiToUOffsets[] = { 0, 2 }; 692 693 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; 694 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; 695 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; 696 697 /* US-ASCII */ 698 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 699 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 700 "US-ASCII", 701 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 702 NULL, 0) 703 ) { 704 log_err("US-ASCII->u with skip did not match.\n"); 705 } 706 707 #if !UCONFIG_NO_LEGACY_CONVERSION 708 /* SBCS NLTC codepage 367 for US-ASCII */ 709 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 710 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 711 "ibm-367", 712 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 713 NULL, 0) 714 ) { 715 log_err("ibm-367->u with skip did not match.\n"); 716 } 717 #endif 718 719 /* ISO-Latin-1 */ 720 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 721 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 722 "LATIN_1", 723 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 724 NULL, 0) 725 ) { 726 log_err("LATIN_1->u with skip did not match.\n"); 727 } 728 729 #if !UCONFIG_NO_LEGACY_CONVERSION 730 /* windows-1252 */ 731 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 732 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 733 "windows-1252", 734 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 735 NULL, 0) 736 ) { 737 log_err("windows-1252->u with skip did not match.\n"); 738 } 739 #endif 740 } 741 742 #if !UCONFIG_NO_LEGACY_CONVERSION 743 { 744 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 745 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 746 }; 747 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 748 }; 749 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; 750 751 752 /* euc-jp*/ 753 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 754 /* BEGIN android-changed */ 755 /* Android uses a different EUC-JP table. We change this byte sequence, 756 * choosing one that is unassigned in both tables. */ 757 0x8f, 0xa1, 0xa1, /*unassigned*/ 758 /* 0x8f, 0xda, 0xa1, */ /*unassigned*/ 759 /* END android-changed */ 760 0x8e, 0xe0, 761 }; 762 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; 763 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; 764 765 /*EUC_TW*/ 766 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 767 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 768 0xe6, 0xca, 0x8a, 769 }; 770 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; 771 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; 772 /*iso-2022-jp*/ 773 static const uint8_t sampleTxt_iso_2022_jp[]={ 774 0x41, 775 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ 776 0x1b, 0x28, 0x42, 0x42, 777 778 }; 779 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; 780 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; 781 782 /*iso-2022-cn*/ 783 static const uint8_t sampleTxt_iso_2022_cn[]={ 784 0x0f, 0x41, 0x44, 785 0x1B, 0x24, 0x29, 0x47, 786 0x0E, 0x40, 0x6f, /*unassigned*/ 787 0x0f, 0x42, 788 789 }; 790 791 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; 792 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; 793 794 /*iso-2022-kr*/ 795 static const uint8_t sampleTxt_iso_2022_kr[]={ 796 0x1b, 0x24, 0x29, 0x43, 797 0x41, 798 0x0E, 0x7f, 0x1E, 799 0x0e, 0x25, 0x50, 800 0x0f, 0x51, 801 0x42, 0x43, 802 803 }; 804 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; 805 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; 806 807 /*hz*/ 808 static const uint8_t sampleTxt_hz[]={ 809 0x41, 810 0x7e, 0x7b, 0x26, 0x30, 811 0x7f, 0x1E, /*unassigned*/ 812 0x26, 0x30, 813 0x7e, 0x7d, 0x42, 814 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 815 0x7e, 0x7d, 0x42, 816 }; 817 static const UChar hztoUnicode[]={ 818 0x41, 819 0x03a0, 820 0x03A0, 821 0x42, 822 0x42,}; 823 824 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; 825 826 /*ISCII*/ 827 static const uint8_t sampleTxt_iscii[]={ 828 0x41, 829 0xa1, 830 0xEB, /*unassigned*/ 831 0x26, 832 0x30, 833 0xa2, 834 0xEC, /*unassigned*/ 835 0x42, 836 }; 837 static const UChar isciitoUnicode[]={ 838 0x41, 839 0x0901, 840 0x26, 841 0x30, 842 0x0902, 843 0x42, 844 }; 845 846 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; 847 848 /*LMBCS*/ 849 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, 850 0x12, 0x92, 0xa0, /*unassigned*/ 851 0x12, 0x92, 0xA1, 852 }; 853 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; 854 static const int32_t fromLMBCS[] = {0, 6}; 855 856 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 857 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 858 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 859 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 860 861 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 862 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 863 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 864 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 865 866 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 867 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 868 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) 869 log_err("euc-jp->u with skip did not match.\n"); 870 871 872 873 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 874 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 875 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) 876 log_err("euc-tw->u with skip did not match.\n"); 877 878 879 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 880 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 881 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) 882 log_err("iso-2022-jp->u with skip did not match.\n"); 883 884 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 885 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 886 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) 887 log_err("iso-2022-cn->u with skip did not match.\n"); 888 889 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 890 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 891 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) 892 log_err("iso-2022-kr->u with skip did not match.\n"); 893 894 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 895 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 896 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) 897 log_err("HZ->u with skip did not match.\n"); 898 899 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 900 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 901 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) 902 log_err("iscii->u with skip did not match.\n"); 903 904 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), 905 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1", 906 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) 907 log_err("LMBCS->u with skip did not match.\n"); 908 909 } 910 #endif 911 912 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); 913 { 914 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 915 0xe0, 0x80, 0x61,}; 916 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; 917 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; 918 919 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 920 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 921 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 922 log_err("utf8->u with skip did not match.\n");; 923 } 924 925 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); 926 { 927 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 928 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; 929 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 930 931 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 932 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 933 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 934 log_err("scsu->u with skip did not match.\n"); 935 } 936 937 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 938 { 939 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 940 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ 941 0x24, 0x1E, 0x52, /* 3 */ 942 0xB2, /* 6 */ 943 0x20, /* 7 */ 944 0x40, 0x07, /* 8 - wrong trail byte */ 945 0xB3, /* 10 */ 946 0xB1, /* 11 */ 947 0xD0, 0x20, /* 12 - wrong trail byte */ 948 0x0D, /* 14 */ 949 0x0A, /* 15 */ 950 0x20, /* 16 */ 951 0x00, /* 17 */ 952 0xD0, 0x6C, /* 18 */ 953 0xB6, /* 20 */ 954 0xD8, 0xA5, /* 21 */ 955 0x20, /* 23 */ 956 0x68, /* 24 */ 957 0x59, /* 25 */ 958 0xF9, 0x28, /* 26 */ 959 0x6D, /* 28 */ 960 0x20, /* 29 */ 961 0x73, /* 30 */ 962 0xE0, 0x2D, /* 31 */ 963 0xDE, 0x43, /* 33 */ 964 0xD0, 0x33, /* 35 */ 965 0x20, /* 37 */ 966 0xFA, 0x83, /* 38 */ 967 0x25, 0x01, /* 40 */ 968 0xFB, 0x16, 0x87, /* 42 */ 969 0x4B, 0x16, /* 45 */ 970 0x20, /* 47 */ 971 0xE6, 0xBD, /* 48 */ 972 0xEB, 0x5B, /* 50 */ 973 0x4B, 0xCC, /* 52 */ 974 0xF9, 0xA2, /* 54 */ 975 0xFC, 0x10, 0x3E, /* 56 */ 976 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ 977 0x20, /* 63 */ 978 0xFC, 0x03, 0xAC, /* 64 */ 979 0xFF, /* 67 - FF just resets the state without encoding anything */ 980 0x01, /* 68 */ 981 0xDE, 0x83, /* 69 */ 982 0x20, /* 71 */ 983 0x09 /* 72 */ 984 }; 985 UChar expected[]={ 986 0xFEFF, 0x0061, 0x0062, 0x0020, 987 0x0063, 0x0061, 0x000D, 0x000A, 988 0x0020, 0x0000, 0x00DF, 0x00E6, 989 0x0930, 0x0020, 0x0918, 0x0909, 990 0x3086, 0x304D, 0x0020, 0x3053, 991 0x4000, 0x4E00, 0x7777, 0x0020, 992 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, 993 0x0020, 0xD7A3, 0xDC00, 0xD800, 994 0xD800, 0xDC00, 0xD845, 0xDDDD, 995 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 996 0xDFFF, 0x0001, 0x0E40, 0x0020, 997 0x0009 998 }; 999 int32_t offsets[]={ 1000 0, 3, 6, 7, /* skip 8, */ 1001 10, 11, /* skip 12, */ 1002 14, 15, 16, 17, 18, 1003 20, 21, 23, 24, 25, 26, 28, 29, 1004 30, 31, 33, 35, 37, 38, 1005 40, 42, 45, 47, 48, 1006 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, 1007 63, 64, /* trail */ 64, /* reset only 67, */ 1008 68, 69, 1009 71, 72 1010 }; 1011 1012 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1013 expected, ARRAY_LENGTH(expected), "BOCU-1", 1014 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1015 ) { 1016 log_err("BOCU-1->u with skip did not match.\n"); 1017 } 1018 } 1019 1020 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 1021 { 1022 const uint8_t sampleText[]={ 1023 0x61, /* 0 'a' */ 1024 0xc0, 0x80, /* 1 non-shortest form */ 1025 0xc4, 0xb5, /* 3 U+0135 */ 1026 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ 1027 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ 1028 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ 1029 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ 1030 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ 1031 0x62, /* 24 'b' */ 1032 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ 1033 0xed, 0xa0, /* 28 incomplete sequence */ 1034 0xd0, 0x80 /* 30 U+0400 */ 1035 }; 1036 UChar expected[]={ 1037 0x0061, 1038 /* skip */ 1039 0x0135, 1040 0xd020, 1041 0xd801, 0xdc01, 1042 0xe000, 1043 0xdc01, 1044 /* skip */ 1045 0x0062, 1046 0xd801, 1047 0x0400 1048 }; 1049 int32_t offsets[]={ 1050 0, 1051 /* skip 1, */ 1052 3, 1053 5, 1054 8, 11, 1055 14, 1056 17, 1057 /* skip 20, 20, */ 1058 24, 1059 25, 1060 /* skip 28 */ 1061 30 1062 }; 1063 1064 /* without offsets */ 1065 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1066 expected, ARRAY_LENGTH(expected), "CESU-8", 1067 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) 1068 ) { 1069 log_err("CESU-8->u with skip did not match.\n"); 1070 } 1071 1072 /* with offsets */ 1073 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1074 expected, ARRAY_LENGTH(expected), "CESU-8", 1075 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1076 ) { 1077 log_err("CESU-8->u with skip did not match.\n"); 1078 } 1079 } 1080 } 1081 1082 static void TestStop(int32_t inputsize, int32_t outputsize) 1083 { 1084 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1085 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1086 1087 static const uint8_t expstopIBM_949[]= { 1088 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; 1089 1090 static const uint8_t expstopIBM_943[] = { 1091 0x9f, 0xaf, 0x9f, 0xb1}; 1092 1093 static const uint8_t expstopIBM_930[] = { 1094 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; 1095 1096 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; 1097 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; 1098 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; 1099 1100 1101 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; 1102 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; 1103 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; 1104 1105 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; 1106 static const int32_t fromIBM943Offs [] = { 0, 2}; 1107 static const int32_t fromIBM930Offs [] = { 1, 3}; 1108 1109 gInBufferSize = inputsize; 1110 gOutBufferSize = outputsize; 1111 1112 /*From Unicode*/ 1113 1114 #if !UCONFIG_NO_LEGACY_CONVERSION 1115 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1116 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", 1117 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) 1118 log_err("u-> ibm-949 with stop did not match.\n"); 1119 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1120 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", 1121 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) 1122 log_err("u-> ibm-943 with stop did not match.\n"); 1123 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1124 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", 1125 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) 1126 log_err("u-> ibm-930 with stop did not match.\n"); 1127 1128 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); 1129 { 1130 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1131 static const uint8_t toIBM943[]= { 0x61,}; 1132 static const int32_t offset[]= {0,} ; 1133 1134 /*EUC_JP*/ 1135 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1136 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; 1137 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; 1138 1139 /*EUC_TW*/ 1140 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1141 static const uint8_t to_euc_tw[]={ 1142 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; 1143 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; 1144 1145 /*ISO-2022-JP*/ 1146 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; 1147 static const uint8_t to_iso_2022_jp[]={ 1148 0x41, 1149 1150 }; 1151 static const int32_t from_iso_2022_jpOffs [] ={0,}; 1152 1153 /*ISO-2022-cn*/ 1154 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1155 static const uint8_t to_iso_2022_cn[]={ 1156 0x41, 1157 1158 }; 1159 static const int32_t from_iso_2022_cnOffs [] ={ 1160 0,0, 1161 2,2, 1162 }; 1163 1164 /*ISO-2022-kr*/ 1165 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 1166 static const uint8_t to_iso_2022_kr[]={ 1167 0x1b, 0x24, 0x29, 0x43, 1168 0x41, 1169 0x0e, 0x25, 0x50, 1170 }; 1171 static const int32_t from_iso_2022_krOffs [] ={ 1172 -1,-1,-1,-1, 1173 0, 1174 1,1,1, 1175 }; 1176 1177 /* HZ encoding */ 1178 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1179 1180 static const uint8_t to_hz[]={ 1181 0x7e, 0x7d, 0x41, 1182 0x7e, 0x7b, 0x26, 0x30, 1183 1184 }; 1185 static const int32_t from_hzOffs [] ={ 1186 0, 0,0, 1187 1,1,1,1, 1188 }; 1189 1190 /*ISCII*/ 1191 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1192 static const uint8_t to_iscii[]={ 1193 0x41, 1194 }; 1195 static const int32_t from_isciiOffs [] ={ 1196 0, 1197 }; 1198 1199 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1200 toIBM943, sizeof(toIBM943), "ibm-943", 1201 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) 1202 log_err("u-> ibm-943 with stop did not match.\n"); 1203 1204 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1205 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 1206 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) 1207 log_err("u-> euc-jp with stop did not match.\n"); 1208 1209 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1210 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1211 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1212 log_err("u-> euc-tw with stop did not match.\n"); 1213 1214 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1215 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1216 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1217 log_err("u-> iso-2022-jp with stop did not match.\n"); 1218 1219 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1220 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1221 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1222 log_err("u-> iso-2022-jp with stop did not match.\n"); 1223 1224 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 1225 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 1226 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) 1227 log_err("u-> iso-2022-cn with stop did not match.\n"); 1228 1229 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 1230 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 1231 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) 1232 log_err("u-> iso-2022-kr with stop did not match.\n"); 1233 1234 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 1235 to_hz, sizeof(to_hz), "HZ", 1236 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) 1237 log_err("u-> HZ with stop did not match.\n");\ 1238 1239 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 1240 to_iscii, sizeof(to_iscii), "ISCII,version=0", 1241 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) 1242 log_err("u-> iscii with stop did not match.\n"); 1243 1244 1245 } 1246 #endif 1247 1248 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); 1249 { 1250 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1251 1252 static const uint8_t to_SCSU[]={ 1253 0x41, 1254 1255 }; 1256 int32_t from_SCSUOffs [] ={ 1257 0, 1258 1259 }; 1260 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1261 to_SCSU, sizeof(to_SCSU), "SCSU", 1262 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) 1263 log_err("u-> SCSU with skip did not match.\n"); 1264 1265 } 1266 1267 /*to Unicode*/ 1268 1269 #if !UCONFIG_NO_LEGACY_CONVERSION 1270 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), 1271 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949", 1272 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) 1273 log_err("ibm-949->u with stop did not match.\n"); 1274 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), 1275 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943", 1276 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) 1277 log_err("ibm-943->u with stop did not match.\n"); 1278 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), 1279 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930", 1280 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) 1281 log_err("ibm-930->u with stop did not match.\n"); 1282 1283 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); 1284 { 1285 1286 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1287 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1288 }; 1289 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; 1290 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; 1291 1292 /*EUC-JP*/ 1293 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1294 /* BEGIN android-changed */ 1295 /* Android uses a different EUC-JP table. We change this byte sequence, 1296 * choosing one that is unassigned in both tables. */ 1297 0x8f, 0xa1, 0xa1, /*unassigned*/ 1298 /* 0x8f, 0xda, 0xa1, */ /*unassigned*/ 1299 /* END android-changed */ 1300 0x8e, 0xe0, 1301 }; 1302 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; 1303 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; 1304 1305 /*EUC_TW*/ 1306 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1307 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1308 0xe6, 0xca, 0x8a, 1309 }; 1310 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; 1311 int32_t from_euc_twOffs [] ={ 0, 1, 3}; 1312 1313 1314 1315 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1316 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1317 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1318 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); 1319 1320 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1321 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1322 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) 1323 log_err("euc-jp->u with stop did not match.\n"); 1324 1325 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1326 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1327 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1328 log_err("euc-tw->u with stop did not match.\n"); 1329 } 1330 #endif 1331 1332 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); 1333 { 1334 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1335 0xe0, 0x80, 0x61,}; 1336 static const UChar expected1[] = { 0x0031, 0x4e8c,}; 1337 static const int32_t offsets1[] = { 0x0000, 0x0001}; 1338 1339 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1340 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1341 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1342 log_err("utf8->u with stop did not match.\n");; 1343 } 1344 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); 1345 { 1346 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; 1347 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; 1348 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; 1349 1350 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1351 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1352 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1353 log_err("scsu->u with stop did not match.\n");; 1354 } 1355 1356 } 1357 1358 static void TestSub(int32_t inputsize, int32_t outputsize) 1359 { 1360 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1361 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1362 1363 static const uint8_t expsubIBM_949[] = 1364 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; 1365 1366 static const uint8_t expsubIBM_943[] = { 1367 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; 1368 1369 static const uint8_t expsubIBM_930[] = { 1370 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; 1371 1372 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; 1373 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1374 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1375 1376 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1377 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; 1378 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; 1379 1380 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; 1381 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; 1382 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; 1383 1384 gInBufferSize = inputsize; 1385 gOutBufferSize = outputsize; 1386 1387 /*from unicode*/ 1388 1389 #if !UCONFIG_NO_LEGACY_CONVERSION 1390 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1391 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", 1392 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) 1393 log_err("u-> ibm-949 with subst did not match.\n"); 1394 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1395 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", 1396 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) 1397 log_err("u-> ibm-943 with subst did not match.\n"); 1398 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1399 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", 1400 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) 1401 log_err("u-> ibm-930 with subst did not match.\n"); 1402 1403 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1404 { 1405 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1406 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; 1407 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; 1408 1409 1410 /* EUC_JP*/ 1411 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1412 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1413 0xf4, 0xfe, 0xf4, 0xfe, 1414 0x61, 0x8e, 0xe0, 1415 }; 1416 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; 1417 1418 /*EUC_TW*/ 1419 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1420 static const uint8_t to_euc_tw[]={ 1421 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1422 0xfd, 0xfe, 0xfd, 0xfe, 1423 0x61, 0xe6, 0xca, 0x8a, 1424 }; 1425 1426 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; 1427 1428 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1429 toIBM943, sizeof(toIBM943), "ibm-943", 1430 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) 1431 log_err("u-> ibm-943 with substitute did not match.\n"); 1432 1433 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1434 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 1435 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) 1436 log_err("u-> euc-jp with substitute did not match.\n"); 1437 1438 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1439 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1440 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1441 log_err("u-> euc-tw with substitute did not match.\n"); 1442 } 1443 #endif 1444 1445 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1446 { 1447 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1448 1449 const uint8_t to_SCSU[]={ 1450 0x41, 1451 0x0e, 0xff,0xfd, 1452 0x42 1453 1454 1455 }; 1456 int32_t from_SCSUOffs [] ={ 1457 0, 1458 1,1,1, 1459 2, 1460 1461 }; 1462 const uint8_t to_SCSU_1[]={ 1463 0x41, 1464 1465 }; 1466 int32_t from_SCSUOffs_1 [] ={ 1467 0, 1468 1469 }; 1470 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1471 to_SCSU, sizeof(to_SCSU), "SCSU", 1472 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) 1473 log_err("u-> SCSU with substitute did not match.\n"); 1474 1475 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1476 to_SCSU_1, sizeof(to_SCSU_1), "SCSU", 1477 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 1478 log_err("u-> SCSU with substitute did not match.\n"); 1479 } 1480 1481 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1482 { 1483 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; 1484 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, 1485 0xf0, 0x90, 0x90, 0x81, 1486 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 1487 0xef, 0xbf, 0xbf, 0x61, 1488 1489 }; 1490 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; 1491 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]), 1492 expectedUTF8, sizeof(expectedUTF8), "utf8", 1493 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { 1494 log_err("u-> utf8 with stop did not match.\n"); 1495 } 1496 } 1497 1498 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1499 { 1500 static const UChar in[]={ 0x0041, 0xfeff }; 1501 1502 static const uint8_t out[]={ 1503 #if U_IS_BIG_ENDIAN 1504 0xfe, 0xff, 1505 0x00, 0x41, 1506 0xfe, 0xff 1507 #else 1508 0xff, 0xfe, 1509 0x41, 0x00, 1510 0xff, 0xfe 1511 #endif 1512 }; 1513 static const int32_t offsets[]={ 1514 -1, -1, 0, 0, 1, 1 1515 }; 1516 1517 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1518 out, sizeof(out), "UTF-16", 1519 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1520 ) { 1521 log_err("u->UTF-16 with substitute did not match.\n"); 1522 } 1523 } 1524 1525 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1526 { 1527 static const UChar in[]={ 0x0041, 0xfeff }; 1528 1529 static const uint8_t out[]={ 1530 #if U_IS_BIG_ENDIAN 1531 0x00, 0x00, 0xfe, 0xff, 1532 0x00, 0x00, 0x00, 0x41, 1533 0x00, 0x00, 0xfe, 0xff 1534 #else 1535 0xff, 0xfe, 0x00, 0x00, 1536 0x41, 0x00, 0x00, 0x00, 1537 0xff, 0xfe, 0x00, 0x00 1538 #endif 1539 }; 1540 static const int32_t offsets[]={ 1541 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 1542 }; 1543 1544 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1545 out, sizeof(out), "UTF-32", 1546 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1547 ) { 1548 log_err("u->UTF-32 with substitute did not match.\n"); 1549 } 1550 } 1551 1552 /*to unicode*/ 1553 1554 #if !UCONFIG_NO_LEGACY_CONVERSION 1555 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), 1556 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949", 1557 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) 1558 log_err("ibm-949->u with substitute did not match.\n"); 1559 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), 1560 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943", 1561 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) 1562 log_err("ibm-943->u with substitute did not match.\n"); 1563 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), 1564 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930", 1565 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) 1566 log_err("ibm-930->u with substitute did not match.\n"); 1567 1568 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1569 { 1570 1571 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1572 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1573 }; 1574 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 1575 }; 1576 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; 1577 1578 1579 /* EUC_JP*/ 1580 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1581 /* BEGIN android-changed */ 1582 /* Android uses a different EUC-JP table. We change this byte sequence, 1583 * choosing one that is unassigned in both tables. */ 1584 0x8f, 0xa1, 0xa1, /*unassigned*/ 1585 /* 0x8f, 0xda, 0xa1, */ /*unassigned*/ 1586 /* END android-changed */ 1587 0x8e, 0xe0, 0x8a 1588 }; 1589 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; 1590 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; 1591 1592 /*EUC_TW*/ 1593 const uint8_t sampleTxt_euc_tw[]={ 1594 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1595 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1596 0xe6, 0xca, 0x8a, 1597 }; 1598 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; 1599 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; 1600 1601 1602 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1603 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1604 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1605 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); 1606 1607 1608 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1609 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1610 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) 1611 log_err("euc-jp->u with substitute did not match.\n"); 1612 1613 1614 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1615 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1616 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1617 log_err("euc-tw->u with substitute did not match.\n"); 1618 1619 1620 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1621 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1622 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) 1623 log_err("euc-jp->u with substitute did not match.\n"); 1624 } 1625 #endif 1626 1627 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1628 { 1629 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1630 0xe0, 0x80, 0x61,}; 1631 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 1632 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 1633 1634 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1635 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1636 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1637 log_err("utf8->u with substitute did not match.\n");; 1638 } 1639 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1640 { 1641 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 1642 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; 1643 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 1644 1645 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1646 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1647 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1648 log_err("scsu->u with stop did not match.\n");; 1649 } 1650 1651 #if !UCONFIG_NO_LEGACY_CONVERSION 1652 log_verbose("Testing ibm-930 subchar/subchar1\n"); 1653 { 1654 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; 1655 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; 1656 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1657 1658 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; 1659 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; 1660 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; 1661 1662 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930", 1663 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1664 ) { 1665 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); 1666 } 1667 1668 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930", 1669 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1670 ) { 1671 log_err("ibm-930->u subchar/subchar1 did not match.\n"); 1672 } 1673 } 1674 1675 log_verbose("Testing GB 18030 with substitute callbacks\n"); 1676 { 1677 static const UChar u2[]={ 1678 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; 1679 static const uint8_t gb2[]={ 1680 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; 1681 static const int32_t offsets2[]={ 1682 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; 1683 1684 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030", 1685 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1686 ) { 1687 log_err("gb18030->u with substitute did not match.\n"); 1688 } 1689 } 1690 #endif 1691 1692 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); 1693 { 1694 static const uint8_t utf7[]={ 1695 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 1696 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 1697 }; 1698 static const UChar unicode[]={ 1699 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd 1700 }; 1701 static const int32_t offsets[]={ 1702 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23 1703 }; 1704 1705 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7", 1706 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1707 ) { 1708 log_err("UTF-7->u with substitute did not match.\n"); 1709 } 1710 } 1711 1712 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); 1713 { 1714 static const uint8_t 1715 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, 1716 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, 1717 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; 1718 1719 static const UChar 1720 out1[]={ 0x4e00, 0xfeff }, 1721 out2[]={ 0x004e, 0xfffe }, 1722 out3[]={ 0xfefd, 0x4e00, 0xfeff }; 1723 1724 static const int32_t 1725 offsets1[]={ 2, 4 }, 1726 offsets2[]={ 2, 4 }, 1727 offsets3[]={ 0, 2, 4 }; 1728 1729 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16", 1730 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1731 ) { 1732 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); 1733 } 1734 1735 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16", 1736 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1737 ) { 1738 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); 1739 } 1740 1741 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16", 1742 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1743 ) { 1744 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); 1745 } 1746 } 1747 1748 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); 1749 { 1750 static const uint8_t 1751 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, 1752 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, 1753 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, 1754 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; 1755 1756 static const UChar 1757 out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff }, 1758 out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe }, 1759 out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd }, 1760 out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 }; 1761 1762 static const int32_t 1763 offsets1[]={ 4, 4, 8 }, 1764 offsets2[]={ 4, 4, 8 }, 1765 offsets3[]={ 0, 4, 4, 8, 12 }, 1766 offsets4[]={ 0, 0, 4, 8 }; 1767 1768 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32", 1769 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1770 ) { 1771 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); 1772 } 1773 1774 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32", 1775 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1776 ) { 1777 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); 1778 } 1779 1780 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32", 1781 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1782 ) { 1783 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); 1784 } 1785 1786 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32", 1787 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) 1788 ) { 1789 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); 1790 } 1791 } 1792 } 1793 1794 static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 1795 { 1796 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1797 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1798 1799 const uint8_t expsubwvalIBM_949[]= { 1800 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 1801 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; 1802 1803 const uint8_t expsubwvalIBM_943[]= { 1804 0x9f, 0xaf, 0x9f, 0xb1, 1805 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; 1806 1807 const uint8_t expsubwvalIBM_930[] = { 1808 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; 1809 1810 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; 1811 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; 1812 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ 1813 1814 gInBufferSize = inputsize; 1815 gOutBufferSize = outputsize; 1816 1817 /*from Unicode*/ 1818 1819 #if !UCONFIG_NO_LEGACY_CONVERSION 1820 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1821 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", 1822 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) 1823 log_err("u-> ibm-949 with subst with value did not match.\n"); 1824 1825 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1826 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", 1827 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) 1828 log_err("u-> ibm-943 with sub with value did not match.\n"); 1829 1830 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1831 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", 1832 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) 1833 log_err("u-> ibm-930 with subst with value did not match.\n"); 1834 1835 1836 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 1837 { 1838 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1839 static const uint8_t toIBM943[]= { 0x61, 1840 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1841 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1842 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1843 0x61 }; 1844 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 1845 1846 1847 /* EUC_JP*/ 1848 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; 1849 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1851 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1853 0x61, 0x8e, 0xe0, 1854 }; 1855 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 1856 3, 3, 3, 3, 3, 3, 1857 3, 3, 3, 3, 3, 3, 1858 5, 5, 5, 5, 5, 5, 1859 6, 7, 7, 1860 }; 1861 1862 /*EUC_TW*/ 1863 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1864 static const uint8_t to_euc_tw[]={ 1865 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1866 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1867 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1868 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1869 0x61, 0xe6, 0xca, 0x8a, 1870 }; 1871 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 1872 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 1873 6, 7, 7, 8, 1874 }; 1875 /*ISO-2022-JP*/ 1876 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; 1877 static const uint8_t to_iso_2022_jp1[]={ 1878 0x1b, 0x24, 0x42, 0x21, 0x21, 1879 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1880 0x1b, 0x24, 0x42, 0x21, 0x22, 1881 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1882 0x42, 1883 }; 1884 1885 static const int32_t from_iso_2022_jpOffs1 [] ={ 1886 0,0,0,0,0, 1887 1,1,1,1,1,1,1,1,1, 1888 2,2,2,2,2, 1889 3,3,3,3,3,3,3,3,3, 1890 4, 1891 }; 1892 /* surrogate pair*/ 1893 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; 1894 static const uint8_t to_iso_2022_jp2[]={ 1895 0x1b, 0x24, 0x42, 0x21, 0x21, 1896 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1897 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1898 0x1b, 0x24, 0x42, 0x21, 0x22, 1899 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1900 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1901 0x42, 1902 }; 1903 static const int32_t from_iso_2022_jpOffs2 [] ={ 1904 0,0,0,0,0, 1905 1,1,1,1,1,1,1,1,1, 1906 1,1,1,1,1,1, 1907 3,3,3,3,3, 1908 4,4,4,4,4,4,4,4,4, 1909 4,4,4,4,4,4, 1910 6, 1911 }; 1912 1913 /*ISO-2022-cn*/ 1914 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1915 static const uint8_t to_iso_2022_cn[]={ 1916 0x41, 1917 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, 1918 0x42, 1919 }; 1920 static const int32_t from_iso_2022_cnOffs [] ={ 1921 0, 1922 1,1,1,1,1,1, 1923 2, 1924 }; 1925 1926 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; 1927 1928 static const uint8_t to_iso_2022_cn4[]={ 1929 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 1930 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1931 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1932 0x0e, 0x21, 0x22, 1933 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1934 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1935 0x42, 1936 }; 1937 static const int32_t from_iso_2022_cnOffs4 [] ={ 1938 0,0,0,0,0,0,0, 1939 1,1,1,1,1,1,1, 1940 1,1,1,1,1,1, 1941 3,3,3, 1942 4,4,4,4,4,4,4, 1943 4,4,4,4,4,4, 1944 6 1945 1946 }; 1947 1948 /*ISO-2022-kr*/ 1949 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 1950 static const uint8_t to_iso_2022_kr2[]={ 1951 0x1b, 0x24, 0x29, 0x43, 1952 0x41, 1953 0x0e, 0x25, 0x50, 1954 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1955 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1956 0x0e, 0x25, 0x50, 1957 0x0f, 0x42, 1958 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1959 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1960 0x43 1961 }; 1962 static const int32_t from_iso_2022_krOffs2 [] ={ 1963 -1,-1,-1,-1, 1964 0, 1965 1,1,1, 1966 2,2,2,2,2,2,2, 1967 2,2,2,2,2,2, 1968 4,4,4, 1969 5,5, 1970 6,6,6,6,6,6, 1971 6,6,6,6,6,6, 1972 8, 1973 }; 1974 1975 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; 1976 static const uint8_t to_iso_2022_kr[]={ 1977 0x1b, 0x24, 0x29, 0x43, 1978 0x41, 1979 0x0e, 0x25, 0x50, 1980 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1981 0x0e, 0x25, 0x50, 1982 0x0f, 0x42, 1983 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1984 0x43 1985 }; 1986 1987 1988 static const int32_t from_iso_2022_krOffs [] ={ 1989 -1,-1,-1,-1, 1990 0, 1991 1,1,1, 1992 2,2,2,2,2,2,2, 1993 3,3,3, 1994 4,4, 1995 5,5,5,5,5,5, 1996 6, 1997 }; 1998 /* HZ encoding */ 1999 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 2000 2001 static const uint8_t to_hz[]={ 2002 0x7e, 0x7d, 0x41, 2003 0x7e, 0x7b, 0x26, 0x30, 2004 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ 2005 0x7e, 0x7b, 0x26, 0x30, 2006 0x7e, 0x7d, 0x42, 2007 2008 }; 2009 static const int32_t from_hzOffs [] ={ 2010 0,0,0, 2011 1,1,1,1, 2012 2,2,2,2,2,2,2,2, 2013 3,3,3,3, 2014 4,4,4 2015 }; 2016 2017 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 2018 static const uint8_t to_hz2[]={ 2019 0x7e, 0x7d, 0x41, 2020 0x7e, 0x7b, 0x26, 0x30, 2021 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2022 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2023 0x7e, 0x7b, 0x26, 0x30, 2024 0x7e, 0x7d, 0x42, 2025 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2026 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2027 0x43 2028 }; 2029 static const int32_t from_hzOffs2 [] ={ 2030 0,0,0, 2031 1,1,1,1, 2032 2,2,2,2,2,2,2,2, 2033 2,2,2,2,2,2, 2034 4,4,4,4, 2035 5,5,5, 2036 6,6,6,6,6,6, 2037 6,6,6,6,6,6, 2038 8, 2039 }; 2040 2041 /*ISCII*/ 2042 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; 2043 static const uint8_t to_iscii[]={ 2044 0x41, 2045 0xef, 0x42, 0xa1, 2046 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2047 0xa2, 2048 0x42, 2049 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2050 0x43 2051 }; 2052 2053 2054 static const int32_t from_isciiOffs [] ={ 2055 0, 2056 1,1,1, 2057 2,2,2,2,2,2, 2058 3, 2059 4, 2060 5,5,5,5,5,5, 2061 6, 2062 }; 2063 2064 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 2065 toIBM943, sizeof(toIBM943), "ibm-943", 2066 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) 2067 log_err("u-> ibm-943 with subst with value did not match.\n"); 2068 2069 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 2070 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 2071 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) 2072 log_err("u-> euc-jp with subst with value did not match.\n"); 2073 2074 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 2075 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 2076 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) 2077 log_err("u-> euc-tw with subst with value did not match.\n"); 2078 2079 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2080 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2081 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2082 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2083 2084 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2085 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2086 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2087 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2088 2089 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 2090 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 2091 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) 2092 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2093 /*ESCAPE OPTIONS*/ 2094 { 2095 /* surrogate pair*/ 2096 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; 2097 static const uint8_t to_iso_2022_jp3_v2[]={ 2098 0x1b, 0x24, 0x42, 0x21, 0x21, 2099 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2100 2101 0x1b, 0x24, 0x42, 0x21, 0x22, 2102 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2103 2104 0x42, 2105 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, 2106 }; 2107 2108 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ 2109 0,0,0,0,0, 2110 1,1,1,1,1,1,1,1,1,1,1,1, 2111 2112 3,3,3,3,3, 2113 4,4,4,4,4,4,4,4,4,4,4,4, 2114 2115 6, 2116 7,7,7,7,7,7,7,7,7 2117 }; 2118 2119 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), 2120 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp", 2121 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2122 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); 2123 } 2124 { 2125 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2126 static const uint8_t to_iso_2022_cn5_v2[]={ 2127 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2128 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2129 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2130 0x0e, 0x21, 0x22, 2131 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2132 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2133 0x42, 2134 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, 2135 }; 2136 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ 2137 0,0,0,0,0,0,0, 2138 1,1,1,1,1,1,1, 2139 1,1,1,1,1,1, 2140 3,3,3, 2141 4,4,4,4,4,4,4, 2142 4,4,4,4,4,4, 2143 6, 2144 7,7,7,7,7,7 2145 }; 2146 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), 2147 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", 2148 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) 2149 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); 2150 2151 } 2152 { 2153 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2154 static const uint8_t to_iso_2022_cn6_v2[]={ 2155 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2156 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2157 0x0e, 0x21, 0x22, 2158 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2159 0x42, 2160 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d 2161 }; 2162 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ 2163 0, 0, 0, 0, 0, 0, 0, 2164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2165 3, 3, 3, 2166 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2167 6, 2168 7, 7, 7, 7, 7, 7, 7, 7, 2169 }; 2170 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), 2171 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", 2172 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) 2173 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); 2174 2175 } 2176 { 2177 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2178 static const uint8_t to_iso_2022_cn7_v2[]={ 2179 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2180 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2181 0x0e, 0x21, 0x22, 2182 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2183 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, 2184 }; 2185 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ 2186 0, 0, 0, 0, 0, 0, 0, 2187 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2188 3, 3, 3, 2189 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2190 6, 2191 7, 7, 7, 7, 7, 7, 2192 }; 2193 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), 2194 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", 2195 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) 2196 log_err("u-> iso-2022-cn with sub & K did not match.\n"); 2197 2198 } 2199 { 2200 static const UChar iso_2022_cn_inputText8[]={ 2201 0x3000, 2202 0xD84D, 0xDC56, 2203 0x3001, 2204 0xD84D, 0xDC56, 2205 0xDBFF, 0xDFFF, 2206 0x0042, 2207 0x0902}; 2208 static const uint8_t to_iso_2022_cn8_v2[]={ 2209 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2210 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2211 0x0e, 0x21, 0x22, 2212 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2213 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20, 2214 0x42, 2215 0x5c, 0x39, 0x30, 0x32, 0x20 2216 }; 2217 static const int32_t from_iso_2022_cnOffs8_v2 [] ={ 2218 0, 0, 0, 0, 0, 0, 0, 2219 1, 1, 1, 1, 1, 1, 1, 1, 2220 3, 3, 3, 2221 4, 4, 4, 4, 4, 4, 4, 4, 2222 6, 6, 6, 6, 6, 6, 6, 6, 2223 8, 2224 9, 9, 9, 9, 9 2225 }; 2226 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), 2227 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", 2228 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR )) 2229 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n"); 2230 2231 } 2232 { 2233 static const uint8_t to_iso_2022_cn4_v3[]={ 2234 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2235 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2236 0x0e, 0x21, 0x22, 2237 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2238 0x42 2239 }; 2240 2241 2242 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ 2243 0,0,0,0,0,0,0, 2244 1,1,1,1,1,1,1,1,1,1,1, 2245 2246 3,3,3, 2247 4,4,4,4,4,4,4,4,4,4,4, 2248 2249 6 2250 2251 }; 2252 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2253 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", 2254 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2255 { 2256 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); 2257 } 2258 } 2259 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 2260 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 2261 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) 2262 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2263 2264 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2265 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", 2266 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) 2267 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2268 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 2269 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 2270 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) 2271 log_err("u-> iso_2022_kr with subst with value did not match.\n"); 2272 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]), 2273 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", 2274 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) 2275 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); 2276 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 2277 to_hz, sizeof(to_hz), "HZ", 2278 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) 2279 log_err("u-> hz with subst with value did not match.\n"); 2280 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]), 2281 to_hz2, sizeof(to_hz2), "HZ", 2282 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) 2283 log_err("u-> hz with subst with value did not match.\n"); 2284 2285 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 2286 to_iscii, sizeof(to_iscii), "ISCII,version=0", 2287 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) 2288 log_err("u-> iscii with subst with value did not match.\n"); 2289 } 2290 #endif 2291 2292 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 2293 /*to Unicode*/ 2294 { 2295 #if !UCONFIG_NO_LEGACY_CONVERSION 2296 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 2297 0x81, 0xad, /*unassigned*/ 2298 0x89, 0xd3 }; 2299 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 2300 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 2301 0x7B87}; 2302 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 2303 2304 /* EUC_JP*/ 2305 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 2306 /* BEGIN android-changed */ 2307 /* Android uses a different EUC-JP table. We change this byte sequence, 2308 * choosing one that is unassigned in both tables. */ 2309 0x8f, 0xa1, 0xa1, /*unassigned*/ 2310 /* 0x8f, 0xda, 0xa1, */ /*unassigned*/ 2311 /* END android-changed */ 2312 0x8e, 0xe0, 2313 }; 2314 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 2315 /* BEGIN android-changed */ 2316 /* Android uses a different EUC-JP table. We change the expected output, 2317 * matching the byte sequence modified above. */ 2318 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x41, 0x31, 0x25, 0x58, 0x41, 0x31, 2319 /* 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, */ 2320 /* END android-changed */ 2321 0x00a2 }; 2322 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, 2323 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2324 9, 2325 }; 2326 2327 /*EUC_TW*/ 2328 static const uint8_t sampleTxt_euc_tw[]={ 2329 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 2330 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 2331 0xe6, 0xca, 0x8a, 2332 }; 2333 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 2334 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, 2335 0x8706, 0x8a, }; 2336 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 2337 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2338 11, 13}; 2339 2340 /*iso-2022-jp*/ 2341 static const uint8_t sampleTxt_iso_2022_jp[]={ 2342 0x1b, 0x28, 0x42, 0x41, 2343 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ 2344 0x1b, 0x28, 0x42, 0x42, 2345 2346 }; 2347 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 }; 2348 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; 2349 2350 /*iso-2022-cn*/ 2351 static const uint8_t sampleTxt_iso_2022_cn[]={ 2352 0x0f, 0x41, 0x44, 2353 0x1B, 0x24, 0x29, 0x47, 2354 0x0E, 0x40, 0x6c, /*unassigned*/ 2355 0x0f, 0x42, 2356 2357 }; 2358 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; 2359 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; 2360 2361 /*iso-2022-kr*/ 2362 static const uint8_t sampleTxt_iso_2022_kr[]={ 2363 0x1b, 0x24, 0x29, 0x43, 2364 0x41, 2365 0x0E, 0x7f, 0x1E, 2366 0x0e, 0x25, 0x50, 2367 0x0f, 0x51, 2368 0x42, 0x43, 2369 2370 }; 2371 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; 2372 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; 2373 2374 /*hz*/ 2375 static const uint8_t sampleTxt_hz[]={ 2376 0x41, 2377 0x7e, 0x7b, 0x26, 0x30, 2378 0x7f, 0x1E, /*unassigned*/ 2379 0x26, 0x30, 2380 0x7e, 0x7d, 0x42, 2381 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 2382 0x7e, 0x7d, 0x42, 2383 }; 2384 static const UChar hztoUnicode[]={ 2385 0x41, 2386 0x03a0, 2387 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2388 0x03A0, 2389 0x42, 2390 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2391 0x42,}; 2392 2393 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; 2394 2395 2396 /*iscii*/ 2397 static const uint8_t sampleTxt_iscii[]={ 2398 0x41, 2399 0x30, 2400 0xEB, /*unassigned*/ 2401 0xa3, 2402 0x42, 2403 0xEC, /*unassigned*/ 2404 0x42, 2405 }; 2406 static const UChar isciitoUnicode[]={ 2407 0x41, 2408 0x30, 2409 0x25, 0x58, 0x45, 0x42, 2410 0x0903, 2411 0x42, 2412 0x25, 0x58, 0x45, 0x43, 2413 0x42,}; 2414 2415 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; 2416 #endif 2417 2418 /*UTF8*/ 2419 static const uint8_t sampleTxtUTF8[]={ 2420 0x20, 0x64, 0x50, 2421 0xC2, 0x7E, /* truncated char */ 2422 0x20, 2423 0xE0, 0xB5, 0x7E, /* truncated char */ 2424 0x40, 2425 }; 2426 static const UChar UTF8ToUnicode[]={ 2427 0x0020, 0x0064, 0x0050, 2428 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ 2429 0x0020, 2430 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, 2431 0x0040 2432 }; 2433 static const int32_t fromUTF8[] = { 2434 0, 1, 2, 2435 3, 3, 3, 3, 4, 2436 5, 2437 6, 6, 6, 6, 6, 6, 6, 6, 8, 2438 9 2439 }; 2440 static const UChar UTF8ToUnicodeXML_DEC[]={ 2441 0x0020, 0x0064, 0x0050, 2442 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ 2443 0x0020, 2444 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, 2445 0x0040 2446 }; 2447 static const int32_t fromUTF8XML_DEC[] = { 2448 0, 1, 2, 2449 3, 3, 3, 3, 3, 3, 4, 2450 5, 2451 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 2452 9 2453 }; 2454 2455 2456 #if !UCONFIG_NO_LEGACY_CONVERSION 2457 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), 2458 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 2459 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) 2460 log_err("ibm-943->u with substitute with value did not match.\n"); 2461 2462 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), 2463 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp", 2464 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) 2465 log_err("euc-jp->u with substitute with value did not match.\n"); 2466 2467 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 2468 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 2469 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) 2470 log_err("euc-tw->u with substitute with value did not match.\n"); 2471 2472 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2473 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2474 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) 2475 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2476 2477 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2478 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2479 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) 2480 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2481 2482 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ 2483 { 2484 static const UChar iso_2022_jptoUnicodeDec[]={ 2485 0x0041, 2486 0x0026, 0x0023, 0x0034, 0x0032, 0x003b, 2487 0x0026, 0x0023, 0x0036, 0x0038, 0x003b, 2488 0x0042 }; 2489 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; 2490 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2491 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2492 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2493 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); 2494 } 2495 { 2496 static const UChar iso_2022_jptoUnicodeHex[]={ 2497 0x0041, 2498 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b, 2499 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b, 2500 0x0042 }; 2501 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; 2502 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2503 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2504 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) 2505 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); 2506 } 2507 { 2508 static const UChar iso_2022_jptoUnicodeC[]={ 2509 0x0041, 2510 0x005C, 0x0078, 0x0032, 0x0041, 2511 0x005C, 0x0078, 0x0034, 0x0034, 2512 0x0042 }; 2513 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; 2514 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2515 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2516 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2517 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); 2518 } 2519 } 2520 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 2521 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 2522 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) 2523 log_err("iso-2022-cn->u with substitute with value did not match.\n"); 2524 2525 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 2526 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 2527 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) 2528 log_err("iso-2022-kr->u with substitute with value did not match.\n"); 2529 2530 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 2531 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 2532 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) 2533 log_err("hz->u with substitute with value did not match.\n"); 2534 2535 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 2536 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 2537 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) 2538 log_err("ISCII ->u with substitute with value did not match.\n"); 2539 #endif 2540 2541 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2542 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8", 2543 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) 2544 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2545 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2546 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8", 2547 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) 2548 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2549 } 2550 } 2551 2552 #if !UCONFIG_NO_LEGACY_CONVERSION 2553 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) 2554 { 2555 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; 2556 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 2557 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; 2558 2559 2560 static const uint8_t text943[] = { 2561 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; 2562 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2563 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2564 static const UChar toUnicode943stop[]= { 0x304b}; 2565 2566 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; 2567 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; 2568 static const int32_t fromIBM943Offsstop[] = { 0}; 2569 2570 gInBufferSize = inputsize; 2571 gOutBufferSize = outputsize; 2572 /*checking with a legal value*/ 2573 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]), 2574 templegal949, sizeof(templegal949), "ibm-949", 2575 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) 2576 log_err("u-> ibm-949 with skip did not match.\n"); 2577 2578 /*checking illegal value for ibm-943 with substitute*/ 2579 if(!testConvertToUnicode(text943, sizeof(text943), 2580 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2581 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2582 log_err("ibm-943->u with subst did not match.\n"); 2583 /*checking illegal value for ibm-943 with skip */ 2584 if(!testConvertToUnicode(text943, sizeof(text943), 2585 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943", 2586 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) 2587 log_err("ibm-943->u with skip did not match.\n"); 2588 2589 /*checking illegal value for ibm-943 with stop */ 2590 if(!testConvertToUnicode(text943, sizeof(text943), 2591 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943", 2592 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) 2593 log_err("ibm-943->u with stop did not match.\n"); 2594 2595 } 2596 2597 static void TestSingleByte(int32_t inputsize, int32_t outputsize) 2598 { 2599 static const uint8_t sampleText[] = { 2600 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, 2601 0xff, 0x32, 0x33}; 2602 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; 2603 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; 2604 /*checking illegal value for ibm-943 with substitute*/ 2605 gInBufferSize = inputsize; 2606 gOutBufferSize = outputsize; 2607 2608 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 2609 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2610 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2611 log_err("ibm-943->u with subst did not match.\n"); 2612 } 2613 2614 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) 2615 { 2616 /*EBCDIC_STATEFUL*/ 2617 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; 2618 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; 2619 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; 2620 /* s SO doubl SI sng s SO fe fe SI s */ 2621 2622 /*EBCDIC_STATEFUL with subChar=3f*/ 2623 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; 2624 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; 2625 static const char mySubChar[]={ 0x3f}; 2626 2627 gInBufferSize = inputsize; 2628 gOutBufferSize = outputsize; 2629 2630 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2631 toIBM930, sizeof(toIBM930), "ibm-930", 2632 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) 2633 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); 2634 2635 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2636 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", 2637 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) 2638 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); 2639 } 2640 #endif 2641 2642 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 2643 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 2644 const char *mySubChar, int8_t len) 2645 { 2646 2647 2648 UErrorCode status = U_ZERO_ERROR; 2649 UConverter *conv = 0; 2650 char junkout[NEW_MAX_BUFFER]; /* FIX */ 2651 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2652 const UChar *src; 2653 char *end; 2654 char *targ; 2655 int32_t *offs; 2656 int i; 2657 int32_t realBufferSize; 2658 char *realBufferEnd; 2659 const UChar *realSourceEnd; 2660 const UChar *sourceLimit; 2661 UBool checkOffsets = TRUE; 2662 UBool doFlush; 2663 char junk[9999]; 2664 char offset_str[9999]; 2665 char *p; 2666 UConverterFromUCallback oldAction = NULL; 2667 const void* oldContext = NULL; 2668 2669 2670 for(i=0;i<NEW_MAX_BUFFER;i++) 2671 junkout[i] = (char)0xF0; 2672 for(i=0;i<NEW_MAX_BUFFER;i++) 2673 junokout[i] = 0xFF; 2674 setNuConvTestName(codepage, "FROM"); 2675 2676 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 2677 gOutBufferSize); 2678 2679 conv = ucnv_open(codepage, &status); 2680 if(U_FAILURE(status)) 2681 { 2682 log_data_err("Couldn't open converter %s\n",codepage); 2683 return TRUE; 2684 } 2685 2686 log_verbose("Converter opened..\n"); 2687 2688 /*----setting the callback routine----*/ 2689 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2690 if (U_FAILURE(status)) 2691 { 2692 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2693 } 2694 /*------------------------*/ 2695 /*setting the subChar*/ 2696 if(mySubChar != NULL){ 2697 ucnv_setSubstChars(conv, mySubChar, len, &status); 2698 if (U_FAILURE(status)) { 2699 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2700 } 2701 } 2702 /*------------*/ 2703 2704 src = source; 2705 targ = junkout; 2706 offs = junokout; 2707 2708 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2709 realBufferEnd = junkout + realBufferSize; 2710 realSourceEnd = source + sourceLen; 2711 2712 if ( gOutBufferSize != realBufferSize ) 2713 checkOffsets = FALSE; 2714 2715 if( gInBufferSize != NEW_MAX_BUFFER ) 2716 checkOffsets = FALSE; 2717 2718 do 2719 { 2720 end = nct_min(targ + gOutBufferSize, realBufferEnd); 2721 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 2722 2723 doFlush = (UBool)(sourceLimit == realSourceEnd); 2724 2725 if(targ == realBufferEnd) 2726 { 2727 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 2728 return FALSE; 2729 } 2730 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 2731 2732 2733 status = U_ZERO_ERROR; 2734 2735 ucnv_fromUnicode (conv, 2736 (char **)&targ, 2737 (const char *)end, 2738 &src, 2739 sourceLimit, 2740 checkOffsets ? offs : NULL, 2741 doFlush, /* flush if we're at the end of the input data */ 2742 &status); 2743 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 2744 2745 2746 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2747 UChar errChars[50]; /* should be sufficient */ 2748 int8_t errLen = 50; 2749 UErrorCode err = U_ZERO_ERROR; 2750 const UChar* limit= NULL; 2751 const UChar* start= NULL; 2752 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); 2753 if(U_FAILURE(err)){ 2754 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); 2755 } 2756 /* src points to limit of invalid chars */ 2757 limit = src; 2758 /* length of in invalid chars should be equal to returned length*/ 2759 start = src - errLen; 2760 if(u_strncmp(errChars,start,errLen)!=0){ 2761 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2762 } 2763 } 2764 /* allow failure codes for the stop callback */ 2765 if(U_FAILURE(status) && 2766 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) 2767 { 2768 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2769 return FALSE; 2770 } 2771 2772 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 2773 sourceLen, targ-junkout); 2774 if(getTestOption(VERBOSITY_OPTION)) 2775 { 2776 2777 junk[0] = 0; 2778 offset_str[0] = 0; 2779 for(p = junkout;p<targ;p++) 2780 { 2781 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 2782 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 2783 } 2784 2785 log_verbose(junk); 2786 printSeq(expect, expectLen); 2787 if ( checkOffsets ) 2788 { 2789 log_verbose("\nOffsets:"); 2790 log_verbose(offset_str); 2791 } 2792 log_verbose("\n"); 2793 } 2794 ucnv_close(conv); 2795 2796 2797 if(expectLen != targ-junkout) 2798 { 2799 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2800 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2801 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2802 printSeqErr(expect, expectLen); 2803 return FALSE; 2804 } 2805 2806 if (checkOffsets && (expectOffsets != 0) ) 2807 { 2808 log_verbose("comparing %d offsets..\n", targ-junkout); 2809 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 2810 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2811 log_err("Got Output : "); 2812 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2813 log_err("Got Offsets: "); 2814 for(p=junkout;p<targ;p++) 2815 log_err("%d,", junokout[p-junkout]); 2816 log_err("\n"); 2817 log_err("Expected Offsets: "); 2818 for(i=0; i<(targ-junkout); i++) 2819 log_err("%d,", expectOffsets[i]); 2820 log_err("\n"); 2821 return FALSE; 2822 } 2823 } 2824 2825 if(!memcmp(junkout, expect, expectLen)) 2826 { 2827 log_verbose("String matches! %s\n", gNuConvTestName); 2828 return TRUE; 2829 } 2830 else 2831 { 2832 log_err("String does not match. %s\n", gNuConvTestName); 2833 log_err("source: "); 2834 printUSeqErr(source, sourceLen); 2835 log_err("Got: "); 2836 printSeqErr((const uint8_t *)junkout, expectLen); 2837 log_err("Expected: "); 2838 printSeqErr(expect, expectLen); 2839 return FALSE; 2840 } 2841 } 2842 2843 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 2844 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 2845 const char *mySubChar, int8_t len) 2846 { 2847 UErrorCode status = U_ZERO_ERROR; 2848 UConverter *conv = 0; 2849 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 2850 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2851 const char *src; 2852 const char *realSourceEnd; 2853 const char *srcLimit; 2854 UChar *targ; 2855 UChar *end; 2856 int32_t *offs; 2857 int i; 2858 UBool checkOffsets = TRUE; 2859 char junk[9999]; 2860 char offset_str[9999]; 2861 UChar *p; 2862 UConverterToUCallback oldAction = NULL; 2863 const void* oldContext = NULL; 2864 2865 int32_t realBufferSize; 2866 UChar *realBufferEnd; 2867 2868 2869 for(i=0;i<NEW_MAX_BUFFER;i++) 2870 junkout[i] = 0xFFFE; 2871 2872 for(i=0;i<NEW_MAX_BUFFER;i++) 2873 junokout[i] = -1; 2874 2875 setNuConvTestName(codepage, "TO"); 2876 2877 log_verbose("\n========= %s\n", gNuConvTestName); 2878 2879 conv = ucnv_open(codepage, &status); 2880 if(U_FAILURE(status)) 2881 { 2882 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 2883 return TRUE; 2884 } 2885 2886 log_verbose("Converter opened..\n"); 2887 2888 src = (const char *)source; 2889 targ = junkout; 2890 offs = junokout; 2891 2892 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2893 realBufferEnd = junkout + realBufferSize; 2894 realSourceEnd = src + sourcelen; 2895 /*----setting the callback routine----*/ 2896 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2897 if (U_FAILURE(status)) 2898 { 2899 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2900 } 2901 /*-------------------------------------*/ 2902 /*setting the subChar*/ 2903 if(mySubChar != NULL){ 2904 ucnv_setSubstChars(conv, mySubChar, len, &status); 2905 if (U_FAILURE(status)) { 2906 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2907 } 2908 } 2909 /*------------*/ 2910 2911 2912 if ( gOutBufferSize != realBufferSize ) 2913 checkOffsets = FALSE; 2914 2915 if( gInBufferSize != NEW_MAX_BUFFER ) 2916 checkOffsets = FALSE; 2917 2918 do 2919 { 2920 end = nct_min( targ + gOutBufferSize, realBufferEnd); 2921 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 2922 2923 if(targ == realBufferEnd) 2924 { 2925 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 2926 return FALSE; 2927 } 2928 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 2929 2930 2931 2932 status = U_ZERO_ERROR; 2933 2934 ucnv_toUnicode (conv, 2935 &targ, 2936 end, 2937 (const char **)&src, 2938 (const char *)srcLimit, 2939 checkOffsets ? offs : NULL, 2940 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 2941 &status); 2942 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 2943 2944 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2945 char errChars[50]; /* should be sufficient */ 2946 int8_t errLen = 50; 2947 UErrorCode err = U_ZERO_ERROR; 2948 const char* limit= NULL; 2949 const char* start= NULL; 2950 ucnv_getInvalidChars(conv,errChars, &errLen, &err); 2951 if(U_FAILURE(err)){ 2952 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); 2953 } 2954 /* src points to limit of invalid chars */ 2955 limit = src; 2956 /* length of in invalid chars should be equal to returned length*/ 2957 start = src - errLen; 2958 if(uprv_strncmp(errChars,start,errLen)!=0){ 2959 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2960 } 2961 } 2962 /* allow failure codes for the stop callback */ 2963 if(U_FAILURE(status) && 2964 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) 2965 { 2966 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2967 return FALSE; 2968 } 2969 2970 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 2971 sourcelen, targ-junkout); 2972 if(getTestOption(VERBOSITY_OPTION)) 2973 { 2974 2975 junk[0] = 0; 2976 offset_str[0] = 0; 2977 2978 for(p = junkout;p<targ;p++) 2979 { 2980 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 2981 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 2982 } 2983 2984 log_verbose(junk); 2985 printUSeq(expect, expectlen); 2986 if ( checkOffsets ) 2987 { 2988 log_verbose("\nOffsets:"); 2989 log_verbose(offset_str); 2990 } 2991 log_verbose("\n"); 2992 } 2993 ucnv_close(conv); 2994 2995 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 2996 2997 if (checkOffsets && (expectOffsets != 0)) 2998 { 2999 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 3000 { 3001 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3002 log_err("Got offsets: "); 3003 for(p=junkout;p<targ;p++) 3004 log_err(" %2d,", junokout[p-junkout]); 3005 log_err("\n"); 3006 log_err("Expected offsets: "); 3007 for(i=0; i<(targ-junkout); i++) 3008 log_err(" %2d,", expectOffsets[i]); 3009 log_err("\n"); 3010 log_err("Got output: "); 3011 for(i=0; i<(targ-junkout); i++) 3012 log_err("0x%04x,", junkout[i]); 3013 log_err("\n"); 3014 log_err("From source: "); 3015 for(i=0; i<(src-(const char *)source); i++) 3016 log_err(" 0x%02x,", (unsigned char)source[i]); 3017 log_err("\n"); 3018 } 3019 } 3020 3021 if(!memcmp(junkout, expect, expectlen*2)) 3022 { 3023 log_verbose("Matches!\n"); 3024 return TRUE; 3025 } 3026 else 3027 { 3028 log_err("String does not match. %s\n", gNuConvTestName); 3029 log_verbose("String does not match. %s\n", gNuConvTestName); 3030 log_err("Got: "); 3031 printUSeqErr(junkout, expectlen); 3032 log_err("Expected: "); 3033 printUSeqErr(expect, expectlen); 3034 log_err("\n"); 3035 return FALSE; 3036 } 3037 } 3038 3039 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 3040 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 3041 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3042 { 3043 3044 3045 UErrorCode status = U_ZERO_ERROR; 3046 UConverter *conv = 0; 3047 char junkout[NEW_MAX_BUFFER]; /* FIX */ 3048 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3049 const UChar *src; 3050 char *end; 3051 char *targ; 3052 int32_t *offs; 3053 int i; 3054 int32_t realBufferSize; 3055 char *realBufferEnd; 3056 const UChar *realSourceEnd; 3057 const UChar *sourceLimit; 3058 UBool checkOffsets = TRUE; 3059 UBool doFlush; 3060 char junk[9999]; 3061 char offset_str[9999]; 3062 char *p; 3063 UConverterFromUCallback oldAction = NULL; 3064 const void* oldContext = NULL; 3065 3066 3067 for(i=0;i<NEW_MAX_BUFFER;i++) 3068 junkout[i] = (char)0xF0; 3069 for(i=0;i<NEW_MAX_BUFFER;i++) 3070 junokout[i] = 0xFF; 3071 setNuConvTestName(codepage, "FROM"); 3072 3073 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 3074 gOutBufferSize); 3075 3076 conv = ucnv_open(codepage, &status); 3077 if(U_FAILURE(status)) 3078 { 3079 log_data_err("Couldn't open converter %s\n",codepage); 3080 return TRUE; /* Because the err has already been logged. */ 3081 } 3082 3083 log_verbose("Converter opened..\n"); 3084 3085 /*----setting the callback routine----*/ 3086 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3087 if (U_FAILURE(status)) 3088 { 3089 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3090 } 3091 /*------------------------*/ 3092 /*setting the subChar*/ 3093 if(mySubChar != NULL){ 3094 ucnv_setSubstChars(conv, mySubChar, len, &status); 3095 if (U_FAILURE(status)) { 3096 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); 3097 } 3098 } 3099 /*------------*/ 3100 3101 src = source; 3102 targ = junkout; 3103 offs = junokout; 3104 3105 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3106 realBufferEnd = junkout + realBufferSize; 3107 realSourceEnd = source + sourceLen; 3108 3109 if ( gOutBufferSize != realBufferSize ) 3110 checkOffsets = FALSE; 3111 3112 if( gInBufferSize != NEW_MAX_BUFFER ) 3113 checkOffsets = FALSE; 3114 3115 do 3116 { 3117 end = nct_min(targ + gOutBufferSize, realBufferEnd); 3118 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 3119 3120 doFlush = (UBool)(sourceLimit == realSourceEnd); 3121 3122 if(targ == realBufferEnd) 3123 { 3124 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 3125 return FALSE; 3126 } 3127 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 3128 3129 3130 status = U_ZERO_ERROR; 3131 3132 ucnv_fromUnicode (conv, 3133 (char **)&targ, 3134 (const char *)end, 3135 &src, 3136 sourceLimit, 3137 checkOffsets ? offs : NULL, 3138 doFlush, /* flush if we're at the end of the input data */ 3139 &status); 3140 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 3141 3142 /* allow failure codes for the stop callback */ 3143 if(U_FAILURE(status) && status != expectedError) 3144 { 3145 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3146 return FALSE; 3147 } 3148 3149 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 3150 sourceLen, targ-junkout); 3151 if(getTestOption(VERBOSITY_OPTION)) 3152 { 3153 3154 junk[0] = 0; 3155 offset_str[0] = 0; 3156 for(p = junkout;p<targ;p++) 3157 { 3158 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 3159 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 3160 } 3161 3162 log_verbose(junk); 3163 printSeq(expect, expectLen); 3164 if ( checkOffsets ) 3165 { 3166 log_verbose("\nOffsets:"); 3167 log_verbose(offset_str); 3168 } 3169 log_verbose("\n"); 3170 } 3171 ucnv_close(conv); 3172 3173 3174 if(expectLen != targ-junkout) 3175 { 3176 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3177 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3178 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3179 printSeqErr(expect, expectLen); 3180 return FALSE; 3181 } 3182 3183 if (checkOffsets && (expectOffsets != 0) ) 3184 { 3185 log_verbose("comparing %d offsets..\n", targ-junkout); 3186 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 3187 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3188 log_err("Got Output : "); 3189 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3190 log_err("Got Offsets: "); 3191 for(p=junkout;p<targ;p++) 3192 log_err("%d,", junokout[p-junkout]); 3193 log_err("\n"); 3194 log_err("Expected Offsets: "); 3195 for(i=0; i<(targ-junkout); i++) 3196 log_err("%d,", expectOffsets[i]); 3197 log_err("\n"); 3198 return FALSE; 3199 } 3200 } 3201 3202 if(!memcmp(junkout, expect, expectLen)) 3203 { 3204 log_verbose("String matches! %s\n", gNuConvTestName); 3205 return TRUE; 3206 } 3207 else 3208 { 3209 log_err("String does not match. %s\n", gNuConvTestName); 3210 log_err("source: "); 3211 printUSeqErr(source, sourceLen); 3212 log_err("Got: "); 3213 printSeqErr((const uint8_t *)junkout, expectLen); 3214 log_err("Expected: "); 3215 printSeqErr(expect, expectLen); 3216 return FALSE; 3217 } 3218 } 3219 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 3220 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 3221 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3222 { 3223 UErrorCode status = U_ZERO_ERROR; 3224 UConverter *conv = 0; 3225 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 3226 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3227 const char *src; 3228 const char *realSourceEnd; 3229 const char *srcLimit; 3230 UChar *targ; 3231 UChar *end; 3232 int32_t *offs; 3233 int i; 3234 UBool checkOffsets = TRUE; 3235 char junk[9999]; 3236 char offset_str[9999]; 3237 UChar *p; 3238 UConverterToUCallback oldAction = NULL; 3239 const void* oldContext = NULL; 3240 3241 int32_t realBufferSize; 3242 UChar *realBufferEnd; 3243 3244 3245 for(i=0;i<NEW_MAX_BUFFER;i++) 3246 junkout[i] = 0xFFFE; 3247 3248 for(i=0;i<NEW_MAX_BUFFER;i++) 3249 junokout[i] = -1; 3250 3251 setNuConvTestName(codepage, "TO"); 3252 3253 log_verbose("\n========= %s\n", gNuConvTestName); 3254 3255 conv = ucnv_open(codepage, &status); 3256 if(U_FAILURE(status)) 3257 { 3258 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 3259 return TRUE; 3260 } 3261 3262 log_verbose("Converter opened..\n"); 3263 3264 src = (const char *)source; 3265 targ = junkout; 3266 offs = junokout; 3267 3268 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3269 realBufferEnd = junkout + realBufferSize; 3270 realSourceEnd = src + sourcelen; 3271 /*----setting the callback routine----*/ 3272 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3273 if (U_FAILURE(status)) 3274 { 3275 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3276 } 3277 /*-------------------------------------*/ 3278 /*setting the subChar*/ 3279 if(mySubChar != NULL){ 3280 ucnv_setSubstChars(conv, mySubChar, len, &status); 3281 if (U_FAILURE(status)) { 3282 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3283 } 3284 } 3285 /*------------*/ 3286 3287 3288 if ( gOutBufferSize != realBufferSize ) 3289 checkOffsets = FALSE; 3290 3291 if( gInBufferSize != NEW_MAX_BUFFER ) 3292 checkOffsets = FALSE; 3293 3294 do 3295 { 3296 end = nct_min( targ + gOutBufferSize, realBufferEnd); 3297 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 3298 3299 if(targ == realBufferEnd) 3300 { 3301 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 3302 return FALSE; 3303 } 3304 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 3305 3306 3307 3308 status = U_ZERO_ERROR; 3309 3310 ucnv_toUnicode (conv, 3311 &targ, 3312 end, 3313 (const char **)&src, 3314 (const char *)srcLimit, 3315 checkOffsets ? offs : NULL, 3316 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 3317 &status); 3318 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 3319 3320 /* allow failure codes for the stop callback */ 3321 if(U_FAILURE(status) && status!=expectedError) 3322 { 3323 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3324 return FALSE; 3325 } 3326 3327 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 3328 sourcelen, targ-junkout); 3329 if(getTestOption(VERBOSITY_OPTION)) 3330 { 3331 3332 junk[0] = 0; 3333 offset_str[0] = 0; 3334 3335 for(p = junkout;p<targ;p++) 3336 { 3337 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 3338 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 3339 } 3340 3341 log_verbose(junk); 3342 printUSeq(expect, expectlen); 3343 if ( checkOffsets ) 3344 { 3345 log_verbose("\nOffsets:"); 3346 log_verbose(offset_str); 3347 } 3348 log_verbose("\n"); 3349 } 3350 ucnv_close(conv); 3351 3352 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 3353 3354 if (checkOffsets && (expectOffsets != 0)) 3355 { 3356 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 3357 { 3358 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3359 log_err("Got offsets: "); 3360 for(p=junkout;p<targ;p++) 3361 log_err(" %2d,", junokout[p-junkout]); 3362 log_err("\n"); 3363 log_err("Expected offsets: "); 3364 for(i=0; i<(targ-junkout); i++) 3365 log_err(" %2d,", expectOffsets[i]); 3366 log_err("\n"); 3367 log_err("Got output: "); 3368 for(i=0; i<(targ-junkout); i++) 3369 log_err("0x%04x,", junkout[i]); 3370 log_err("\n"); 3371 log_err("From source: "); 3372 for(i=0; i<(src-(const char *)source); i++) 3373 log_err(" 0x%02x,", (unsigned char)source[i]); 3374 log_err("\n"); 3375 } 3376 } 3377 3378 if(!memcmp(junkout, expect, expectlen*2)) 3379 { 3380 log_verbose("Matches!\n"); 3381 return TRUE; 3382 } 3383 else 3384 { 3385 log_err("String does not match. %s\n", gNuConvTestName); 3386 log_verbose("String does not match. %s\n", gNuConvTestName); 3387 log_err("Got: "); 3388 printUSeqErr(junkout, expectlen); 3389 log_err("Expected: "); 3390 printUSeqErr(expect, expectlen); 3391 log_err("\n"); 3392 return FALSE; 3393 } 3394 } 3395 3396 static void TestCallBackFailure(void) { 3397 UErrorCode status = U_USELESS_COLLATOR_ERROR; 3398 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); 3399 if (status != U_USELESS_COLLATOR_ERROR) { 3400 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); 3401 } 3402 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); 3403 if (status != U_USELESS_COLLATOR_ERROR) { 3404 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); 3405 } 3406 ucnv_cbFromUWriteSub(NULL, -1, &status); 3407 if (status != U_USELESS_COLLATOR_ERROR) { 3408 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); 3409 } 3410 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); 3411 if (status != U_USELESS_COLLATOR_ERROR) { 3412 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); 3413 } 3414 } 3415