1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2013, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /* 7 ******************************************************************************** 8 * File NCCBTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda 7/21/1999 Testing error callback routines 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 #include "cstring.h" 20 #include "unicode/uloc.h" 21 #include "unicode/ucnv.h" 22 #include "unicode/ucnv_err.h" 23 #include "cintltst.h" 24 #include "unicode/utypes.h" 25 #include "unicode/ustring.h" 26 #include "nccbtst.h" 27 #include "unicode/ucnv_cb.h" 28 #include "unicode/utf16.h" 29 30 #define NEW_MAX_BUFFER 999 31 32 #define nct_min(x,y) ((x<y) ? x : y) 33 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) 34 35 static int32_t gInBufferSize = 0; 36 static int32_t gOutBufferSize = 0; 37 static char gNuConvTestName[1024]; 38 39 static void printSeq(const uint8_t* a, int len) 40 { 41 int i=0; 42 log_verbose("\n{"); 43 while (i<len) 44 log_verbose("0x%02X, ", a[i++]); 45 log_verbose("}\n"); 46 } 47 48 static void printUSeq(const UChar* a, int len) 49 { 50 int i=0; 51 log_verbose("{"); 52 while (i<len) 53 log_verbose(" 0x%04x, ", a[i++]); 54 log_verbose("}\n"); 55 } 56 57 static void printSeqErr(const uint8_t* a, int len) 58 { 59 int i=0; 60 fprintf(stderr, "{"); 61 while (i<len) 62 fprintf(stderr, " 0x%02x, ", a[i++]); 63 fprintf(stderr, "}\n"); 64 } 65 66 static void printUSeqErr(const UChar* a, int len) 67 { 68 int i=0; 69 fprintf(stderr, "{"); 70 while (i<len) 71 fprintf(stderr, "0x%04x, ", a[i++]); 72 fprintf(stderr,"}\n"); 73 } 74 75 static void setNuConvTestName(const char *codepage, const char *direction) 76 { 77 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 78 codepage, 79 direction, 80 (int)gInBufferSize, 81 (int)gOutBufferSize); 82 } 83 84 85 static void TestCallBackFailure(void); 86 87 void addTestConvertErrorCallBack(TestNode** root); 88 89 void addTestConvertErrorCallBack(TestNode** root) 90 { 91 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); 92 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); 93 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); 94 /* BEGIN android-removed 95 To save space, Android does not build complete CJK conversion tables. 96 We skip the test here. 97 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); 98 END android-removed */ 99 100 #if !UCONFIG_NO_LEGACY_CONVERSION 101 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); 102 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); 103 #endif 104 105 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); 106 } 107 108 static void TestSkipCallBack() 109 { 110 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 111 TestSkip(1,NEW_MAX_BUFFER); 112 TestSkip(1,1); 113 TestSkip(NEW_MAX_BUFFER, 1); 114 } 115 116 static void TestStopCallBack() 117 { 118 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 119 TestStop(1,NEW_MAX_BUFFER); 120 TestStop(1,1); 121 TestStop(NEW_MAX_BUFFER, 1); 122 } 123 124 static void TestSubCallBack() 125 { 126 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 127 TestSub(1,NEW_MAX_BUFFER); 128 TestSub(1,1); 129 TestSub(NEW_MAX_BUFFER, 1); 130 131 #if !UCONFIG_NO_LEGACY_CONVERSION 132 TestEBCDIC_STATEFUL_Sub(1, 1); 133 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); 134 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); 135 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 136 #endif 137 } 138 139 static void TestSubWithValueCallBack() 140 { 141 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 142 TestSubWithValue(1,NEW_MAX_BUFFER); 143 TestSubWithValue(1,1); 144 TestSubWithValue(NEW_MAX_BUFFER, 1); 145 } 146 147 #if !UCONFIG_NO_LEGACY_CONVERSION 148 static void TestLegalAndOtherCallBack() 149 { 150 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 151 TestLegalAndOthers(1,NEW_MAX_BUFFER); 152 TestLegalAndOthers(1,1); 153 TestLegalAndOthers(NEW_MAX_BUFFER, 1); 154 } 155 156 static void TestSingleByteCallBack() 157 { 158 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 159 TestSingleByte(1,NEW_MAX_BUFFER); 160 TestSingleByte(1,1); 161 TestSingleByte(NEW_MAX_BUFFER, 1); 162 } 163 #endif 164 165 static void TestSkip(int32_t inputsize, int32_t outputsize) 166 { 167 static const uint8_t expskipIBM_949[]= { 168 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 169 170 static const uint8_t expskipIBM_943[] = { 171 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; 172 173 static const uint8_t expskipIBM_930[] = { 174 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; 175 176 gInBufferSize = inputsize; 177 gOutBufferSize = outputsize; 178 179 /*From Unicode*/ 180 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); 181 182 #if !UCONFIG_NO_LEGACY_CONVERSION 183 { 184 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 185 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 186 187 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; 188 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; 189 190 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 191 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", 192 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) 193 log_err("u-> ibm-949 with skip did not match.\n"); 194 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 195 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", 196 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) 197 log_err("u-> ibm-943 with skip did not match.\n"); 198 } 199 200 { 201 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; 202 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; 203 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; 204 205 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ 206 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, 207 fromUBytes, sizeof(fromUBytes), 208 "ibm-930", 209 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, 210 NULL, 0) 211 ) { 212 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); 213 } 214 } 215 #endif 216 217 { 218 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 219 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; 220 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; 221 222 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 223 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; 224 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; 225 226 /* US-ASCII */ 227 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 228 usasciiFromUBytes, sizeof(usasciiFromUBytes), 229 "US-ASCII", 230 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 231 NULL, 0) 232 ) { 233 log_err("u->US-ASCII with skip did not match.\n"); 234 } 235 236 #if !UCONFIG_NO_LEGACY_CONVERSION 237 /* SBCS NLTC codepage 367 for US-ASCII */ 238 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 239 usasciiFromUBytes, sizeof(usasciiFromUBytes), 240 "ibm-367", 241 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 242 NULL, 0) 243 ) { 244 log_err("u->ibm-367 with skip did not match.\n"); 245 } 246 #endif 247 248 /* ISO-Latin-1 */ 249 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 250 latin1FromUBytes, sizeof(latin1FromUBytes), 251 "LATIN_1", 252 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 253 NULL, 0) 254 ) { 255 log_err("u->LATIN_1 with skip did not match.\n"); 256 } 257 258 #if !UCONFIG_NO_LEGACY_CONVERSION 259 /* windows-1252 */ 260 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 261 latin1FromUBytes, sizeof(latin1FromUBytes), 262 "windows-1252", 263 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 264 NULL, 0) 265 ) { 266 log_err("u->windows-1252 with skip did not match.\n"); 267 } 268 } 269 270 { 271 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 272 static const uint8_t toIBM943[]= { 0x61, 0x61 }; 273 static const int32_t offset[]= {0, 4}; 274 275 /* EUC_JP*/ 276 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 277 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 278 0x61, 0x8e, 0xe0, 279 }; 280 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; 281 282 /*EUC_TW*/ 283 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 284 static const uint8_t to_euc_tw[]={ 285 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 286 0x61, 0xe6, 0xca, 0x8a, 287 }; 288 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; 289 290 /*ISO-2022-JP*/ 291 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; 292 static const uint8_t to_iso_2022_jp[]={ 293 0x41, 294 0x42, 295 296 }; 297 static const int32_t from_iso_2022_jpOffs [] ={0,2}; 298 299 /*ISO-2022-JP*/ 300 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 301 static const uint8_t to_iso_2022_jp2[]={ 302 0x41, 303 0x43, 304 305 }; 306 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; 307 308 /*ISO-2022-cn*/ 309 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 310 static const uint8_t to_iso_2022_cn[]={ 311 0x41, 0x42 312 }; 313 static const int32_t from_iso_2022_cnOffs [] ={ 314 0, 2 315 }; 316 317 /*ISO-2022-CN*/ 318 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 319 static const uint8_t to_iso_2022_cn1[]={ 320 0x41, 0x43 321 322 }; 323 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; 324 325 /*ISO-2022-kr*/ 326 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 327 static const uint8_t to_iso_2022_kr[]={ 328 0x1b, 0x24, 0x29, 0x43, 329 0x41, 330 0x0e, 0x25, 0x50, 331 0x25, 0x50, 332 0x0f, 0x42, 333 }; 334 static const int32_t from_iso_2022_krOffs [] ={ 335 -1,-1,-1,-1, 336 0, 337 1,1,1, 338 3,3, 339 4,4 340 }; 341 342 /*ISO-2022-kr*/ 343 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 344 static const uint8_t to_iso_2022_kr1[]={ 345 0x1b, 0x24, 0x29, 0x43, 346 0x41, 347 0x0e, 0x25, 0x50, 348 0x25, 0x50, 349 350 }; 351 static const int32_t from_iso_2022_krOffs1 [] ={ 352 -1,-1,-1,-1, 353 0, 354 1,1,1, 355 3,3, 356 357 }; 358 /* HZ encoding */ 359 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 360 361 static const uint8_t to_hz[]={ 362 0x7e, 0x7d, 0x41, 363 0x7e, 0x7b, 0x26, 0x30, 364 0x26, 0x30, 365 0x7e, 0x7d, 0x42, 366 367 }; 368 static const int32_t from_hzOffs [] ={ 369 0,0,0, 370 1,1,1,1, 371 3,3, 372 4,4,4,4 373 }; 374 375 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 376 377 static const uint8_t to_hz1[]={ 378 0x7e, 0x7d, 0x41, 379 0x7e, 0x7b, 0x26, 0x30, 380 0x26, 0x30, 381 382 383 }; 384 static const int32_t from_hzOffs1 [] ={ 385 0,0,0, 386 1,1,1,1, 387 3,3, 388 389 }; 390 391 #endif 392 393 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 394 395 static const uint8_t to_SCSU[]={ 396 0x41, 397 0x42 398 399 400 }; 401 static const int32_t from_SCSUOffs [] ={ 402 0, 403 2, 404 405 }; 406 407 #if !UCONFIG_NO_LEGACY_CONVERSION 408 /* ISCII */ 409 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 410 static const uint8_t to_iscii[]={ 411 0x41, 412 0x42, 413 }; 414 static const int32_t from_isciiOffs [] ={ 415 0,2, 416 417 }; 418 /*ISCII*/ 419 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 420 static const uint8_t to_iscii1[]={ 421 0x44, 422 0x43, 423 424 }; 425 static const int32_t from_isciiOffs1 [] ={0,2}; 426 427 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 428 toIBM943, sizeof(toIBM943), "ibm-943", 429 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) 430 log_err("u-> ibm-943 with skip did not match.\n"); 431 432 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 433 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", 434 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) 435 log_err("u-> euc-jp with skip did not match.\n"); 436 437 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 438 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 439 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) 440 log_err("u-> euc-tw with skip did not match.\n"); 441 442 /*iso_2022_jp*/ 443 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 444 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 445 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) 446 log_err("u-> iso-2022-jp with skip did not match.\n"); 447 448 /* with context */ 449 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 450 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 451 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 452 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 453 454 /*iso_2022_cn*/ 455 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 456 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 457 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) 458 log_err("u-> iso-2022-cn with skip did not match.\n"); 459 /*with context*/ 460 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), 461 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", 462 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 463 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 464 465 /*iso_2022_kr*/ 466 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 467 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 468 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) 469 log_err("u-> iso-2022-kr with skip did not match.\n"); 470 /*with context*/ 471 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), 472 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", 473 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 474 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 475 476 /*hz*/ 477 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 478 to_hz, sizeof(to_hz), "HZ", 479 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) 480 log_err("u-> HZ with skip did not match.\n"); 481 /*with context*/ 482 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]), 483 to_hz1, sizeof(to_hz1), "hz", 484 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 485 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 486 #endif 487 488 /*SCSU*/ 489 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 490 to_SCSU, sizeof(to_SCSU), "SCSU", 491 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) 492 log_err("u-> SCSU with skip did not match.\n"); 493 494 #if !UCONFIG_NO_LEGACY_CONVERSION 495 /*ISCII*/ 496 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 497 to_iscii, sizeof(to_iscii), "ISCII,version=0", 498 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) 499 log_err("u-> iscii with skip did not match.\n"); 500 /*with context*/ 501 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]), 502 to_iscii1, sizeof(to_iscii1), "ISCII,version=0", 503 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 504 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 505 #endif 506 } 507 508 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 509 { 510 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 511 0xFB, 0xEE, 0x28, /* from source offset 0 */ 512 0x24, 0x1E, 0x52, 513 0xB2, 514 0x20, 515 0xB3, 516 0xB1, 517 0x0D, 518 0x0A, 519 520 0x20, /* from 8 */ 521 0x00, 522 0xD0, 0x6C, 523 0xB6, 524 0xD8, 0xA5, 525 0x20, 526 0x68, 527 0x59, 528 529 0xF9, 0x28, /* from 16 */ 530 0x6D, 531 0x20, 532 0x73, 533 0xE0, 0x2D, 534 0xDE, 0x43, 535 0xD0, 0x33, 536 0x20, 537 538 0xFA, 0x83, /* from 24 */ 539 0x25, 0x01, 540 0xFB, 0x16, 0x87, 541 0x4B, 0x16, 542 0x20, 543 0xE6, 0xBD, 544 0xEB, 0x5B, 545 0x4B, 0xCC, 546 547 0xF9, 0xA2, /* from 32 */ 548 0xFC, 0x10, 0x3E, 549 0xFE, 0x16, 0x3A, 0x8C, 550 0x20, 551 0xFC, 0x03, 0xAC, 552 553 0x01, /* from 41 */ 554 0xDE, 0x83, 555 0x20, 556 0x09 557 }; 558 static const UChar expected[]={ 559 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ 560 0x0063, 0x0061, 0x000D, 0x000A, 561 562 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ 563 0x0930, 0x0020, 0x0918, 0x0909, 564 565 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ 566 0x4000, 0x4E00, 0x7777, 0x0020, 567 568 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ 569 0x0020, 0xD7A3, 0xDC00, 0xD800, 570 571 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ 572 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 573 574 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ 575 0x0009 576 }; 577 static const int32_t offsets[]={ 578 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, 579 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, 580 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 581 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, 582 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, 583 41, 42, 42, 43, 44 584 }; 585 586 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ 587 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 588 sampleText, sizeof(sampleText), 589 "BOCU-1", 590 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 591 ) { 592 log_err("u->BOCU-1 with skip did not match.\n"); 593 } 594 } 595 596 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 597 { 598 const uint8_t sampleText[]={ 599 0x61, /* 'a' */ 600 0xc4, 0xb5, /* U+0135 */ 601 0xed, 0x80, 0xa0, /* Hangul U+d020 */ 602 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ 603 0xee, 0x80, 0x80, /* PUA U+e000 */ 604 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ 605 0x62, /* 'b' */ 606 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ 607 0xd0, 0x80 /* U+0400 */ 608 }; 609 UChar expected[]={ 610 0x0061, 611 0x0135, 612 0xd020, 613 0xd801, 0xdc01, 614 0xe000, 615 0xdc01, 616 0x0062, 617 0xd801, 618 0x0400 619 }; 620 int32_t offsets[]={ 621 0, 622 1, 1, 623 2, 2, 2, 624 3, 3, 3, 4, 4, 4, 625 5, 5, 5, 626 6, 6, 6, 627 7, 628 8, 8, 8, 629 9, 9 630 }; 631 632 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ 633 634 /* without offsets */ 635 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 636 sampleText, sizeof(sampleText), 637 "CESU-8", 638 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) 639 ) { 640 log_err("u->CESU-8 with skip did not match.\n"); 641 } 642 643 /* with offsets */ 644 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 645 sampleText, sizeof(sampleText), 646 "CESU-8", 647 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 648 ) { 649 log_err("u->CESU-8 with skip did not match.\n"); 650 } 651 } 652 653 /*to Unicode*/ 654 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); 655 656 #if !UCONFIG_NO_LEGACY_CONVERSION 657 { 658 659 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; 660 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 661 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 662 663 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; 664 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; 665 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; 666 667 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), 668 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949", 669 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) 670 log_err("ibm-949->u with skip did not match.\n"); 671 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), 672 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943", 673 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) 674 log_err("ibm-943->u with skip did not match.\n"); 675 676 677 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), 678 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 679 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) 680 log_err("ibm-930->u with skip did not match.\n"); 681 682 683 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930), 684 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 685 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 686 log_err("ibm-930->u with skip did not match.\n"); 687 } 688 #endif 689 690 { 691 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; 692 static const UChar usasciiToU[] = { 0x61, 0x31 }; 693 static const int32_t usasciiToUOffsets[] = { 0, 2 }; 694 695 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; 696 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; 697 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; 698 699 /* US-ASCII */ 700 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 701 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 702 "US-ASCII", 703 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 704 NULL, 0) 705 ) { 706 log_err("US-ASCII->u with skip did not match.\n"); 707 } 708 709 #if !UCONFIG_NO_LEGACY_CONVERSION 710 /* SBCS NLTC codepage 367 for US-ASCII */ 711 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 712 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 713 "ibm-367", 714 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 715 NULL, 0) 716 ) { 717 log_err("ibm-367->u with skip did not match.\n"); 718 } 719 #endif 720 721 /* ISO-Latin-1 */ 722 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 723 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 724 "LATIN_1", 725 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 726 NULL, 0) 727 ) { 728 log_err("LATIN_1->u with skip did not match.\n"); 729 } 730 731 #if !UCONFIG_NO_LEGACY_CONVERSION 732 /* windows-1252 */ 733 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 734 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 735 "windows-1252", 736 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 737 NULL, 0) 738 ) { 739 log_err("windows-1252->u with skip did not match.\n"); 740 } 741 #endif 742 } 743 744 #if !UCONFIG_NO_LEGACY_CONVERSION 745 { 746 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 747 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 748 }; 749 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 750 }; 751 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; 752 753 754 /* euc-jp*/ 755 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 756 0x8f, 0xda, 0xa1, /*unassigned*/ 757 0x8e, 0xe0, 758 }; 759 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; 760 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; 761 762 /*EUC_TW*/ 763 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 764 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 765 0xe6, 0xca, 0x8a, 766 }; 767 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; 768 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; 769 /*iso-2022-jp*/ 770 static const uint8_t sampleTxt_iso_2022_jp[]={ 771 0x41, 772 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ 773 0x1b, 0x28, 0x42, 0x42, 774 775 }; 776 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; 777 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; 778 779 /*iso-2022-cn*/ 780 static const uint8_t sampleTxt_iso_2022_cn[]={ 781 0x0f, 0x41, 0x44, 782 0x1B, 0x24, 0x29, 0x47, 783 0x0E, 0x40, 0x6f, /*unassigned*/ 784 0x0f, 0x42, 785 786 }; 787 788 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; 789 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; 790 791 /*iso-2022-kr*/ 792 static const uint8_t sampleTxt_iso_2022_kr[]={ 793 0x1b, 0x24, 0x29, 0x43, 794 0x41, 795 0x0E, 0x7f, 0x1E, 796 0x0e, 0x25, 0x50, 797 0x0f, 0x51, 798 0x42, 0x43, 799 800 }; 801 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; 802 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; 803 804 /*hz*/ 805 static const uint8_t sampleTxt_hz[]={ 806 0x41, 807 0x7e, 0x7b, 0x26, 0x30, 808 0x7f, 0x1E, /*unassigned*/ 809 0x26, 0x30, 810 0x7e, 0x7d, 0x42, 811 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 812 0x7e, 0x7d, 0x42, 813 }; 814 static const UChar hztoUnicode[]={ 815 0x41, 816 0x03a0, 817 0x03A0, 818 0x42, 819 0x42,}; 820 821 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; 822 823 /*ISCII*/ 824 static const uint8_t sampleTxt_iscii[]={ 825 0x41, 826 0xa1, 827 0xEB, /*unassigned*/ 828 0x26, 829 0x30, 830 0xa2, 831 0xEC, /*unassigned*/ 832 0x42, 833 }; 834 static const UChar isciitoUnicode[]={ 835 0x41, 836 0x0901, 837 0x26, 838 0x30, 839 0x0902, 840 0x42, 841 }; 842 843 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; 844 845 /*LMBCS*/ 846 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, 847 0x12, 0x92, 0xa0, /*unassigned*/ 848 0x12, 0x92, 0xA1, 849 }; 850 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; 851 static const int32_t fromLMBCS[] = {0, 6}; 852 853 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 854 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 855 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 856 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 857 858 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 859 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 860 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 861 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 862 863 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 864 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP", 865 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) 866 log_err("euc-jp->u with skip did not match.\n"); 867 868 869 870 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 871 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 872 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) 873 log_err("euc-tw->u with skip did not match.\n"); 874 875 876 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 877 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 878 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) 879 log_err("iso-2022-jp->u with skip did not match.\n"); 880 881 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 882 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 883 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) 884 log_err("iso-2022-cn->u with skip did not match.\n"); 885 886 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 887 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 888 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) 889 log_err("iso-2022-kr->u with skip did not match.\n"); 890 891 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 892 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 893 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) 894 log_err("HZ->u with skip did not match.\n"); 895 896 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 897 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 898 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) 899 log_err("iscii->u with skip did not match.\n"); 900 901 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), 902 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1", 903 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) 904 log_err("LMBCS->u with skip did not match.\n"); 905 906 } 907 #endif 908 909 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); 910 { 911 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 912 0xe0, 0x80, 0x61,}; 913 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; 914 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; 915 916 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 917 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 918 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 919 log_err("utf8->u with skip did not match.\n");; 920 } 921 922 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); 923 { 924 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 925 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; 926 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 927 928 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 929 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 930 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 931 log_err("scsu->u with skip did not match.\n"); 932 } 933 934 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 935 { 936 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 937 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ 938 0x24, 0x1E, 0x52, /* 3 */ 939 0xB2, /* 6 */ 940 0x20, /* 7 */ 941 0x40, 0x07, /* 8 - wrong trail byte */ 942 0xB3, /* 10 */ 943 0xB1, /* 11 */ 944 0xD0, 0x20, /* 12 - wrong trail byte */ 945 0x0D, /* 14 */ 946 0x0A, /* 15 */ 947 0x20, /* 16 */ 948 0x00, /* 17 */ 949 0xD0, 0x6C, /* 18 */ 950 0xB6, /* 20 */ 951 0xD8, 0xA5, /* 21 */ 952 0x20, /* 23 */ 953 0x68, /* 24 */ 954 0x59, /* 25 */ 955 0xF9, 0x28, /* 26 */ 956 0x6D, /* 28 */ 957 0x20, /* 29 */ 958 0x73, /* 30 */ 959 0xE0, 0x2D, /* 31 */ 960 0xDE, 0x43, /* 33 */ 961 0xD0, 0x33, /* 35 */ 962 0x20, /* 37 */ 963 0xFA, 0x83, /* 38 */ 964 0x25, 0x01, /* 40 */ 965 0xFB, 0x16, 0x87, /* 42 */ 966 0x4B, 0x16, /* 45 */ 967 0x20, /* 47 */ 968 0xE6, 0xBD, /* 48 */ 969 0xEB, 0x5B, /* 50 */ 970 0x4B, 0xCC, /* 52 */ 971 0xF9, 0xA2, /* 54 */ 972 0xFC, 0x10, 0x3E, /* 56 */ 973 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ 974 0x20, /* 63 */ 975 0xFC, 0x03, 0xAC, /* 64 */ 976 0xFF, /* 67 - FF just resets the state without encoding anything */ 977 0x01, /* 68 */ 978 0xDE, 0x83, /* 69 */ 979 0x20, /* 71 */ 980 0x09 /* 72 */ 981 }; 982 UChar expected[]={ 983 0xFEFF, 0x0061, 0x0062, 0x0020, 984 0x0063, 0x0061, 0x000D, 0x000A, 985 0x0020, 0x0000, 0x00DF, 0x00E6, 986 0x0930, 0x0020, 0x0918, 0x0909, 987 0x3086, 0x304D, 0x0020, 0x3053, 988 0x4000, 0x4E00, 0x7777, 0x0020, 989 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, 990 0x0020, 0xD7A3, 0xDC00, 0xD800, 991 0xD800, 0xDC00, 0xD845, 0xDDDD, 992 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 993 0xDFFF, 0x0001, 0x0E40, 0x0020, 994 0x0009 995 }; 996 int32_t offsets[]={ 997 0, 3, 6, 7, /* skip 8, */ 998 10, 11, /* skip 12, */ 999 14, 15, 16, 17, 18, 1000 20, 21, 23, 24, 25, 26, 28, 29, 1001 30, 31, 33, 35, 37, 38, 1002 40, 42, 45, 47, 48, 1003 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, 1004 63, 64, /* trail */ 64, /* reset only 67, */ 1005 68, 69, 1006 71, 72 1007 }; 1008 1009 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1010 expected, ARRAY_LENGTH(expected), "BOCU-1", 1011 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1012 ) { 1013 log_err("BOCU-1->u with skip did not match.\n"); 1014 } 1015 } 1016 1017 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 1018 { 1019 const uint8_t sampleText[]={ 1020 0x61, /* 0 'a' */ 1021 0xc0, 0x80, /* 1 non-shortest form */ 1022 0xc4, 0xb5, /* 3 U+0135 */ 1023 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ 1024 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ 1025 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ 1026 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ 1027 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ 1028 0x62, /* 24 'b' */ 1029 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ 1030 0xed, 0xa0, /* 28 incomplete sequence */ 1031 0xd0, 0x80 /* 30 U+0400 */ 1032 }; 1033 UChar expected[]={ 1034 0x0061, 1035 /* skip */ 1036 0x0135, 1037 0xd020, 1038 0xd801, 0xdc01, 1039 0xe000, 1040 0xdc01, 1041 /* skip */ 1042 0x0062, 1043 0xd801, 1044 0x0400 1045 }; 1046 int32_t offsets[]={ 1047 0, 1048 /* skip 1, */ 1049 3, 1050 5, 1051 8, 11, 1052 14, 1053 17, 1054 /* skip 20, 20, */ 1055 24, 1056 25, 1057 /* skip 28 */ 1058 30 1059 }; 1060 1061 /* without offsets */ 1062 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1063 expected, ARRAY_LENGTH(expected), "CESU-8", 1064 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) 1065 ) { 1066 log_err("CESU-8->u with skip did not match.\n"); 1067 } 1068 1069 /* with offsets */ 1070 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1071 expected, ARRAY_LENGTH(expected), "CESU-8", 1072 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1073 ) { 1074 log_err("CESU-8->u with skip did not match.\n"); 1075 } 1076 } 1077 } 1078 1079 static void TestStop(int32_t inputsize, int32_t outputsize) 1080 { 1081 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1082 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1083 1084 static const uint8_t expstopIBM_949[]= { 1085 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; 1086 1087 static const uint8_t expstopIBM_943[] = { 1088 0x9f, 0xaf, 0x9f, 0xb1}; 1089 1090 static const uint8_t expstopIBM_930[] = { 1091 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; 1092 1093 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; 1094 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; 1095 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; 1096 1097 1098 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; 1099 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; 1100 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; 1101 1102 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; 1103 static const int32_t fromIBM943Offs [] = { 0, 2}; 1104 static const int32_t fromIBM930Offs [] = { 1, 3}; 1105 1106 gInBufferSize = inputsize; 1107 gOutBufferSize = outputsize; 1108 1109 /*From Unicode*/ 1110 1111 #if !UCONFIG_NO_LEGACY_CONVERSION 1112 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1113 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", 1114 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) 1115 log_err("u-> ibm-949 with stop did not match.\n"); 1116 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1117 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", 1118 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) 1119 log_err("u-> ibm-943 with stop did not match.\n"); 1120 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1121 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", 1122 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) 1123 log_err("u-> ibm-930 with stop did not match.\n"); 1124 1125 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); 1126 { 1127 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1128 static const uint8_t toIBM943[]= { 0x61,}; 1129 static const int32_t offset[]= {0,} ; 1130 1131 /*EUC_JP*/ 1132 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1133 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; 1134 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; 1135 1136 /*EUC_TW*/ 1137 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1138 static const uint8_t to_euc_tw[]={ 1139 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; 1140 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; 1141 1142 /*ISO-2022-JP*/ 1143 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; 1144 static const uint8_t to_iso_2022_jp[]={ 1145 0x41, 1146 1147 }; 1148 static const int32_t from_iso_2022_jpOffs [] ={0,}; 1149 1150 /*ISO-2022-cn*/ 1151 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1152 static const uint8_t to_iso_2022_cn[]={ 1153 0x41, 1154 1155 }; 1156 static const int32_t from_iso_2022_cnOffs [] ={ 1157 0,0, 1158 2,2, 1159 }; 1160 1161 /*ISO-2022-kr*/ 1162 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 1163 static const uint8_t to_iso_2022_kr[]={ 1164 0x1b, 0x24, 0x29, 0x43, 1165 0x41, 1166 0x0e, 0x25, 0x50, 1167 }; 1168 static const int32_t from_iso_2022_krOffs [] ={ 1169 -1,-1,-1,-1, 1170 0, 1171 1,1,1, 1172 }; 1173 1174 /* HZ encoding */ 1175 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1176 1177 static const uint8_t to_hz[]={ 1178 0x7e, 0x7d, 0x41, 1179 0x7e, 0x7b, 0x26, 0x30, 1180 1181 }; 1182 static const int32_t from_hzOffs [] ={ 1183 0, 0,0, 1184 1,1,1,1, 1185 }; 1186 1187 /*ISCII*/ 1188 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1189 static const uint8_t to_iscii[]={ 1190 0x41, 1191 }; 1192 static const int32_t from_isciiOffs [] ={ 1193 0, 1194 }; 1195 1196 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1197 toIBM943, sizeof(toIBM943), "ibm-943", 1198 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) 1199 log_err("u-> ibm-943 with stop did not match.\n"); 1200 1201 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1202 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", 1203 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) 1204 log_err("u-> euc-jp with stop did not match.\n"); 1205 1206 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1207 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1208 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1209 log_err("u-> euc-tw with stop did not match.\n"); 1210 1211 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1212 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1213 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1214 log_err("u-> iso-2022-jp with stop did not match.\n"); 1215 1216 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1217 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1218 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1219 log_err("u-> iso-2022-jp with stop did not match.\n"); 1220 1221 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 1222 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 1223 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) 1224 log_err("u-> iso-2022-cn with stop did not match.\n"); 1225 1226 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 1227 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 1228 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) 1229 log_err("u-> iso-2022-kr with stop did not match.\n"); 1230 1231 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 1232 to_hz, sizeof(to_hz), "HZ", 1233 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) 1234 log_err("u-> HZ with stop did not match.\n");\ 1235 1236 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 1237 to_iscii, sizeof(to_iscii), "ISCII,version=0", 1238 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) 1239 log_err("u-> iscii with stop did not match.\n"); 1240 1241 1242 } 1243 #endif 1244 1245 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); 1246 { 1247 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1248 1249 static const uint8_t to_SCSU[]={ 1250 0x41, 1251 1252 }; 1253 int32_t from_SCSUOffs [] ={ 1254 0, 1255 1256 }; 1257 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1258 to_SCSU, sizeof(to_SCSU), "SCSU", 1259 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) 1260 log_err("u-> SCSU with skip did not match.\n"); 1261 1262 } 1263 1264 /*to Unicode*/ 1265 1266 #if !UCONFIG_NO_LEGACY_CONVERSION 1267 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), 1268 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949", 1269 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) 1270 log_err("ibm-949->u with stop did not match.\n"); 1271 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), 1272 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943", 1273 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) 1274 log_err("ibm-943->u with stop did not match.\n"); 1275 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), 1276 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930", 1277 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) 1278 log_err("ibm-930->u with stop did not match.\n"); 1279 1280 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); 1281 { 1282 1283 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1284 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1285 }; 1286 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; 1287 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; 1288 1289 1290 /*EUC-JP*/ 1291 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1292 0x8f, 0xda, 0xa1, /*unassigned*/ 1293 0x8e, 0xe0, 1294 }; 1295 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; 1296 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; 1297 1298 /*EUC_TW*/ 1299 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1300 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1301 0xe6, 0xca, 0x8a, 1302 }; 1303 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; 1304 int32_t from_euc_twOffs [] ={ 0, 1, 3}; 1305 1306 1307 1308 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1309 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1310 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1311 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); 1312 1313 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1314 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP", 1315 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) 1316 log_err("euc-jp->u with stop did not match.\n"); 1317 1318 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1319 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1320 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1321 log_err("euc-tw->u with stop did not match.\n"); 1322 } 1323 #endif 1324 1325 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); 1326 { 1327 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1328 0xe0, 0x80, 0x61,}; 1329 static const UChar expected1[] = { 0x0031, 0x4e8c,}; 1330 static const int32_t offsets1[] = { 0x0000, 0x0001}; 1331 1332 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1333 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1334 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1335 log_err("utf8->u with stop did not match.\n");; 1336 } 1337 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); 1338 { 1339 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; 1340 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; 1341 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; 1342 1343 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1344 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1345 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1346 log_err("scsu->u with stop did not match.\n");; 1347 } 1348 1349 } 1350 1351 static void TestSub(int32_t inputsize, int32_t outputsize) 1352 { 1353 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1354 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1355 1356 static const uint8_t expsubIBM_949[] = 1357 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; 1358 1359 static const uint8_t expsubIBM_943[] = { 1360 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; 1361 1362 static const uint8_t expsubIBM_930[] = { 1363 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; 1364 1365 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; 1366 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1367 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1368 1369 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1370 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; 1371 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; 1372 1373 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; 1374 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; 1375 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; 1376 1377 gInBufferSize = inputsize; 1378 gOutBufferSize = outputsize; 1379 1380 /*from unicode*/ 1381 1382 #if !UCONFIG_NO_LEGACY_CONVERSION 1383 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1384 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", 1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) 1386 log_err("u-> ibm-949 with subst did not match.\n"); 1387 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1388 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", 1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) 1390 log_err("u-> ibm-943 with subst did not match.\n"); 1391 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1392 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", 1393 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) 1394 log_err("u-> ibm-930 with subst did not match.\n"); 1395 1396 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1397 { 1398 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1399 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; 1400 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; 1401 1402 1403 /* EUC_JP*/ 1404 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1405 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1406 0xf4, 0xfe, 0xf4, 0xfe, 1407 0x61, 0x8e, 0xe0, 1408 }; 1409 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; 1410 1411 /*EUC_TW*/ 1412 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1413 static const uint8_t to_euc_tw[]={ 1414 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1415 0xfd, 0xfe, 0xfd, 0xfe, 1416 0x61, 0xe6, 0xca, 0x8a, 1417 }; 1418 1419 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; 1420 1421 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1422 toIBM943, sizeof(toIBM943), "ibm-943", 1423 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) 1424 log_err("u-> ibm-943 with substitute did not match.\n"); 1425 1426 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1427 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", 1428 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) 1429 log_err("u-> euc-jp with substitute did not match.\n"); 1430 1431 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1432 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1433 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1434 log_err("u-> euc-tw with substitute did not match.\n"); 1435 } 1436 #endif 1437 1438 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1439 { 1440 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1441 1442 const uint8_t to_SCSU[]={ 1443 0x41, 1444 0x0e, 0xff,0xfd, 1445 0x42 1446 1447 1448 }; 1449 int32_t from_SCSUOffs [] ={ 1450 0, 1451 1,1,1, 1452 2, 1453 1454 }; 1455 const uint8_t to_SCSU_1[]={ 1456 0x41, 1457 1458 }; 1459 int32_t from_SCSUOffs_1 [] ={ 1460 0, 1461 1462 }; 1463 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1464 to_SCSU, sizeof(to_SCSU), "SCSU", 1465 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) 1466 log_err("u-> SCSU with substitute did not match.\n"); 1467 1468 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1469 to_SCSU_1, sizeof(to_SCSU_1), "SCSU", 1470 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 1471 log_err("u-> SCSU with substitute did not match.\n"); 1472 } 1473 1474 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1475 { 1476 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; 1477 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, 1478 0xf0, 0x90, 0x90, 0x81, 1479 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 1480 0xef, 0xbf, 0xbf, 0x61, 1481 1482 }; 1483 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; 1484 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]), 1485 expectedUTF8, sizeof(expectedUTF8), "utf8", 1486 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { 1487 log_err("u-> utf8 with stop did not match.\n"); 1488 } 1489 } 1490 1491 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1492 { 1493 static const UChar in[]={ 0x0041, 0xfeff }; 1494 1495 static const uint8_t out[]={ 1496 #if U_IS_BIG_ENDIAN 1497 0xfe, 0xff, 1498 0x00, 0x41, 1499 0xfe, 0xff 1500 #else 1501 0xff, 0xfe, 1502 0x41, 0x00, 1503 0xff, 0xfe 1504 #endif 1505 }; 1506 static const int32_t offsets[]={ 1507 -1, -1, 0, 0, 1, 1 1508 }; 1509 1510 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1511 out, sizeof(out), "UTF-16", 1512 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1513 ) { 1514 log_err("u->UTF-16 with substitute did not match.\n"); 1515 } 1516 } 1517 1518 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1519 { 1520 static const UChar in[]={ 0x0041, 0xfeff }; 1521 1522 static const uint8_t out[]={ 1523 #if U_IS_BIG_ENDIAN 1524 0x00, 0x00, 0xfe, 0xff, 1525 0x00, 0x00, 0x00, 0x41, 1526 0x00, 0x00, 0xfe, 0xff 1527 #else 1528 0xff, 0xfe, 0x00, 0x00, 1529 0x41, 0x00, 0x00, 0x00, 1530 0xff, 0xfe, 0x00, 0x00 1531 #endif 1532 }; 1533 static const int32_t offsets[]={ 1534 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 1535 }; 1536 1537 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1538 out, sizeof(out), "UTF-32", 1539 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1540 ) { 1541 log_err("u->UTF-32 with substitute did not match.\n"); 1542 } 1543 } 1544 1545 /*to unicode*/ 1546 1547 #if !UCONFIG_NO_LEGACY_CONVERSION 1548 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), 1549 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949", 1550 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) 1551 log_err("ibm-949->u with substitute did not match.\n"); 1552 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), 1553 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943", 1554 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) 1555 log_err("ibm-943->u with substitute did not match.\n"); 1556 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), 1557 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930", 1558 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) 1559 log_err("ibm-930->u with substitute did not match.\n"); 1560 1561 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1562 { 1563 1564 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1565 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1566 }; 1567 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 1568 }; 1569 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; 1570 1571 1572 /* EUC_JP*/ 1573 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1574 0x8f, 0xda, 0xa1, /*unassigned*/ 1575 0x8e, 0xe0, 0x8a 1576 }; 1577 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; 1578 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; 1579 1580 /*EUC_TW*/ 1581 const uint8_t sampleTxt_euc_tw[]={ 1582 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1583 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1584 0xe6, 0xca, 0x8a, 1585 }; 1586 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; 1587 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; 1588 1589 1590 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1591 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1592 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1593 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); 1594 1595 1596 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1597 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP", 1598 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) 1599 log_err("euc-jp->u with substitute did not match.\n"); 1600 1601 1602 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1603 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1604 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1605 log_err("euc-tw->u with substitute did not match.\n"); 1606 1607 1608 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1609 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP", 1610 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) 1611 log_err("euc-jp->u with substitute did not match.\n"); 1612 } 1613 #endif 1614 1615 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1616 { 1617 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1618 0xe0, 0x80, 0x61,}; 1619 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 1620 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 1621 1622 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1623 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1624 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1625 log_err("utf8->u with substitute did not match.\n");; 1626 } 1627 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1628 { 1629 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 1630 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; 1631 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 1632 1633 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1634 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1635 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1636 log_err("scsu->u with stop did not match.\n");; 1637 } 1638 1639 #if !UCONFIG_NO_LEGACY_CONVERSION 1640 log_verbose("Testing ibm-930 subchar/subchar1\n"); 1641 { 1642 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; 1643 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; 1644 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1645 1646 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; 1647 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; 1648 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; 1649 1650 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930", 1651 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1652 ) { 1653 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); 1654 } 1655 1656 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930", 1657 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1658 ) { 1659 log_err("ibm-930->u subchar/subchar1 did not match.\n"); 1660 } 1661 } 1662 1663 log_verbose("Testing GB 18030 with substitute callbacks\n"); 1664 { 1665 static const UChar u2[]={ 1666 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; 1667 static const uint8_t gb2[]={ 1668 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; 1669 static const int32_t offsets2[]={ 1670 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; 1671 1672 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030", 1673 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1674 ) { 1675 log_err("gb18030->u with substitute did not match.\n"); 1676 } 1677 } 1678 #endif 1679 1680 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); 1681 { 1682 static const uint8_t utf7[]={ 1683 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 1684 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 1685 }; 1686 static const UChar unicode[]={ 1687 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 1688 }; 1689 static const int32_t offsets[]={ 1690 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24 1691 }; 1692 1693 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7", 1694 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1695 ) { 1696 log_err("UTF-7->u with substitute did not match.\n"); 1697 } 1698 } 1699 1700 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); 1701 { 1702 static const uint8_t 1703 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, 1704 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, 1705 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; 1706 1707 static const UChar 1708 out1[]={ 0x4e00, 0xfeff }, 1709 out2[]={ 0x004e, 0xfffe }, 1710 out3[]={ 0xfefd, 0x4e00, 0xfeff }; 1711 1712 static const int32_t 1713 offsets1[]={ 2, 4 }, 1714 offsets2[]={ 2, 4 }, 1715 offsets3[]={ 0, 2, 4 }; 1716 1717 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16", 1718 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1719 ) { 1720 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); 1721 } 1722 1723 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16", 1724 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1725 ) { 1726 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); 1727 } 1728 1729 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16", 1730 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1731 ) { 1732 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); 1733 } 1734 } 1735 1736 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); 1737 { 1738 static const uint8_t 1739 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, 1740 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, 1741 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, 1742 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; 1743 1744 static const UChar 1745 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff }, 1746 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe }, 1747 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd }, 1748 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 }; 1749 1750 static const int32_t 1751 offsets1[]={ 4, 4, 8 }, 1752 offsets2[]={ 4, 4, 8 }, 1753 offsets3[]={ 0, 4, 4, 8, 12 }, 1754 offsets4[]={ 0, 0, 4, 8 }; 1755 1756 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32", 1757 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1758 ) { 1759 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); 1760 } 1761 1762 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32", 1763 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1764 ) { 1765 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); 1766 } 1767 1768 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32", 1769 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1770 ) { 1771 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); 1772 } 1773 1774 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32", 1775 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) 1776 ) { 1777 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); 1778 } 1779 } 1780 } 1781 1782 static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 1783 { 1784 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1785 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1786 1787 const uint8_t expsubwvalIBM_949[]= { 1788 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 1789 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; 1790 1791 const uint8_t expsubwvalIBM_943[]= { 1792 0x9f, 0xaf, 0x9f, 0xb1, 1793 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; 1794 1795 const uint8_t expsubwvalIBM_930[] = { 1796 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; 1797 1798 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; 1799 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; 1800 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ 1801 1802 gInBufferSize = inputsize; 1803 gOutBufferSize = outputsize; 1804 1805 /*from Unicode*/ 1806 1807 #if !UCONFIG_NO_LEGACY_CONVERSION 1808 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1809 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", 1810 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) 1811 log_err("u-> ibm-949 with subst with value did not match.\n"); 1812 1813 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1814 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", 1815 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) 1816 log_err("u-> ibm-943 with sub with value did not match.\n"); 1817 1818 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1819 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", 1820 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) 1821 log_err("u-> ibm-930 with subst with value did not match.\n"); 1822 1823 1824 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 1825 { 1826 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1827 static const uint8_t toIBM943[]= { 0x61, 1828 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1829 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1830 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1831 0x61 }; 1832 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 1833 1834 1835 /* EUC_JP*/ 1836 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; 1837 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1838 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1839 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1840 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1841 0x61, 0x8e, 0xe0, 1842 }; 1843 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 1844 3, 3, 3, 3, 3, 3, 1845 3, 3, 3, 3, 3, 3, 1846 5, 5, 5, 5, 5, 5, 1847 6, 7, 7, 1848 }; 1849 1850 /*EUC_TW*/ 1851 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1852 static const uint8_t to_euc_tw[]={ 1853 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1854 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1855 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1856 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1857 0x61, 0xe6, 0xca, 0x8a, 1858 }; 1859 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 1860 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 1861 6, 7, 7, 8, 1862 }; 1863 /*ISO-2022-JP*/ 1864 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; 1865 static const uint8_t to_iso_2022_jp1[]={ 1866 0x1b, 0x24, 0x42, 0x21, 0x21, 1867 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1868 0x1b, 0x24, 0x42, 0x21, 0x22, 1869 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1870 0x42, 1871 }; 1872 1873 static const int32_t from_iso_2022_jpOffs1 [] ={ 1874 0,0,0,0,0, 1875 1,1,1,1,1,1,1,1,1, 1876 2,2,2,2,2, 1877 3,3,3,3,3,3,3,3,3, 1878 4, 1879 }; 1880 /* surrogate pair*/ 1881 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; 1882 static const uint8_t to_iso_2022_jp2[]={ 1883 0x1b, 0x24, 0x42, 0x21, 0x21, 1884 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1885 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1886 0x1b, 0x24, 0x42, 0x21, 0x22, 1887 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1888 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1889 0x42, 1890 }; 1891 static const int32_t from_iso_2022_jpOffs2 [] ={ 1892 0,0,0,0,0, 1893 1,1,1,1,1,1,1,1,1, 1894 1,1,1,1,1,1, 1895 3,3,3,3,3, 1896 4,4,4,4,4,4,4,4,4, 1897 4,4,4,4,4,4, 1898 6, 1899 }; 1900 1901 /*ISO-2022-cn*/ 1902 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1903 static const uint8_t to_iso_2022_cn[]={ 1904 0x41, 1905 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, 1906 0x42, 1907 }; 1908 static const int32_t from_iso_2022_cnOffs [] ={ 1909 0, 1910 1,1,1,1,1,1, 1911 2, 1912 }; 1913 1914 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; 1915 1916 static const uint8_t to_iso_2022_cn4[]={ 1917 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 1918 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1919 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1920 0x0e, 0x21, 0x22, 1921 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1922 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1923 0x42, 1924 }; 1925 static const int32_t from_iso_2022_cnOffs4 [] ={ 1926 0,0,0,0,0,0,0, 1927 1,1,1,1,1,1,1, 1928 1,1,1,1,1,1, 1929 3,3,3, 1930 4,4,4,4,4,4,4, 1931 4,4,4,4,4,4, 1932 6 1933 1934 }; 1935 1936 /*ISO-2022-kr*/ 1937 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 1938 static const uint8_t to_iso_2022_kr2[]={ 1939 0x1b, 0x24, 0x29, 0x43, 1940 0x41, 1941 0x0e, 0x25, 0x50, 1942 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1944 0x0e, 0x25, 0x50, 1945 0x0f, 0x42, 1946 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1947 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1948 0x43 1949 }; 1950 static const int32_t from_iso_2022_krOffs2 [] ={ 1951 -1,-1,-1,-1, 1952 0, 1953 1,1,1, 1954 2,2,2,2,2,2,2, 1955 2,2,2,2,2,2, 1956 4,4,4, 1957 5,5, 1958 6,6,6,6,6,6, 1959 6,6,6,6,6,6, 1960 8, 1961 }; 1962 1963 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; 1964 static const uint8_t to_iso_2022_kr[]={ 1965 0x1b, 0x24, 0x29, 0x43, 1966 0x41, 1967 0x0e, 0x25, 0x50, 1968 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1969 0x0e, 0x25, 0x50, 1970 0x0f, 0x42, 1971 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1972 0x43 1973 }; 1974 1975 1976 static const int32_t from_iso_2022_krOffs [] ={ 1977 -1,-1,-1,-1, 1978 0, 1979 1,1,1, 1980 2,2,2,2,2,2,2, 1981 3,3,3, 1982 4,4, 1983 5,5,5,5,5,5, 1984 6, 1985 }; 1986 /* HZ encoding */ 1987 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1988 1989 static const uint8_t to_hz[]={ 1990 0x7e, 0x7d, 0x41, 1991 0x7e, 0x7b, 0x26, 0x30, 1992 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ 1993 0x7e, 0x7b, 0x26, 0x30, 1994 0x7e, 0x7d, 0x42, 1995 1996 }; 1997 static const int32_t from_hzOffs [] ={ 1998 0,0,0, 1999 1,1,1,1, 2000 2,2,2,2,2,2,2,2, 2001 3,3,3,3, 2002 4,4,4 2003 }; 2004 2005 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 2006 static const uint8_t to_hz2[]={ 2007 0x7e, 0x7d, 0x41, 2008 0x7e, 0x7b, 0x26, 0x30, 2009 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2011 0x7e, 0x7b, 0x26, 0x30, 2012 0x7e, 0x7d, 0x42, 2013 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2014 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2015 0x43 2016 }; 2017 static const int32_t from_hzOffs2 [] ={ 2018 0,0,0, 2019 1,1,1,1, 2020 2,2,2,2,2,2,2,2, 2021 2,2,2,2,2,2, 2022 4,4,4,4, 2023 5,5,5, 2024 6,6,6,6,6,6, 2025 6,6,6,6,6,6, 2026 8, 2027 }; 2028 2029 /*ISCII*/ 2030 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; 2031 static const uint8_t to_iscii[]={ 2032 0x41, 2033 0xef, 0x42, 0xa1, 2034 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2035 0xa2, 2036 0x42, 2037 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2038 0x43 2039 }; 2040 2041 2042 static const int32_t from_isciiOffs [] ={ 2043 0, 2044 1,1,1, 2045 2,2,2,2,2,2, 2046 3, 2047 4, 2048 5,5,5,5,5,5, 2049 6, 2050 }; 2051 2052 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 2053 toIBM943, sizeof(toIBM943), "ibm-943", 2054 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) 2055 log_err("u-> ibm-943 with subst with value did not match.\n"); 2056 2057 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 2058 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", 2059 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) 2060 log_err("u-> euc-jp with subst with value did not match.\n"); 2061 2062 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 2063 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 2064 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) 2065 log_err("u-> euc-tw with subst with value did not match.\n"); 2066 2067 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2068 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2069 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2070 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2071 2072 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2073 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2074 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2075 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2076 2077 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 2078 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 2079 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) 2080 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2081 /*ESCAPE OPTIONS*/ 2082 { 2083 /* surrogate pair*/ 2084 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; 2085 static const uint8_t to_iso_2022_jp3_v2[]={ 2086 0x1b, 0x24, 0x42, 0x21, 0x21, 2087 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2088 2089 0x1b, 0x24, 0x42, 0x21, 0x22, 2090 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2091 2092 0x42, 2093 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, 2094 }; 2095 2096 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ 2097 0,0,0,0,0, 2098 1,1,1,1,1,1,1,1,1,1,1,1, 2099 2100 3,3,3,3,3, 2101 4,4,4,4,4,4,4,4,4,4,4,4, 2102 2103 6, 2104 7,7,7,7,7,7,7,7,7 2105 }; 2106 2107 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), 2108 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp", 2109 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2110 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); 2111 } 2112 { 2113 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2114 static const uint8_t to_iso_2022_cn5_v2[]={ 2115 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2116 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2117 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2118 0x0e, 0x21, 0x22, 2119 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2120 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2121 0x42, 2122 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, 2123 }; 2124 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ 2125 0,0,0,0,0,0,0, 2126 1,1,1,1,1,1,1, 2127 1,1,1,1,1,1, 2128 3,3,3, 2129 4,4,4,4,4,4,4, 2130 4,4,4,4,4,4, 2131 6, 2132 7,7,7,7,7,7 2133 }; 2134 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), 2135 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", 2136 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) 2137 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); 2138 2139 } 2140 { 2141 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2142 static const uint8_t to_iso_2022_cn6_v2[]={ 2143 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2144 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2145 0x0e, 0x21, 0x22, 2146 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2147 0x42, 2148 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d 2149 }; 2150 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ 2151 0, 0, 0, 0, 0, 0, 0, 2152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2153 3, 3, 3, 2154 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2155 6, 2156 7, 7, 7, 7, 7, 7, 7, 7, 2157 }; 2158 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), 2159 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", 2160 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) 2161 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); 2162 2163 } 2164 { 2165 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2166 static const uint8_t to_iso_2022_cn7_v2[]={ 2167 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2168 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2169 0x0e, 0x21, 0x22, 2170 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2171 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, 2172 }; 2173 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ 2174 0, 0, 0, 0, 0, 0, 0, 2175 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2176 3, 3, 3, 2177 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2178 6, 2179 7, 7, 7, 7, 7, 7, 2180 }; 2181 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), 2182 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", 2183 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) 2184 log_err("u-> iso-2022-cn with sub & K did not match.\n"); 2185 2186 } 2187 { 2188 static const UChar iso_2022_cn_inputText8[]={ 2189 0x3000, 2190 0xD84D, 0xDC56, 2191 0x3001, 2192 0xD84D, 0xDC56, 2193 0xDBFF, 0xDFFF, 2194 0x0042, 2195 0x0902}; 2196 static const uint8_t to_iso_2022_cn8_v2[]={ 2197 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2198 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2199 0x0e, 0x21, 0x22, 2200 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2201 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20, 2202 0x42, 2203 0x5c, 0x39, 0x30, 0x32, 0x20 2204 }; 2205 static const int32_t from_iso_2022_cnOffs8_v2 [] ={ 2206 0, 0, 0, 0, 0, 0, 0, 2207 1, 1, 1, 1, 1, 1, 1, 1, 2208 3, 3, 3, 2209 4, 4, 4, 4, 4, 4, 4, 4, 2210 6, 6, 6, 6, 6, 6, 6, 6, 2211 8, 2212 9, 9, 9, 9, 9 2213 }; 2214 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), 2215 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", 2216 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR )) 2217 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n"); 2218 2219 } 2220 { 2221 static const uint8_t to_iso_2022_cn4_v3[]={ 2222 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2223 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2224 0x0e, 0x21, 0x22, 2225 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2226 0x42 2227 }; 2228 2229 2230 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ 2231 0,0,0,0,0,0,0, 2232 1,1,1,1,1,1,1,1,1,1,1, 2233 2234 3,3,3, 2235 4,4,4,4,4,4,4,4,4,4,4, 2236 2237 6 2238 2239 }; 2240 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2241 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", 2242 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2243 { 2244 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); 2245 } 2246 } 2247 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 2248 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 2249 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) 2250 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2251 2252 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2253 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", 2254 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) 2255 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2256 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 2257 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 2258 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) 2259 log_err("u-> iso_2022_kr with subst with value did not match.\n"); 2260 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]), 2261 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", 2262 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) 2263 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); 2264 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 2265 to_hz, sizeof(to_hz), "HZ", 2266 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) 2267 log_err("u-> hz with subst with value did not match.\n"); 2268 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]), 2269 to_hz2, sizeof(to_hz2), "HZ", 2270 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) 2271 log_err("u-> hz with subst with value did not match.\n"); 2272 2273 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 2274 to_iscii, sizeof(to_iscii), "ISCII,version=0", 2275 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) 2276 log_err("u-> iscii with subst with value did not match.\n"); 2277 } 2278 #endif 2279 2280 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 2281 /*to Unicode*/ 2282 { 2283 #if !UCONFIG_NO_LEGACY_CONVERSION 2284 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 2285 0x81, 0xad, /*unassigned*/ 2286 0x89, 0xd3 }; 2287 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 2288 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 2289 0x7B87}; 2290 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 2291 2292 /* EUC_JP*/ 2293 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 2294 0x8f, 0xda, 0xa1, /*unassigned*/ 2295 0x8e, 0xe0, 2296 }; 2297 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 2298 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, 2299 0x00a2 }; 2300 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, 2301 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2302 9, 2303 }; 2304 2305 /*EUC_TW*/ 2306 static const uint8_t sampleTxt_euc_tw[]={ 2307 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 2308 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 2309 0xe6, 0xca, 0x8a, 2310 }; 2311 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 2312 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, 2313 0x8706, 0x8a, }; 2314 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 2315 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2316 11, 13}; 2317 2318 /*iso-2022-jp*/ 2319 static const uint8_t sampleTxt_iso_2022_jp[]={ 2320 0x1b, 0x28, 0x42, 0x41, 2321 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ 2322 0x1b, 0x28, 0x42, 0x42, 2323 2324 }; 2325 /* A % X 3 A % X 1 A B */ 2326 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 }; 2327 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; 2328 2329 /*iso-2022-cn*/ 2330 static const uint8_t sampleTxt_iso_2022_cn[]={ 2331 0x0f, 0x41, 0x44, 2332 0x1B, 0x24, 0x29, 0x47, 2333 0x0E, 0x40, 0x6c, /*unassigned*/ 2334 0x0f, 0x42, 2335 2336 }; 2337 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; 2338 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; 2339 2340 /*iso-2022-kr*/ 2341 static const uint8_t sampleTxt_iso_2022_kr[]={ 2342 0x1b, 0x24, 0x29, 0x43, 2343 0x41, 2344 0x0E, 0x7f, 0x1E, 2345 0x0e, 0x25, 0x50, 2346 0x0f, 0x51, 2347 0x42, 0x43, 2348 2349 }; 2350 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; 2351 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; 2352 2353 /*hz*/ 2354 static const uint8_t sampleTxt_hz[]={ 2355 0x41, 2356 0x7e, 0x7b, 0x26, 0x30, 2357 0x7f, 0x1E, /*unassigned*/ 2358 0x26, 0x30, 2359 0x7e, 0x7d, 0x42, 2360 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 2361 0x7e, 0x7d, 0x42, 2362 }; 2363 static const UChar hztoUnicode[]={ 2364 0x41, 2365 0x03a0, 2366 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2367 0x03A0, 2368 0x42, 2369 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2370 0x42,}; 2371 2372 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; 2373 2374 2375 /*iscii*/ 2376 static const uint8_t sampleTxt_iscii[]={ 2377 0x41, 2378 0x30, 2379 0xEB, /*unassigned*/ 2380 0xa3, 2381 0x42, 2382 0xEC, /*unassigned*/ 2383 0x42, 2384 }; 2385 static const UChar isciitoUnicode[]={ 2386 0x41, 2387 0x30, 2388 0x25, 0x58, 0x45, 0x42, 2389 0x0903, 2390 0x42, 2391 0x25, 0x58, 0x45, 0x43, 2392 0x42,}; 2393 2394 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; 2395 #endif 2396 2397 /*UTF8*/ 2398 static const uint8_t sampleTxtUTF8[]={ 2399 0x20, 0x64, 0x50, 2400 0xC2, 0x7E, /* truncated char */ 2401 0x20, 2402 0xE0, 0xB5, 0x7E, /* truncated char */ 2403 0x40, 2404 }; 2405 static const UChar UTF8ToUnicode[]={ 2406 0x0020, 0x0064, 0x0050, 2407 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ 2408 0x0020, 2409 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, 2410 0x0040 2411 }; 2412 static const int32_t fromUTF8[] = { 2413 0, 1, 2, 2414 3, 3, 3, 3, 4, 2415 5, 2416 6, 6, 6, 6, 6, 6, 6, 6, 8, 2417 9 2418 }; 2419 static const UChar UTF8ToUnicodeXML_DEC[]={ 2420 0x0020, 0x0064, 0x0050, 2421 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ 2422 0x0020, 2423 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, 2424 0x0040 2425 }; 2426 static const int32_t fromUTF8XML_DEC[] = { 2427 0, 1, 2, 2428 3, 3, 3, 3, 3, 3, 4, 2429 5, 2430 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 2431 9 2432 }; 2433 2434 2435 #if !UCONFIG_NO_LEGACY_CONVERSION 2436 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), 2437 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 2438 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) 2439 log_err("ibm-943->u with substitute with value did not match.\n"); 2440 2441 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), 2442 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"IBM-eucJP", 2443 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) 2444 log_err("euc-jp->u with substitute with value did not match.\n"); 2445 2446 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 2447 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 2448 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) 2449 log_err("euc-tw->u with substitute with value did not match.\n"); 2450 2451 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2452 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2453 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) 2454 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2455 2456 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2457 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2458 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) 2459 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2460 2461 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ 2462 { 2463 static const UChar iso_2022_jptoUnicodeDec[]={ 2464 0x0041, 2465 /* & # 5 8 ; */ 2466 0x0026, 0x0023, 0x0035, 0x0038, 0x003b, 2467 0x0026, 0x0023, 0x0032, 0x0036, 0x003b, 2468 0x0042 }; 2469 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; 2470 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2471 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2472 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2473 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); 2474 } 2475 { 2476 static const UChar iso_2022_jptoUnicodeHex[]={ 2477 0x0041, 2478 /* & # x 3 A ; */ 2479 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b, 2480 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b, 2481 0x0042 }; 2482 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; 2483 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2484 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2485 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) 2486 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); 2487 } 2488 { 2489 static const UChar iso_2022_jptoUnicodeC[]={ 2490 0x0041, 2491 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */ 2492 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */ 2493 0x0042 }; 2494 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; 2495 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2496 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2497 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2498 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); 2499 } 2500 } 2501 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 2502 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 2503 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) 2504 log_err("iso-2022-cn->u with substitute with value did not match.\n"); 2505 2506 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 2507 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 2508 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) 2509 log_err("iso-2022-kr->u with substitute with value did not match.\n"); 2510 2511 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 2512 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 2513 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) 2514 log_err("hz->u with substitute with value did not match.\n"); 2515 2516 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 2517 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 2518 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) 2519 log_err("ISCII ->u with substitute with value did not match.\n"); 2520 #endif 2521 2522 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2523 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8", 2524 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) 2525 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2526 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2527 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8", 2528 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) 2529 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2530 } 2531 } 2532 2533 #if !UCONFIG_NO_LEGACY_CONVERSION 2534 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) 2535 { 2536 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; 2537 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 2538 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; 2539 2540 2541 static const uint8_t text943[] = { 2542 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; 2543 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2544 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2545 static const UChar toUnicode943stop[]= { 0x304b}; 2546 2547 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; 2548 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; 2549 static const int32_t fromIBM943Offsstop[] = { 0}; 2550 2551 gInBufferSize = inputsize; 2552 gOutBufferSize = outputsize; 2553 /*checking with a legal value*/ 2554 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]), 2555 templegal949, sizeof(templegal949), "ibm-949", 2556 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) 2557 log_err("u-> ibm-949 with skip did not match.\n"); 2558 2559 /*checking illegal value for ibm-943 with substitute*/ 2560 if(!testConvertToUnicode(text943, sizeof(text943), 2561 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2562 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2563 log_err("ibm-943->u with subst did not match.\n"); 2564 /*checking illegal value for ibm-943 with skip */ 2565 if(!testConvertToUnicode(text943, sizeof(text943), 2566 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943", 2567 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) 2568 log_err("ibm-943->u with skip did not match.\n"); 2569 2570 /*checking illegal value for ibm-943 with stop */ 2571 if(!testConvertToUnicode(text943, sizeof(text943), 2572 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943", 2573 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) 2574 log_err("ibm-943->u with stop did not match.\n"); 2575 2576 } 2577 2578 static void TestSingleByte(int32_t inputsize, int32_t outputsize) 2579 { 2580 static const uint8_t sampleText[] = { 2581 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, 2582 0xff, 0x32, 0x33}; 2583 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; 2584 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; 2585 /*checking illegal value for ibm-943 with substitute*/ 2586 gInBufferSize = inputsize; 2587 gOutBufferSize = outputsize; 2588 2589 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 2590 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2591 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2592 log_err("ibm-943->u with subst did not match.\n"); 2593 } 2594 2595 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) 2596 { 2597 /*EBCDIC_STATEFUL*/ 2598 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; 2599 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; 2600 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; 2601 /* s SO doubl SI sng s SO fe fe SI s */ 2602 2603 /*EBCDIC_STATEFUL with subChar=3f*/ 2604 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; 2605 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; 2606 static const char mySubChar[]={ 0x3f}; 2607 2608 gInBufferSize = inputsize; 2609 gOutBufferSize = outputsize; 2610 2611 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2612 toIBM930, sizeof(toIBM930), "ibm-930", 2613 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) 2614 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); 2615 2616 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2617 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", 2618 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) 2619 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); 2620 } 2621 #endif 2622 2623 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 2624 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 2625 const char *mySubChar, int8_t len) 2626 { 2627 2628 2629 UErrorCode status = U_ZERO_ERROR; 2630 UConverter *conv = 0; 2631 char junkout[NEW_MAX_BUFFER]; /* FIX */ 2632 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2633 const UChar *src; 2634 char *end; 2635 char *targ; 2636 int32_t *offs; 2637 int i; 2638 int32_t realBufferSize; 2639 char *realBufferEnd; 2640 const UChar *realSourceEnd; 2641 const UChar *sourceLimit; 2642 UBool checkOffsets = TRUE; 2643 UBool doFlush; 2644 char junk[9999]; 2645 char offset_str[9999]; 2646 char *p; 2647 UConverterFromUCallback oldAction = NULL; 2648 const void* oldContext = NULL; 2649 2650 2651 for(i=0;i<NEW_MAX_BUFFER;i++) 2652 junkout[i] = (char)0xF0; 2653 for(i=0;i<NEW_MAX_BUFFER;i++) 2654 junokout[i] = 0xFF; 2655 setNuConvTestName(codepage, "FROM"); 2656 2657 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 2658 gOutBufferSize); 2659 2660 conv = ucnv_open(codepage, &status); 2661 if(U_FAILURE(status)) 2662 { 2663 log_data_err("Couldn't open converter %s\n",codepage); 2664 return TRUE; 2665 } 2666 2667 log_verbose("Converter opened..\n"); 2668 2669 /*----setting the callback routine----*/ 2670 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2671 if (U_FAILURE(status)) 2672 { 2673 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2674 } 2675 /*------------------------*/ 2676 /*setting the subChar*/ 2677 if(mySubChar != NULL){ 2678 ucnv_setSubstChars(conv, mySubChar, len, &status); 2679 if (U_FAILURE(status)) { 2680 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2681 } 2682 } 2683 /*------------*/ 2684 2685 src = source; 2686 targ = junkout; 2687 offs = junokout; 2688 2689 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2690 realBufferEnd = junkout + realBufferSize; 2691 realSourceEnd = source + sourceLen; 2692 2693 if ( gOutBufferSize != realBufferSize ) 2694 checkOffsets = FALSE; 2695 2696 if( gInBufferSize != NEW_MAX_BUFFER ) 2697 checkOffsets = FALSE; 2698 2699 do 2700 { 2701 end = nct_min(targ + gOutBufferSize, realBufferEnd); 2702 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 2703 2704 doFlush = (UBool)(sourceLimit == realSourceEnd); 2705 2706 if(targ == realBufferEnd) 2707 { 2708 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 2709 return FALSE; 2710 } 2711 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 2712 2713 2714 status = U_ZERO_ERROR; 2715 2716 ucnv_fromUnicode (conv, 2717 (char **)&targ, 2718 (const char *)end, 2719 &src, 2720 sourceLimit, 2721 checkOffsets ? offs : NULL, 2722 doFlush, /* flush if we're at the end of the input data */ 2723 &status); 2724 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 2725 2726 2727 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2728 UChar errChars[50]; /* should be sufficient */ 2729 int8_t errLen = 50; 2730 UErrorCode err = U_ZERO_ERROR; 2731 const UChar* start= NULL; 2732 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); 2733 if(U_FAILURE(err)){ 2734 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); 2735 } 2736 /* length of in invalid chars should be equal to returned length*/ 2737 start = src - errLen; 2738 if(u_strncmp(errChars,start,errLen)!=0){ 2739 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2740 } 2741 } 2742 /* allow failure codes for the stop callback */ 2743 if(U_FAILURE(status) && 2744 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) 2745 { 2746 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2747 return FALSE; 2748 } 2749 2750 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 2751 sourceLen, targ-junkout); 2752 if(getTestOption(VERBOSITY_OPTION)) 2753 { 2754 2755 junk[0] = 0; 2756 offset_str[0] = 0; 2757 for(p = junkout;p<targ;p++) 2758 { 2759 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 2760 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 2761 } 2762 2763 log_verbose(junk); 2764 printSeq(expect, expectLen); 2765 if ( checkOffsets ) 2766 { 2767 log_verbose("\nOffsets:"); 2768 log_verbose(offset_str); 2769 } 2770 log_verbose("\n"); 2771 } 2772 ucnv_close(conv); 2773 2774 2775 if(expectLen != targ-junkout) 2776 { 2777 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2778 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2779 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2780 printSeqErr(expect, expectLen); 2781 return FALSE; 2782 } 2783 2784 if (checkOffsets && (expectOffsets != 0) ) 2785 { 2786 log_verbose("comparing %d offsets..\n", targ-junkout); 2787 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 2788 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2789 log_err("Got Output : "); 2790 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2791 log_err("Got Offsets: "); 2792 for(p=junkout;p<targ;p++) 2793 log_err("%d,", junokout[p-junkout]); 2794 log_err("\n"); 2795 log_err("Expected Offsets: "); 2796 for(i=0; i<(targ-junkout); i++) 2797 log_err("%d,", expectOffsets[i]); 2798 log_err("\n"); 2799 return FALSE; 2800 } 2801 } 2802 2803 if(!memcmp(junkout, expect, expectLen)) 2804 { 2805 log_verbose("String matches! %s\n", gNuConvTestName); 2806 return TRUE; 2807 } 2808 else 2809 { 2810 log_err("String does not match. %s\n", gNuConvTestName); 2811 log_err("source: "); 2812 printUSeqErr(source, sourceLen); 2813 log_err("Got: "); 2814 printSeqErr((const uint8_t *)junkout, expectLen); 2815 log_err("Expected: "); 2816 printSeqErr(expect, expectLen); 2817 return FALSE; 2818 } 2819 } 2820 2821 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 2822 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 2823 const char *mySubChar, int8_t len) 2824 { 2825 UErrorCode status = U_ZERO_ERROR; 2826 UConverter *conv = 0; 2827 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 2828 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2829 const char *src; 2830 const char *realSourceEnd; 2831 const char *srcLimit; 2832 UChar *targ; 2833 UChar *end; 2834 int32_t *offs; 2835 int i; 2836 UBool checkOffsets = TRUE; 2837 char junk[9999]; 2838 char offset_str[9999]; 2839 UChar *p; 2840 UConverterToUCallback oldAction = NULL; 2841 const void* oldContext = NULL; 2842 2843 int32_t realBufferSize; 2844 UChar *realBufferEnd; 2845 2846 2847 for(i=0;i<NEW_MAX_BUFFER;i++) 2848 junkout[i] = 0xFFFE; 2849 2850 for(i=0;i<NEW_MAX_BUFFER;i++) 2851 junokout[i] = -1; 2852 2853 setNuConvTestName(codepage, "TO"); 2854 2855 log_verbose("\n========= %s\n", gNuConvTestName); 2856 2857 conv = ucnv_open(codepage, &status); 2858 if(U_FAILURE(status)) 2859 { 2860 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 2861 return TRUE; 2862 } 2863 2864 log_verbose("Converter opened..\n"); 2865 2866 src = (const char *)source; 2867 targ = junkout; 2868 offs = junokout; 2869 2870 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2871 realBufferEnd = junkout + realBufferSize; 2872 realSourceEnd = src + sourcelen; 2873 /*----setting the callback routine----*/ 2874 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2875 if (U_FAILURE(status)) 2876 { 2877 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2878 } 2879 /*-------------------------------------*/ 2880 /*setting the subChar*/ 2881 if(mySubChar != NULL){ 2882 ucnv_setSubstChars(conv, mySubChar, len, &status); 2883 if (U_FAILURE(status)) { 2884 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2885 } 2886 } 2887 /*------------*/ 2888 2889 2890 if ( gOutBufferSize != realBufferSize ) 2891 checkOffsets = FALSE; 2892 2893 if( gInBufferSize != NEW_MAX_BUFFER ) 2894 checkOffsets = FALSE; 2895 2896 do 2897 { 2898 end = nct_min( targ + gOutBufferSize, realBufferEnd); 2899 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 2900 2901 if(targ == realBufferEnd) 2902 { 2903 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 2904 return FALSE; 2905 } 2906 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 2907 2908 2909 2910 status = U_ZERO_ERROR; 2911 2912 ucnv_toUnicode (conv, 2913 &targ, 2914 end, 2915 (const char **)&src, 2916 (const char *)srcLimit, 2917 checkOffsets ? offs : NULL, 2918 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 2919 &status); 2920 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 2921 2922 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2923 char errChars[50]; /* should be sufficient */ 2924 int8_t errLen = 50; 2925 UErrorCode err = U_ZERO_ERROR; 2926 const char* start= NULL; 2927 ucnv_getInvalidChars(conv,errChars, &errLen, &err); 2928 if(U_FAILURE(err)){ 2929 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); 2930 } 2931 /* length of in invalid chars should be equal to returned length*/ 2932 start = src - errLen; 2933 if(uprv_strncmp(errChars,start,errLen)!=0){ 2934 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2935 } 2936 } 2937 /* allow failure codes for the stop callback */ 2938 if(U_FAILURE(status) && 2939 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) 2940 { 2941 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2942 return FALSE; 2943 } 2944 2945 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 2946 sourcelen, targ-junkout); 2947 if(getTestOption(VERBOSITY_OPTION)) 2948 { 2949 2950 junk[0] = 0; 2951 offset_str[0] = 0; 2952 2953 for(p = junkout;p<targ;p++) 2954 { 2955 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 2956 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 2957 } 2958 2959 log_verbose(junk); 2960 printUSeq(expect, expectlen); 2961 if ( checkOffsets ) 2962 { 2963 log_verbose("\nOffsets:"); 2964 log_verbose(offset_str); 2965 } 2966 log_verbose("\n"); 2967 } 2968 ucnv_close(conv); 2969 2970 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 2971 2972 if (checkOffsets && (expectOffsets != 0)) 2973 { 2974 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 2975 { 2976 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2977 log_err("Got offsets: "); 2978 for(p=junkout;p<targ;p++) 2979 log_err(" %2d,", junokout[p-junkout]); 2980 log_err("\n"); 2981 log_err("Expected offsets: "); 2982 for(i=0; i<(targ-junkout); i++) 2983 log_err(" %2d,", expectOffsets[i]); 2984 log_err("\n"); 2985 log_err("Got output: "); 2986 for(i=0; i<(targ-junkout); i++) 2987 log_err("0x%04x,", junkout[i]); 2988 log_err("\n"); 2989 log_err("From source: "); 2990 for(i=0; i<(src-(const char *)source); i++) 2991 log_err(" 0x%02x,", (unsigned char)source[i]); 2992 log_err("\n"); 2993 } 2994 } 2995 2996 if(!memcmp(junkout, expect, expectlen*2)) 2997 { 2998 log_verbose("Matches!\n"); 2999 return TRUE; 3000 } 3001 else 3002 { 3003 log_err("String does not match. %s\n", gNuConvTestName); 3004 log_verbose("String does not match. %s\n", gNuConvTestName); 3005 log_err("Got: "); 3006 printUSeqErr(junkout, expectlen); 3007 log_err("Expected: "); 3008 printUSeqErr(expect, expectlen); 3009 log_err("\n"); 3010 return FALSE; 3011 } 3012 } 3013 3014 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 3015 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 3016 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3017 { 3018 3019 3020 UErrorCode status = U_ZERO_ERROR; 3021 UConverter *conv = 0; 3022 char junkout[NEW_MAX_BUFFER]; /* FIX */ 3023 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3024 const UChar *src; 3025 char *end; 3026 char *targ; 3027 int32_t *offs; 3028 int i; 3029 int32_t realBufferSize; 3030 char *realBufferEnd; 3031 const UChar *realSourceEnd; 3032 const UChar *sourceLimit; 3033 UBool checkOffsets = TRUE; 3034 UBool doFlush; 3035 char junk[9999]; 3036 char offset_str[9999]; 3037 char *p; 3038 UConverterFromUCallback oldAction = NULL; 3039 const void* oldContext = NULL; 3040 3041 3042 for(i=0;i<NEW_MAX_BUFFER;i++) 3043 junkout[i] = (char)0xF0; 3044 for(i=0;i<NEW_MAX_BUFFER;i++) 3045 junokout[i] = 0xFF; 3046 setNuConvTestName(codepage, "FROM"); 3047 3048 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 3049 gOutBufferSize); 3050 3051 conv = ucnv_open(codepage, &status); 3052 if(U_FAILURE(status)) 3053 { 3054 log_data_err("Couldn't open converter %s\n",codepage); 3055 return TRUE; /* Because the err has already been logged. */ 3056 } 3057 3058 log_verbose("Converter opened..\n"); 3059 3060 /*----setting the callback routine----*/ 3061 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3062 if (U_FAILURE(status)) 3063 { 3064 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3065 } 3066 /*------------------------*/ 3067 /*setting the subChar*/ 3068 if(mySubChar != NULL){ 3069 ucnv_setSubstChars(conv, mySubChar, len, &status); 3070 if (U_FAILURE(status)) { 3071 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); 3072 } 3073 } 3074 /*------------*/ 3075 3076 src = source; 3077 targ = junkout; 3078 offs = junokout; 3079 3080 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3081 realBufferEnd = junkout + realBufferSize; 3082 realSourceEnd = source + sourceLen; 3083 3084 if ( gOutBufferSize != realBufferSize ) 3085 checkOffsets = FALSE; 3086 3087 if( gInBufferSize != NEW_MAX_BUFFER ) 3088 checkOffsets = FALSE; 3089 3090 do 3091 { 3092 end = nct_min(targ + gOutBufferSize, realBufferEnd); 3093 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 3094 3095 doFlush = (UBool)(sourceLimit == realSourceEnd); 3096 3097 if(targ == realBufferEnd) 3098 { 3099 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 3100 return FALSE; 3101 } 3102 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 3103 3104 3105 status = U_ZERO_ERROR; 3106 3107 ucnv_fromUnicode (conv, 3108 (char **)&targ, 3109 (const char *)end, 3110 &src, 3111 sourceLimit, 3112 checkOffsets ? offs : NULL, 3113 doFlush, /* flush if we're at the end of the input data */ 3114 &status); 3115 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 3116 3117 /* allow failure codes for the stop callback */ 3118 if(U_FAILURE(status) && status != expectedError) 3119 { 3120 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3121 return FALSE; 3122 } 3123 3124 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 3125 sourceLen, targ-junkout); 3126 if(getTestOption(VERBOSITY_OPTION)) 3127 { 3128 3129 junk[0] = 0; 3130 offset_str[0] = 0; 3131 for(p = junkout;p<targ;p++) 3132 { 3133 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 3134 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 3135 } 3136 3137 log_verbose(junk); 3138 printSeq(expect, expectLen); 3139 if ( checkOffsets ) 3140 { 3141 log_verbose("\nOffsets:"); 3142 log_verbose(offset_str); 3143 } 3144 log_verbose("\n"); 3145 } 3146 ucnv_close(conv); 3147 3148 3149 if(expectLen != targ-junkout) 3150 { 3151 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3152 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3153 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3154 printSeqErr(expect, expectLen); 3155 return FALSE; 3156 } 3157 3158 if (checkOffsets && (expectOffsets != 0) ) 3159 { 3160 log_verbose("comparing %d offsets..\n", targ-junkout); 3161 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 3162 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3163 log_err("Got Output : "); 3164 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3165 log_err("Got Offsets: "); 3166 for(p=junkout;p<targ;p++) 3167 log_err("%d,", junokout[p-junkout]); 3168 log_err("\n"); 3169 log_err("Expected Offsets: "); 3170 for(i=0; i<(targ-junkout); i++) 3171 log_err("%d,", expectOffsets[i]); 3172 log_err("\n"); 3173 return FALSE; 3174 } 3175 } 3176 3177 if(!memcmp(junkout, expect, expectLen)) 3178 { 3179 log_verbose("String matches! %s\n", gNuConvTestName); 3180 return TRUE; 3181 } 3182 else 3183 { 3184 log_err("String does not match. %s\n", gNuConvTestName); 3185 log_err("source: "); 3186 printUSeqErr(source, sourceLen); 3187 log_err("Got: "); 3188 printSeqErr((const uint8_t *)junkout, expectLen); 3189 log_err("Expected: "); 3190 printSeqErr(expect, expectLen); 3191 return FALSE; 3192 } 3193 } 3194 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 3195 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 3196 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3197 { 3198 UErrorCode status = U_ZERO_ERROR; 3199 UConverter *conv = 0; 3200 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 3201 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3202 const char *src; 3203 const char *realSourceEnd; 3204 const char *srcLimit; 3205 UChar *targ; 3206 UChar *end; 3207 int32_t *offs; 3208 int i; 3209 UBool checkOffsets = TRUE; 3210 char junk[9999]; 3211 char offset_str[9999]; 3212 UChar *p; 3213 UConverterToUCallback oldAction = NULL; 3214 const void* oldContext = NULL; 3215 3216 int32_t realBufferSize; 3217 UChar *realBufferEnd; 3218 3219 3220 for(i=0;i<NEW_MAX_BUFFER;i++) 3221 junkout[i] = 0xFFFE; 3222 3223 for(i=0;i<NEW_MAX_BUFFER;i++) 3224 junokout[i] = -1; 3225 3226 setNuConvTestName(codepage, "TO"); 3227 3228 log_verbose("\n========= %s\n", gNuConvTestName); 3229 3230 conv = ucnv_open(codepage, &status); 3231 if(U_FAILURE(status)) 3232 { 3233 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 3234 return TRUE; 3235 } 3236 3237 log_verbose("Converter opened..\n"); 3238 3239 src = (const char *)source; 3240 targ = junkout; 3241 offs = junokout; 3242 3243 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3244 realBufferEnd = junkout + realBufferSize; 3245 realSourceEnd = src + sourcelen; 3246 /*----setting the callback routine----*/ 3247 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3248 if (U_FAILURE(status)) 3249 { 3250 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3251 } 3252 /*-------------------------------------*/ 3253 /*setting the subChar*/ 3254 if(mySubChar != NULL){ 3255 ucnv_setSubstChars(conv, mySubChar, len, &status); 3256 if (U_FAILURE(status)) { 3257 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3258 } 3259 } 3260 /*------------*/ 3261 3262 3263 if ( gOutBufferSize != realBufferSize ) 3264 checkOffsets = FALSE; 3265 3266 if( gInBufferSize != NEW_MAX_BUFFER ) 3267 checkOffsets = FALSE; 3268 3269 do 3270 { 3271 end = nct_min( targ + gOutBufferSize, realBufferEnd); 3272 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 3273 3274 if(targ == realBufferEnd) 3275 { 3276 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 3277 return FALSE; 3278 } 3279 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 3280 3281 3282 3283 status = U_ZERO_ERROR; 3284 3285 ucnv_toUnicode (conv, 3286 &targ, 3287 end, 3288 (const char **)&src, 3289 (const char *)srcLimit, 3290 checkOffsets ? offs : NULL, 3291 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 3292 &status); 3293 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 3294 3295 /* allow failure codes for the stop callback */ 3296 if(U_FAILURE(status) && status!=expectedError) 3297 { 3298 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3299 return FALSE; 3300 } 3301 3302 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 3303 sourcelen, targ-junkout); 3304 if(getTestOption(VERBOSITY_OPTION)) 3305 { 3306 3307 junk[0] = 0; 3308 offset_str[0] = 0; 3309 3310 for(p = junkout;p<targ;p++) 3311 { 3312 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 3313 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 3314 } 3315 3316 log_verbose(junk); 3317 printUSeq(expect, expectlen); 3318 if ( checkOffsets ) 3319 { 3320 log_verbose("\nOffsets:"); 3321 log_verbose(offset_str); 3322 } 3323 log_verbose("\n"); 3324 } 3325 ucnv_close(conv); 3326 3327 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 3328 3329 if (checkOffsets && (expectOffsets != 0)) 3330 { 3331 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 3332 { 3333 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3334 log_err("Got offsets: "); 3335 for(p=junkout;p<targ;p++) 3336 log_err(" %2d,", junokout[p-junkout]); 3337 log_err("\n"); 3338 log_err("Expected offsets: "); 3339 for(i=0; i<(targ-junkout); i++) 3340 log_err(" %2d,", expectOffsets[i]); 3341 log_err("\n"); 3342 log_err("Got output: "); 3343 for(i=0; i<(targ-junkout); i++) 3344 log_err("0x%04x,", junkout[i]); 3345 log_err("\n"); 3346 log_err("From source: "); 3347 for(i=0; i<(src-(const char *)source); i++) 3348 log_err(" 0x%02x,", (unsigned char)source[i]); 3349 log_err("\n"); 3350 } 3351 } 3352 3353 if(!memcmp(junkout, expect, expectlen*2)) 3354 { 3355 log_verbose("Matches!\n"); 3356 return TRUE; 3357 } 3358 else 3359 { 3360 log_err("String does not match. %s\n", gNuConvTestName); 3361 log_verbose("String does not match. %s\n", gNuConvTestName); 3362 log_err("Got: "); 3363 printUSeqErr(junkout, expectlen); 3364 log_err("Expected: "); 3365 printUSeqErr(expect, expectlen); 3366 log_err("\n"); 3367 return FALSE; 3368 } 3369 } 3370 3371 static void TestCallBackFailure(void) { 3372 UErrorCode status = U_USELESS_COLLATOR_ERROR; 3373 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); 3374 if (status != U_USELESS_COLLATOR_ERROR) { 3375 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); 3376 } 3377 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); 3378 if (status != U_USELESS_COLLATOR_ERROR) { 3379 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); 3380 } 3381 ucnv_cbFromUWriteSub(NULL, -1, &status); 3382 if (status != U_USELESS_COLLATOR_ERROR) { 3383 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); 3384 } 3385 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); 3386 if (status != U_USELESS_COLLATOR_ERROR) { 3387 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); 3388 } 3389 } 3390