1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /* 7 ******************************************************************************** 8 * File NCCBTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda 7/21/1999 Testing error callback routines 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 #include "cstring.h" 20 #include "unicode/uloc.h" 21 #include "unicode/ucnv.h" 22 #include "unicode/ucnv_err.h" 23 #include "cintltst.h" 24 #include "unicode/utypes.h" 25 #include "unicode/ustring.h" 26 #include "nccbtst.h" 27 #include "unicode/ucnv_cb.h" 28 #include "unicode/utf16.h" 29 30 #define NEW_MAX_BUFFER 999 31 32 #define nct_min(x,y) ((x<y) ? x : y) 33 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) 34 35 static int32_t gInBufferSize = 0; 36 static int32_t gOutBufferSize = 0; 37 static char gNuConvTestName[1024]; 38 39 static void printSeq(const uint8_t* a, int len) 40 { 41 int i=0; 42 log_verbose("\n{"); 43 while (i<len) 44 log_verbose("0x%02X, ", a[i++]); 45 log_verbose("}\n"); 46 } 47 48 static void printUSeq(const UChar* a, int len) 49 { 50 int i=0; 51 log_verbose("{"); 52 while (i<len) 53 log_verbose(" 0x%04x, ", a[i++]); 54 log_verbose("}\n"); 55 } 56 57 static void printSeqErr(const uint8_t* a, int len) 58 { 59 int i=0; 60 fprintf(stderr, "{"); 61 while (i<len) 62 fprintf(stderr, " 0x%02x, ", a[i++]); 63 fprintf(stderr, "}\n"); 64 } 65 66 static void printUSeqErr(const UChar* a, int len) 67 { 68 int i=0; 69 fprintf(stderr, "{"); 70 while (i<len) 71 fprintf(stderr, "0x%04x, ", a[i++]); 72 fprintf(stderr,"}\n"); 73 } 74 75 static void setNuConvTestName(const char *codepage, const char *direction) 76 { 77 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 78 codepage, 79 direction, 80 (int)gInBufferSize, 81 (int)gOutBufferSize); 82 } 83 84 85 static void TestCallBackFailure(void); 86 87 void addTestConvertErrorCallBack(TestNode** root); 88 89 void addTestConvertErrorCallBack(TestNode** root) 90 { 91 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); 92 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); 93 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); 94 /* BEGIN android-removed 95 To save space, Android does not build complete CJK conversion tables. 96 We skip the test here. 97 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); 98 END android-removed */ 99 100 #if !UCONFIG_NO_LEGACY_CONVERSION 101 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); 102 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); 103 #endif 104 105 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); 106 } 107 108 static void TestSkipCallBack() 109 { 110 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 111 TestSkip(1,NEW_MAX_BUFFER); 112 TestSkip(1,1); 113 TestSkip(NEW_MAX_BUFFER, 1); 114 } 115 116 static void TestStopCallBack() 117 { 118 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 119 TestStop(1,NEW_MAX_BUFFER); 120 TestStop(1,1); 121 TestStop(NEW_MAX_BUFFER, 1); 122 } 123 124 static void TestSubCallBack() 125 { 126 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 127 TestSub(1,NEW_MAX_BUFFER); 128 TestSub(1,1); 129 TestSub(NEW_MAX_BUFFER, 1); 130 131 #if !UCONFIG_NO_LEGACY_CONVERSION 132 TestEBCDIC_STATEFUL_Sub(1, 1); 133 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); 134 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); 135 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 136 #endif 137 } 138 139 static void TestSubWithValueCallBack() 140 { 141 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 142 TestSubWithValue(1,NEW_MAX_BUFFER); 143 TestSubWithValue(1,1); 144 TestSubWithValue(NEW_MAX_BUFFER, 1); 145 } 146 147 #if !UCONFIG_NO_LEGACY_CONVERSION 148 static void TestLegalAndOtherCallBack() 149 { 150 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 151 TestLegalAndOthers(1,NEW_MAX_BUFFER); 152 TestLegalAndOthers(1,1); 153 TestLegalAndOthers(NEW_MAX_BUFFER, 1); 154 } 155 156 static void TestSingleByteCallBack() 157 { 158 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 159 TestSingleByte(1,NEW_MAX_BUFFER); 160 TestSingleByte(1,1); 161 TestSingleByte(NEW_MAX_BUFFER, 1); 162 } 163 #endif 164 165 static void TestSkip(int32_t inputsize, int32_t outputsize) 166 { 167 static const uint8_t expskipIBM_949[]= { 168 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 169 170 static const uint8_t expskipIBM_943[] = { 171 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; 172 173 static const uint8_t expskipIBM_930[] = { 174 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; 175 176 gInBufferSize = inputsize; 177 gOutBufferSize = outputsize; 178 179 /*From Unicode*/ 180 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); 181 182 #if !UCONFIG_NO_LEGACY_CONVERSION 183 { 184 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 185 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 186 187 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; 188 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; 189 190 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 191 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", 192 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) 193 log_err("u-> ibm-949 with skip did not match.\n"); 194 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 195 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", 196 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) 197 log_err("u-> ibm-943 with skip did not match.\n"); 198 } 199 200 { 201 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; 202 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; 203 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; 204 205 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ 206 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, 207 fromUBytes, sizeof(fromUBytes), 208 "ibm-930", 209 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, 210 NULL, 0) 211 ) { 212 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); 213 } 214 } 215 #endif 216 217 { 218 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 219 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; 220 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; 221 222 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 223 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; 224 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; 225 226 /* US-ASCII */ 227 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 228 usasciiFromUBytes, sizeof(usasciiFromUBytes), 229 "US-ASCII", 230 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 231 NULL, 0) 232 ) { 233 log_err("u->US-ASCII with skip did not match.\n"); 234 } 235 236 #if !UCONFIG_NO_LEGACY_CONVERSION 237 /* SBCS NLTC codepage 367 for US-ASCII */ 238 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 239 usasciiFromUBytes, sizeof(usasciiFromUBytes), 240 "ibm-367", 241 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 242 NULL, 0) 243 ) { 244 log_err("u->ibm-367 with skip did not match.\n"); 245 } 246 #endif 247 248 /* ISO-Latin-1 */ 249 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 250 latin1FromUBytes, sizeof(latin1FromUBytes), 251 "LATIN_1", 252 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 253 NULL, 0) 254 ) { 255 log_err("u->LATIN_1 with skip did not match.\n"); 256 } 257 258 #if !UCONFIG_NO_LEGACY_CONVERSION 259 /* windows-1252 */ 260 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 261 latin1FromUBytes, sizeof(latin1FromUBytes), 262 "windows-1252", 263 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 264 NULL, 0) 265 ) { 266 log_err("u->windows-1252 with skip did not match.\n"); 267 } 268 } 269 270 { 271 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 272 static const uint8_t toIBM943[]= { 0x61, 0x61 }; 273 static const int32_t offset[]= {0, 4}; 274 275 /* EUC_JP*/ 276 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 277 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 278 0x61, 0x8e, 0xe0, 279 }; 280 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; 281 282 /*EUC_TW*/ 283 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 284 static const uint8_t to_euc_tw[]={ 285 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 286 0x61, 0xe6, 0xca, 0x8a, 287 }; 288 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; 289 290 /*ISO-2022-JP*/ 291 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; 292 static const uint8_t to_iso_2022_jp[]={ 293 0x41, 294 0x42, 295 296 }; 297 static const int32_t from_iso_2022_jpOffs [] ={0,2}; 298 299 /*ISO-2022-JP*/ 300 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 301 static const uint8_t to_iso_2022_jp2[]={ 302 0x41, 303 0x43, 304 305 }; 306 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; 307 308 /*ISO-2022-cn*/ 309 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 310 static const uint8_t to_iso_2022_cn[]={ 311 0x41, 0x42 312 }; 313 static const int32_t from_iso_2022_cnOffs [] ={ 314 0, 2 315 }; 316 317 /*ISO-2022-CN*/ 318 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 319 static const uint8_t to_iso_2022_cn1[]={ 320 0x41, 0x43 321 322 }; 323 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; 324 325 /*ISO-2022-kr*/ 326 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 327 static const uint8_t to_iso_2022_kr[]={ 328 0x1b, 0x24, 0x29, 0x43, 329 0x41, 330 0x0e, 0x25, 0x50, 331 0x25, 0x50, 332 0x0f, 0x42, 333 }; 334 static const int32_t from_iso_2022_krOffs [] ={ 335 -1,-1,-1,-1, 336 0, 337 1,1,1, 338 3,3, 339 4,4 340 }; 341 342 /*ISO-2022-kr*/ 343 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 344 static const uint8_t to_iso_2022_kr1[]={ 345 0x1b, 0x24, 0x29, 0x43, 346 0x41, 347 0x0e, 0x25, 0x50, 348 0x25, 0x50, 349 350 }; 351 static const int32_t from_iso_2022_krOffs1 [] ={ 352 -1,-1,-1,-1, 353 0, 354 1,1,1, 355 3,3, 356 357 }; 358 /* HZ encoding */ 359 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 360 361 static const uint8_t to_hz[]={ 362 0x7e, 0x7d, 0x41, 363 0x7e, 0x7b, 0x26, 0x30, 364 0x26, 0x30, 365 0x7e, 0x7d, 0x42, 366 367 }; 368 static const int32_t from_hzOffs [] ={ 369 0,0,0, 370 1,1,1,1, 371 3,3, 372 4,4,4,4 373 }; 374 375 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 376 377 static const uint8_t to_hz1[]={ 378 0x7e, 0x7d, 0x41, 379 0x7e, 0x7b, 0x26, 0x30, 380 0x26, 0x30, 381 382 383 }; 384 static const int32_t from_hzOffs1 [] ={ 385 0,0,0, 386 1,1,1,1, 387 3,3, 388 389 }; 390 391 #endif 392 393 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 394 395 static const uint8_t to_SCSU[]={ 396 0x41, 397 0x42 398 399 400 }; 401 static const int32_t from_SCSUOffs [] ={ 402 0, 403 2, 404 405 }; 406 407 #if !UCONFIG_NO_LEGACY_CONVERSION 408 /* ISCII */ 409 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 410 static const uint8_t to_iscii[]={ 411 0x41, 412 0x42, 413 }; 414 static const int32_t from_isciiOffs [] ={ 415 0,2, 416 417 }; 418 /*ISCII*/ 419 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 420 static const uint8_t to_iscii1[]={ 421 0x44, 422 0x43, 423 424 }; 425 static const int32_t from_isciiOffs1 [] ={0,2}; 426 427 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 428 toIBM943, sizeof(toIBM943), "ibm-943", 429 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) 430 log_err("u-> ibm-943 with skip did not match.\n"); 431 432 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 433 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 434 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) 435 log_err("u-> euc-jp with skip did not match.\n"); 436 437 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 438 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 439 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) 440 log_err("u-> euc-tw with skip did not match.\n"); 441 442 /*iso_2022_jp*/ 443 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 444 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 445 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) 446 log_err("u-> iso-2022-jp with skip did not match.\n"); 447 448 /* with context */ 449 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 450 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 451 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 452 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 453 454 /*iso_2022_cn*/ 455 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 456 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 457 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) 458 log_err("u-> iso-2022-cn with skip did not match.\n"); 459 /*with context*/ 460 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), 461 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", 462 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 463 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 464 465 /*iso_2022_kr*/ 466 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 467 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 468 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) 469 log_err("u-> iso-2022-kr with skip did not match.\n"); 470 /*with context*/ 471 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), 472 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", 473 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 474 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 475 476 /*hz*/ 477 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 478 to_hz, sizeof(to_hz), "HZ", 479 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) 480 log_err("u-> HZ with skip did not match.\n"); 481 /*with context*/ 482 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]), 483 to_hz1, sizeof(to_hz1), "hz", 484 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 485 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 486 #endif 487 488 /*SCSU*/ 489 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 490 to_SCSU, sizeof(to_SCSU), "SCSU", 491 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) 492 log_err("u-> SCSU with skip did not match.\n"); 493 494 #if !UCONFIG_NO_LEGACY_CONVERSION 495 /*ISCII*/ 496 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 497 to_iscii, sizeof(to_iscii), "ISCII,version=0", 498 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) 499 log_err("u-> iscii with skip did not match.\n"); 500 /*with context*/ 501 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]), 502 to_iscii1, sizeof(to_iscii1), "ISCII,version=0", 503 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 504 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 505 #endif 506 } 507 508 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 509 { 510 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 511 0xFB, 0xEE, 0x28, /* from source offset 0 */ 512 0x24, 0x1E, 0x52, 513 0xB2, 514 0x20, 515 0xB3, 516 0xB1, 517 0x0D, 518 0x0A, 519 520 0x20, /* from 8 */ 521 0x00, 522 0xD0, 0x6C, 523 0xB6, 524 0xD8, 0xA5, 525 0x20, 526 0x68, 527 0x59, 528 529 0xF9, 0x28, /* from 16 */ 530 0x6D, 531 0x20, 532 0x73, 533 0xE0, 0x2D, 534 0xDE, 0x43, 535 0xD0, 0x33, 536 0x20, 537 538 0xFA, 0x83, /* from 24 */ 539 0x25, 0x01, 540 0xFB, 0x16, 0x87, 541 0x4B, 0x16, 542 0x20, 543 0xE6, 0xBD, 544 0xEB, 0x5B, 545 0x4B, 0xCC, 546 547 0xF9, 0xA2, /* from 32 */ 548 0xFC, 0x10, 0x3E, 549 0xFE, 0x16, 0x3A, 0x8C, 550 0x20, 551 0xFC, 0x03, 0xAC, 552 553 0x01, /* from 41 */ 554 0xDE, 0x83, 555 0x20, 556 0x09 557 }; 558 static const UChar expected[]={ 559 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ 560 0x0063, 0x0061, 0x000D, 0x000A, 561 562 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ 563 0x0930, 0x0020, 0x0918, 0x0909, 564 565 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ 566 0x4000, 0x4E00, 0x7777, 0x0020, 567 568 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ 569 0x0020, 0xD7A3, 0xDC00, 0xD800, 570 571 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ 572 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 573 574 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ 575 0x0009 576 }; 577 static const int32_t offsets[]={ 578 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, 579 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, 580 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 581 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, 582 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, 583 41, 42, 42, 43, 44 584 }; 585 586 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ 587 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 588 sampleText, sizeof(sampleText), 589 "BOCU-1", 590 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 591 ) { 592 log_err("u->BOCU-1 with skip did not match.\n"); 593 } 594 } 595 596 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 597 { 598 const uint8_t sampleText[]={ 599 0x61, /* 'a' */ 600 0xc4, 0xb5, /* U+0135 */ 601 0xed, 0x80, 0xa0, /* Hangul U+d020 */ 602 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ 603 0xee, 0x80, 0x80, /* PUA U+e000 */ 604 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ 605 0x62, /* 'b' */ 606 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ 607 0xd0, 0x80 /* U+0400 */ 608 }; 609 UChar expected[]={ 610 0x0061, 611 0x0135, 612 0xd020, 613 0xd801, 0xdc01, 614 0xe000, 615 0xdc01, 616 0x0062, 617 0xd801, 618 0x0400 619 }; 620 int32_t offsets[]={ 621 0, 622 1, 1, 623 2, 2, 2, 624 3, 3, 3, 4, 4, 4, 625 5, 5, 5, 626 6, 6, 6, 627 7, 628 8, 8, 8, 629 9, 9 630 }; 631 632 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ 633 634 /* without offsets */ 635 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 636 sampleText, sizeof(sampleText), 637 "CESU-8", 638 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) 639 ) { 640 log_err("u->CESU-8 with skip did not match.\n"); 641 } 642 643 /* with offsets */ 644 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 645 sampleText, sizeof(sampleText), 646 "CESU-8", 647 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 648 ) { 649 log_err("u->CESU-8 with skip did not match.\n"); 650 } 651 } 652 653 /*to Unicode*/ 654 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); 655 656 #if !UCONFIG_NO_LEGACY_CONVERSION 657 { 658 659 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; 660 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 661 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 662 663 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; 664 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; 665 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; 666 667 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), 668 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949", 669 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) 670 log_err("ibm-949->u with skip did not match.\n"); 671 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), 672 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943", 673 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) 674 log_err("ibm-943->u with skip did not match.\n"); 675 676 677 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), 678 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 679 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) 680 log_err("ibm-930->u with skip did not match.\n"); 681 682 683 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930), 684 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 685 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 686 log_err("ibm-930->u with skip did not match.\n"); 687 } 688 #endif 689 690 { 691 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; 692 static const UChar usasciiToU[] = { 0x61, 0x31 }; 693 static const int32_t usasciiToUOffsets[] = { 0, 2 }; 694 695 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; 696 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; 697 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; 698 699 /* US-ASCII */ 700 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 701 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 702 "US-ASCII", 703 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 704 NULL, 0) 705 ) { 706 log_err("US-ASCII->u with skip did not match.\n"); 707 } 708 709 #if !UCONFIG_NO_LEGACY_CONVERSION 710 /* SBCS NLTC codepage 367 for US-ASCII */ 711 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 712 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 713 "ibm-367", 714 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 715 NULL, 0) 716 ) { 717 log_err("ibm-367->u with skip did not match.\n"); 718 } 719 #endif 720 721 /* ISO-Latin-1 */ 722 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 723 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 724 "LATIN_1", 725 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 726 NULL, 0) 727 ) { 728 log_err("LATIN_1->u with skip did not match.\n"); 729 } 730 731 #if !UCONFIG_NO_LEGACY_CONVERSION 732 /* windows-1252 */ 733 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 734 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 735 "windows-1252", 736 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 737 NULL, 0) 738 ) { 739 log_err("windows-1252->u with skip did not match.\n"); 740 } 741 #endif 742 } 743 744 #if !UCONFIG_NO_LEGACY_CONVERSION 745 { 746 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 747 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 748 }; 749 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 750 }; 751 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; 752 753 754 /* euc-jp*/ 755 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 756 /* BEGIN android-changed */ 757 /* Android uses a different EUC-JP table. We change this byte sequence, 758 * choosing one that is unassigned in both tables. */ 759 0x8f, 0xa1, 0xa1, /*unassigned*/ 760 /* 0x8f, 0xda, 0xa1, */ /*unassigned*/ 761 /* END android-changed */ 762 0x8e, 0xe0, 763 }; 764 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; 765 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; 766 767 /*EUC_TW*/ 768 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 769 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 770 0xe6, 0xca, 0x8a, 771 }; 772 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; 773 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; 774 /*iso-2022-jp*/ 775 static const uint8_t sampleTxt_iso_2022_jp[]={ 776 0x41, 777 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ 778 0x1b, 0x28, 0x42, 0x42, 779 780 }; 781 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; 782 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; 783 784 /*iso-2022-cn*/ 785 static const uint8_t sampleTxt_iso_2022_cn[]={ 786 0x0f, 0x41, 0x44, 787 0x1B, 0x24, 0x29, 0x47, 788 0x0E, 0x40, 0x6f, /*unassigned*/ 789 0x0f, 0x42, 790 791 }; 792 793 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; 794 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; 795 796 /*iso-2022-kr*/ 797 static const uint8_t sampleTxt_iso_2022_kr[]={ 798 0x1b, 0x24, 0x29, 0x43, 799 0x41, 800 0x0E, 0x7f, 0x1E, 801 0x0e, 0x25, 0x50, 802 0x0f, 0x51, 803 0x42, 0x43, 804 805 }; 806 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; 807 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; 808 809 /*hz*/ 810 static const uint8_t sampleTxt_hz[]={ 811 0x41, 812 0x7e, 0x7b, 0x26, 0x30, 813 0x7f, 0x1E, /*unassigned*/ 814 0x26, 0x30, 815 0x7e, 0x7d, 0x42, 816 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 817 0x7e, 0x7d, 0x42, 818 }; 819 static const UChar hztoUnicode[]={ 820 0x41, 821 0x03a0, 822 0x03A0, 823 0x42, 824 0x42,}; 825 826 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; 827 828 /*ISCII*/ 829 static const uint8_t sampleTxt_iscii[]={ 830 0x41, 831 0xa1, 832 0xEB, /*unassigned*/ 833 0x26, 834 0x30, 835 0xa2, 836 0xEC, /*unassigned*/ 837 0x42, 838 }; 839 static const UChar isciitoUnicode[]={ 840 0x41, 841 0x0901, 842 0x26, 843 0x30, 844 0x0902, 845 0x42, 846 }; 847 848 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; 849 850 /*LMBCS*/ 851 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, 852 0x12, 0x92, 0xa0, /*unassigned*/ 853 0x12, 0x92, 0xA1, 854 }; 855 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; 856 static const int32_t fromLMBCS[] = {0, 6}; 857 858 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 859 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 860 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 861 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 862 863 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 864 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 865 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 866 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 867 868 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 869 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 870 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) 871 log_err("euc-jp->u with skip did not match.\n"); 872 873 874 875 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 876 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 877 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) 878 log_err("euc-tw->u with skip did not match.\n"); 879 880 881 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 882 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 883 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) 884 log_err("iso-2022-jp->u with skip did not match.\n"); 885 886 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 887 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 888 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) 889 log_err("iso-2022-cn->u with skip did not match.\n"); 890 891 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 892 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 893 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) 894 log_err("iso-2022-kr->u with skip did not match.\n"); 895 896 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 897 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 898 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) 899 log_err("HZ->u with skip did not match.\n"); 900 901 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 902 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 903 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) 904 log_err("iscii->u with skip did not match.\n"); 905 906 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), 907 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1", 908 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) 909 log_err("LMBCS->u with skip did not match.\n"); 910 911 } 912 #endif 913 914 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); 915 { 916 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 917 0xe0, 0x80, 0x61,}; 918 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; 919 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; 920 921 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 922 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 923 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 924 log_err("utf8->u with skip did not match.\n");; 925 } 926 927 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); 928 { 929 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 930 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; 931 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 932 933 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 934 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 935 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 936 log_err("scsu->u with skip did not match.\n"); 937 } 938 939 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 940 { 941 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 942 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ 943 0x24, 0x1E, 0x52, /* 3 */ 944 0xB2, /* 6 */ 945 0x20, /* 7 */ 946 0x40, 0x07, /* 8 - wrong trail byte */ 947 0xB3, /* 10 */ 948 0xB1, /* 11 */ 949 0xD0, 0x20, /* 12 - wrong trail byte */ 950 0x0D, /* 14 */ 951 0x0A, /* 15 */ 952 0x20, /* 16 */ 953 0x00, /* 17 */ 954 0xD0, 0x6C, /* 18 */ 955 0xB6, /* 20 */ 956 0xD8, 0xA5, /* 21 */ 957 0x20, /* 23 */ 958 0x68, /* 24 */ 959 0x59, /* 25 */ 960 0xF9, 0x28, /* 26 */ 961 0x6D, /* 28 */ 962 0x20, /* 29 */ 963 0x73, /* 30 */ 964 0xE0, 0x2D, /* 31 */ 965 0xDE, 0x43, /* 33 */ 966 0xD0, 0x33, /* 35 */ 967 0x20, /* 37 */ 968 0xFA, 0x83, /* 38 */ 969 0x25, 0x01, /* 40 */ 970 0xFB, 0x16, 0x87, /* 42 */ 971 0x4B, 0x16, /* 45 */ 972 0x20, /* 47 */ 973 0xE6, 0xBD, /* 48 */ 974 0xEB, 0x5B, /* 50 */ 975 0x4B, 0xCC, /* 52 */ 976 0xF9, 0xA2, /* 54 */ 977 0xFC, 0x10, 0x3E, /* 56 */ 978 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ 979 0x20, /* 63 */ 980 0xFC, 0x03, 0xAC, /* 64 */ 981 0xFF, /* 67 - FF just resets the state without encoding anything */ 982 0x01, /* 68 */ 983 0xDE, 0x83, /* 69 */ 984 0x20, /* 71 */ 985 0x09 /* 72 */ 986 }; 987 UChar expected[]={ 988 0xFEFF, 0x0061, 0x0062, 0x0020, 989 0x0063, 0x0061, 0x000D, 0x000A, 990 0x0020, 0x0000, 0x00DF, 0x00E6, 991 0x0930, 0x0020, 0x0918, 0x0909, 992 0x3086, 0x304D, 0x0020, 0x3053, 993 0x4000, 0x4E00, 0x7777, 0x0020, 994 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, 995 0x0020, 0xD7A3, 0xDC00, 0xD800, 996 0xD800, 0xDC00, 0xD845, 0xDDDD, 997 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 998 0xDFFF, 0x0001, 0x0E40, 0x0020, 999 0x0009 1000 }; 1001 int32_t offsets[]={ 1002 0, 3, 6, 7, /* skip 8, */ 1003 10, 11, /* skip 12, */ 1004 14, 15, 16, 17, 18, 1005 20, 21, 23, 24, 25, 26, 28, 29, 1006 30, 31, 33, 35, 37, 38, 1007 40, 42, 45, 47, 48, 1008 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, 1009 63, 64, /* trail */ 64, /* reset only 67, */ 1010 68, 69, 1011 71, 72 1012 }; 1013 1014 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1015 expected, ARRAY_LENGTH(expected), "BOCU-1", 1016 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1017 ) { 1018 log_err("BOCU-1->u with skip did not match.\n"); 1019 } 1020 } 1021 1022 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 1023 { 1024 const uint8_t sampleText[]={ 1025 0x61, /* 0 'a' */ 1026 0xc0, 0x80, /* 1 non-shortest form */ 1027 0xc4, 0xb5, /* 3 U+0135 */ 1028 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ 1029 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ 1030 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ 1031 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ 1032 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ 1033 0x62, /* 24 'b' */ 1034 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ 1035 0xed, 0xa0, /* 28 incomplete sequence */ 1036 0xd0, 0x80 /* 30 U+0400 */ 1037 }; 1038 UChar expected[]={ 1039 0x0061, 1040 /* skip */ 1041 0x0135, 1042 0xd020, 1043 0xd801, 0xdc01, 1044 0xe000, 1045 0xdc01, 1046 /* skip */ 1047 0x0062, 1048 0xd801, 1049 0x0400 1050 }; 1051 int32_t offsets[]={ 1052 0, 1053 /* skip 1, */ 1054 3, 1055 5, 1056 8, 11, 1057 14, 1058 17, 1059 /* skip 20, 20, */ 1060 24, 1061 25, 1062 /* skip 28 */ 1063 30 1064 }; 1065 1066 /* without offsets */ 1067 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1068 expected, ARRAY_LENGTH(expected), "CESU-8", 1069 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) 1070 ) { 1071 log_err("CESU-8->u with skip did not match.\n"); 1072 } 1073 1074 /* with offsets */ 1075 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1076 expected, ARRAY_LENGTH(expected), "CESU-8", 1077 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1078 ) { 1079 log_err("CESU-8->u with skip did not match.\n"); 1080 } 1081 } 1082 } 1083 1084 static void TestStop(int32_t inputsize, int32_t outputsize) 1085 { 1086 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1087 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1088 1089 static const uint8_t expstopIBM_949[]= { 1090 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; 1091 1092 static const uint8_t expstopIBM_943[] = { 1093 0x9f, 0xaf, 0x9f, 0xb1}; 1094 1095 static const uint8_t expstopIBM_930[] = { 1096 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; 1097 1098 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; 1099 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; 1100 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; 1101 1102 1103 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; 1104 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; 1105 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; 1106 1107 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; 1108 static const int32_t fromIBM943Offs [] = { 0, 2}; 1109 static const int32_t fromIBM930Offs [] = { 1, 3}; 1110 1111 gInBufferSize = inputsize; 1112 gOutBufferSize = outputsize; 1113 1114 /*From Unicode*/ 1115 1116 #if !UCONFIG_NO_LEGACY_CONVERSION 1117 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1118 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", 1119 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) 1120 log_err("u-> ibm-949 with stop did not match.\n"); 1121 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1122 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", 1123 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) 1124 log_err("u-> ibm-943 with stop did not match.\n"); 1125 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1126 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", 1127 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) 1128 log_err("u-> ibm-930 with stop did not match.\n"); 1129 1130 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); 1131 { 1132 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1133 static const uint8_t toIBM943[]= { 0x61,}; 1134 static const int32_t offset[]= {0,} ; 1135 1136 /*EUC_JP*/ 1137 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1138 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; 1139 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; 1140 1141 /*EUC_TW*/ 1142 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1143 static const uint8_t to_euc_tw[]={ 1144 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; 1145 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; 1146 1147 /*ISO-2022-JP*/ 1148 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; 1149 static const uint8_t to_iso_2022_jp[]={ 1150 0x41, 1151 1152 }; 1153 static const int32_t from_iso_2022_jpOffs [] ={0,}; 1154 1155 /*ISO-2022-cn*/ 1156 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1157 static const uint8_t to_iso_2022_cn[]={ 1158 0x41, 1159 1160 }; 1161 static const int32_t from_iso_2022_cnOffs [] ={ 1162 0,0, 1163 2,2, 1164 }; 1165 1166 /*ISO-2022-kr*/ 1167 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 1168 static const uint8_t to_iso_2022_kr[]={ 1169 0x1b, 0x24, 0x29, 0x43, 1170 0x41, 1171 0x0e, 0x25, 0x50, 1172 }; 1173 static const int32_t from_iso_2022_krOffs [] ={ 1174 -1,-1,-1,-1, 1175 0, 1176 1,1,1, 1177 }; 1178 1179 /* HZ encoding */ 1180 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1181 1182 static const uint8_t to_hz[]={ 1183 0x7e, 0x7d, 0x41, 1184 0x7e, 0x7b, 0x26, 0x30, 1185 1186 }; 1187 static const int32_t from_hzOffs [] ={ 1188 0, 0,0, 1189 1,1,1,1, 1190 }; 1191 1192 /*ISCII*/ 1193 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1194 static const uint8_t to_iscii[]={ 1195 0x41, 1196 }; 1197 static const int32_t from_isciiOffs [] ={ 1198 0, 1199 }; 1200 1201 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1202 toIBM943, sizeof(toIBM943), "ibm-943", 1203 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) 1204 log_err("u-> ibm-943 with stop did not match.\n"); 1205 1206 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1207 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 1208 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) 1209 log_err("u-> euc-jp with stop did not match.\n"); 1210 1211 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1212 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1213 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1214 log_err("u-> euc-tw with stop did not match.\n"); 1215 1216 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1217 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1218 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1219 log_err("u-> iso-2022-jp with stop did not match.\n"); 1220 1221 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1222 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1223 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1224 log_err("u-> iso-2022-jp with stop did not match.\n"); 1225 1226 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 1227 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 1228 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) 1229 log_err("u-> iso-2022-cn with stop did not match.\n"); 1230 1231 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 1232 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 1233 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) 1234 log_err("u-> iso-2022-kr with stop did not match.\n"); 1235 1236 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 1237 to_hz, sizeof(to_hz), "HZ", 1238 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) 1239 log_err("u-> HZ with stop did not match.\n");\ 1240 1241 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 1242 to_iscii, sizeof(to_iscii), "ISCII,version=0", 1243 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) 1244 log_err("u-> iscii with stop did not match.\n"); 1245 1246 1247 } 1248 #endif 1249 1250 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); 1251 { 1252 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1253 1254 static const uint8_t to_SCSU[]={ 1255 0x41, 1256 1257 }; 1258 int32_t from_SCSUOffs [] ={ 1259 0, 1260 1261 }; 1262 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1263 to_SCSU, sizeof(to_SCSU), "SCSU", 1264 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) 1265 log_err("u-> SCSU with skip did not match.\n"); 1266 1267 } 1268 1269 /*to Unicode*/ 1270 1271 #if !UCONFIG_NO_LEGACY_CONVERSION 1272 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), 1273 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949", 1274 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) 1275 log_err("ibm-949->u with stop did not match.\n"); 1276 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), 1277 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943", 1278 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) 1279 log_err("ibm-943->u with stop did not match.\n"); 1280 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), 1281 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930", 1282 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) 1283 log_err("ibm-930->u with stop did not match.\n"); 1284 1285 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); 1286 { 1287 1288 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1289 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1290 }; 1291 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; 1292 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; 1293 1294 1295 /*EUC-JP*/ 1296 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1297 /* BEGIN android-changed */ 1298 /* Android uses a different EUC-JP table. We change this byte sequence, 1299 * choosing one that is unassigned in both tables. */ 1300 0x8f, 0xa1, 0xa1, /*unassigned*/ 1301 /* 0x8f, 0xda, 0xa1, */ /*unassigned*/ 1302 /* END android-changed */ 1303 0x8e, 0xe0, 1304 }; 1305 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; 1306 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; 1307 1308 /*EUC_TW*/ 1309 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1310 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1311 0xe6, 0xca, 0x8a, 1312 }; 1313 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; 1314 int32_t from_euc_twOffs [] ={ 0, 1, 3}; 1315 1316 1317 1318 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1319 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1320 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1321 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); 1322 1323 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1324 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1325 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) 1326 log_err("euc-jp->u with stop did not match.\n"); 1327 1328 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1329 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1330 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1331 log_err("euc-tw->u with stop did not match.\n"); 1332 } 1333 #endif 1334 1335 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); 1336 { 1337 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1338 0xe0, 0x80, 0x61,}; 1339 static const UChar expected1[] = { 0x0031, 0x4e8c,}; 1340 static const int32_t offsets1[] = { 0x0000, 0x0001}; 1341 1342 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1343 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1344 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1345 log_err("utf8->u with stop did not match.\n");; 1346 } 1347 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); 1348 { 1349 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; 1350 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; 1351 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; 1352 1353 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1354 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1355 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1356 log_err("scsu->u with stop did not match.\n");; 1357 } 1358 1359 } 1360 1361 static void TestSub(int32_t inputsize, int32_t outputsize) 1362 { 1363 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1364 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1365 1366 static const uint8_t expsubIBM_949[] = 1367 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; 1368 1369 static const uint8_t expsubIBM_943[] = { 1370 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; 1371 1372 static const uint8_t expsubIBM_930[] = { 1373 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; 1374 1375 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; 1376 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1377 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1378 1379 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1380 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; 1381 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; 1382 1383 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; 1384 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; 1385 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; 1386 1387 gInBufferSize = inputsize; 1388 gOutBufferSize = outputsize; 1389 1390 /*from unicode*/ 1391 1392 #if !UCONFIG_NO_LEGACY_CONVERSION 1393 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1394 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", 1395 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) 1396 log_err("u-> ibm-949 with subst did not match.\n"); 1397 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1398 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", 1399 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) 1400 log_err("u-> ibm-943 with subst did not match.\n"); 1401 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1402 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", 1403 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) 1404 log_err("u-> ibm-930 with subst did not match.\n"); 1405 1406 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1407 { 1408 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1409 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; 1410 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; 1411 1412 1413 /* EUC_JP*/ 1414 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1415 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1416 0xf4, 0xfe, 0xf4, 0xfe, 1417 0x61, 0x8e, 0xe0, 1418 }; 1419 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; 1420 1421 /*EUC_TW*/ 1422 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1423 static const uint8_t to_euc_tw[]={ 1424 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1425 0xfd, 0xfe, 0xfd, 0xfe, 1426 0x61, 0xe6, 0xca, 0x8a, 1427 }; 1428 1429 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; 1430 1431 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1432 toIBM943, sizeof(toIBM943), "ibm-943", 1433 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) 1434 log_err("u-> ibm-943 with substitute did not match.\n"); 1435 1436 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1437 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 1438 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) 1439 log_err("u-> euc-jp with substitute did not match.\n"); 1440 1441 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1442 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1443 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1444 log_err("u-> euc-tw with substitute did not match.\n"); 1445 } 1446 #endif 1447 1448 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1449 { 1450 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1451 1452 const uint8_t to_SCSU[]={ 1453 0x41, 1454 0x0e, 0xff,0xfd, 1455 0x42 1456 1457 1458 }; 1459 int32_t from_SCSUOffs [] ={ 1460 0, 1461 1,1,1, 1462 2, 1463 1464 }; 1465 const uint8_t to_SCSU_1[]={ 1466 0x41, 1467 1468 }; 1469 int32_t from_SCSUOffs_1 [] ={ 1470 0, 1471 1472 }; 1473 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1474 to_SCSU, sizeof(to_SCSU), "SCSU", 1475 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) 1476 log_err("u-> SCSU with substitute did not match.\n"); 1477 1478 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1479 to_SCSU_1, sizeof(to_SCSU_1), "SCSU", 1480 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 1481 log_err("u-> SCSU with substitute did not match.\n"); 1482 } 1483 1484 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1485 { 1486 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; 1487 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, 1488 0xf0, 0x90, 0x90, 0x81, 1489 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 1490 0xef, 0xbf, 0xbf, 0x61, 1491 1492 }; 1493 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; 1494 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]), 1495 expectedUTF8, sizeof(expectedUTF8), "utf8", 1496 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { 1497 log_err("u-> utf8 with stop did not match.\n"); 1498 } 1499 } 1500 1501 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1502 { 1503 static const UChar in[]={ 0x0041, 0xfeff }; 1504 1505 static const uint8_t out[]={ 1506 #if U_IS_BIG_ENDIAN 1507 0xfe, 0xff, 1508 0x00, 0x41, 1509 0xfe, 0xff 1510 #else 1511 0xff, 0xfe, 1512 0x41, 0x00, 1513 0xff, 0xfe 1514 #endif 1515 }; 1516 static const int32_t offsets[]={ 1517 -1, -1, 0, 0, 1, 1 1518 }; 1519 1520 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1521 out, sizeof(out), "UTF-16", 1522 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1523 ) { 1524 log_err("u->UTF-16 with substitute did not match.\n"); 1525 } 1526 } 1527 1528 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1529 { 1530 static const UChar in[]={ 0x0041, 0xfeff }; 1531 1532 static const uint8_t out[]={ 1533 #if U_IS_BIG_ENDIAN 1534 0x00, 0x00, 0xfe, 0xff, 1535 0x00, 0x00, 0x00, 0x41, 1536 0x00, 0x00, 0xfe, 0xff 1537 #else 1538 0xff, 0xfe, 0x00, 0x00, 1539 0x41, 0x00, 0x00, 0x00, 1540 0xff, 0xfe, 0x00, 0x00 1541 #endif 1542 }; 1543 static const int32_t offsets[]={ 1544 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 1545 }; 1546 1547 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1548 out, sizeof(out), "UTF-32", 1549 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1550 ) { 1551 log_err("u->UTF-32 with substitute did not match.\n"); 1552 } 1553 } 1554 1555 /*to unicode*/ 1556 1557 #if !UCONFIG_NO_LEGACY_CONVERSION 1558 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), 1559 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949", 1560 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) 1561 log_err("ibm-949->u with substitute did not match.\n"); 1562 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), 1563 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943", 1564 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) 1565 log_err("ibm-943->u with substitute did not match.\n"); 1566 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), 1567 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930", 1568 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) 1569 log_err("ibm-930->u with substitute did not match.\n"); 1570 1571 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1572 { 1573 1574 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1575 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1576 }; 1577 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 1578 }; 1579 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; 1580 1581 1582 /* EUC_JP*/ 1583 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1584 /* BEGIN android-changed */ 1585 /* Android uses a different EUC-JP table. We change this byte sequence, 1586 * choosing one that is unassigned in both tables. */ 1587 0x8f, 0xa1, 0xa1, /*unassigned*/ 1588 /* 0x8f, 0xda, 0xa1, */ /*unassigned*/ 1589 /* END android-changed */ 1590 0x8e, 0xe0, 0x8a 1591 }; 1592 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; 1593 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; 1594 1595 /*EUC_TW*/ 1596 const uint8_t sampleTxt_euc_tw[]={ 1597 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1598 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1599 0xe6, 0xca, 0x8a, 1600 }; 1601 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; 1602 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; 1603 1604 1605 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1606 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1607 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1608 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); 1609 1610 1611 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1612 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1613 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) 1614 log_err("euc-jp->u with substitute did not match.\n"); 1615 1616 1617 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1618 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1619 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1620 log_err("euc-tw->u with substitute did not match.\n"); 1621 1622 1623 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1624 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", 1625 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) 1626 log_err("euc-jp->u with substitute did not match.\n"); 1627 } 1628 #endif 1629 1630 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1631 { 1632 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1633 0xe0, 0x80, 0x61,}; 1634 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 1635 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 1636 1637 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1638 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1639 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1640 log_err("utf8->u with substitute did not match.\n");; 1641 } 1642 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1643 { 1644 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 1645 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; 1646 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 1647 1648 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1649 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1650 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1651 log_err("scsu->u with stop did not match.\n");; 1652 } 1653 1654 #if !UCONFIG_NO_LEGACY_CONVERSION 1655 log_verbose("Testing ibm-930 subchar/subchar1\n"); 1656 { 1657 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; 1658 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; 1659 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1660 1661 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; 1662 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; 1663 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; 1664 1665 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930", 1666 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1667 ) { 1668 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); 1669 } 1670 1671 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930", 1672 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1673 ) { 1674 log_err("ibm-930->u subchar/subchar1 did not match.\n"); 1675 } 1676 } 1677 1678 log_verbose("Testing GB 18030 with substitute callbacks\n"); 1679 { 1680 static const UChar u2[]={ 1681 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; 1682 static const uint8_t gb2[]={ 1683 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; 1684 static const int32_t offsets2[]={ 1685 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; 1686 1687 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030", 1688 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1689 ) { 1690 log_err("gb18030->u with substitute did not match.\n"); 1691 } 1692 } 1693 #endif 1694 1695 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); 1696 { 1697 static const uint8_t utf7[]={ 1698 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 1699 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 1700 }; 1701 static const UChar unicode[]={ 1702 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 1703 }; 1704 static const int32_t offsets[]={ 1705 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24 1706 }; 1707 1708 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7", 1709 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1710 ) { 1711 log_err("UTF-7->u with substitute did not match.\n"); 1712 } 1713 } 1714 1715 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); 1716 { 1717 static const uint8_t 1718 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, 1719 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, 1720 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; 1721 1722 static const UChar 1723 out1[]={ 0x4e00, 0xfeff }, 1724 out2[]={ 0x004e, 0xfffe }, 1725 out3[]={ 0xfefd, 0x4e00, 0xfeff }; 1726 1727 static const int32_t 1728 offsets1[]={ 2, 4 }, 1729 offsets2[]={ 2, 4 }, 1730 offsets3[]={ 0, 2, 4 }; 1731 1732 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16", 1733 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1734 ) { 1735 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); 1736 } 1737 1738 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16", 1739 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1740 ) { 1741 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); 1742 } 1743 1744 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16", 1745 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1746 ) { 1747 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); 1748 } 1749 } 1750 1751 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); 1752 { 1753 static const uint8_t 1754 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, 1755 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, 1756 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, 1757 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; 1758 1759 static const UChar 1760 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff }, 1761 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe }, 1762 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd }, 1763 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 }; 1764 1765 static const int32_t 1766 offsets1[]={ 4, 4, 8 }, 1767 offsets2[]={ 4, 4, 8 }, 1768 offsets3[]={ 0, 4, 4, 8, 12 }, 1769 offsets4[]={ 0, 0, 4, 8 }; 1770 1771 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32", 1772 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1773 ) { 1774 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); 1775 } 1776 1777 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32", 1778 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1779 ) { 1780 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); 1781 } 1782 1783 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32", 1784 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1785 ) { 1786 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); 1787 } 1788 1789 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32", 1790 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) 1791 ) { 1792 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); 1793 } 1794 } 1795 } 1796 1797 static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 1798 { 1799 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1800 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1801 1802 const uint8_t expsubwvalIBM_949[]= { 1803 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 1804 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; 1805 1806 const uint8_t expsubwvalIBM_943[]= { 1807 0x9f, 0xaf, 0x9f, 0xb1, 1808 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; 1809 1810 const uint8_t expsubwvalIBM_930[] = { 1811 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; 1812 1813 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; 1814 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; 1815 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ 1816 1817 gInBufferSize = inputsize; 1818 gOutBufferSize = outputsize; 1819 1820 /*from Unicode*/ 1821 1822 #if !UCONFIG_NO_LEGACY_CONVERSION 1823 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1824 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", 1825 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) 1826 log_err("u-> ibm-949 with subst with value did not match.\n"); 1827 1828 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1829 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", 1830 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) 1831 log_err("u-> ibm-943 with sub with value did not match.\n"); 1832 1833 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1834 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", 1835 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) 1836 log_err("u-> ibm-930 with subst with value did not match.\n"); 1837 1838 1839 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 1840 { 1841 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1842 static const uint8_t toIBM943[]= { 0x61, 1843 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1844 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1845 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1846 0x61 }; 1847 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 1848 1849 1850 /* EUC_JP*/ 1851 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; 1852 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1853 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1854 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1855 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1856 0x61, 0x8e, 0xe0, 1857 }; 1858 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 1859 3, 3, 3, 3, 3, 3, 1860 3, 3, 3, 3, 3, 3, 1861 5, 5, 5, 5, 5, 5, 1862 6, 7, 7, 1863 }; 1864 1865 /*EUC_TW*/ 1866 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1867 static const uint8_t to_euc_tw[]={ 1868 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1869 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1870 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1871 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1872 0x61, 0xe6, 0xca, 0x8a, 1873 }; 1874 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 1875 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 1876 6, 7, 7, 8, 1877 }; 1878 /*ISO-2022-JP*/ 1879 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; 1880 static const uint8_t to_iso_2022_jp1[]={ 1881 0x1b, 0x24, 0x42, 0x21, 0x21, 1882 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1883 0x1b, 0x24, 0x42, 0x21, 0x22, 1884 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1885 0x42, 1886 }; 1887 1888 static const int32_t from_iso_2022_jpOffs1 [] ={ 1889 0,0,0,0,0, 1890 1,1,1,1,1,1,1,1,1, 1891 2,2,2,2,2, 1892 3,3,3,3,3,3,3,3,3, 1893 4, 1894 }; 1895 /* surrogate pair*/ 1896 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; 1897 static const uint8_t to_iso_2022_jp2[]={ 1898 0x1b, 0x24, 0x42, 0x21, 0x21, 1899 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1900 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1901 0x1b, 0x24, 0x42, 0x21, 0x22, 1902 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1903 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1904 0x42, 1905 }; 1906 static const int32_t from_iso_2022_jpOffs2 [] ={ 1907 0,0,0,0,0, 1908 1,1,1,1,1,1,1,1,1, 1909 1,1,1,1,1,1, 1910 3,3,3,3,3, 1911 4,4,4,4,4,4,4,4,4, 1912 4,4,4,4,4,4, 1913 6, 1914 }; 1915 1916 /*ISO-2022-cn*/ 1917 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1918 static const uint8_t to_iso_2022_cn[]={ 1919 0x41, 1920 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, 1921 0x42, 1922 }; 1923 static const int32_t from_iso_2022_cnOffs [] ={ 1924 0, 1925 1,1,1,1,1,1, 1926 2, 1927 }; 1928 1929 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; 1930 1931 static const uint8_t to_iso_2022_cn4[]={ 1932 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 1933 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1934 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1935 0x0e, 0x21, 0x22, 1936 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1937 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1938 0x42, 1939 }; 1940 static const int32_t from_iso_2022_cnOffs4 [] ={ 1941 0,0,0,0,0,0,0, 1942 1,1,1,1,1,1,1, 1943 1,1,1,1,1,1, 1944 3,3,3, 1945 4,4,4,4,4,4,4, 1946 4,4,4,4,4,4, 1947 6 1948 1949 }; 1950 1951 /*ISO-2022-kr*/ 1952 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 1953 static const uint8_t to_iso_2022_kr2[]={ 1954 0x1b, 0x24, 0x29, 0x43, 1955 0x41, 1956 0x0e, 0x25, 0x50, 1957 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1958 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1959 0x0e, 0x25, 0x50, 1960 0x0f, 0x42, 1961 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1962 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1963 0x43 1964 }; 1965 static const int32_t from_iso_2022_krOffs2 [] ={ 1966 -1,-1,-1,-1, 1967 0, 1968 1,1,1, 1969 2,2,2,2,2,2,2, 1970 2,2,2,2,2,2, 1971 4,4,4, 1972 5,5, 1973 6,6,6,6,6,6, 1974 6,6,6,6,6,6, 1975 8, 1976 }; 1977 1978 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; 1979 static const uint8_t to_iso_2022_kr[]={ 1980 0x1b, 0x24, 0x29, 0x43, 1981 0x41, 1982 0x0e, 0x25, 0x50, 1983 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1984 0x0e, 0x25, 0x50, 1985 0x0f, 0x42, 1986 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1987 0x43 1988 }; 1989 1990 1991 static const int32_t from_iso_2022_krOffs [] ={ 1992 -1,-1,-1,-1, 1993 0, 1994 1,1,1, 1995 2,2,2,2,2,2,2, 1996 3,3,3, 1997 4,4, 1998 5,5,5,5,5,5, 1999 6, 2000 }; 2001 /* HZ encoding */ 2002 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 2003 2004 static const uint8_t to_hz[]={ 2005 0x7e, 0x7d, 0x41, 2006 0x7e, 0x7b, 0x26, 0x30, 2007 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ 2008 0x7e, 0x7b, 0x26, 0x30, 2009 0x7e, 0x7d, 0x42, 2010 2011 }; 2012 static const int32_t from_hzOffs [] ={ 2013 0,0,0, 2014 1,1,1,1, 2015 2,2,2,2,2,2,2,2, 2016 3,3,3,3, 2017 4,4,4 2018 }; 2019 2020 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 2021 static const uint8_t to_hz2[]={ 2022 0x7e, 0x7d, 0x41, 2023 0x7e, 0x7b, 0x26, 0x30, 2024 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2025 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2026 0x7e, 0x7b, 0x26, 0x30, 2027 0x7e, 0x7d, 0x42, 2028 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2029 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2030 0x43 2031 }; 2032 static const int32_t from_hzOffs2 [] ={ 2033 0,0,0, 2034 1,1,1,1, 2035 2,2,2,2,2,2,2,2, 2036 2,2,2,2,2,2, 2037 4,4,4,4, 2038 5,5,5, 2039 6,6,6,6,6,6, 2040 6,6,6,6,6,6, 2041 8, 2042 }; 2043 2044 /*ISCII*/ 2045 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; 2046 static const uint8_t to_iscii[]={ 2047 0x41, 2048 0xef, 0x42, 0xa1, 2049 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2050 0xa2, 2051 0x42, 2052 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2053 0x43 2054 }; 2055 2056 2057 static const int32_t from_isciiOffs [] ={ 2058 0, 2059 1,1,1, 2060 2,2,2,2,2,2, 2061 3, 2062 4, 2063 5,5,5,5,5,5, 2064 6, 2065 }; 2066 2067 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 2068 toIBM943, sizeof(toIBM943), "ibm-943", 2069 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) 2070 log_err("u-> ibm-943 with subst with value did not match.\n"); 2071 2072 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 2073 to_euc_jp, sizeof(to_euc_jp), "euc-jp", 2074 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) 2075 log_err("u-> euc-jp with subst with value did not match.\n"); 2076 2077 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 2078 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 2079 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) 2080 log_err("u-> euc-tw with subst with value did not match.\n"); 2081 2082 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2083 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2084 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2085 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2086 2087 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2088 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2089 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2090 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2091 2092 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 2093 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 2094 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) 2095 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2096 /*ESCAPE OPTIONS*/ 2097 { 2098 /* surrogate pair*/ 2099 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; 2100 static const uint8_t to_iso_2022_jp3_v2[]={ 2101 0x1b, 0x24, 0x42, 0x21, 0x21, 2102 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2103 2104 0x1b, 0x24, 0x42, 0x21, 0x22, 2105 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2106 2107 0x42, 2108 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, 2109 }; 2110 2111 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ 2112 0,0,0,0,0, 2113 1,1,1,1,1,1,1,1,1,1,1,1, 2114 2115 3,3,3,3,3, 2116 4,4,4,4,4,4,4,4,4,4,4,4, 2117 2118 6, 2119 7,7,7,7,7,7,7,7,7 2120 }; 2121 2122 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), 2123 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp", 2124 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2125 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); 2126 } 2127 { 2128 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2129 static const uint8_t to_iso_2022_cn5_v2[]={ 2130 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2131 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2132 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2133 0x0e, 0x21, 0x22, 2134 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2135 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2136 0x42, 2137 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, 2138 }; 2139 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ 2140 0,0,0,0,0,0,0, 2141 1,1,1,1,1,1,1, 2142 1,1,1,1,1,1, 2143 3,3,3, 2144 4,4,4,4,4,4,4, 2145 4,4,4,4,4,4, 2146 6, 2147 7,7,7,7,7,7 2148 }; 2149 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), 2150 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", 2151 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) 2152 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); 2153 2154 } 2155 { 2156 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2157 static const uint8_t to_iso_2022_cn6_v2[]={ 2158 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2159 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2160 0x0e, 0x21, 0x22, 2161 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2162 0x42, 2163 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d 2164 }; 2165 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ 2166 0, 0, 0, 0, 0, 0, 0, 2167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2168 3, 3, 3, 2169 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2170 6, 2171 7, 7, 7, 7, 7, 7, 7, 7, 2172 }; 2173 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), 2174 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", 2175 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) 2176 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); 2177 2178 } 2179 { 2180 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2181 static const uint8_t to_iso_2022_cn7_v2[]={ 2182 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2183 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2184 0x0e, 0x21, 0x22, 2185 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2186 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, 2187 }; 2188 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ 2189 0, 0, 0, 0, 0, 0, 0, 2190 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2191 3, 3, 3, 2192 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2193 6, 2194 7, 7, 7, 7, 7, 7, 2195 }; 2196 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), 2197 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", 2198 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) 2199 log_err("u-> iso-2022-cn with sub & K did not match.\n"); 2200 2201 } 2202 { 2203 static const UChar iso_2022_cn_inputText8[]={ 2204 0x3000, 2205 0xD84D, 0xDC56, 2206 0x3001, 2207 0xD84D, 0xDC56, 2208 0xDBFF, 0xDFFF, 2209 0x0042, 2210 0x0902}; 2211 static const uint8_t to_iso_2022_cn8_v2[]={ 2212 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2213 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2214 0x0e, 0x21, 0x22, 2215 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2216 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20, 2217 0x42, 2218 0x5c, 0x39, 0x30, 0x32, 0x20 2219 }; 2220 static const int32_t from_iso_2022_cnOffs8_v2 [] ={ 2221 0, 0, 0, 0, 0, 0, 0, 2222 1, 1, 1, 1, 1, 1, 1, 1, 2223 3, 3, 3, 2224 4, 4, 4, 4, 4, 4, 4, 4, 2225 6, 6, 6, 6, 6, 6, 6, 6, 2226 8, 2227 9, 9, 9, 9, 9 2228 }; 2229 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), 2230 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", 2231 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR )) 2232 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n"); 2233 2234 } 2235 { 2236 static const uint8_t to_iso_2022_cn4_v3[]={ 2237 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2238 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2239 0x0e, 0x21, 0x22, 2240 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2241 0x42 2242 }; 2243 2244 2245 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ 2246 0,0,0,0,0,0,0, 2247 1,1,1,1,1,1,1,1,1,1,1, 2248 2249 3,3,3, 2250 4,4,4,4,4,4,4,4,4,4,4, 2251 2252 6 2253 2254 }; 2255 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2256 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", 2257 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2258 { 2259 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); 2260 } 2261 } 2262 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 2263 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 2264 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) 2265 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2266 2267 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2268 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", 2269 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) 2270 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2271 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 2272 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 2273 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) 2274 log_err("u-> iso_2022_kr with subst with value did not match.\n"); 2275 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]), 2276 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", 2277 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) 2278 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); 2279 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 2280 to_hz, sizeof(to_hz), "HZ", 2281 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) 2282 log_err("u-> hz with subst with value did not match.\n"); 2283 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]), 2284 to_hz2, sizeof(to_hz2), "HZ", 2285 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) 2286 log_err("u-> hz with subst with value did not match.\n"); 2287 2288 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 2289 to_iscii, sizeof(to_iscii), "ISCII,version=0", 2290 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) 2291 log_err("u-> iscii with subst with value did not match.\n"); 2292 } 2293 #endif 2294 2295 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 2296 /*to Unicode*/ 2297 { 2298 #if !UCONFIG_NO_LEGACY_CONVERSION 2299 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 2300 0x81, 0xad, /*unassigned*/ 2301 0x89, 0xd3 }; 2302 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 2303 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 2304 0x7B87}; 2305 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 2306 2307 /* EUC_JP*/ 2308 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 2309 /* BEGIN android-changed */ 2310 /* Android uses a different EUC-JP table. We change this byte sequence, 2311 * choosing one that is unassigned in both tables. */ 2312 0x8f, 0xa1, 0xa1, /*unassigned*/ 2313 /* 0x8f, 0xda, 0xa1, */ /*unassigned*/ 2314 /* END android-changed */ 2315 0x8e, 0xe0, 2316 }; 2317 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 2318 /* BEGIN android-changed */ 2319 /* Android uses a different EUC-JP table. We change the expected output, 2320 * matching the byte sequence modified above. */ 2321 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x41, 0x31, 0x25, 0x58, 0x41, 0x31, 2322 /* 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, */ 2323 /* END android-changed */ 2324 0x00a2 }; 2325 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, 2326 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2327 9, 2328 }; 2329 2330 /*EUC_TW*/ 2331 static const uint8_t sampleTxt_euc_tw[]={ 2332 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 2333 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 2334 0xe6, 0xca, 0x8a, 2335 }; 2336 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 2337 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, 2338 0x8706, 0x8a, }; 2339 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 2340 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2341 11, 13}; 2342 2343 /*iso-2022-jp*/ 2344 static const uint8_t sampleTxt_iso_2022_jp[]={ 2345 0x1b, 0x28, 0x42, 0x41, 2346 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ 2347 0x1b, 0x28, 0x42, 0x42, 2348 2349 }; 2350 /* A % X 3 A % X 1 A B */ 2351 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 }; 2352 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; 2353 2354 /*iso-2022-cn*/ 2355 static const uint8_t sampleTxt_iso_2022_cn[]={ 2356 0x0f, 0x41, 0x44, 2357 0x1B, 0x24, 0x29, 0x47, 2358 0x0E, 0x40, 0x6c, /*unassigned*/ 2359 0x0f, 0x42, 2360 2361 }; 2362 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; 2363 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; 2364 2365 /*iso-2022-kr*/ 2366 static const uint8_t sampleTxt_iso_2022_kr[]={ 2367 0x1b, 0x24, 0x29, 0x43, 2368 0x41, 2369 0x0E, 0x7f, 0x1E, 2370 0x0e, 0x25, 0x50, 2371 0x0f, 0x51, 2372 0x42, 0x43, 2373 2374 }; 2375 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; 2376 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; 2377 2378 /*hz*/ 2379 static const uint8_t sampleTxt_hz[]={ 2380 0x41, 2381 0x7e, 0x7b, 0x26, 0x30, 2382 0x7f, 0x1E, /*unassigned*/ 2383 0x26, 0x30, 2384 0x7e, 0x7d, 0x42, 2385 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 2386 0x7e, 0x7d, 0x42, 2387 }; 2388 static const UChar hztoUnicode[]={ 2389 0x41, 2390 0x03a0, 2391 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2392 0x03A0, 2393 0x42, 2394 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2395 0x42,}; 2396 2397 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; 2398 2399 2400 /*iscii*/ 2401 static const uint8_t sampleTxt_iscii[]={ 2402 0x41, 2403 0x30, 2404 0xEB, /*unassigned*/ 2405 0xa3, 2406 0x42, 2407 0xEC, /*unassigned*/ 2408 0x42, 2409 }; 2410 static const UChar isciitoUnicode[]={ 2411 0x41, 2412 0x30, 2413 0x25, 0x58, 0x45, 0x42, 2414 0x0903, 2415 0x42, 2416 0x25, 0x58, 0x45, 0x43, 2417 0x42,}; 2418 2419 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; 2420 #endif 2421 2422 /*UTF8*/ 2423 static const uint8_t sampleTxtUTF8[]={ 2424 0x20, 0x64, 0x50, 2425 0xC2, 0x7E, /* truncated char */ 2426 0x20, 2427 0xE0, 0xB5, 0x7E, /* truncated char */ 2428 0x40, 2429 }; 2430 static const UChar UTF8ToUnicode[]={ 2431 0x0020, 0x0064, 0x0050, 2432 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ 2433 0x0020, 2434 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, 2435 0x0040 2436 }; 2437 static const int32_t fromUTF8[] = { 2438 0, 1, 2, 2439 3, 3, 3, 3, 4, 2440 5, 2441 6, 6, 6, 6, 6, 6, 6, 6, 8, 2442 9 2443 }; 2444 static const UChar UTF8ToUnicodeXML_DEC[]={ 2445 0x0020, 0x0064, 0x0050, 2446 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ 2447 0x0020, 2448 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, 2449 0x0040 2450 }; 2451 static const int32_t fromUTF8XML_DEC[] = { 2452 0, 1, 2, 2453 3, 3, 3, 3, 3, 3, 4, 2454 5, 2455 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 2456 9 2457 }; 2458 2459 2460 #if !UCONFIG_NO_LEGACY_CONVERSION 2461 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), 2462 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 2463 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) 2464 log_err("ibm-943->u with substitute with value did not match.\n"); 2465 2466 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), 2467 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp", 2468 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) 2469 log_err("euc-jp->u with substitute with value did not match.\n"); 2470 2471 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 2472 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 2473 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) 2474 log_err("euc-tw->u with substitute with value did not match.\n"); 2475 2476 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2477 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2478 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) 2479 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2480 2481 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2482 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2483 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) 2484 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2485 2486 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ 2487 { 2488 static const UChar iso_2022_jptoUnicodeDec[]={ 2489 0x0041, 2490 /* & # 5 8 ; */ 2491 0x0026, 0x0023, 0x0035, 0x0038, 0x003b, 2492 0x0026, 0x0023, 0x0032, 0x0036, 0x003b, 2493 0x0042 }; 2494 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; 2495 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2496 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2497 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2498 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); 2499 } 2500 { 2501 static const UChar iso_2022_jptoUnicodeHex[]={ 2502 0x0041, 2503 /* & # x 3 A ; */ 2504 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b, 2505 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b, 2506 0x0042 }; 2507 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; 2508 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2509 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2510 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) 2511 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); 2512 } 2513 { 2514 static const UChar iso_2022_jptoUnicodeC[]={ 2515 0x0041, 2516 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */ 2517 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */ 2518 0x0042 }; 2519 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; 2520 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2521 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2522 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2523 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); 2524 } 2525 } 2526 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 2527 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 2528 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) 2529 log_err("iso-2022-cn->u with substitute with value did not match.\n"); 2530 2531 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 2532 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 2533 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) 2534 log_err("iso-2022-kr->u with substitute with value did not match.\n"); 2535 2536 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 2537 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 2538 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) 2539 log_err("hz->u with substitute with value did not match.\n"); 2540 2541 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 2542 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 2543 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) 2544 log_err("ISCII ->u with substitute with value did not match.\n"); 2545 #endif 2546 2547 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2548 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8", 2549 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) 2550 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2551 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2552 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8", 2553 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) 2554 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2555 } 2556 } 2557 2558 #if !UCONFIG_NO_LEGACY_CONVERSION 2559 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) 2560 { 2561 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; 2562 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 2563 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; 2564 2565 2566 static const uint8_t text943[] = { 2567 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; 2568 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2569 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2570 static const UChar toUnicode943stop[]= { 0x304b}; 2571 2572 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; 2573 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; 2574 static const int32_t fromIBM943Offsstop[] = { 0}; 2575 2576 gInBufferSize = inputsize; 2577 gOutBufferSize = outputsize; 2578 /*checking with a legal value*/ 2579 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]), 2580 templegal949, sizeof(templegal949), "ibm-949", 2581 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) 2582 log_err("u-> ibm-949 with skip did not match.\n"); 2583 2584 /*checking illegal value for ibm-943 with substitute*/ 2585 if(!testConvertToUnicode(text943, sizeof(text943), 2586 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2587 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2588 log_err("ibm-943->u with subst did not match.\n"); 2589 /*checking illegal value for ibm-943 with skip */ 2590 if(!testConvertToUnicode(text943, sizeof(text943), 2591 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943", 2592 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) 2593 log_err("ibm-943->u with skip did not match.\n"); 2594 2595 /*checking illegal value for ibm-943 with stop */ 2596 if(!testConvertToUnicode(text943, sizeof(text943), 2597 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943", 2598 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) 2599 log_err("ibm-943->u with stop did not match.\n"); 2600 2601 } 2602 2603 static void TestSingleByte(int32_t inputsize, int32_t outputsize) 2604 { 2605 static const uint8_t sampleText[] = { 2606 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, 2607 0xff, 0x32, 0x33}; 2608 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; 2609 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; 2610 /*checking illegal value for ibm-943 with substitute*/ 2611 gInBufferSize = inputsize; 2612 gOutBufferSize = outputsize; 2613 2614 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 2615 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2616 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2617 log_err("ibm-943->u with subst did not match.\n"); 2618 } 2619 2620 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) 2621 { 2622 /*EBCDIC_STATEFUL*/ 2623 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; 2624 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; 2625 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; 2626 /* s SO doubl SI sng s SO fe fe SI s */ 2627 2628 /*EBCDIC_STATEFUL with subChar=3f*/ 2629 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; 2630 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; 2631 static const char mySubChar[]={ 0x3f}; 2632 2633 gInBufferSize = inputsize; 2634 gOutBufferSize = outputsize; 2635 2636 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2637 toIBM930, sizeof(toIBM930), "ibm-930", 2638 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) 2639 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); 2640 2641 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2642 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", 2643 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) 2644 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); 2645 } 2646 #endif 2647 2648 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 2649 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 2650 const char *mySubChar, int8_t len) 2651 { 2652 2653 2654 UErrorCode status = U_ZERO_ERROR; 2655 UConverter *conv = 0; 2656 char junkout[NEW_MAX_BUFFER]; /* FIX */ 2657 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2658 const UChar *src; 2659 char *end; 2660 char *targ; 2661 int32_t *offs; 2662 int i; 2663 int32_t realBufferSize; 2664 char *realBufferEnd; 2665 const UChar *realSourceEnd; 2666 const UChar *sourceLimit; 2667 UBool checkOffsets = TRUE; 2668 UBool doFlush; 2669 char junk[9999]; 2670 char offset_str[9999]; 2671 char *p; 2672 UConverterFromUCallback oldAction = NULL; 2673 const void* oldContext = NULL; 2674 2675 2676 for(i=0;i<NEW_MAX_BUFFER;i++) 2677 junkout[i] = (char)0xF0; 2678 for(i=0;i<NEW_MAX_BUFFER;i++) 2679 junokout[i] = 0xFF; 2680 setNuConvTestName(codepage, "FROM"); 2681 2682 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 2683 gOutBufferSize); 2684 2685 conv = ucnv_open(codepage, &status); 2686 if(U_FAILURE(status)) 2687 { 2688 log_data_err("Couldn't open converter %s\n",codepage); 2689 return TRUE; 2690 } 2691 2692 log_verbose("Converter opened..\n"); 2693 2694 /*----setting the callback routine----*/ 2695 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2696 if (U_FAILURE(status)) 2697 { 2698 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2699 } 2700 /*------------------------*/ 2701 /*setting the subChar*/ 2702 if(mySubChar != NULL){ 2703 ucnv_setSubstChars(conv, mySubChar, len, &status); 2704 if (U_FAILURE(status)) { 2705 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2706 } 2707 } 2708 /*------------*/ 2709 2710 src = source; 2711 targ = junkout; 2712 offs = junokout; 2713 2714 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2715 realBufferEnd = junkout + realBufferSize; 2716 realSourceEnd = source + sourceLen; 2717 2718 if ( gOutBufferSize != realBufferSize ) 2719 checkOffsets = FALSE; 2720 2721 if( gInBufferSize != NEW_MAX_BUFFER ) 2722 checkOffsets = FALSE; 2723 2724 do 2725 { 2726 end = nct_min(targ + gOutBufferSize, realBufferEnd); 2727 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 2728 2729 doFlush = (UBool)(sourceLimit == realSourceEnd); 2730 2731 if(targ == realBufferEnd) 2732 { 2733 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 2734 return FALSE; 2735 } 2736 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 2737 2738 2739 status = U_ZERO_ERROR; 2740 2741 ucnv_fromUnicode (conv, 2742 (char **)&targ, 2743 (const char *)end, 2744 &src, 2745 sourceLimit, 2746 checkOffsets ? offs : NULL, 2747 doFlush, /* flush if we're at the end of the input data */ 2748 &status); 2749 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 2750 2751 2752 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2753 UChar errChars[50]; /* should be sufficient */ 2754 int8_t errLen = 50; 2755 UErrorCode err = U_ZERO_ERROR; 2756 const UChar* limit= NULL; 2757 const UChar* start= NULL; 2758 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); 2759 if(U_FAILURE(err)){ 2760 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); 2761 } 2762 /* src points to limit of invalid chars */ 2763 limit = src; 2764 /* length of in invalid chars should be equal to returned length*/ 2765 start = src - errLen; 2766 if(u_strncmp(errChars,start,errLen)!=0){ 2767 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2768 } 2769 } 2770 /* allow failure codes for the stop callback */ 2771 if(U_FAILURE(status) && 2772 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) 2773 { 2774 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2775 return FALSE; 2776 } 2777 2778 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 2779 sourceLen, targ-junkout); 2780 if(getTestOption(VERBOSITY_OPTION)) 2781 { 2782 2783 junk[0] = 0; 2784 offset_str[0] = 0; 2785 for(p = junkout;p<targ;p++) 2786 { 2787 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 2788 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 2789 } 2790 2791 log_verbose(junk); 2792 printSeq(expect, expectLen); 2793 if ( checkOffsets ) 2794 { 2795 log_verbose("\nOffsets:"); 2796 log_verbose(offset_str); 2797 } 2798 log_verbose("\n"); 2799 } 2800 ucnv_close(conv); 2801 2802 2803 if(expectLen != targ-junkout) 2804 { 2805 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2806 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2807 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2808 printSeqErr(expect, expectLen); 2809 return FALSE; 2810 } 2811 2812 if (checkOffsets && (expectOffsets != 0) ) 2813 { 2814 log_verbose("comparing %d offsets..\n", targ-junkout); 2815 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 2816 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2817 log_err("Got Output : "); 2818 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2819 log_err("Got Offsets: "); 2820 for(p=junkout;p<targ;p++) 2821 log_err("%d,", junokout[p-junkout]); 2822 log_err("\n"); 2823 log_err("Expected Offsets: "); 2824 for(i=0; i<(targ-junkout); i++) 2825 log_err("%d,", expectOffsets[i]); 2826 log_err("\n"); 2827 return FALSE; 2828 } 2829 } 2830 2831 if(!memcmp(junkout, expect, expectLen)) 2832 { 2833 log_verbose("String matches! %s\n", gNuConvTestName); 2834 return TRUE; 2835 } 2836 else 2837 { 2838 log_err("String does not match. %s\n", gNuConvTestName); 2839 log_err("source: "); 2840 printUSeqErr(source, sourceLen); 2841 log_err("Got: "); 2842 printSeqErr((const uint8_t *)junkout, expectLen); 2843 log_err("Expected: "); 2844 printSeqErr(expect, expectLen); 2845 return FALSE; 2846 } 2847 } 2848 2849 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 2850 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 2851 const char *mySubChar, int8_t len) 2852 { 2853 UErrorCode status = U_ZERO_ERROR; 2854 UConverter *conv = 0; 2855 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 2856 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2857 const char *src; 2858 const char *realSourceEnd; 2859 const char *srcLimit; 2860 UChar *targ; 2861 UChar *end; 2862 int32_t *offs; 2863 int i; 2864 UBool checkOffsets = TRUE; 2865 char junk[9999]; 2866 char offset_str[9999]; 2867 UChar *p; 2868 UConverterToUCallback oldAction = NULL; 2869 const void* oldContext = NULL; 2870 2871 int32_t realBufferSize; 2872 UChar *realBufferEnd; 2873 2874 2875 for(i=0;i<NEW_MAX_BUFFER;i++) 2876 junkout[i] = 0xFFFE; 2877 2878 for(i=0;i<NEW_MAX_BUFFER;i++) 2879 junokout[i] = -1; 2880 2881 setNuConvTestName(codepage, "TO"); 2882 2883 log_verbose("\n========= %s\n", gNuConvTestName); 2884 2885 conv = ucnv_open(codepage, &status); 2886 if(U_FAILURE(status)) 2887 { 2888 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 2889 return TRUE; 2890 } 2891 2892 log_verbose("Converter opened..\n"); 2893 2894 src = (const char *)source; 2895 targ = junkout; 2896 offs = junokout; 2897 2898 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2899 realBufferEnd = junkout + realBufferSize; 2900 realSourceEnd = src + sourcelen; 2901 /*----setting the callback routine----*/ 2902 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2903 if (U_FAILURE(status)) 2904 { 2905 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2906 } 2907 /*-------------------------------------*/ 2908 /*setting the subChar*/ 2909 if(mySubChar != NULL){ 2910 ucnv_setSubstChars(conv, mySubChar, len, &status); 2911 if (U_FAILURE(status)) { 2912 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2913 } 2914 } 2915 /*------------*/ 2916 2917 2918 if ( gOutBufferSize != realBufferSize ) 2919 checkOffsets = FALSE; 2920 2921 if( gInBufferSize != NEW_MAX_BUFFER ) 2922 checkOffsets = FALSE; 2923 2924 do 2925 { 2926 end = nct_min( targ + gOutBufferSize, realBufferEnd); 2927 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 2928 2929 if(targ == realBufferEnd) 2930 { 2931 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 2932 return FALSE; 2933 } 2934 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 2935 2936 2937 2938 status = U_ZERO_ERROR; 2939 2940 ucnv_toUnicode (conv, 2941 &targ, 2942 end, 2943 (const char **)&src, 2944 (const char *)srcLimit, 2945 checkOffsets ? offs : NULL, 2946 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 2947 &status); 2948 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 2949 2950 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2951 char errChars[50]; /* should be sufficient */ 2952 int8_t errLen = 50; 2953 UErrorCode err = U_ZERO_ERROR; 2954 const char* limit= NULL; 2955 const char* start= NULL; 2956 ucnv_getInvalidChars(conv,errChars, &errLen, &err); 2957 if(U_FAILURE(err)){ 2958 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); 2959 } 2960 /* src points to limit of invalid chars */ 2961 limit = src; 2962 /* length of in invalid chars should be equal to returned length*/ 2963 start = src - errLen; 2964 if(uprv_strncmp(errChars,start,errLen)!=0){ 2965 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2966 } 2967 } 2968 /* allow failure codes for the stop callback */ 2969 if(U_FAILURE(status) && 2970 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) 2971 { 2972 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2973 return FALSE; 2974 } 2975 2976 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 2977 sourcelen, targ-junkout); 2978 if(getTestOption(VERBOSITY_OPTION)) 2979 { 2980 2981 junk[0] = 0; 2982 offset_str[0] = 0; 2983 2984 for(p = junkout;p<targ;p++) 2985 { 2986 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 2987 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 2988 } 2989 2990 log_verbose(junk); 2991 printUSeq(expect, expectlen); 2992 if ( checkOffsets ) 2993 { 2994 log_verbose("\nOffsets:"); 2995 log_verbose(offset_str); 2996 } 2997 log_verbose("\n"); 2998 } 2999 ucnv_close(conv); 3000 3001 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 3002 3003 if (checkOffsets && (expectOffsets != 0)) 3004 { 3005 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 3006 { 3007 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3008 log_err("Got offsets: "); 3009 for(p=junkout;p<targ;p++) 3010 log_err(" %2d,", junokout[p-junkout]); 3011 log_err("\n"); 3012 log_err("Expected offsets: "); 3013 for(i=0; i<(targ-junkout); i++) 3014 log_err(" %2d,", expectOffsets[i]); 3015 log_err("\n"); 3016 log_err("Got output: "); 3017 for(i=0; i<(targ-junkout); i++) 3018 log_err("0x%04x,", junkout[i]); 3019 log_err("\n"); 3020 log_err("From source: "); 3021 for(i=0; i<(src-(const char *)source); i++) 3022 log_err(" 0x%02x,", (unsigned char)source[i]); 3023 log_err("\n"); 3024 } 3025 } 3026 3027 if(!memcmp(junkout, expect, expectlen*2)) 3028 { 3029 log_verbose("Matches!\n"); 3030 return TRUE; 3031 } 3032 else 3033 { 3034 log_err("String does not match. %s\n", gNuConvTestName); 3035 log_verbose("String does not match. %s\n", gNuConvTestName); 3036 log_err("Got: "); 3037 printUSeqErr(junkout, expectlen); 3038 log_err("Expected: "); 3039 printUSeqErr(expect, expectlen); 3040 log_err("\n"); 3041 return FALSE; 3042 } 3043 } 3044 3045 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 3046 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 3047 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3048 { 3049 3050 3051 UErrorCode status = U_ZERO_ERROR; 3052 UConverter *conv = 0; 3053 char junkout[NEW_MAX_BUFFER]; /* FIX */ 3054 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3055 const UChar *src; 3056 char *end; 3057 char *targ; 3058 int32_t *offs; 3059 int i; 3060 int32_t realBufferSize; 3061 char *realBufferEnd; 3062 const UChar *realSourceEnd; 3063 const UChar *sourceLimit; 3064 UBool checkOffsets = TRUE; 3065 UBool doFlush; 3066 char junk[9999]; 3067 char offset_str[9999]; 3068 char *p; 3069 UConverterFromUCallback oldAction = NULL; 3070 const void* oldContext = NULL; 3071 3072 3073 for(i=0;i<NEW_MAX_BUFFER;i++) 3074 junkout[i] = (char)0xF0; 3075 for(i=0;i<NEW_MAX_BUFFER;i++) 3076 junokout[i] = 0xFF; 3077 setNuConvTestName(codepage, "FROM"); 3078 3079 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 3080 gOutBufferSize); 3081 3082 conv = ucnv_open(codepage, &status); 3083 if(U_FAILURE(status)) 3084 { 3085 log_data_err("Couldn't open converter %s\n",codepage); 3086 return TRUE; /* Because the err has already been logged. */ 3087 } 3088 3089 log_verbose("Converter opened..\n"); 3090 3091 /*----setting the callback routine----*/ 3092 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3093 if (U_FAILURE(status)) 3094 { 3095 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3096 } 3097 /*------------------------*/ 3098 /*setting the subChar*/ 3099 if(mySubChar != NULL){ 3100 ucnv_setSubstChars(conv, mySubChar, len, &status); 3101 if (U_FAILURE(status)) { 3102 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); 3103 } 3104 } 3105 /*------------*/ 3106 3107 src = source; 3108 targ = junkout; 3109 offs = junokout; 3110 3111 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3112 realBufferEnd = junkout + realBufferSize; 3113 realSourceEnd = source + sourceLen; 3114 3115 if ( gOutBufferSize != realBufferSize ) 3116 checkOffsets = FALSE; 3117 3118 if( gInBufferSize != NEW_MAX_BUFFER ) 3119 checkOffsets = FALSE; 3120 3121 do 3122 { 3123 end = nct_min(targ + gOutBufferSize, realBufferEnd); 3124 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 3125 3126 doFlush = (UBool)(sourceLimit == realSourceEnd); 3127 3128 if(targ == realBufferEnd) 3129 { 3130 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 3131 return FALSE; 3132 } 3133 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 3134 3135 3136 status = U_ZERO_ERROR; 3137 3138 ucnv_fromUnicode (conv, 3139 (char **)&targ, 3140 (const char *)end, 3141 &src, 3142 sourceLimit, 3143 checkOffsets ? offs : NULL, 3144 doFlush, /* flush if we're at the end of the input data */ 3145 &status); 3146 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 3147 3148 /* allow failure codes for the stop callback */ 3149 if(U_FAILURE(status) && status != expectedError) 3150 { 3151 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3152 return FALSE; 3153 } 3154 3155 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 3156 sourceLen, targ-junkout); 3157 if(getTestOption(VERBOSITY_OPTION)) 3158 { 3159 3160 junk[0] = 0; 3161 offset_str[0] = 0; 3162 for(p = junkout;p<targ;p++) 3163 { 3164 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 3165 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 3166 } 3167 3168 log_verbose(junk); 3169 printSeq(expect, expectLen); 3170 if ( checkOffsets ) 3171 { 3172 log_verbose("\nOffsets:"); 3173 log_verbose(offset_str); 3174 } 3175 log_verbose("\n"); 3176 } 3177 ucnv_close(conv); 3178 3179 3180 if(expectLen != targ-junkout) 3181 { 3182 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3183 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3184 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3185 printSeqErr(expect, expectLen); 3186 return FALSE; 3187 } 3188 3189 if (checkOffsets && (expectOffsets != 0) ) 3190 { 3191 log_verbose("comparing %d offsets..\n", targ-junkout); 3192 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 3193 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3194 log_err("Got Output : "); 3195 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3196 log_err("Got Offsets: "); 3197 for(p=junkout;p<targ;p++) 3198 log_err("%d,", junokout[p-junkout]); 3199 log_err("\n"); 3200 log_err("Expected Offsets: "); 3201 for(i=0; i<(targ-junkout); i++) 3202 log_err("%d,", expectOffsets[i]); 3203 log_err("\n"); 3204 return FALSE; 3205 } 3206 } 3207 3208 if(!memcmp(junkout, expect, expectLen)) 3209 { 3210 log_verbose("String matches! %s\n", gNuConvTestName); 3211 return TRUE; 3212 } 3213 else 3214 { 3215 log_err("String does not match. %s\n", gNuConvTestName); 3216 log_err("source: "); 3217 printUSeqErr(source, sourceLen); 3218 log_err("Got: "); 3219 printSeqErr((const uint8_t *)junkout, expectLen); 3220 log_err("Expected: "); 3221 printSeqErr(expect, expectLen); 3222 return FALSE; 3223 } 3224 } 3225 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 3226 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 3227 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3228 { 3229 UErrorCode status = U_ZERO_ERROR; 3230 UConverter *conv = 0; 3231 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 3232 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3233 const char *src; 3234 const char *realSourceEnd; 3235 const char *srcLimit; 3236 UChar *targ; 3237 UChar *end; 3238 int32_t *offs; 3239 int i; 3240 UBool checkOffsets = TRUE; 3241 char junk[9999]; 3242 char offset_str[9999]; 3243 UChar *p; 3244 UConverterToUCallback oldAction = NULL; 3245 const void* oldContext = NULL; 3246 3247 int32_t realBufferSize; 3248 UChar *realBufferEnd; 3249 3250 3251 for(i=0;i<NEW_MAX_BUFFER;i++) 3252 junkout[i] = 0xFFFE; 3253 3254 for(i=0;i<NEW_MAX_BUFFER;i++) 3255 junokout[i] = -1; 3256 3257 setNuConvTestName(codepage, "TO"); 3258 3259 log_verbose("\n========= %s\n", gNuConvTestName); 3260 3261 conv = ucnv_open(codepage, &status); 3262 if(U_FAILURE(status)) 3263 { 3264 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 3265 return TRUE; 3266 } 3267 3268 log_verbose("Converter opened..\n"); 3269 3270 src = (const char *)source; 3271 targ = junkout; 3272 offs = junokout; 3273 3274 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3275 realBufferEnd = junkout + realBufferSize; 3276 realSourceEnd = src + sourcelen; 3277 /*----setting the callback routine----*/ 3278 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3279 if (U_FAILURE(status)) 3280 { 3281 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3282 } 3283 /*-------------------------------------*/ 3284 /*setting the subChar*/ 3285 if(mySubChar != NULL){ 3286 ucnv_setSubstChars(conv, mySubChar, len, &status); 3287 if (U_FAILURE(status)) { 3288 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3289 } 3290 } 3291 /*------------*/ 3292 3293 3294 if ( gOutBufferSize != realBufferSize ) 3295 checkOffsets = FALSE; 3296 3297 if( gInBufferSize != NEW_MAX_BUFFER ) 3298 checkOffsets = FALSE; 3299 3300 do 3301 { 3302 end = nct_min( targ + gOutBufferSize, realBufferEnd); 3303 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 3304 3305 if(targ == realBufferEnd) 3306 { 3307 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 3308 return FALSE; 3309 } 3310 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 3311 3312 3313 3314 status = U_ZERO_ERROR; 3315 3316 ucnv_toUnicode (conv, 3317 &targ, 3318 end, 3319 (const char **)&src, 3320 (const char *)srcLimit, 3321 checkOffsets ? offs : NULL, 3322 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 3323 &status); 3324 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 3325 3326 /* allow failure codes for the stop callback */ 3327 if(U_FAILURE(status) && status!=expectedError) 3328 { 3329 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3330 return FALSE; 3331 } 3332 3333 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 3334 sourcelen, targ-junkout); 3335 if(getTestOption(VERBOSITY_OPTION)) 3336 { 3337 3338 junk[0] = 0; 3339 offset_str[0] = 0; 3340 3341 for(p = junkout;p<targ;p++) 3342 { 3343 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 3344 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 3345 } 3346 3347 log_verbose(junk); 3348 printUSeq(expect, expectlen); 3349 if ( checkOffsets ) 3350 { 3351 log_verbose("\nOffsets:"); 3352 log_verbose(offset_str); 3353 } 3354 log_verbose("\n"); 3355 } 3356 ucnv_close(conv); 3357 3358 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 3359 3360 if (checkOffsets && (expectOffsets != 0)) 3361 { 3362 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 3363 { 3364 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3365 log_err("Got offsets: "); 3366 for(p=junkout;p<targ;p++) 3367 log_err(" %2d,", junokout[p-junkout]); 3368 log_err("\n"); 3369 log_err("Expected offsets: "); 3370 for(i=0; i<(targ-junkout); i++) 3371 log_err(" %2d,", expectOffsets[i]); 3372 log_err("\n"); 3373 log_err("Got output: "); 3374 for(i=0; i<(targ-junkout); i++) 3375 log_err("0x%04x,", junkout[i]); 3376 log_err("\n"); 3377 log_err("From source: "); 3378 for(i=0; i<(src-(const char *)source); i++) 3379 log_err(" 0x%02x,", (unsigned char)source[i]); 3380 log_err("\n"); 3381 } 3382 } 3383 3384 if(!memcmp(junkout, expect, expectlen*2)) 3385 { 3386 log_verbose("Matches!\n"); 3387 return TRUE; 3388 } 3389 else 3390 { 3391 log_err("String does not match. %s\n", gNuConvTestName); 3392 log_verbose("String does not match. %s\n", gNuConvTestName); 3393 log_err("Got: "); 3394 printUSeqErr(junkout, expectlen); 3395 log_err("Expected: "); 3396 printUSeqErr(expect, expectlen); 3397 log_err("\n"); 3398 return FALSE; 3399 } 3400 } 3401 3402 static void TestCallBackFailure(void) { 3403 UErrorCode status = U_USELESS_COLLATOR_ERROR; 3404 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); 3405 if (status != U_USELESS_COLLATOR_ERROR) { 3406 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); 3407 } 3408 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); 3409 if (status != U_USELESS_COLLATOR_ERROR) { 3410 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); 3411 } 3412 ucnv_cbFromUWriteSub(NULL, -1, &status); 3413 if (status != U_USELESS_COLLATOR_ERROR) { 3414 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); 3415 } 3416 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); 3417 if (status != U_USELESS_COLLATOR_ERROR) { 3418 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); 3419 } 3420 } 3421