1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /* 9 ******************************************************************************** 10 * File NCCBTST.C 11 * 12 * Modification History: 13 * Name Description 14 * Madhu Katragadda 7/21/1999 Testing error callback routines 15 ******************************************************************************** 16 */ 17 #include <stdio.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include <ctype.h> 21 #include "cmemory.h" 22 #include "cstring.h" 23 #include "unicode/uloc.h" 24 #include "unicode/ucnv.h" 25 #include "unicode/ucnv_err.h" 26 #include "cintltst.h" 27 #include "unicode/utypes.h" 28 #include "unicode/ustring.h" 29 #include "nccbtst.h" 30 #include "unicode/ucnv_cb.h" 31 #include "unicode/utf16.h" 32 33 #define NEW_MAX_BUFFER 999 34 35 #define nct_min(x,y) ((x<y) ? x : y) 36 37 static int32_t gInBufferSize = 0; 38 static int32_t gOutBufferSize = 0; 39 static char gNuConvTestName[1024]; 40 41 static void printSeq(const uint8_t* a, int len) 42 { 43 int i=0; 44 log_verbose("\n{"); 45 while (i<len) 46 log_verbose("0x%02X, ", a[i++]); 47 log_verbose("}\n"); 48 } 49 50 static void printUSeq(const UChar* a, int len) 51 { 52 int i=0; 53 log_verbose("{"); 54 while (i<len) 55 log_verbose(" 0x%04x, ", a[i++]); 56 log_verbose("}\n"); 57 } 58 59 static void printSeqErr(const uint8_t* a, int len) 60 { 61 int i=0; 62 fprintf(stderr, "{"); 63 while (i<len) 64 fprintf(stderr, " 0x%02x, ", a[i++]); 65 fprintf(stderr, "}\n"); 66 } 67 68 static void printUSeqErr(const UChar* a, int len) 69 { 70 int i=0; 71 fprintf(stderr, "{"); 72 while (i<len) 73 fprintf(stderr, "0x%04x, ", a[i++]); 74 fprintf(stderr,"}\n"); 75 } 76 77 static void setNuConvTestName(const char *codepage, const char *direction) 78 { 79 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 80 codepage, 81 direction, 82 (int)gInBufferSize, 83 (int)gOutBufferSize); 84 } 85 86 87 static void TestCallBackFailure(void); 88 89 void addTestConvertErrorCallBack(TestNode** root); 90 91 void addTestConvertErrorCallBack(TestNode** root) 92 { 93 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); 94 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); 95 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); 96 /* BEGIN android-removed 97 To save space, Android does not build complete CJK conversion tables. 98 We skip the test here. 99 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); 100 END android-removed */ 101 102 #if !UCONFIG_NO_LEGACY_CONVERSION 103 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); 104 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); 105 #endif 106 107 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); 108 } 109 110 static void TestSkipCallBack() 111 { 112 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 113 TestSkip(1,NEW_MAX_BUFFER); 114 TestSkip(1,1); 115 TestSkip(NEW_MAX_BUFFER, 1); 116 } 117 118 static void TestStopCallBack() 119 { 120 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 121 TestStop(1,NEW_MAX_BUFFER); 122 TestStop(1,1); 123 TestStop(NEW_MAX_BUFFER, 1); 124 } 125 126 static void TestSubCallBack() 127 { 128 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 129 TestSub(1,NEW_MAX_BUFFER); 130 TestSub(1,1); 131 TestSub(NEW_MAX_BUFFER, 1); 132 133 #if !UCONFIG_NO_LEGACY_CONVERSION 134 TestEBCDIC_STATEFUL_Sub(1, 1); 135 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); 136 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); 137 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 138 #endif 139 } 140 141 static void TestSubWithValueCallBack() 142 { 143 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 144 TestSubWithValue(1,NEW_MAX_BUFFER); 145 TestSubWithValue(1,1); 146 TestSubWithValue(NEW_MAX_BUFFER, 1); 147 } 148 149 #if !UCONFIG_NO_LEGACY_CONVERSION 150 static void TestLegalAndOtherCallBack() 151 { 152 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 153 TestLegalAndOthers(1,NEW_MAX_BUFFER); 154 TestLegalAndOthers(1,1); 155 TestLegalAndOthers(NEW_MAX_BUFFER, 1); 156 } 157 158 static void TestSingleByteCallBack() 159 { 160 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 161 TestSingleByte(1,NEW_MAX_BUFFER); 162 TestSingleByte(1,1); 163 TestSingleByte(NEW_MAX_BUFFER, 1); 164 } 165 #endif 166 167 static void TestSkip(int32_t inputsize, int32_t outputsize) 168 { 169 static const uint8_t expskipIBM_949[]= { 170 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 171 172 static const uint8_t expskipIBM_943[] = { 173 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; 174 175 static const uint8_t expskipIBM_930[] = { 176 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; 177 178 gInBufferSize = inputsize; 179 gOutBufferSize = outputsize; 180 181 /*From Unicode*/ 182 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); 183 184 #if !UCONFIG_NO_LEGACY_CONVERSION 185 { 186 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 187 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 188 189 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; 190 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; 191 192 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText), 193 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949", 194 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) 195 log_err("u-> ibm-949 with skip did not match.\n"); 196 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), 197 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943", 198 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) 199 log_err("u-> ibm-943 with skip did not match.\n"); 200 } 201 202 { 203 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; 204 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; 205 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; 206 207 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ 208 if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU), 209 fromUBytes, UPRV_LENGTHOF(fromUBytes), 210 "ibm-930", 211 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, 212 NULL, 0) 213 ) { 214 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); 215 } 216 } 217 #endif 218 219 { 220 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 221 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; 222 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; 223 224 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 225 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; 226 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; 227 228 /* US-ASCII */ 229 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU), 230 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes), 231 "US-ASCII", 232 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 233 NULL, 0) 234 ) { 235 log_err("u->US-ASCII with skip did not match.\n"); 236 } 237 238 #if !UCONFIG_NO_LEGACY_CONVERSION 239 /* SBCS NLTC codepage 367 for US-ASCII */ 240 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU), 241 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes), 242 "ibm-367", 243 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 244 NULL, 0) 245 ) { 246 log_err("u->ibm-367 with skip did not match.\n"); 247 } 248 #endif 249 250 /* ISO-Latin-1 */ 251 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU), 252 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes), 253 "LATIN_1", 254 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 255 NULL, 0) 256 ) { 257 log_err("u->LATIN_1 with skip did not match.\n"); 258 } 259 260 #if !UCONFIG_NO_LEGACY_CONVERSION 261 /* windows-1252 */ 262 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU), 263 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes), 264 "windows-1252", 265 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 266 NULL, 0) 267 ) { 268 log_err("u->windows-1252 with skip did not match.\n"); 269 } 270 } 271 272 { 273 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 274 static const uint8_t toIBM943[]= { 0x61, 0x61 }; 275 static const int32_t offset[]= {0, 4}; 276 277 /* EUC_JP*/ 278 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 279 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 280 0x61, 0x8e, 0xe0, 281 }; 282 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; 283 284 /*EUC_TW*/ 285 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 286 static const uint8_t to_euc_tw[]={ 287 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 288 0x61, 0xe6, 0xca, 0x8a, 289 }; 290 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; 291 292 /*ISO-2022-JP*/ 293 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; 294 static const uint8_t to_iso_2022_jp[]={ 295 0x41, 296 0x42, 297 298 }; 299 static const int32_t from_iso_2022_jpOffs [] ={0,2}; 300 301 /*ISO-2022-JP*/ 302 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 303 static const uint8_t to_iso_2022_jp2[]={ 304 0x41, 305 0x43, 306 307 }; 308 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; 309 310 /*ISO-2022-cn*/ 311 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 312 static const uint8_t to_iso_2022_cn[]={ 313 0x41, 0x42 314 }; 315 static const int32_t from_iso_2022_cnOffs [] ={ 316 0, 2 317 }; 318 319 /*ISO-2022-CN*/ 320 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 321 static const uint8_t to_iso_2022_cn1[]={ 322 0x41, 0x43 323 324 }; 325 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; 326 327 /*ISO-2022-kr*/ 328 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 329 static const uint8_t to_iso_2022_kr[]={ 330 0x1b, 0x24, 0x29, 0x43, 331 0x41, 332 0x0e, 0x25, 0x50, 333 0x25, 0x50, 334 0x0f, 0x42, 335 }; 336 static const int32_t from_iso_2022_krOffs [] ={ 337 -1,-1,-1,-1, 338 0, 339 1,1,1, 340 3,3, 341 4,4 342 }; 343 344 /*ISO-2022-kr*/ 345 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 346 static const uint8_t to_iso_2022_kr1[]={ 347 0x1b, 0x24, 0x29, 0x43, 348 0x41, 349 0x0e, 0x25, 0x50, 350 0x25, 0x50, 351 352 }; 353 static const int32_t from_iso_2022_krOffs1 [] ={ 354 -1,-1,-1,-1, 355 0, 356 1,1,1, 357 3,3, 358 359 }; 360 /* HZ encoding */ 361 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 362 363 static const uint8_t to_hz[]={ 364 0x7e, 0x7d, 0x41, 365 0x7e, 0x7b, 0x26, 0x30, 366 0x26, 0x30, 367 0x7e, 0x7d, 0x42, 368 369 }; 370 static const int32_t from_hzOffs [] ={ 371 0,0,0, 372 1,1,1,1, 373 3,3, 374 4,4,4,4 375 }; 376 377 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 378 379 static const uint8_t to_hz1[]={ 380 0x7e, 0x7d, 0x41, 381 0x7e, 0x7b, 0x26, 0x30, 382 0x26, 0x30, 383 384 385 }; 386 static const int32_t from_hzOffs1 [] ={ 387 0,0,0, 388 1,1,1,1, 389 3,3, 390 391 }; 392 393 #endif 394 395 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 396 397 static const uint8_t to_SCSU[]={ 398 0x41, 399 0x42 400 401 402 }; 403 static const int32_t from_SCSUOffs [] ={ 404 0, 405 2, 406 407 }; 408 409 #if !UCONFIG_NO_LEGACY_CONVERSION 410 /* ISCII */ 411 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 412 static const uint8_t to_iscii[]={ 413 0x41, 414 0x42, 415 }; 416 static const int32_t from_isciiOffs [] ={ 417 0,2, 418 419 }; 420 /*ISCII*/ 421 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 422 static const uint8_t to_iscii1[]={ 423 0x44, 424 0x43, 425 426 }; 427 static const int32_t from_isciiOffs1 [] ={0,2}; 428 429 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest), 430 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943", 431 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) 432 log_err("u-> ibm-943 with skip did not match.\n"); 433 434 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText), 435 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP", 436 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) 437 log_err("u-> euc-jp with skip did not match.\n"); 438 439 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText), 440 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw", 441 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) 442 log_err("u-> euc-tw with skip did not match.\n"); 443 444 /*iso_2022_jp*/ 445 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText), 446 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp", 447 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) 448 log_err("u-> iso-2022-jp with skip did not match.\n"); 449 450 /* with context */ 451 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2), 452 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp", 453 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 454 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 455 456 /*iso_2022_cn*/ 457 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText), 458 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn", 459 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) 460 log_err("u-> iso-2022-cn with skip did not match.\n"); 461 /*with context*/ 462 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1), 463 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn", 464 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 465 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 466 467 /*iso_2022_kr*/ 468 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText), 469 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr", 470 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) 471 log_err("u-> iso-2022-kr with skip did not match.\n"); 472 /*with context*/ 473 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1), 474 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr", 475 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 476 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 477 478 /*hz*/ 479 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText), 480 to_hz, UPRV_LENGTHOF(to_hz), "HZ", 481 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) 482 log_err("u-> HZ with skip did not match.\n"); 483 /*with context*/ 484 if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1), 485 to_hz1, UPRV_LENGTHOF(to_hz1), "hz", 486 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 487 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 488 #endif 489 490 /*SCSU*/ 491 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText), 492 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU", 493 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) 494 log_err("u-> SCSU with skip did not match.\n"); 495 496 #if !UCONFIG_NO_LEGACY_CONVERSION 497 /*ISCII*/ 498 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText), 499 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0", 500 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) 501 log_err("u-> iscii with skip did not match.\n"); 502 /*with context*/ 503 if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1), 504 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0", 505 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 506 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 507 #endif 508 } 509 510 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 511 { 512 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 513 0xFB, 0xEE, 0x28, /* from source offset 0 */ 514 0x24, 0x1E, 0x52, 515 0xB2, 516 0x20, 517 0xB3, 518 0xB1, 519 0x0D, 520 0x0A, 521 522 0x20, /* from 8 */ 523 0x00, 524 0xD0, 0x6C, 525 0xB6, 526 0xD8, 0xA5, 527 0x20, 528 0x68, 529 0x59, 530 531 0xF9, 0x28, /* from 16 */ 532 0x6D, 533 0x20, 534 0x73, 535 0xE0, 0x2D, 536 0xDE, 0x43, 537 0xD0, 0x33, 538 0x20, 539 540 0xFA, 0x83, /* from 24 */ 541 0x25, 0x01, 542 0xFB, 0x16, 0x87, 543 0x4B, 0x16, 544 0x20, 545 0xE6, 0xBD, 546 0xEB, 0x5B, 547 0x4B, 0xCC, 548 549 0xF9, 0xA2, /* from 32 */ 550 0xFC, 0x10, 0x3E, 551 0xFE, 0x16, 0x3A, 0x8C, 552 0x20, 553 0xFC, 0x03, 0xAC, 554 555 0x01, /* from 41 */ 556 0xDE, 0x83, 557 0x20, 558 0x09 559 }; 560 static const UChar expected[]={ 561 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ 562 0x0063, 0x0061, 0x000D, 0x000A, 563 564 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ 565 0x0930, 0x0020, 0x0918, 0x0909, 566 567 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ 568 0x4000, 0x4E00, 0x7777, 0x0020, 569 570 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ 571 0x0020, 0xD7A3, 0xDC00, 0xD800, 572 573 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ 574 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 575 576 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ 577 0x0009 578 }; 579 static const int32_t offsets[]={ 580 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, 581 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, 582 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 583 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, 584 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, 585 41, 42, 42, 43, 44 586 }; 587 588 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ 589 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected), 590 sampleText, UPRV_LENGTHOF(sampleText), 591 "BOCU-1", 592 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 593 ) { 594 log_err("u->BOCU-1 with skip did not match.\n"); 595 } 596 } 597 598 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 599 { 600 const uint8_t sampleText[]={ 601 0x61, /* 'a' */ 602 0xc4, 0xb5, /* U+0135 */ 603 0xed, 0x80, 0xa0, /* Hangul U+d020 */ 604 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ 605 0xee, 0x80, 0x80, /* PUA U+e000 */ 606 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ 607 0x62, /* 'b' */ 608 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ 609 0xd0, 0x80 /* U+0400 */ 610 }; 611 UChar expected[]={ 612 0x0061, 613 0x0135, 614 0xd020, 615 0xd801, 0xdc01, 616 0xe000, 617 0xdc01, 618 0x0062, 619 0xd801, 620 0x0400 621 }; 622 int32_t offsets[]={ 623 0, 624 1, 1, 625 2, 2, 2, 626 3, 3, 3, 4, 4, 4, 627 5, 5, 5, 628 6, 6, 6, 629 7, 630 8, 8, 8, 631 9, 9 632 }; 633 634 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ 635 636 /* without offsets */ 637 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected), 638 sampleText, UPRV_LENGTHOF(sampleText), 639 "CESU-8", 640 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) 641 ) { 642 log_err("u->CESU-8 with skip did not match.\n"); 643 } 644 645 /* with offsets */ 646 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected), 647 sampleText, UPRV_LENGTHOF(sampleText), 648 "CESU-8", 649 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 650 ) { 651 log_err("u->CESU-8 with skip did not match.\n"); 652 } 653 } 654 655 /*to Unicode*/ 656 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); 657 658 #if !UCONFIG_NO_LEGACY_CONVERSION 659 { 660 661 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; 662 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 663 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 664 665 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; 666 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; 667 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; 668 669 if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), 670 IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949", 671 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) 672 log_err("ibm-949->u with skip did not match.\n"); 673 if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), 674 IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943", 675 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) 676 log_err("ibm-943->u with skip did not match.\n"); 677 678 679 if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930), 680 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930", 681 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) 682 log_err("ibm-930->u with skip did not match.\n"); 683 684 685 if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930), 686 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930", 687 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 688 log_err("ibm-930->u with skip did not match.\n"); 689 } 690 #endif 691 692 { 693 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; 694 static const UChar usasciiToU[] = { 0x61, 0x31 }; 695 static const int32_t usasciiToUOffsets[] = { 0, 2 }; 696 697 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; 698 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; 699 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; 700 701 /* US-ASCII */ 702 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes), 703 usasciiToU, UPRV_LENGTHOF(usasciiToU), 704 "US-ASCII", 705 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 706 NULL, 0) 707 ) { 708 log_err("US-ASCII->u with skip did not match.\n"); 709 } 710 711 #if !UCONFIG_NO_LEGACY_CONVERSION 712 /* SBCS NLTC codepage 367 for US-ASCII */ 713 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes), 714 usasciiToU, UPRV_LENGTHOF(usasciiToU), 715 "ibm-367", 716 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 717 NULL, 0) 718 ) { 719 log_err("ibm-367->u with skip did not match.\n"); 720 } 721 #endif 722 723 /* ISO-Latin-1 */ 724 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes), 725 latin1ToU, UPRV_LENGTHOF(latin1ToU), 726 "LATIN_1", 727 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 728 NULL, 0) 729 ) { 730 log_err("LATIN_1->u with skip did not match.\n"); 731 } 732 733 #if !UCONFIG_NO_LEGACY_CONVERSION 734 /* windows-1252 */ 735 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes), 736 latin1ToU, UPRV_LENGTHOF(latin1ToU), 737 "windows-1252", 738 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 739 NULL, 0) 740 ) { 741 log_err("windows-1252->u with skip did not match.\n"); 742 } 743 #endif 744 } 745 746 #if !UCONFIG_NO_LEGACY_CONVERSION 747 { 748 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 749 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 750 }; 751 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 752 }; 753 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; 754 755 756 /* euc-jp*/ 757 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 758 0x8f, 0xda, 0xa1, /*unassigned*/ 759 0x8e, 0xe0, 760 }; 761 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; 762 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; 763 764 /*EUC_TW*/ 765 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 766 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 767 0xe6, 0xca, 0x8a, 768 }; 769 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; 770 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; 771 /*iso-2022-jp*/ 772 static const uint8_t sampleTxt_iso_2022_jp[]={ 773 0x41, 774 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ 775 0x1b, 0x28, 0x42, 0x42, 776 777 }; 778 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; 779 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; 780 781 /*iso-2022-cn*/ 782 static const uint8_t sampleTxt_iso_2022_cn[]={ 783 0x0f, 0x41, 0x44, 784 0x1B, 0x24, 0x29, 0x47, 785 0x0E, 0x40, 0x6f, /*unassigned*/ 786 0x0f, 0x42, 787 788 }; 789 790 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; 791 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; 792 793 /*iso-2022-kr*/ 794 static const uint8_t sampleTxt_iso_2022_kr[]={ 795 0x1b, 0x24, 0x29, 0x43, 796 0x41, 797 0x0E, 0x7f, 0x1E, 798 0x0e, 0x25, 0x50, 799 0x0f, 0x51, 800 0x42, 0x43, 801 802 }; 803 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; 804 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; 805 806 /*hz*/ 807 static const uint8_t sampleTxt_hz[]={ 808 0x41, 809 0x7e, 0x7b, 0x26, 0x30, 810 0x7f, 0x1E, /*unassigned*/ 811 0x26, 0x30, 812 0x7e, 0x7d, 0x42, 813 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 814 0x7e, 0x7d, 0x42, 815 }; 816 static const UChar hztoUnicode[]={ 817 0x41, 818 0x03a0, 819 0x03A0, 820 0x42, 821 0x42,}; 822 823 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; 824 825 /*ISCII*/ 826 static const uint8_t sampleTxt_iscii[]={ 827 0x41, 828 0xa1, 829 0xEB, /*unassigned*/ 830 0x26, 831 0x30, 832 0xa2, 833 0xEC, /*unassigned*/ 834 0x42, 835 }; 836 static const UChar isciitoUnicode[]={ 837 0x41, 838 0x0901, 839 0x26, 840 0x30, 841 0x0902, 842 0x42, 843 }; 844 845 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; 846 847 /*LMBCS*/ 848 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, 849 0x12, 0x92, 0xa0, /*unassigned*/ 850 0x12, 0x92, 0xA1, 851 }; 852 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; 853 static const int32_t fromLMBCS[] = {0, 6}; 854 855 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL), 856 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930", 857 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 858 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 859 860 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL), 861 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930", 862 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 863 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 864 865 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp), 866 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP", 867 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) 868 log_err("euc-jp->u with skip did not match.\n"); 869 870 871 872 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw), 873 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw", 874 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) 875 log_err("euc-tw->u with skip did not match.\n"); 876 877 878 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), 879 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp", 880 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) 881 log_err("iso-2022-jp->u with skip did not match.\n"); 882 883 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn), 884 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn", 885 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) 886 log_err("iso-2022-cn->u with skip did not match.\n"); 887 888 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr), 889 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr", 890 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) 891 log_err("iso-2022-kr->u with skip did not match.\n"); 892 893 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz), 894 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ", 895 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) 896 log_err("HZ->u with skip did not match.\n"); 897 898 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii), 899 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0", 900 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) 901 log_err("iscii->u with skip did not match.\n"); 902 903 if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS), 904 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1", 905 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) 906 log_err("LMBCS->u with skip did not match.\n"); 907 908 } 909 #endif 910 911 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); 912 { 913 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 914 0xe0, 0x80, 0x61,}; 915 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; 916 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; 917 918 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), 919 expected1, UPRV_LENGTHOF(expected1),"utf8", 920 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 921 log_err("utf8->u with skip did not match.\n");; 922 } 923 924 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); 925 { 926 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 927 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; 928 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 929 930 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), 931 expected1, UPRV_LENGTHOF(expected1),"SCSU", 932 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 933 log_err("scsu->u with skip did not match.\n"); 934 } 935 936 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 937 { 938 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 939 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ 940 0x24, 0x1E, 0x52, /* 3 */ 941 0xB2, /* 6 */ 942 0x20, /* 7 */ 943 0x40, 0x07, /* 8 - wrong trail byte */ 944 0xB3, /* 10 */ 945 0xB1, /* 11 */ 946 0xD0, 0x20, /* 12 - wrong trail byte */ 947 0x0D, /* 14 */ 948 0x0A, /* 15 */ 949 0x20, /* 16 */ 950 0x00, /* 17 */ 951 0xD0, 0x6C, /* 18 */ 952 0xB6, /* 20 */ 953 0xD8, 0xA5, /* 21 */ 954 0x20, /* 23 */ 955 0x68, /* 24 */ 956 0x59, /* 25 */ 957 0xF9, 0x28, /* 26 */ 958 0x6D, /* 28 */ 959 0x20, /* 29 */ 960 0x73, /* 30 */ 961 0xE0, 0x2D, /* 31 */ 962 0xDE, 0x43, /* 33 */ 963 0xD0, 0x33, /* 35 */ 964 0x20, /* 37 */ 965 0xFA, 0x83, /* 38 */ 966 0x25, 0x01, /* 40 */ 967 0xFB, 0x16, 0x87, /* 42 */ 968 0x4B, 0x16, /* 45 */ 969 0x20, /* 47 */ 970 0xE6, 0xBD, /* 48 */ 971 0xEB, 0x5B, /* 50 */ 972 0x4B, 0xCC, /* 52 */ 973 0xF9, 0xA2, /* 54 */ 974 0xFC, 0x10, 0x3E, /* 56 */ 975 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ 976 0x20, /* 63 */ 977 0xFC, 0x03, 0xAC, /* 64 */ 978 0xFF, /* 67 - FF just resets the state without encoding anything */ 979 0x01, /* 68 */ 980 0xDE, 0x83, /* 69 */ 981 0x20, /* 71 */ 982 0x09 /* 72 */ 983 }; 984 UChar expected[]={ 985 0xFEFF, 0x0061, 0x0062, 0x0020, 986 0x0063, 0x0061, 0x000D, 0x000A, 987 0x0020, 0x0000, 0x00DF, 0x00E6, 988 0x0930, 0x0020, 0x0918, 0x0909, 989 0x3086, 0x304D, 0x0020, 0x3053, 990 0x4000, 0x4E00, 0x7777, 0x0020, 991 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, 992 0x0020, 0xD7A3, 0xDC00, 0xD800, 993 0xD800, 0xDC00, 0xD845, 0xDDDD, 994 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 995 0xDFFF, 0x0001, 0x0E40, 0x0020, 996 0x0009 997 }; 998 int32_t offsets[]={ 999 0, 3, 6, 7, /* skip 8, */ 1000 10, 11, /* skip 12, */ 1001 14, 15, 16, 17, 18, 1002 20, 21, 23, 24, 25, 26, 28, 29, 1003 30, 31, 33, 35, 37, 38, 1004 40, 42, 45, 47, 48, 1005 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, 1006 63, 64, /* trail */ 64, /* reset only 67, */ 1007 68, 69, 1008 71, 72 1009 }; 1010 1011 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText), 1012 expected, UPRV_LENGTHOF(expected), "BOCU-1", 1013 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1014 ) { 1015 log_err("BOCU-1->u with skip did not match.\n"); 1016 } 1017 } 1018 1019 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 1020 { 1021 const uint8_t sampleText[]={ 1022 0x61, /* 0 'a' */ 1023 0xc0, 0x80, /* 1 non-shortest form */ 1024 0xc4, 0xb5, /* 3 U+0135 */ 1025 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ 1026 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ 1027 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ 1028 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ 1029 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ 1030 0x62, /* 24 'b' */ 1031 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ 1032 0xed, 0xa0, /* 28 incomplete sequence */ 1033 0xd0, 0x80 /* 30 U+0400 */ 1034 }; 1035 UChar expected[]={ 1036 0x0061, 1037 /* skip */ 1038 0x0135, 1039 0xd020, 1040 0xd801, 0xdc01, 1041 0xe000, 1042 0xdc01, 1043 /* skip */ 1044 0x0062, 1045 0xd801, 1046 0x0400 1047 }; 1048 int32_t offsets[]={ 1049 0, 1050 /* skip 1, */ 1051 3, 1052 5, 1053 8, 11, 1054 14, 1055 17, 1056 /* skip 20, 20, */ 1057 24, 1058 25, 1059 /* skip 28 */ 1060 30 1061 }; 1062 1063 /* without offsets */ 1064 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText), 1065 expected, UPRV_LENGTHOF(expected), "CESU-8", 1066 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) 1067 ) { 1068 log_err("CESU-8->u with skip did not match.\n"); 1069 } 1070 1071 /* with offsets */ 1072 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText), 1073 expected, UPRV_LENGTHOF(expected), "CESU-8", 1074 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1075 ) { 1076 log_err("CESU-8->u with skip did not match.\n"); 1077 } 1078 } 1079 } 1080 1081 static void TestStop(int32_t inputsize, int32_t outputsize) 1082 { 1083 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1084 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1085 1086 static const uint8_t expstopIBM_949[]= { 1087 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; 1088 1089 static const uint8_t expstopIBM_943[] = { 1090 0x9f, 0xaf, 0x9f, 0xb1}; 1091 1092 static const uint8_t expstopIBM_930[] = { 1093 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; 1094 1095 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; 1096 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; 1097 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; 1098 1099 1100 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; 1101 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; 1102 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; 1103 1104 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; 1105 static const int32_t fromIBM943Offs [] = { 0, 2}; 1106 static const int32_t fromIBM930Offs [] = { 1, 3}; 1107 1108 gInBufferSize = inputsize; 1109 gOutBufferSize = outputsize; 1110 1111 /*From Unicode*/ 1112 1113 #if !UCONFIG_NO_LEGACY_CONVERSION 1114 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText), 1115 expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949", 1116 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) 1117 log_err("u-> ibm-949 with stop did not match.\n"); 1118 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), 1119 expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943", 1120 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) 1121 log_err("u-> ibm-943 with stop did not match.\n"); 1122 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), 1123 expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930", 1124 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) 1125 log_err("u-> ibm-930 with stop did not match.\n"); 1126 1127 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); 1128 { 1129 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1130 static const uint8_t toIBM943[]= { 0x61,}; 1131 static const int32_t offset[]= {0,} ; 1132 1133 /*EUC_JP*/ 1134 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1135 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; 1136 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; 1137 1138 /*EUC_TW*/ 1139 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1140 static const uint8_t to_euc_tw[]={ 1141 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; 1142 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; 1143 1144 /*ISO-2022-JP*/ 1145 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; 1146 static const uint8_t to_iso_2022_jp[]={ 1147 0x41, 1148 1149 }; 1150 static const int32_t from_iso_2022_jpOffs [] ={0,}; 1151 1152 /*ISO-2022-cn*/ 1153 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1154 static const uint8_t to_iso_2022_cn[]={ 1155 0x41, 1156 1157 }; 1158 static const int32_t from_iso_2022_cnOffs [] ={ 1159 0,0, 1160 2,2, 1161 }; 1162 1163 /*ISO-2022-kr*/ 1164 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 1165 static const uint8_t to_iso_2022_kr[]={ 1166 0x1b, 0x24, 0x29, 0x43, 1167 0x41, 1168 0x0e, 0x25, 0x50, 1169 }; 1170 static const int32_t from_iso_2022_krOffs [] ={ 1171 -1,-1,-1,-1, 1172 0, 1173 1,1,1, 1174 }; 1175 1176 /* HZ encoding */ 1177 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1178 1179 static const uint8_t to_hz[]={ 1180 0x7e, 0x7d, 0x41, 1181 0x7e, 0x7b, 0x26, 0x30, 1182 1183 }; 1184 static const int32_t from_hzOffs [] ={ 1185 0, 0,0, 1186 1,1,1,1, 1187 }; 1188 1189 /*ISCII*/ 1190 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1191 static const uint8_t to_iscii[]={ 1192 0x41, 1193 }; 1194 static const int32_t from_isciiOffs [] ={ 1195 0, 1196 }; 1197 1198 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest), 1199 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943", 1200 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) 1201 log_err("u-> ibm-943 with stop did not match.\n"); 1202 1203 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText), 1204 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP", 1205 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) 1206 log_err("u-> euc-jp with stop did not match.\n"); 1207 1208 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText), 1209 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw", 1210 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1211 log_err("u-> euc-tw with stop did not match.\n"); 1212 1213 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText), 1214 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp", 1215 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1216 log_err("u-> iso-2022-jp with stop did not match.\n"); 1217 1218 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText), 1219 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp", 1220 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1221 log_err("u-> iso-2022-jp with stop did not match.\n"); 1222 1223 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText), 1224 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn", 1225 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) 1226 log_err("u-> iso-2022-cn with stop did not match.\n"); 1227 1228 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText), 1229 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr", 1230 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) 1231 log_err("u-> iso-2022-kr with stop did not match.\n"); 1232 1233 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText), 1234 to_hz, UPRV_LENGTHOF(to_hz), "HZ", 1235 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) 1236 log_err("u-> HZ with stop did not match.\n");\ 1237 1238 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText), 1239 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0", 1240 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) 1241 log_err("u-> iscii with stop did not match.\n"); 1242 1243 1244 } 1245 #endif 1246 1247 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); 1248 { 1249 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1250 1251 static const uint8_t to_SCSU[]={ 1252 0x41, 1253 1254 }; 1255 int32_t from_SCSUOffs [] ={ 1256 0, 1257 1258 }; 1259 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText), 1260 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU", 1261 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) 1262 log_err("u-> SCSU with skip did not match.\n"); 1263 1264 } 1265 1266 /*to Unicode*/ 1267 1268 #if !UCONFIG_NO_LEGACY_CONVERSION 1269 if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), 1270 IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949", 1271 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) 1272 log_err("ibm-949->u with stop did not match.\n"); 1273 if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), 1274 IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943", 1275 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) 1276 log_err("ibm-943->u with stop did not match.\n"); 1277 if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), 1278 IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930", 1279 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) 1280 log_err("ibm-930->u with stop did not match.\n"); 1281 1282 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); 1283 { 1284 1285 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1286 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1287 }; 1288 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; 1289 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; 1290 1291 1292 /*EUC-JP*/ 1293 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1294 0x8f, 0xda, 0xa1, /*unassigned*/ 1295 0x8e, 0xe0, 1296 }; 1297 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; 1298 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; 1299 1300 /*EUC_TW*/ 1301 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1302 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1303 0xe6, 0xca, 0x8a, 1304 }; 1305 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; 1306 int32_t from_euc_twOffs [] ={ 0, 1, 3}; 1307 1308 1309 1310 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL), 1311 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930", 1312 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1313 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); 1314 1315 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp), 1316 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP", 1317 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) 1318 log_err("euc-jp->u with stop did not match.\n"); 1319 1320 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw), 1321 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw", 1322 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1323 log_err("euc-tw->u with stop did not match.\n"); 1324 } 1325 #endif 1326 1327 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); 1328 { 1329 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1330 0xe0, 0x80, 0x61,}; 1331 static const UChar expected1[] = { 0x0031, 0x4e8c,}; 1332 static const int32_t offsets1[] = { 0x0000, 0x0001}; 1333 1334 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), 1335 expected1, UPRV_LENGTHOF(expected1),"utf8", 1336 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1337 log_err("utf8->u with stop did not match.\n");; 1338 } 1339 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); 1340 { 1341 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; 1342 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; 1343 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; 1344 1345 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), 1346 expected1, UPRV_LENGTHOF(expected1),"SCSU", 1347 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1348 log_err("scsu->u with stop did not match.\n");; 1349 } 1350 1351 } 1352 1353 static void TestSub(int32_t inputsize, int32_t outputsize) 1354 { 1355 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1356 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1357 1358 static const uint8_t expsubIBM_949[] = 1359 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; 1360 1361 static const uint8_t expsubIBM_943[] = { 1362 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; 1363 1364 static const uint8_t expsubIBM_930[] = { 1365 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; 1366 1367 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; 1368 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1369 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1370 1371 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1372 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; 1373 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; 1374 1375 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; 1376 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; 1377 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; 1378 1379 gInBufferSize = inputsize; 1380 gOutBufferSize = outputsize; 1381 1382 /*from unicode*/ 1383 1384 #if !UCONFIG_NO_LEGACY_CONVERSION 1385 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText), 1386 expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949", 1387 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) 1388 log_err("u-> ibm-949 with subst did not match.\n"); 1389 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), 1390 expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943", 1391 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) 1392 log_err("u-> ibm-943 with subst did not match.\n"); 1393 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), 1394 expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930", 1395 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) 1396 log_err("u-> ibm-930 with subst did not match.\n"); 1397 1398 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1399 { 1400 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1401 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; 1402 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; 1403 1404 1405 /* EUC_JP*/ 1406 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1407 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1408 0xf4, 0xfe, 0xf4, 0xfe, 1409 0x61, 0x8e, 0xe0, 1410 }; 1411 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; 1412 1413 /*EUC_TW*/ 1414 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1415 static const uint8_t to_euc_tw[]={ 1416 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1417 0xfd, 0xfe, 0xfd, 0xfe, 1418 0x61, 0xe6, 0xca, 0x8a, 1419 }; 1420 1421 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; 1422 1423 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest), 1424 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943", 1425 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) 1426 log_err("u-> ibm-943 with substitute did not match.\n"); 1427 1428 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText), 1429 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP", 1430 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) 1431 log_err("u-> euc-jp with substitute did not match.\n"); 1432 1433 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText), 1434 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw", 1435 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1436 log_err("u-> euc-tw with substitute did not match.\n"); 1437 } 1438 #endif 1439 1440 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1441 { 1442 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1443 1444 const uint8_t to_SCSU[]={ 1445 0x41, 1446 0x0e, 0xff,0xfd, 1447 0x42 1448 1449 1450 }; 1451 int32_t from_SCSUOffs [] ={ 1452 0, 1453 1,1,1, 1454 2, 1455 1456 }; 1457 const uint8_t to_SCSU_1[]={ 1458 0x41, 1459 1460 }; 1461 int32_t from_SCSUOffs_1 [] ={ 1462 0, 1463 1464 }; 1465 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText), 1466 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU", 1467 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) 1468 log_err("u-> SCSU with substitute did not match.\n"); 1469 1470 if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText), 1471 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU", 1472 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 1473 log_err("u-> SCSU with substitute did not match.\n"); 1474 } 1475 1476 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1477 { 1478 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; 1479 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, 1480 0xf0, 0x90, 0x90, 0x81, 1481 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 1482 0xef, 0xbf, 0xbf, 0x61, 1483 1484 }; 1485 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; 1486 if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput), 1487 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8", 1488 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { 1489 log_err("u-> utf8 with substitute did not match.\n"); 1490 } 1491 } 1492 1493 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1494 { 1495 static const UChar in[]={ 0x0041, 0xfeff }; 1496 1497 static const uint8_t out[]={ 1498 #if U_IS_BIG_ENDIAN 1499 0xfe, 0xff, 1500 0x00, 0x41, 1501 0xfe, 0xff 1502 #else 1503 0xff, 0xfe, 1504 0x41, 0x00, 1505 0xff, 0xfe 1506 #endif 1507 }; 1508 static const int32_t offsets[]={ 1509 -1, -1, 0, 0, 1, 1 1510 }; 1511 1512 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in), 1513 out, UPRV_LENGTHOF(out), "UTF-16", 1514 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1515 ) { 1516 log_err("u->UTF-16 with substitute did not match.\n"); 1517 } 1518 } 1519 1520 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1521 { 1522 static const UChar in[]={ 0x0041, 0xfeff }; 1523 1524 static const uint8_t out[]={ 1525 #if U_IS_BIG_ENDIAN 1526 0x00, 0x00, 0xfe, 0xff, 1527 0x00, 0x00, 0x00, 0x41, 1528 0x00, 0x00, 0xfe, 0xff 1529 #else 1530 0xff, 0xfe, 0x00, 0x00, 1531 0x41, 0x00, 0x00, 0x00, 1532 0xff, 0xfe, 0x00, 0x00 1533 #endif 1534 }; 1535 static const int32_t offsets[]={ 1536 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 1537 }; 1538 1539 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in), 1540 out, UPRV_LENGTHOF(out), "UTF-32", 1541 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1542 ) { 1543 log_err("u->UTF-32 with substitute did not match.\n"); 1544 } 1545 } 1546 1547 /*to unicode*/ 1548 1549 #if !UCONFIG_NO_LEGACY_CONVERSION 1550 if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), 1551 IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949", 1552 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) 1553 log_err("ibm-949->u with substitute did not match.\n"); 1554 if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), 1555 IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943", 1556 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) 1557 log_err("ibm-943->u with substitute did not match.\n"); 1558 if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), 1559 IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930", 1560 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) 1561 log_err("ibm-930->u with substitute did not match.\n"); 1562 1563 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1564 { 1565 1566 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1567 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1568 }; 1569 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 1570 }; 1571 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; 1572 1573 1574 /* EUC_JP*/ 1575 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1576 0x8f, 0xda, 0xa1, /*unassigned*/ 1577 0x8e, 0xe0, 0x8a 1578 }; 1579 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; 1580 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; 1581 1582 /*EUC_TW*/ 1583 const uint8_t sampleTxt_euc_tw[]={ 1584 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1585 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1586 0xe6, 0xca, 0x8a, 1587 }; 1588 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; 1589 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; 1590 1591 1592 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL), 1593 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930", 1594 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1595 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); 1596 1597 1598 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp), 1599 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP", 1600 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) 1601 log_err("euc-jp->u with substitute did not match.\n"); 1602 1603 1604 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw), 1605 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw", 1606 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1607 log_err("euc-tw->u with substitute did not match.\n"); 1608 1609 1610 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp), 1611 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP", 1612 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) 1613 log_err("euc-jp->u with substitute did not match.\n"); 1614 } 1615 #endif 1616 1617 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1618 { 1619 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1620 0xe0, 0x80, 0x61,}; 1621 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061}; 1622 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006}; 1623 1624 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), 1625 expected1, UPRV_LENGTHOF(expected1),"utf8", 1626 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1627 log_err("utf8->u with substitute did not match.\n");; 1628 } 1629 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1630 { 1631 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 1632 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; 1633 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 1634 1635 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), 1636 expected1, UPRV_LENGTHOF(expected1),"SCSU", 1637 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1638 log_err("scsu->u with stop did not match.\n");; 1639 } 1640 1641 #if !UCONFIG_NO_LEGACY_CONVERSION 1642 log_verbose("Testing ibm-930 subchar/subchar1\n"); 1643 { 1644 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; 1645 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; 1646 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1647 1648 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; 1649 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; 1650 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; 1651 1652 if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930", 1653 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1654 ) { 1655 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); 1656 } 1657 1658 if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930", 1659 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1660 ) { 1661 log_err("ibm-930->u subchar/subchar1 did not match.\n"); 1662 } 1663 } 1664 1665 log_verbose("Testing GB 18030 with substitute callbacks\n"); 1666 { 1667 static const UChar u2[]={ 1668 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; 1669 static const uint8_t gb2[]={ 1670 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; 1671 static const int32_t offsets2[]={ 1672 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; 1673 1674 if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030", 1675 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1676 ) { 1677 log_err("gb18030->u with substitute did not match.\n"); 1678 } 1679 } 1680 #endif 1681 1682 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); 1683 { 1684 static const uint8_t utf7[]={ 1685 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 1686 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 1687 }; 1688 static const UChar unicode[]={ 1689 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 1690 }; 1691 static const int32_t offsets[]={ 1692 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24 1693 }; 1694 1695 if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", 1696 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1697 ) { 1698 log_err("UTF-7->u with substitute did not match.\n"); 1699 } 1700 } 1701 1702 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); 1703 { 1704 static const uint8_t 1705 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, 1706 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, 1707 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; 1708 1709 static const UChar 1710 out1[]={ 0x4e00, 0xfeff }, 1711 out2[]={ 0x004e, 0xfffe }, 1712 out3[]={ 0xfefd, 0x4e00, 0xfeff }; 1713 1714 static const int32_t 1715 offsets1[]={ 2, 4 }, 1716 offsets2[]={ 2, 4 }, 1717 offsets3[]={ 0, 2, 4 }; 1718 1719 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16", 1720 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1721 ) { 1722 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); 1723 } 1724 1725 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16", 1726 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1727 ) { 1728 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); 1729 } 1730 1731 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16", 1732 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1733 ) { 1734 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); 1735 } 1736 } 1737 1738 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); 1739 { 1740 static const uint8_t 1741 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, 1742 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, 1743 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, 1744 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; 1745 1746 static const UChar 1747 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff }, 1748 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe }, 1749 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd }, 1750 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 }; 1751 1752 static const int32_t 1753 offsets1[]={ 4, 4, 8 }, 1754 offsets2[]={ 4, 4, 8 }, 1755 offsets3[]={ 0, 4, 4, 8, 12 }, 1756 offsets4[]={ 0, 0, 4, 8 }; 1757 1758 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32", 1759 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1760 ) { 1761 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); 1762 } 1763 1764 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32", 1765 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1766 ) { 1767 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); 1768 } 1769 1770 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32", 1771 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1772 ) { 1773 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); 1774 } 1775 1776 if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32", 1777 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) 1778 ) { 1779 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); 1780 } 1781 } 1782 } 1783 1784 static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 1785 { 1786 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1787 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1788 1789 const uint8_t expsubwvalIBM_949[]= { 1790 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 1791 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; 1792 1793 const uint8_t expsubwvalIBM_943[]= { 1794 0x9f, 0xaf, 0x9f, 0xb1, 1795 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; 1796 1797 const uint8_t expsubwvalIBM_930[] = { 1798 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; 1799 1800 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; 1801 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; 1802 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ 1803 1804 gInBufferSize = inputsize; 1805 gOutBufferSize = outputsize; 1806 1807 /*from Unicode*/ 1808 1809 #if !UCONFIG_NO_LEGACY_CONVERSION 1810 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText), 1811 expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949", 1812 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) 1813 log_err("u-> ibm-949 with subst with value did not match.\n"); 1814 1815 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), 1816 expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943", 1817 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) 1818 log_err("u-> ibm-943 with sub with value did not match.\n"); 1819 1820 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), 1821 expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930", 1822 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) 1823 log_err("u-> ibm-930 with subst with value did not match.\n"); 1824 1825 1826 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 1827 { 1828 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1829 static const uint8_t toIBM943[]= { 0x61, 1830 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1831 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1832 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1833 0x61 }; 1834 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 1835 1836 1837 /* EUC_JP*/ 1838 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; 1839 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1840 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1841 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1842 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1843 0x61, 0x8e, 0xe0, 1844 }; 1845 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 1846 3, 3, 3, 3, 3, 3, 1847 3, 3, 3, 3, 3, 3, 1848 5, 5, 5, 5, 5, 5, 1849 6, 7, 7, 1850 }; 1851 1852 /*EUC_TW*/ 1853 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1854 static const uint8_t to_euc_tw[]={ 1855 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1856 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1857 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1858 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1859 0x61, 0xe6, 0xca, 0x8a, 1860 }; 1861 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 1862 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 1863 6, 7, 7, 8, 1864 }; 1865 /*ISO-2022-JP*/ 1866 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; 1867 static const uint8_t to_iso_2022_jp1[]={ 1868 0x1b, 0x24, 0x42, 0x21, 0x21, 1869 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1870 0x1b, 0x24, 0x42, 0x21, 0x22, 1871 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1872 0x42, 1873 }; 1874 1875 static const int32_t from_iso_2022_jpOffs1 [] ={ 1876 0,0,0,0,0, 1877 1,1,1,1,1,1,1,1,1, 1878 2,2,2,2,2, 1879 3,3,3,3,3,3,3,3,3, 1880 4, 1881 }; 1882 /* surrogate pair*/ 1883 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; 1884 static const uint8_t to_iso_2022_jp2[]={ 1885 0x1b, 0x24, 0x42, 0x21, 0x21, 1886 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1887 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1888 0x1b, 0x24, 0x42, 0x21, 0x22, 1889 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1890 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1891 0x42, 1892 }; 1893 static const int32_t from_iso_2022_jpOffs2 [] ={ 1894 0,0,0,0,0, 1895 1,1,1,1,1,1,1,1,1, 1896 1,1,1,1,1,1, 1897 3,3,3,3,3, 1898 4,4,4,4,4,4,4,4,4, 1899 4,4,4,4,4,4, 1900 6, 1901 }; 1902 1903 /*ISO-2022-cn*/ 1904 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1905 static const uint8_t to_iso_2022_cn[]={ 1906 0x41, 1907 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, 1908 0x42, 1909 }; 1910 static const int32_t from_iso_2022_cnOffs [] ={ 1911 0, 1912 1,1,1,1,1,1, 1913 2, 1914 }; 1915 1916 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; 1917 1918 static const uint8_t to_iso_2022_cn4[]={ 1919 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 1920 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1921 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1922 0x0e, 0x21, 0x22, 1923 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1924 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1925 0x42, 1926 }; 1927 static const int32_t from_iso_2022_cnOffs4 [] ={ 1928 0,0,0,0,0,0,0, 1929 1,1,1,1,1,1,1, 1930 1,1,1,1,1,1, 1931 3,3,3, 1932 4,4,4,4,4,4,4, 1933 4,4,4,4,4,4, 1934 6 1935 1936 }; 1937 1938 /*ISO-2022-kr*/ 1939 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 1940 static const uint8_t to_iso_2022_kr2[]={ 1941 0x1b, 0x24, 0x29, 0x43, 1942 0x41, 1943 0x0e, 0x25, 0x50, 1944 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1945 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1946 0x0e, 0x25, 0x50, 1947 0x0f, 0x42, 1948 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1949 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1950 0x43 1951 }; 1952 static const int32_t from_iso_2022_krOffs2 [] ={ 1953 -1,-1,-1,-1, 1954 0, 1955 1,1,1, 1956 2,2,2,2,2,2,2, 1957 2,2,2,2,2,2, 1958 4,4,4, 1959 5,5, 1960 6,6,6,6,6,6, 1961 6,6,6,6,6,6, 1962 8, 1963 }; 1964 1965 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; 1966 static const uint8_t to_iso_2022_kr[]={ 1967 0x1b, 0x24, 0x29, 0x43, 1968 0x41, 1969 0x0e, 0x25, 0x50, 1970 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1971 0x0e, 0x25, 0x50, 1972 0x0f, 0x42, 1973 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1974 0x43 1975 }; 1976 1977 1978 static const int32_t from_iso_2022_krOffs [] ={ 1979 -1,-1,-1,-1, 1980 0, 1981 1,1,1, 1982 2,2,2,2,2,2,2, 1983 3,3,3, 1984 4,4, 1985 5,5,5,5,5,5, 1986 6, 1987 }; 1988 /* HZ encoding */ 1989 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1990 1991 static const uint8_t to_hz[]={ 1992 0x7e, 0x7d, 0x41, 1993 0x7e, 0x7b, 0x26, 0x30, 1994 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ 1995 0x7e, 0x7b, 0x26, 0x30, 1996 0x7e, 0x7d, 0x42, 1997 1998 }; 1999 static const int32_t from_hzOffs [] ={ 2000 0,0,0, 2001 1,1,1,1, 2002 2,2,2,2,2,2,2,2, 2003 3,3,3,3, 2004 4,4,4 2005 }; 2006 2007 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 2008 static const uint8_t to_hz2[]={ 2009 0x7e, 0x7d, 0x41, 2010 0x7e, 0x7b, 0x26, 0x30, 2011 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2012 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2013 0x7e, 0x7b, 0x26, 0x30, 2014 0x7e, 0x7d, 0x42, 2015 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2016 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2017 0x43 2018 }; 2019 static const int32_t from_hzOffs2 [] ={ 2020 0,0,0, 2021 1,1,1,1, 2022 2,2,2,2,2,2,2,2, 2023 2,2,2,2,2,2, 2024 4,4,4,4, 2025 5,5,5, 2026 6,6,6,6,6,6, 2027 6,6,6,6,6,6, 2028 8, 2029 }; 2030 2031 /*ISCII*/ 2032 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; 2033 static const uint8_t to_iscii[]={ 2034 0x41, 2035 0xef, 0x42, 0xa1, 2036 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2037 0xa2, 2038 0x42, 2039 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2040 0x43 2041 }; 2042 2043 2044 static const int32_t from_isciiOffs [] ={ 2045 0, 2046 1,1,1, 2047 2,2,2,2,2,2, 2048 3, 2049 4, 2050 5,5,5,5,5,5, 2051 6, 2052 }; 2053 2054 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest), 2055 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943", 2056 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) 2057 log_err("u-> ibm-943 with subst with value did not match.\n"); 2058 2059 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText), 2060 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP", 2061 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) 2062 log_err("u-> euc-jp with subst with value did not match.\n"); 2063 2064 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText), 2065 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw", 2066 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) 2067 log_err("u-> euc-tw with subst with value did not match.\n"); 2068 2069 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1), 2070 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp", 2071 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2072 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2073 2074 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1), 2075 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp", 2076 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2077 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2078 2079 if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2), 2080 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp", 2081 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) 2082 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2083 /*ESCAPE OPTIONS*/ 2084 { 2085 /* surrogate pair*/ 2086 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; 2087 static const uint8_t to_iso_2022_jp3_v2[]={ 2088 0x1b, 0x24, 0x42, 0x21, 0x21, 2089 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2090 2091 0x1b, 0x24, 0x42, 0x21, 0x22, 2092 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2093 2094 0x42, 2095 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, 2096 }; 2097 2098 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ 2099 0,0,0,0,0, 2100 1,1,1,1,1,1,1,1,1,1,1,1, 2101 2102 3,3,3,3,3, 2103 4,4,4,4,4,4,4,4,4,4,4,4, 2104 2105 6, 2106 7,7,7,7,7,7,7,7,7 2107 }; 2108 2109 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3), 2110 to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp", 2111 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2112 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); 2113 } 2114 { 2115 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2116 static const uint8_t to_iso_2022_cn5_v2[]={ 2117 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2118 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2119 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2120 0x0e, 0x21, 0x22, 2121 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2122 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2123 0x42, 2124 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, 2125 }; 2126 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ 2127 0,0,0,0,0,0,0, 2128 1,1,1,1,1,1,1, 2129 1,1,1,1,1,1, 2130 3,3,3, 2131 4,4,4,4,4,4,4, 2132 4,4,4,4,4,4, 2133 6, 2134 7,7,7,7,7,7 2135 }; 2136 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5), 2137 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn", 2138 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) 2139 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); 2140 2141 } 2142 { 2143 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2144 static const uint8_t to_iso_2022_cn6_v2[]={ 2145 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2146 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2147 0x0e, 0x21, 0x22, 2148 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2149 0x42, 2150 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d 2151 }; 2152 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ 2153 0, 0, 0, 0, 0, 0, 0, 2154 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2155 3, 3, 3, 2156 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2157 6, 2158 7, 7, 7, 7, 7, 7, 7, 7, 2159 }; 2160 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6), 2161 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn", 2162 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) 2163 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); 2164 2165 } 2166 { 2167 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2168 static const uint8_t to_iso_2022_cn7_v2[]={ 2169 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2170 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2171 0x0e, 0x21, 0x22, 2172 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2173 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, 2174 }; 2175 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ 2176 0, 0, 0, 0, 0, 0, 0, 2177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2178 3, 3, 3, 2179 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2180 6, 2181 7, 7, 7, 7, 7, 7, 2182 }; 2183 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7), 2184 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn", 2185 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) 2186 log_err("u-> iso-2022-cn with sub & K did not match.\n"); 2187 2188 } 2189 { 2190 static const UChar iso_2022_cn_inputText8[]={ 2191 0x3000, 2192 0xD84D, 0xDC56, 2193 0x3001, 2194 0xD84D, 0xDC56, 2195 0xDBFF, 0xDFFF, 2196 0x0042, 2197 0x0902}; 2198 static const uint8_t to_iso_2022_cn8_v2[]={ 2199 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2200 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2201 0x0e, 0x21, 0x22, 2202 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2203 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20, 2204 0x42, 2205 0x5c, 0x39, 0x30, 0x32, 0x20 2206 }; 2207 static const int32_t from_iso_2022_cnOffs8_v2 [] ={ 2208 0, 0, 0, 0, 0, 0, 0, 2209 1, 1, 1, 1, 1, 1, 1, 1, 2210 3, 3, 3, 2211 4, 4, 4, 4, 4, 4, 4, 4, 2212 6, 6, 6, 6, 6, 6, 6, 6, 2213 8, 2214 9, 9, 9, 9, 9 2215 }; 2216 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8), 2217 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn", 2218 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR )) 2219 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n"); 2220 2221 } 2222 { 2223 static const uint8_t to_iso_2022_cn4_v3[]={ 2224 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2225 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2226 0x0e, 0x21, 0x22, 2227 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2228 0x42 2229 }; 2230 2231 2232 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ 2233 0,0,0,0,0,0,0, 2234 1,1,1,1,1,1,1,1,1,1,1, 2235 2236 3,3,3, 2237 4,4,4,4,4,4,4,4,4,4,4, 2238 2239 6 2240 2241 }; 2242 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4), 2243 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn", 2244 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2245 { 2246 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); 2247 } 2248 } 2249 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText), 2250 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn", 2251 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) 2252 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2253 2254 if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4), 2255 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn", 2256 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) 2257 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2258 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText), 2259 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr", 2260 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) 2261 log_err("u-> iso_2022_kr with subst with value did not match.\n"); 2262 if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2), 2263 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr", 2264 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) 2265 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); 2266 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText), 2267 to_hz, UPRV_LENGTHOF(to_hz), "HZ", 2268 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) 2269 log_err("u-> hz with subst with value did not match.\n"); 2270 if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2), 2271 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ", 2272 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) 2273 log_err("u-> hz with subst with value did not match.\n"); 2274 2275 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText), 2276 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0", 2277 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) 2278 log_err("u-> iscii with subst with value did not match.\n"); 2279 } 2280 #endif 2281 2282 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 2283 /*to Unicode*/ 2284 { 2285 #if !UCONFIG_NO_LEGACY_CONVERSION 2286 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 2287 0x81, 0xad, /*unassigned*/ 2288 0x89, 0xd3 }; 2289 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 2290 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 2291 0x7B87}; 2292 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 2293 2294 /* EUC_JP*/ 2295 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 2296 0x8f, 0xda, 0xa1, /*unassigned*/ 2297 0x8e, 0xe0, 2298 }; 2299 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 2300 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, 2301 0x00a2 }; 2302 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, 2303 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2304 9, 2305 }; 2306 2307 /*EUC_TW*/ 2308 static const uint8_t sampleTxt_euc_tw[]={ 2309 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 2310 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 2311 0xe6, 0xca, 0x8a, 2312 }; 2313 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 2314 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, 2315 0x8706, 0x8a, }; 2316 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 2317 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2318 11, 13}; 2319 2320 /*iso-2022-jp*/ 2321 static const uint8_t sampleTxt_iso_2022_jp[]={ 2322 0x1b, 0x28, 0x42, 0x41, 2323 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ 2324 0x1b, 0x28, 0x42, 0x42, 2325 2326 }; 2327 /* A % X 3 A % X 1 A B */ 2328 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 }; 2329 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; 2330 2331 /*iso-2022-cn*/ 2332 static const uint8_t sampleTxt_iso_2022_cn[]={ 2333 0x0f, 0x41, 0x44, 2334 0x1B, 0x24, 0x29, 0x47, 2335 0x0E, 0x40, 0x6c, /*unassigned*/ 2336 0x0f, 0x42, 2337 2338 }; 2339 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; 2340 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; 2341 2342 /*iso-2022-kr*/ 2343 static const uint8_t sampleTxt_iso_2022_kr[]={ 2344 0x1b, 0x24, 0x29, 0x43, 2345 0x41, 2346 0x0E, 0x7f, 0x1E, 2347 0x0e, 0x25, 0x50, 2348 0x0f, 0x51, 2349 0x42, 0x43, 2350 2351 }; 2352 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; 2353 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; 2354 2355 /*hz*/ 2356 static const uint8_t sampleTxt_hz[]={ 2357 0x41, 2358 0x7e, 0x7b, 0x26, 0x30, 2359 0x7f, 0x1E, /*unassigned*/ 2360 0x26, 0x30, 2361 0x7e, 0x7d, 0x42, 2362 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 2363 0x7e, 0x7d, 0x42, 2364 }; 2365 static const UChar hztoUnicode[]={ 2366 0x41, 2367 0x03a0, 2368 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2369 0x03A0, 2370 0x42, 2371 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2372 0x42,}; 2373 2374 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; 2375 2376 2377 /*iscii*/ 2378 static const uint8_t sampleTxt_iscii[]={ 2379 0x41, 2380 0x30, 2381 0xEB, /*unassigned*/ 2382 0xa3, 2383 0x42, 2384 0xEC, /*unassigned*/ 2385 0x42, 2386 }; 2387 static const UChar isciitoUnicode[]={ 2388 0x41, 2389 0x30, 2390 0x25, 0x58, 0x45, 0x42, 2391 0x0903, 2392 0x42, 2393 0x25, 0x58, 0x45, 0x43, 2394 0x42,}; 2395 2396 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; 2397 #endif 2398 2399 /*UTF8*/ 2400 static const uint8_t sampleTxtUTF8[]={ 2401 0x20, 0x64, 0x50, 2402 0xC2, 0x7E, /* truncated char */ 2403 0x20, 2404 0xE0, 0xB5, 0x7E, /* truncated char */ 2405 0x40, 2406 }; 2407 static const UChar UTF8ToUnicode[]={ 2408 0x0020, 0x0064, 0x0050, 2409 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ 2410 0x0020, 2411 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, 2412 0x0040 2413 }; 2414 static const int32_t fromUTF8[] = { 2415 0, 1, 2, 2416 3, 3, 3, 3, 4, 2417 5, 2418 6, 6, 6, 6, 6, 6, 6, 6, 8, 2419 9 2420 }; 2421 static const UChar UTF8ToUnicodeXML_DEC[]={ 2422 0x0020, 0x0064, 0x0050, 2423 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ 2424 0x0020, 2425 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, 2426 0x0040 2427 }; 2428 static const int32_t fromUTF8XML_DEC[] = { 2429 0, 1, 2, 2430 3, 3, 3, 3, 3, 3, 4, 2431 5, 2432 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 2433 9 2434 }; 2435 2436 2437 #if !UCONFIG_NO_LEGACY_CONVERSION 2438 if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU), 2439 IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943", 2440 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) 2441 log_err("ibm-943->u with substitute with value did not match.\n"); 2442 2443 if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP), 2444 EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP", 2445 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) 2446 log_err("euc-jp->u with substitute with value did not match.\n"); 2447 2448 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw), 2449 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw", 2450 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) 2451 log_err("euc-tw->u with substitute with value did not match.\n"); 2452 2453 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), 2454 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp", 2455 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) 2456 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2457 2458 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), 2459 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp", 2460 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) 2461 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2462 2463 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ 2464 { 2465 static const UChar iso_2022_jptoUnicodeDec[]={ 2466 0x0041, 2467 /* & # 5 8 ; */ 2468 0x0026, 0x0023, 0x0035, 0x0038, 0x003b, 2469 0x0026, 0x0023, 0x0032, 0x0036, 0x003b, 2470 0x0042 }; 2471 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; 2472 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), 2473 iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp", 2474 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2475 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); 2476 } 2477 { 2478 static const UChar iso_2022_jptoUnicodeHex[]={ 2479 0x0041, 2480 /* & # x 3 A ; */ 2481 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b, 2482 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b, 2483 0x0042 }; 2484 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; 2485 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), 2486 iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp", 2487 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) 2488 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); 2489 } 2490 { 2491 static const UChar iso_2022_jptoUnicodeC[]={ 2492 0x0041, 2493 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */ 2494 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */ 2495 0x0042 }; 2496 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; 2497 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), 2498 iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp", 2499 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2500 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); 2501 } 2502 } 2503 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn), 2504 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn", 2505 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) 2506 log_err("iso-2022-cn->u with substitute with value did not match.\n"); 2507 2508 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr), 2509 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr", 2510 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) 2511 log_err("iso-2022-kr->u with substitute with value did not match.\n"); 2512 2513 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz), 2514 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ", 2515 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) 2516 log_err("hz->u with substitute with value did not match.\n"); 2517 2518 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii), 2519 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0", 2520 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) 2521 log_err("ISCII ->u with substitute with value did not match.\n"); 2522 #endif 2523 2524 if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8), 2525 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8", 2526 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) 2527 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2528 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8), 2529 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8", 2530 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) 2531 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2532 } 2533 } 2534 2535 #if !UCONFIG_NO_LEGACY_CONVERSION 2536 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) 2537 { 2538 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; 2539 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 2540 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; 2541 2542 2543 static const uint8_t text943[] = { 2544 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; 2545 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2546 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2547 static const UChar toUnicode943stop[]= { 0x304b}; 2548 2549 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; 2550 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; 2551 static const int32_t fromIBM943Offsstop[] = { 0}; 2552 2553 gInBufferSize = inputsize; 2554 gOutBufferSize = outputsize; 2555 /*checking with a legal value*/ 2556 if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText), 2557 templegal949, UPRV_LENGTHOF(templegal949), "ibm-949", 2558 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) 2559 log_err("u-> ibm-949 with skip did not match.\n"); 2560 2561 /*checking illegal value for ibm-943 with substitute*/ 2562 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943), 2563 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943", 2564 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2565 log_err("ibm-943->u with subst did not match.\n"); 2566 /*checking illegal value for ibm-943 with skip */ 2567 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943), 2568 toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943", 2569 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) 2570 log_err("ibm-943->u with skip did not match.\n"); 2571 2572 /*checking illegal value for ibm-943 with stop */ 2573 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943), 2574 toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943", 2575 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) 2576 log_err("ibm-943->u with stop did not match.\n"); 2577 2578 } 2579 2580 static void TestSingleByte(int32_t inputsize, int32_t outputsize) 2581 { 2582 static const uint8_t sampleText[] = { 2583 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, 2584 0xff, 0x32, 0x33}; 2585 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; 2586 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; 2587 /*checking illegal value for ibm-943 with substitute*/ 2588 gInBufferSize = inputsize; 2589 gOutBufferSize = outputsize; 2590 2591 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText), 2592 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943", 2593 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2594 log_err("ibm-943->u with subst did not match.\n"); 2595 } 2596 2597 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) 2598 { 2599 /*EBCDIC_STATEFUL*/ 2600 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; 2601 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; 2602 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; 2603 /* s SO doubl SI sng s SO fe fe SI s */ 2604 2605 /*EBCDIC_STATEFUL with subChar=3f*/ 2606 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; 2607 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; 2608 static const char mySubChar[]={ 0x3f}; 2609 2610 gInBufferSize = inputsize; 2611 gOutBufferSize = outputsize; 2612 2613 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest), 2614 toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930", 2615 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) 2616 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); 2617 2618 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest), 2619 toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930", 2620 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) 2621 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); 2622 } 2623 #endif 2624 2625 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 2626 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 2627 const char *mySubChar, int8_t len) 2628 { 2629 2630 2631 UErrorCode status = U_ZERO_ERROR; 2632 UConverter *conv = 0; 2633 char junkout[NEW_MAX_BUFFER]; /* FIX */ 2634 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2635 const UChar *src; 2636 char *end; 2637 char *targ; 2638 int32_t *offs; 2639 int i; 2640 int32_t realBufferSize; 2641 char *realBufferEnd; 2642 const UChar *realSourceEnd; 2643 const UChar *sourceLimit; 2644 UBool checkOffsets = TRUE; 2645 UBool doFlush; 2646 char junk[9999]; 2647 char offset_str[9999]; 2648 char *p; 2649 UConverterFromUCallback oldAction = NULL; 2650 const void* oldContext = NULL; 2651 2652 2653 for(i=0;i<NEW_MAX_BUFFER;i++) 2654 junkout[i] = (char)0xF0; 2655 for(i=0;i<NEW_MAX_BUFFER;i++) 2656 junokout[i] = 0xFF; 2657 setNuConvTestName(codepage, "FROM"); 2658 2659 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 2660 gOutBufferSize); 2661 2662 conv = ucnv_open(codepage, &status); 2663 if(U_FAILURE(status)) 2664 { 2665 log_data_err("Couldn't open converter %s\n",codepage); 2666 return TRUE; 2667 } 2668 2669 log_verbose("Converter opened..\n"); 2670 2671 /*----setting the callback routine----*/ 2672 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2673 if (U_FAILURE(status)) 2674 { 2675 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2676 } 2677 /*------------------------*/ 2678 /*setting the subChar*/ 2679 if(mySubChar != NULL){ 2680 ucnv_setSubstChars(conv, mySubChar, len, &status); 2681 if (U_FAILURE(status)) { 2682 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2683 } 2684 } 2685 /*------------*/ 2686 2687 src = source; 2688 targ = junkout; 2689 offs = junokout; 2690 2691 realBufferSize = UPRV_LENGTHOF(junkout); 2692 realBufferEnd = junkout + realBufferSize; 2693 realSourceEnd = source + sourceLen; 2694 2695 if ( gOutBufferSize != realBufferSize ) 2696 checkOffsets = FALSE; 2697 2698 if( gInBufferSize != NEW_MAX_BUFFER ) 2699 checkOffsets = FALSE; 2700 2701 do 2702 { 2703 end = nct_min(targ + gOutBufferSize, realBufferEnd); 2704 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 2705 2706 doFlush = (UBool)(sourceLimit == realSourceEnd); 2707 2708 if(targ == realBufferEnd) 2709 { 2710 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 2711 return FALSE; 2712 } 2713 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 2714 2715 2716 status = U_ZERO_ERROR; 2717 2718 ucnv_fromUnicode (conv, 2719 (char **)&targ, 2720 (const char *)end, 2721 &src, 2722 sourceLimit, 2723 checkOffsets ? offs : NULL, 2724 doFlush, /* flush if we're at the end of the input data */ 2725 &status); 2726 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 2727 2728 2729 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2730 UChar errChars[50]; /* should be sufficient */ 2731 int8_t errLen = 50; 2732 UErrorCode err = U_ZERO_ERROR; 2733 const UChar* start= NULL; 2734 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); 2735 if(U_FAILURE(err)){ 2736 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); 2737 } 2738 /* length of in invalid chars should be equal to returned length*/ 2739 start = src - errLen; 2740 if(u_strncmp(errChars,start,errLen)!=0){ 2741 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2742 } 2743 } 2744 /* allow failure codes for the stop callback */ 2745 if(U_FAILURE(status) && 2746 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) 2747 { 2748 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2749 return FALSE; 2750 } 2751 2752 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 2753 sourceLen, targ-junkout); 2754 if(getTestOption(VERBOSITY_OPTION)) 2755 { 2756 2757 junk[0] = 0; 2758 offset_str[0] = 0; 2759 for(p = junkout;p<targ;p++) 2760 { 2761 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 2762 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 2763 } 2764 2765 log_verbose(junk); 2766 printSeq(expect, expectLen); 2767 if ( checkOffsets ) 2768 { 2769 log_verbose("\nOffsets:"); 2770 log_verbose(offset_str); 2771 } 2772 log_verbose("\n"); 2773 } 2774 ucnv_close(conv); 2775 2776 2777 if(expectLen != targ-junkout) 2778 { 2779 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2780 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2781 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2782 printSeqErr(expect, expectLen); 2783 return FALSE; 2784 } 2785 2786 if (checkOffsets && (expectOffsets != 0) ) 2787 { 2788 log_verbose("comparing %d offsets..\n", targ-junkout); 2789 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 2790 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2791 log_err("Got Output : "); 2792 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2793 log_err("Got Offsets: "); 2794 for(p=junkout;p<targ;p++) 2795 log_err("%d,", junokout[p-junkout]); 2796 log_err("\n"); 2797 log_err("Expected Offsets: "); 2798 for(i=0; i<(targ-junkout); i++) 2799 log_err("%d,", expectOffsets[i]); 2800 log_err("\n"); 2801 return FALSE; 2802 } 2803 } 2804 2805 if(!memcmp(junkout, expect, expectLen)) 2806 { 2807 log_verbose("String matches! %s\n", gNuConvTestName); 2808 return TRUE; 2809 } 2810 else 2811 { 2812 log_err("String does not match. %s\n", gNuConvTestName); 2813 log_err("source: "); 2814 printUSeqErr(source, sourceLen); 2815 log_err("Got: "); 2816 printSeqErr((const uint8_t *)junkout, expectLen); 2817 log_err("Expected: "); 2818 printSeqErr(expect, expectLen); 2819 return FALSE; 2820 } 2821 } 2822 2823 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 2824 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 2825 const char *mySubChar, int8_t len) 2826 { 2827 UErrorCode status = U_ZERO_ERROR; 2828 UConverter *conv = 0; 2829 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 2830 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2831 const char *src; 2832 const char *realSourceEnd; 2833 const char *srcLimit; 2834 UChar *targ; 2835 UChar *end; 2836 int32_t *offs; 2837 int i; 2838 UBool checkOffsets = TRUE; 2839 char junk[9999]; 2840 char offset_str[9999]; 2841 UChar *p; 2842 UConverterToUCallback oldAction = NULL; 2843 const void* oldContext = NULL; 2844 2845 int32_t realBufferSize; 2846 UChar *realBufferEnd; 2847 2848 2849 for(i=0;i<NEW_MAX_BUFFER;i++) 2850 junkout[i] = 0xFFFE; 2851 2852 for(i=0;i<NEW_MAX_BUFFER;i++) 2853 junokout[i] = -1; 2854 2855 setNuConvTestName(codepage, "TO"); 2856 2857 log_verbose("\n========= %s\n", gNuConvTestName); 2858 2859 conv = ucnv_open(codepage, &status); 2860 if(U_FAILURE(status)) 2861 { 2862 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 2863 return TRUE; 2864 } 2865 2866 log_verbose("Converter opened..\n"); 2867 2868 src = (const char *)source; 2869 targ = junkout; 2870 offs = junokout; 2871 2872 realBufferSize = UPRV_LENGTHOF(junkout); 2873 realBufferEnd = junkout + realBufferSize; 2874 realSourceEnd = src + sourcelen; 2875 /*----setting the callback routine----*/ 2876 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2877 if (U_FAILURE(status)) 2878 { 2879 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2880 } 2881 /*-------------------------------------*/ 2882 /*setting the subChar*/ 2883 if(mySubChar != NULL){ 2884 ucnv_setSubstChars(conv, mySubChar, len, &status); 2885 if (U_FAILURE(status)) { 2886 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2887 } 2888 } 2889 /*------------*/ 2890 2891 2892 if ( gOutBufferSize != realBufferSize ) 2893 checkOffsets = FALSE; 2894 2895 if( gInBufferSize != NEW_MAX_BUFFER ) 2896 checkOffsets = FALSE; 2897 2898 do 2899 { 2900 end = nct_min( targ + gOutBufferSize, realBufferEnd); 2901 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 2902 2903 if(targ == realBufferEnd) 2904 { 2905 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 2906 return FALSE; 2907 } 2908 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 2909 2910 2911 2912 status = U_ZERO_ERROR; 2913 2914 ucnv_toUnicode (conv, 2915 &targ, 2916 end, 2917 (const char **)&src, 2918 (const char *)srcLimit, 2919 checkOffsets ? offs : NULL, 2920 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 2921 &status); 2922 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 2923 2924 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2925 char errChars[50]; /* should be sufficient */ 2926 int8_t errLen = 50; 2927 UErrorCode err = U_ZERO_ERROR; 2928 const char* start= NULL; 2929 ucnv_getInvalidChars(conv,errChars, &errLen, &err); 2930 if(U_FAILURE(err)){ 2931 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); 2932 } 2933 /* length of in invalid chars should be equal to returned length*/ 2934 start = src - errLen; 2935 if(uprv_strncmp(errChars,start,errLen)!=0){ 2936 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2937 } 2938 } 2939 /* allow failure codes for the stop callback */ 2940 if(U_FAILURE(status) && 2941 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) 2942 { 2943 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2944 return FALSE; 2945 } 2946 2947 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 2948 sourcelen, targ-junkout); 2949 if(getTestOption(VERBOSITY_OPTION)) 2950 { 2951 2952 junk[0] = 0; 2953 offset_str[0] = 0; 2954 2955 for(p = junkout;p<targ;p++) 2956 { 2957 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 2958 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 2959 } 2960 2961 log_verbose(junk); 2962 printUSeq(expect, expectlen); 2963 if ( checkOffsets ) 2964 { 2965 log_verbose("\nOffsets:"); 2966 log_verbose(offset_str); 2967 } 2968 log_verbose("\n"); 2969 } 2970 ucnv_close(conv); 2971 2972 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 2973 2974 if (checkOffsets && (expectOffsets != 0)) 2975 { 2976 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 2977 { 2978 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2979 log_err("Got offsets: "); 2980 for(p=junkout;p<targ;p++) 2981 log_err(" %2d,", junokout[p-junkout]); 2982 log_err("\n"); 2983 log_err("Expected offsets: "); 2984 for(i=0; i<(targ-junkout); i++) 2985 log_err(" %2d,", expectOffsets[i]); 2986 log_err("\n"); 2987 log_err("Got output: "); 2988 for(i=0; i<(targ-junkout); i++) 2989 log_err("0x%04x,", junkout[i]); 2990 log_err("\n"); 2991 log_err("From source: "); 2992 for(i=0; i<(src-(const char *)source); i++) 2993 log_err(" 0x%02x,", (unsigned char)source[i]); 2994 log_err("\n"); 2995 } 2996 } 2997 2998 if(!memcmp(junkout, expect, expectlen*2)) 2999 { 3000 log_verbose("Matches!\n"); 3001 return TRUE; 3002 } 3003 else 3004 { 3005 log_err("String does not match. %s\n", gNuConvTestName); 3006 log_verbose("String does not match. %s\n", gNuConvTestName); 3007 log_err("Got: "); 3008 printUSeqErr(junkout, expectlen); 3009 log_err("Expected: "); 3010 printUSeqErr(expect, expectlen); 3011 log_err("\n"); 3012 return FALSE; 3013 } 3014 } 3015 3016 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 3017 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 3018 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3019 { 3020 3021 3022 UErrorCode status = U_ZERO_ERROR; 3023 UConverter *conv = 0; 3024 char junkout[NEW_MAX_BUFFER]; /* FIX */ 3025 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3026 const UChar *src; 3027 char *end; 3028 char *targ; 3029 int32_t *offs; 3030 int i; 3031 int32_t realBufferSize; 3032 char *realBufferEnd; 3033 const UChar *realSourceEnd; 3034 const UChar *sourceLimit; 3035 UBool checkOffsets = TRUE; 3036 UBool doFlush; 3037 char junk[9999]; 3038 char offset_str[9999]; 3039 char *p; 3040 UConverterFromUCallback oldAction = NULL; 3041 const void* oldContext = NULL; 3042 3043 3044 for(i=0;i<NEW_MAX_BUFFER;i++) 3045 junkout[i] = (char)0xF0; 3046 for(i=0;i<NEW_MAX_BUFFER;i++) 3047 junokout[i] = 0xFF; 3048 setNuConvTestName(codepage, "FROM"); 3049 3050 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 3051 gOutBufferSize); 3052 3053 conv = ucnv_open(codepage, &status); 3054 if(U_FAILURE(status)) 3055 { 3056 log_data_err("Couldn't open converter %s\n",codepage); 3057 return TRUE; /* Because the err has already been logged. */ 3058 } 3059 3060 log_verbose("Converter opened..\n"); 3061 3062 /*----setting the callback routine----*/ 3063 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3064 if (U_FAILURE(status)) 3065 { 3066 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3067 } 3068 /*------------------------*/ 3069 /*setting the subChar*/ 3070 if(mySubChar != NULL){ 3071 ucnv_setSubstChars(conv, mySubChar, len, &status); 3072 if (U_FAILURE(status)) { 3073 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); 3074 } 3075 } 3076 /*------------*/ 3077 3078 src = source; 3079 targ = junkout; 3080 offs = junokout; 3081 3082 realBufferSize = UPRV_LENGTHOF(junkout); 3083 realBufferEnd = junkout + realBufferSize; 3084 realSourceEnd = source + sourceLen; 3085 3086 if ( gOutBufferSize != realBufferSize ) 3087 checkOffsets = FALSE; 3088 3089 if( gInBufferSize != NEW_MAX_BUFFER ) 3090 checkOffsets = FALSE; 3091 3092 do 3093 { 3094 end = nct_min(targ + gOutBufferSize, realBufferEnd); 3095 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 3096 3097 doFlush = (UBool)(sourceLimit == realSourceEnd); 3098 3099 if(targ == realBufferEnd) 3100 { 3101 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 3102 return FALSE; 3103 } 3104 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 3105 3106 3107 status = U_ZERO_ERROR; 3108 3109 ucnv_fromUnicode (conv, 3110 (char **)&targ, 3111 (const char *)end, 3112 &src, 3113 sourceLimit, 3114 checkOffsets ? offs : NULL, 3115 doFlush, /* flush if we're at the end of the input data */ 3116 &status); 3117 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 3118 3119 /* allow failure codes for the stop callback */ 3120 if(U_FAILURE(status) && status != expectedError) 3121 { 3122 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3123 return FALSE; 3124 } 3125 3126 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 3127 sourceLen, targ-junkout); 3128 if(getTestOption(VERBOSITY_OPTION)) 3129 { 3130 3131 junk[0] = 0; 3132 offset_str[0] = 0; 3133 for(p = junkout;p<targ;p++) 3134 { 3135 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 3136 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 3137 } 3138 3139 log_verbose(junk); 3140 printSeq(expect, expectLen); 3141 if ( checkOffsets ) 3142 { 3143 log_verbose("\nOffsets:"); 3144 log_verbose(offset_str); 3145 } 3146 log_verbose("\n"); 3147 } 3148 ucnv_close(conv); 3149 3150 3151 if(expectLen != targ-junkout) 3152 { 3153 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3154 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3155 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3156 printSeqErr(expect, expectLen); 3157 return FALSE; 3158 } 3159 3160 if (checkOffsets && (expectOffsets != 0) ) 3161 { 3162 log_verbose("comparing %d offsets..\n", targ-junkout); 3163 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 3164 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3165 log_err("Got Output : "); 3166 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3167 log_err("Got Offsets: "); 3168 for(p=junkout;p<targ;p++) 3169 log_err("%d,", junokout[p-junkout]); 3170 log_err("\n"); 3171 log_err("Expected Offsets: "); 3172 for(i=0; i<(targ-junkout); i++) 3173 log_err("%d,", expectOffsets[i]); 3174 log_err("\n"); 3175 return FALSE; 3176 } 3177 } 3178 3179 if(!memcmp(junkout, expect, expectLen)) 3180 { 3181 log_verbose("String matches! %s\n", gNuConvTestName); 3182 return TRUE; 3183 } 3184 else 3185 { 3186 log_err("String does not match. %s\n", gNuConvTestName); 3187 log_err("source: "); 3188 printUSeqErr(source, sourceLen); 3189 log_err("Got: "); 3190 printSeqErr((const uint8_t *)junkout, expectLen); 3191 log_err("Expected: "); 3192 printSeqErr(expect, expectLen); 3193 return FALSE; 3194 } 3195 } 3196 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 3197 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 3198 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3199 { 3200 UErrorCode status = U_ZERO_ERROR; 3201 UConverter *conv = 0; 3202 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 3203 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3204 const char *src; 3205 const char *realSourceEnd; 3206 const char *srcLimit; 3207 UChar *targ; 3208 UChar *end; 3209 int32_t *offs; 3210 int i; 3211 UBool checkOffsets = TRUE; 3212 char junk[9999]; 3213 char offset_str[9999]; 3214 UChar *p; 3215 UConverterToUCallback oldAction = NULL; 3216 const void* oldContext = NULL; 3217 3218 int32_t realBufferSize; 3219 UChar *realBufferEnd; 3220 3221 3222 for(i=0;i<NEW_MAX_BUFFER;i++) 3223 junkout[i] = 0xFFFE; 3224 3225 for(i=0;i<NEW_MAX_BUFFER;i++) 3226 junokout[i] = -1; 3227 3228 setNuConvTestName(codepage, "TO"); 3229 3230 log_verbose("\n========= %s\n", gNuConvTestName); 3231 3232 conv = ucnv_open(codepage, &status); 3233 if(U_FAILURE(status)) 3234 { 3235 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 3236 return TRUE; 3237 } 3238 3239 log_verbose("Converter opened..\n"); 3240 3241 src = (const char *)source; 3242 targ = junkout; 3243 offs = junokout; 3244 3245 realBufferSize = UPRV_LENGTHOF(junkout); 3246 realBufferEnd = junkout + realBufferSize; 3247 realSourceEnd = src + sourcelen; 3248 /*----setting the callback routine----*/ 3249 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3250 if (U_FAILURE(status)) 3251 { 3252 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3253 } 3254 /*-------------------------------------*/ 3255 /*setting the subChar*/ 3256 if(mySubChar != NULL){ 3257 ucnv_setSubstChars(conv, mySubChar, len, &status); 3258 if (U_FAILURE(status)) { 3259 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3260 } 3261 } 3262 /*------------*/ 3263 3264 3265 if ( gOutBufferSize != realBufferSize ) 3266 checkOffsets = FALSE; 3267 3268 if( gInBufferSize != NEW_MAX_BUFFER ) 3269 checkOffsets = FALSE; 3270 3271 do 3272 { 3273 end = nct_min( targ + gOutBufferSize, realBufferEnd); 3274 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 3275 3276 if(targ == realBufferEnd) 3277 { 3278 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 3279 return FALSE; 3280 } 3281 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 3282 3283 3284 3285 status = U_ZERO_ERROR; 3286 3287 ucnv_toUnicode (conv, 3288 &targ, 3289 end, 3290 (const char **)&src, 3291 (const char *)srcLimit, 3292 checkOffsets ? offs : NULL, 3293 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 3294 &status); 3295 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 3296 3297 /* allow failure codes for the stop callback */ 3298 if(U_FAILURE(status) && status!=expectedError) 3299 { 3300 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3301 return FALSE; 3302 } 3303 3304 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 3305 sourcelen, targ-junkout); 3306 if(getTestOption(VERBOSITY_OPTION)) 3307 { 3308 3309 junk[0] = 0; 3310 offset_str[0] = 0; 3311 3312 for(p = junkout;p<targ;p++) 3313 { 3314 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 3315 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 3316 } 3317 3318 log_verbose(junk); 3319 printUSeq(expect, expectlen); 3320 if ( checkOffsets ) 3321 { 3322 log_verbose("\nOffsets:"); 3323 log_verbose(offset_str); 3324 } 3325 log_verbose("\n"); 3326 } 3327 ucnv_close(conv); 3328 3329 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 3330 3331 if (checkOffsets && (expectOffsets != 0)) 3332 { 3333 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 3334 { 3335 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3336 log_err("Got offsets: "); 3337 for(p=junkout;p<targ;p++) 3338 log_err(" %2d,", junokout[p-junkout]); 3339 log_err("\n"); 3340 log_err("Expected offsets: "); 3341 for(i=0; i<(targ-junkout); i++) 3342 log_err(" %2d,", expectOffsets[i]); 3343 log_err("\n"); 3344 log_err("Got output: "); 3345 for(i=0; i<(targ-junkout); i++) 3346 log_err("0x%04x,", junkout[i]); 3347 log_err("\n"); 3348 log_err("From source: "); 3349 for(i=0; i<(src-(const char *)source); i++) 3350 log_err(" 0x%02x,", (unsigned char)source[i]); 3351 log_err("\n"); 3352 } 3353 } 3354 3355 if(!memcmp(junkout, expect, expectlen*2)) 3356 { 3357 log_verbose("Matches!\n"); 3358 return TRUE; 3359 } 3360 else 3361 { 3362 log_err("String does not match. %s\n", gNuConvTestName); 3363 log_verbose("String does not match. %s\n", gNuConvTestName); 3364 log_err("Got: "); 3365 printUSeqErr(junkout, expectlen); 3366 log_err("Expected: "); 3367 printUSeqErr(expect, expectlen); 3368 log_err("\n"); 3369 return FALSE; 3370 } 3371 } 3372 3373 static void TestCallBackFailure(void) { 3374 UErrorCode status = U_USELESS_COLLATOR_ERROR; 3375 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); 3376 if (status != U_USELESS_COLLATOR_ERROR) { 3377 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); 3378 } 3379 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); 3380 if (status != U_USELESS_COLLATOR_ERROR) { 3381 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); 3382 } 3383 ucnv_cbFromUWriteSub(NULL, -1, &status); 3384 if (status != U_USELESS_COLLATOR_ERROR) { 3385 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); 3386 } 3387 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); 3388 if (status != U_USELESS_COLLATOR_ERROR) { 3389 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); 3390 } 3391 } 3392