1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2006-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.charset; 11 12 import java.nio.BufferOverflowException; 13 import java.nio.ByteBuffer; 14 import java.nio.CharBuffer; 15 import java.nio.charset.CharacterCodingException; 16 import java.nio.charset.Charset; 17 import java.nio.charset.CharsetDecoder; 18 import java.nio.charset.CharsetEncoder; 19 import java.nio.charset.CoderResult; 20 import java.nio.charset.CodingErrorAction; 21 import java.nio.charset.UnsupportedCharsetException; 22 import java.nio.charset.spi.CharsetProvider; 23 import java.util.ArrayList; 24 import java.util.Iterator; 25 import java.util.MissingResourceException; 26 import java.util.Set; 27 import java.util.SortedMap; 28 29 import org.junit.Test; 30 31 import com.ibm.icu.charset.CharsetCallback; 32 import com.ibm.icu.charset.CharsetDecoderICU; 33 import com.ibm.icu.charset.CharsetEncoderICU; 34 import com.ibm.icu.charset.CharsetICU; 35 import com.ibm.icu.charset.CharsetProviderICU; 36 import com.ibm.icu.dev.test.TestFmwk; 37 import com.ibm.icu.text.UTF16; 38 import com.ibm.icu.text.UnicodeSet; 39 40 public class TestCharset extends TestFmwk { 41 @Test 42 public void TestUTF16Converter(){ 43 CharsetProvider icu = new CharsetProviderICU(); 44 Charset cs1 = icu.charsetForName("UTF-16BE"); 45 CharsetEncoder e1 = cs1.newEncoder(); 46 CharsetDecoder d1 = cs1.newDecoder(); 47 48 Charset cs2 = icu.charsetForName("UTF-16LE"); 49 CharsetEncoder e2 = cs2.newEncoder(); 50 CharsetDecoder d2 = cs2.newDecoder(); 51 52 for(int i=0x0000; i<0x10FFFF; i+=0xFF){ 53 CharBuffer us = CharBuffer.allocate(0xFF*2); 54 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 55 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 56 for(int j=0;j<0xFF; j++){ 57 int c = i+j; 58 59 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 60 continue; 61 } 62 63 if(c>0xFFFF){ 64 char lead = UTF16.getLeadSurrogate(c); 65 char trail = UTF16.getTrailSurrogate(c); 66 if(!UTF16.isLeadSurrogate(lead)){ 67 errln("lead is not lead!"+lead+" for cp: \\U"+Integer.toHexString(c)); 68 continue; 69 } 70 if(!UTF16.isTrailSurrogate(trail)){ 71 errln("trail is not trail!"+trail); 72 continue; 73 } 74 us.put(lead); 75 us.put(trail); 76 bs1.put((byte)(lead>>8)); 77 bs1.put((byte)(lead&0xFF)); 78 bs1.put((byte)(trail>>8)); 79 bs1.put((byte)(trail&0xFF)); 80 81 bs2.put((byte)(lead&0xFF)); 82 bs2.put((byte)(lead>>8)); 83 bs2.put((byte)(trail&0xFF)); 84 bs2.put((byte)(trail>>8)); 85 }else{ 86 87 if(c<0xFF){ 88 bs1.put((byte)0x00); 89 bs1.put((byte)(c)); 90 bs2.put((byte)(c)); 91 bs2.put((byte)0x00); 92 }else{ 93 bs1.put((byte)(c>>8)); 94 bs1.put((byte)(c&0xFF)); 95 96 bs2.put((byte)(c&0xFF)); 97 bs2.put((byte)(c>>8)); 98 } 99 us.put((char)c); 100 } 101 } 102 103 104 us.limit(us.position()); 105 us.position(0); 106 if(us.length()==0){ 107 continue; 108 } 109 110 111 bs1.limit(bs1.position()); 112 bs1.position(0); 113 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 114 //newBS.put((byte)0xFE); 115 //newBS.put((byte)0xFF); 116 newBS.put(bs1); 117 bs1.position(0); 118 smBufDecode(d1, "UTF-16", bs1, us); 119 smBufEncode(e1, "UTF-16", us, newBS); 120 121 bs2.limit(bs2.position()); 122 bs2.position(0); 123 newBS.clear(); 124 //newBS.put((byte)0xFF); 125 //newBS.put((byte)0xFE); 126 newBS.put(bs2); 127 bs2.position(0); 128 smBufDecode(d2, "UTF16-LE", bs2, us); 129 smBufEncode(e2, "UTF-16LE", us, newBS); 130 131 } 132 } 133 134 @Test 135 public void TestUTF32Converter(){ 136 CharsetProvider icu = new CharsetProviderICU(); 137 Charset cs1 = icu.charsetForName("UTF-32BE"); 138 CharsetEncoder e1 = cs1.newEncoder(); 139 CharsetDecoder d1 = cs1.newDecoder(); 140 141 Charset cs2 = icu.charsetForName("UTF-32LE"); 142 CharsetEncoder e2 = cs2.newEncoder(); 143 CharsetDecoder d2 = cs2.newDecoder(); 144 145 for(int i=0x000; i<0x10FFFF; i+=0xFF){ 146 CharBuffer us = CharBuffer.allocate(0xFF*2); 147 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 148 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 149 for(int j=0;j<0xFF; j++){ 150 int c = i+j; 151 152 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 153 continue; 154 } 155 156 if(c>0xFFFF){ 157 char lead = UTF16.getLeadSurrogate(c); 158 char trail = UTF16.getTrailSurrogate(c); 159 160 us.put(lead); 161 us.put(trail); 162 }else{ 163 us.put((char)c); 164 } 165 bs1.put((byte) (c >>> 24)); 166 bs1.put((byte) (c >>> 16)); 167 bs1.put((byte) (c >>> 8)); 168 bs1.put((byte) (c & 0xFF)); 169 170 bs2.put((byte) (c & 0xFF)); 171 bs2.put((byte) (c >>> 8)); 172 bs2.put((byte) (c >>> 16)); 173 bs2.put((byte) (c >>> 24)); 174 } 175 bs1.limit(bs1.position()); 176 bs1.position(0); 177 bs2.limit(bs2.position()); 178 bs2.position(0); 179 us.limit(us.position()); 180 us.position(0); 181 if(us.length()==0){ 182 continue; 183 } 184 185 186 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 187 188 newBS.put((byte)0x00); 189 newBS.put((byte)0x00); 190 newBS.put((byte)0xFE); 191 newBS.put((byte)0xFF); 192 193 newBS.put(bs1); 194 bs1.position(0); 195 smBufDecode(d1, "UTF-32", bs1, us); 196 smBufEncode(e1, "UTF-32", us, newBS); 197 198 199 newBS.clear(); 200 201 newBS.put((byte)0xFF); 202 newBS.put((byte)0xFE); 203 newBS.put((byte)0x00); 204 newBS.put((byte)0x00); 205 206 newBS.put(bs2); 207 bs2.position(0); 208 smBufDecode(d2, "UTF-32LE", bs2, us); 209 smBufEncode(e2, "UTF-32LE", us, newBS); 210 211 } 212 } 213 214 @Test 215 public void TestASCIIConverter() { 216 runTestASCIIBasedConverter("ASCII", 0x80); 217 } 218 219 @Test 220 public void Test88591Converter() { 221 runTestASCIIBasedConverter("iso-8859-1", 0x100); 222 } 223 224 public void runTestASCIIBasedConverter(String converter, int limit){ 225 CharsetProvider icu = new CharsetProviderICU(); 226 Charset icuChar = icu.charsetForName(converter); 227 CharsetEncoder encoder = icuChar.newEncoder(); 228 CharsetDecoder decoder = icuChar.newDecoder(); 229 CoderResult cr; 230 231 /* test with and without array-backed buffers */ 232 233 byte[] bytes = new byte[0x10000]; 234 char[] chars = new char[0x10000]; 235 for (int j = 0; j <= 0xffff; j++) { 236 bytes[j] = (byte) j; 237 chars[j] = (char) j; 238 } 239 240 boolean fail = false; 241 boolean arrays = false; 242 boolean decoding = false; 243 int i; 244 245 // 0 thru limit - 1 246 ByteBuffer bs = ByteBuffer.wrap(bytes, 0, limit); 247 CharBuffer us = CharBuffer.wrap(chars, 0, limit); 248 smBufDecode(decoder, converter, bs, us, true); 249 smBufDecode(decoder, converter, bs, us, false); 250 smBufEncode(encoder, converter, us, bs, true); 251 smBufEncode(encoder, converter, us, bs, false); 252 for (i = 0; i < limit; i++) { 253 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 254 us = CharBuffer.wrap(chars, i, 1).slice(); 255 try { 256 decoding = true; 257 arrays = true; 258 smBufDecode(decoder, converter, bs, us, true, false, true); 259 260 decoding = true; 261 arrays = false; 262 smBufDecode(decoder, converter, bs, us, true, false, false); 263 264 decoding = false; 265 arrays = true; 266 smBufEncode(encoder, converter, us, bs, true, false, true); 267 268 decoding = false; 269 arrays = false; 270 smBufEncode(encoder, converter, us, bs, true, false, false); 271 272 } catch (Exception ex) { 273 errln("Failed to fail to " + (decoding ? "decode" : "encode") + " 0x" 274 + Integer.toHexString(i) + (arrays ? " with arrays" : " without arrays") + " in " + converter); 275 return; 276 } 277 } 278 279 // decode limit thru 255 280 for (i = limit; i <= 0xff; i++) { 281 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 282 us = CharBuffer.wrap(chars, i, 1).slice(); 283 try { 284 smBufDecode(decoder, converter, bs, us, true, false, true); 285 fail = true; 286 arrays = true; 287 break; 288 } catch (Exception ex) { 289 } 290 try { 291 smBufDecode(decoder, converter, bs, us, true, false, false); 292 fail = true; 293 arrays = false; 294 break; 295 } catch (Exception ex) { 296 } 297 } 298 if (fail) { 299 errln("Failed to fail to decode 0x" + Integer.toHexString(i) 300 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 301 return; 302 } 303 304 // encode limit thru 0xffff, skipping through much of the 1ff to feff range to save 305 // time (it would take too much time to test every possible case) 306 for (i = limit; i <= 0xffff; i = ((i>=0x1ff && i<0xfeff) ? i+0xfd : i+1)) { 307 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 308 us = CharBuffer.wrap(chars, i, 1).slice(); 309 try { 310 smBufEncode(encoder, converter, us, bs, true, false, true); 311 fail = true; 312 arrays = true; 313 break; 314 } catch (Exception ex) { 315 } 316 try { 317 smBufEncode(encoder, converter, us, bs, true, false, false); 318 fail = true; 319 arrays = false; 320 break; 321 } catch (Exception ex) { 322 } 323 } 324 if (fail) { 325 errln("Failed to fail to encode 0x" + Integer.toHexString(i) 326 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 327 return; 328 } 329 330 // test overflow / underflow edge cases 331 outer: for (int n = 1; n <= 3; n++) { 332 for (int m = 0; m < n; m++) { 333 // expecting underflow 334 try { 335 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 336 us = CharBuffer.wrap(chars, 'a', m).slice(); 337 smBufDecode(decoder, converter, bs, us, true, false, true); 338 smBufDecode(decoder, converter, bs, us, true, false, false); 339 smBufEncode(encoder, converter, us, bs, true, false, true); 340 smBufEncode(encoder, converter, us, bs, true, false, false); 341 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 342 us = CharBuffer.wrap(chars, 'a', n).slice(); 343 smBufDecode(decoder, converter, bs, us, true, false, true, m); 344 smBufDecode(decoder, converter, bs, us, true, false, false, m); 345 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 346 us = CharBuffer.wrap(chars, 'a', m).slice(); 347 smBufEncode(encoder, converter, us, bs, true, false, true, m); 348 smBufEncode(encoder, converter, us, bs, true, false, false, m); 349 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 350 us = CharBuffer.wrap(chars, 'a', n).slice(); 351 smBufDecode(decoder, converter, bs, us, true, false, true); 352 smBufDecode(decoder, converter, bs, us, true, false, false); 353 smBufEncode(encoder, converter, us, bs, true, false, true); 354 smBufEncode(encoder, converter, us, bs, true, false, false); 355 } catch (Exception ex) { 356 fail = true; 357 break outer; 358 } 359 360 // expecting overflow 361 try { 362 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 363 us = CharBuffer.wrap(chars, 'a', m).slice(); 364 smBufDecode(decoder, converter, bs, us, true, false, true); 365 fail = true; 366 break; 367 } catch (Exception ex) { 368 if (!(ex instanceof BufferOverflowException)) { 369 fail = true; 370 break outer; 371 } 372 } 373 try { 374 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 375 us = CharBuffer.wrap(chars, 'a', m).slice(); 376 smBufDecode(decoder, converter, bs, us, true, false, false); 377 fail = true; 378 } catch (Exception ex) { 379 if (!(ex instanceof BufferOverflowException)) { 380 fail = true; 381 break outer; 382 } 383 } 384 try { 385 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 386 us = CharBuffer.wrap(chars, 'a', n).slice(); 387 smBufEncode(encoder, converter, us, bs, true, false, true); 388 fail = true; 389 } catch (Exception ex) { 390 if (!(ex instanceof BufferOverflowException)) { 391 fail = true; 392 break outer; 393 } 394 } 395 try { 396 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 397 us = CharBuffer.wrap(chars, 'a', n).slice(); 398 smBufEncode(encoder, converter, us, bs, true, false, false); 399 fail = true; 400 } catch (Exception ex) { 401 if (!(ex instanceof BufferOverflowException)) { 402 fail = true; 403 break outer; 404 } 405 } 406 } 407 } 408 if (fail) { 409 errln("Incorrect result in " + converter + " for underflow / overflow edge cases"); 410 return; 411 } 412 413 // test surrogate combinations in encoding 414 String lead = "\ud888"; 415 String trail = "\udc88"; 416 String norm = "a"; 417 String ext = "\u0275"; // theta 418 String end = ""; 419 bs = ByteBuffer.wrap(new byte[] { 0 }); 420 String[] input = new String[] { // 421 lead + lead, // malf(1) 422 lead + trail, // unmap(2) 423 lead + norm, // malf(1) 424 lead + ext, // malf(1) 425 lead + end, // malf(1) 426 trail + norm, // malf(1) 427 trail + end, // malf(1) 428 ext + norm, // unmap(1) 429 ext + end, // unmap(1) 430 }; 431 CoderResult[] result = new CoderResult[] { 432 CoderResult.malformedForLength(1), 433 CoderResult.unmappableForLength(2), 434 CoderResult.malformedForLength(1), 435 CoderResult.malformedForLength(1), 436 CoderResult.malformedForLength(1), 437 CoderResult.malformedForLength(1), 438 CoderResult.malformedForLength(1), 439 CoderResult.unmappableForLength(1), 440 CoderResult.unmappableForLength(1), 441 }; 442 443 for (int index = 0; index < input.length; index++) { 444 CharBuffer source = CharBuffer.wrap(input[index]); 445 cr = encoder.encode(source, bs, true); 446 bs.rewind(); 447 encoder.reset(); 448 449 // if cr != results[x] 450 if (!((cr.isUnderflow() && result[index].isUnderflow()) 451 || (cr.isOverflow() && result[index].isOverflow()) 452 || (cr.isMalformed() && result[index].isMalformed()) 453 || (cr.isUnmappable() && result[index].isUnmappable())) 454 || (cr.isError() && cr.length() != result[index].length())) { 455 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 456 break; 457 } 458 459 source = CharBuffer.wrap(input[index].toCharArray()); 460 cr = encoder.encode(source, bs, true); 461 bs.rewind(); 462 encoder.reset(); 463 464 // if cr != results[x] 465 if (!((cr.isUnderflow() && result[index].isUnderflow()) 466 || (cr.isOverflow() && result[index].isOverflow()) 467 || (cr.isMalformed() && result[index].isMalformed()) 468 || (cr.isUnmappable() && result[index].isUnmappable())) 469 || (cr.isError() && cr.length() != result[index].length())) { 470 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 471 break; 472 } 473 } 474 } 475 476 @Test 477 public void TestUTF8Converter() { 478 String converter = "UTF-8"; 479 CharsetProvider icu = new CharsetProviderICU(); 480 Charset icuChar = icu.charsetForName(converter); 481 CharsetEncoder encoder = icuChar.newEncoder(); 482 CharsetDecoder decoder = icuChar.newDecoder(); 483 ByteBuffer bs; 484 CharBuffer us; 485 CoderResult cr; 486 487 488 int[] size = new int[] { 1<<7, 1<<11, 1<<16 }; // # of 1,2,3 byte combinations 489 byte[] bytes = new byte[size[0] + size[1]*2 + size[2]*3]; 490 char[] chars = new char[size[0] + size[1] + size[2]]; 491 int i = 0; 492 int x, y; 493 494 // 0 to 1 << 7 (1 byters) 495 for (; i < size[0]; i++) { 496 bytes[i] = (byte) i; 497 chars[i] = (char) i; 498 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 499 us = CharBuffer.wrap(chars, i, 1).slice(); 500 try { 501 smBufDecode(decoder, converter, bs, us, true, false, true); 502 smBufDecode(decoder, converter, bs, us, true, false, false); 503 smBufEncode(encoder, converter, us, bs, true, false, true); 504 smBufEncode(encoder, converter, us, bs, true, false, false); 505 } catch (Exception ex) { 506 errln("Incorrect result in " + converter + " for 0x" 507 + Integer.toHexString(i)); 508 break; 509 } 510 } 511 512 // 1 << 7 to 1 << 11 (2 byters) 513 for (; i < size[1]; i++) { 514 x = size[0] + i*2; 515 y = size[0] + i; 516 bytes[x + 0] = (byte) (0xc0 | ((i >> 6) & 0x1f)); 517 bytes[x + 1] = (byte) (0x80 | ((i >> 0) & 0x3f)); 518 chars[y] = (char) i; 519 bs = ByteBuffer.wrap(bytes, x, 2).slice(); 520 us = CharBuffer.wrap(chars, y, 1).slice(); 521 try { 522 smBufDecode(decoder, converter, bs, us, true, false, true); 523 smBufDecode(decoder, converter, bs, us, true, false, false); 524 smBufEncode(encoder, converter, us, bs, true, false, true); 525 smBufEncode(encoder, converter, us, bs, true, false, false); 526 } catch (Exception ex) { 527 errln("Incorrect result in " + converter + " for 0x" 528 + Integer.toHexString(i)); 529 break; 530 } 531 } 532 533 // 1 << 11 to 1 << 16 (3 byters and surrogates) 534 for (; i < size[2]; i++) { 535 x = size[0] + size[1] * 2 + i * 3; 536 y = size[0] + size[1] + i; 537 bytes[x + 0] = (byte) (0xe0 | ((i >> 12) & 0x0f)); 538 bytes[x + 1] = (byte) (0x80 | ((i >> 6) & 0x3f)); 539 bytes[x + 2] = (byte) (0x80 | ((i >> 0) & 0x3f)); 540 chars[y] = (char) i; 541 if (!UTF16.isSurrogate((char)i)) { 542 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 543 us = CharBuffer.wrap(chars, y, 1).slice(); 544 try { 545 smBufDecode(decoder, converter, bs, us, true, false, true); 546 smBufDecode(decoder, converter, bs, us, true, false, false); 547 smBufEncode(encoder, converter, us, bs, true, false, true); 548 smBufEncode(encoder, converter, us, bs, true, false, false); 549 } catch (Exception ex) { 550 errln("Incorrect result in " + converter + " for 0x" 551 + Integer.toHexString(i)); 552 break; 553 } 554 } else { 555 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 556 us = CharBuffer.wrap(chars, y, 1).slice(); 557 558 decoder.reset(); 559 cr = decoder.decode(bs, us, true); 560 bs.rewind(); 561 us.rewind(); 562 if (!cr.isMalformed() || cr.length() != 3) { 563 errln("Incorrect result in " + converter + " decoder for 0x" 564 + Integer.toHexString(i) + " received " + cr); 565 break; 566 } 567 encoder.reset(); 568 cr = encoder.encode(us, bs, true); 569 bs.rewind(); 570 us.rewind(); 571 if (!cr.isMalformed() || cr.length() != 1) { 572 errln("Incorrect result in " + converter + " encoder for 0x" 573 + Integer.toHexString(i) + " received " + cr); 574 break; 575 } 576 577 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 578 us = CharBuffer.wrap(new String(chars, y, 1)); 579 580 decoder.reset(); 581 cr = decoder.decode(bs, us, true); 582 bs.rewind(); 583 us.rewind(); 584 if (!cr.isMalformed() || cr.length() != 3) { 585 errln("Incorrect result in " + converter + " decoder for 0x" 586 + Integer.toHexString(i) + " received " + cr); 587 break; 588 } 589 encoder.reset(); 590 cr = encoder.encode(us, bs, true); 591 bs.rewind(); 592 us.rewind(); 593 if (!cr.isMalformed() || cr.length() != 1) { 594 errln("Incorrect result in " + converter + " encoder for 0x" 595 + Integer.toHexString(i) + " received " + cr); 596 break; 597 } 598 599 600 } 601 } 602 if (true) 603 return; 604 } 605 606 @Test 607 public void TestHZ() { 608 /* test input */ 609 char[] in = new char[] { 610 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 611 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 612 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 613 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 614 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 615 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 616 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 617 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 618 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 619 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 620 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 621 0x005A, 0x005B, 0x005C, 0x000A 622 }; 623 624 String converter = "HZ"; 625 CharsetProvider icu = new CharsetProviderICU(); 626 Charset icuChar = icu.charsetForName(converter); 627 CharsetEncoder encoder = icuChar.newEncoder(); 628 CharsetDecoder decoder = icuChar.newDecoder(); 629 try { 630 CharBuffer start = CharBuffer.wrap(in); 631 ByteBuffer bytes = encoder.encode(start); 632 CharBuffer finish = decoder.decode(bytes); 633 634 if (!equals(start, finish)) { 635 errln(converter + " roundtrip test failed: start does not match finish"); 636 637 char[] finishArray = new char[finish.limit()]; 638 for (int i=0; i<finishArray.length; i++) 639 finishArray[i] = finish.get(i); 640 641 logln("start: " + hex(in)); 642 logln("finish: " + hex(finishArray)); 643 } 644 } catch (CharacterCodingException ex) { 645 // Android patch: Skip tests that fail with customized data. 646 logln(converter + " roundtrip test failed: " + ex.getMessage()); 647 // Android patch end. 648 ex.printStackTrace(System.err); 649 } 650 651 /* For better code coverage */ 652 CoderResult result = CoderResult.UNDERFLOW; 653 byte byteout[] = { 654 (byte)0x7e, (byte)0x7d, (byte)0x41, 655 (byte)0x7e, (byte)0x7b, (byte)0x21, 656 }; 657 char charin[] = { 658 (char)0x0041, (char)0x0042, (char)0x3000 659 }; 660 ByteBuffer bb = ByteBuffer.wrap(byteout); 661 CharBuffer cb = CharBuffer.wrap(charin); 662 int testLoopSize = 5; 663 int bbLimits[] = { 0, 1, 3, 4, 6}; 664 int bbPositions[] = { 0, 0, 0, 3, 3 }; 665 int ccPositions[] = { 0, 0, 0, 2, 2 }; 666 for (int i = 0; i < testLoopSize; i++) { 667 encoder.reset(); 668 bb.limit(bbLimits[i]); 669 bb.position(bbPositions[i]); 670 cb.position(ccPositions[i]); 671 result = encoder.encode(cb, bb, true); 672 673 if (i < 3) { 674 if (!result.isOverflow()) { 675 errln("Overflow buffer error should have occurred while encoding HZ (" + i + ")"); 676 } 677 } else { 678 if (result.isError()) { 679 // Android patch: Skip tests that fail with customized data. 680 logln("Error should not have occurred while encoding HZ.(" + i + ")"); 681 // Android patch end. 682 } 683 } 684 } 685 } 686 687 @Test 688 public void TestUTF8Surrogates() { 689 byte[][] in = new byte[][] { 690 { (byte)0x61, }, 691 { (byte)0xc2, (byte)0x80, }, 692 { (byte)0xe0, (byte)0xa0, (byte)0x80, }, 693 { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)0x80, }, 694 { (byte)0xf4, (byte)0x84, (byte)0x8c, (byte)0xa1, }, 695 { (byte)0xf0, (byte)0x90, (byte)0x90, (byte)0x81, }, 696 }; 697 698 /* expected test results */ 699 char[][] results = new char[][] { 700 /* number of bytes read, code point */ 701 { '\u0061', }, 702 { '\u0080', }, 703 { '\u0800', }, 704 { '\ud800', '\udc00', }, // 10000 705 { '\udbd0', '\udf21', }, // 104321 706 { '\ud801', '\udc01', }, // 10401 707 }; 708 709 /* error test input */ 710 byte[][] in2 = new byte[][] { 711 { (byte)0x61, }, 712 { (byte)0xc0, (byte)0x80, /* illegal non-shortest form */ 713 (byte)0xe0, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 714 (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 715 (byte)0xc0, (byte)0xc0, /* illegal trail byte */ 716 (byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80, /* 0x110000 out of range */ 717 (byte)0xf8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, /* too long */ 718 (byte)0xfe, /* illegal byte altogether */ 719 (byte)0x62, }, 720 }; 721 722 /* expected error test results */ 723 char[][] results2 = new char[][] { 724 /* number of bytes read, code point */ 725 { '\u0062', }, 726 { '\u0062', }, 727 }; 728 729 String converter = "UTF-8"; 730 CharsetProvider icu = new CharsetProviderICU(); 731 Charset icuChar = icu.charsetForName(converter); 732 CharsetDecoder decoder = icuChar.newDecoder(); 733 734 int i; 735 try { 736 for (i = 0; i < in.length; i++) { 737 ByteBuffer source = ByteBuffer.wrap(in[i]); 738 CharBuffer expected = CharBuffer.wrap(results[i]); 739 smBufDecode(decoder, converter, source, expected, true, false, 740 true); 741 smBufDecode(decoder, converter, source, expected, true, false, 742 false); 743 } 744 } catch (Exception ex) { 745 errln("Incorrect result in " + converter); 746 } 747 try { 748 for (i = 0; i < in2.length; i++) { 749 ByteBuffer source = ByteBuffer.wrap(in2[i]); 750 CharBuffer expected = CharBuffer.wrap(results2[i]); 751 decoder.onMalformedInput(CodingErrorAction.IGNORE); 752 smBufDecode(decoder, converter, source, expected, true, false, 753 true); 754 smBufDecode(decoder, converter, source, expected, true, false, 755 false); 756 } 757 } catch (Exception ex) { 758 errln("Incorrect result in " + converter); 759 } 760 } 761 762 @Test 763 public void TestSurrogateBehavior() { 764 CharsetProviderICU icu = new CharsetProviderICU(); 765 766 // get all the converters into an array 767 Object[] converters = CharsetProviderICU.getAvailableNames(); 768 769 String norm = "a"; 770 String ext = "\u0275"; // theta 771 String lead = "\ud835"; 772 String trail = "\udd04"; 773 // lead + trail = \U1d504 (fraktur capital A) 774 775 String input = 776 // error position 777 ext // unmap(1) 1 778 + lead // under 1 779 + lead // malf(1) 2 780 + trail // unmap(2) 4 781 + trail // malf(1) 5 782 + ext // unmap(1) 6 783 + norm // unmap(1) 7 784 ; 785 CoderResult[] results = new CoderResult[] { 786 CoderResult.unmappableForLength(1), // or underflow 787 CoderResult.UNDERFLOW, 788 CoderResult.malformedForLength(1), 789 CoderResult.unmappableForLength(2), // or underflow 790 CoderResult.malformedForLength(1), 791 CoderResult.unmappableForLength(1), // or underflow 792 CoderResult.unmappableForLength(1), // or underflow 793 }; 794 int[] positions = new int[] { 1,1,2,4,5,6,7 }; 795 int n = positions.length; 796 797 int badcount = 0; 798 int goodcount = 0; 799 int[] uhohindices = new int[n]; 800 int[] badposindices = new int[n]; 801 int[] malfindices = new int[n]; 802 int[] unmapindices = new int[n]; 803 ArrayList pass = new ArrayList(); 804 ArrayList exempt = new ArrayList(); 805 806 outer: for (int conv=0; conv<converters.length; conv++) { 807 String converter = (String)converters[conv]; 808 if (converter.equals("x-IMAP-mailbox-name") || converter.equals("UTF-7") || converter.equals("CESU-8") || converter.equals("BOCU-1") || 809 converter.equals("x-LMBCS-1")) { 810 exempt.add(converter); 811 continue; 812 } 813 814 boolean currentlybad = false; 815 Charset icuChar = icu.charsetForName(converter); 816 CharsetEncoder encoder = icuChar.newEncoder(); 817 CoderResult cr; 818 819 CharBuffer source = CharBuffer.wrap(input); 820 ByteBuffer target = ByteBuffer.allocate(30); 821 ByteBuffer expected = null; 822 try { 823 encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); 824 encoder.onMalformedInput(CodingErrorAction.IGNORE); 825 expected = encoder.encode(CharBuffer.wrap(ext + lead + trail + ext + norm)); 826 encoder.reset(); 827 } catch (CharacterCodingException ex) { 828 errln("Unexpected CharacterCodingException: " + ex.getMessage()); 829 return; 830 } catch (RuntimeException ex) { 831 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 832 // Android patch: Skip tests that fail with customized data. 833 logln(converter + " " + ex.getClass().getName() + ": " + ex.getMessage()); 834 // Android patch end. 835 continue outer; 836 } 837 838 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 839 encoder.onMalformedInput(CodingErrorAction.REPORT); 840 for (int i=0; i<n; i++) { 841 source.limit(i+1); 842 cr = encoder.encode(source, target, i == n - 1); 843 if (!(equals(cr, results[i]) 844 || (results[i].isUnmappable() && cr.isUnderflow()) // mappability depends on the converter 845 )) { 846 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 847 if (results[i].isMalformed() && cr.isMalformed()) { 848 malfindices[i]++; 849 } else if (results[i].isUnmappable() && cr.isUnmappable()) { 850 unmapindices[i]++; 851 } else { 852 uhohindices[i]++; 853 } 854 errln("(index=" + i + ") " + converter + " Received: " + cr + " Expected: " + results[i]); 855 } 856 if (source.position() != positions[i]) { 857 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 858 badposindices[i]++; 859 errln("(index=" + i + ") " + converter + " Received: " + source.position() + " Expected: " + positions[i]); 860 } 861 862 } 863 encoder.reset(); 864 865 //System.out.println("\n" + hex(target.array())); 866 //System.out.println(hex(expected.array()) + "\n" + expected.limit()); 867 if (!(equals(target, expected, expected.limit()) && target.position() == expected.limit())) { 868 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 869 errln(converter + " Received: \"" + hex(target.array()) + "\" Expected: \"" + hex(expected.array()) + "\""); 870 } 871 872 if (!currentlybad) { 873 goodcount++; 874 pass.add(converter); 875 } 876 } 877 878 logln("\n" + badcount + " / " + (converters.length - exempt.size()) + " (" + goodcount + " good, " + badcount + " bad)"); 879 log("index\t"); for (int i=0; i<n; i++) log(i + "\t"); logln(""); 880 log("unmap\t"); for (int i=0; i<n; i++) log(unmapindices[i] + "\t"); logln(""); 881 log("malf \t"); for (int i=0; i<n; i++) log(malfindices[i] + "\t"); logln(""); 882 log("pos \t"); for (int i=0; i<n; i++) log(badposindices[i] + "\t"); logln(""); 883 log("uhoh \t"); for (int i=0; i<n; i++) log(uhohindices[i] + "\t"); logln(""); 884 logln(""); 885 log("The few that passed: "); for (int i=0; i<pass.size(); i++) log(pass.get(i) + ", "); logln(""); 886 log("The few that are exempt: "); for (int i=0; i<exempt.size(); i++) log(exempt.get(i) + ", "); logln(""); 887 } 888 889 // public void TestCharsetCallback() { 890 // String currentTest = "initialization"; 891 // try { 892 // Class[] params; 893 // 894 // // get the classes 895 // Class CharsetCallback = Class.forName("com.ibm.icu.charset.CharsetCallback"); 896 // Class Decoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Decoder"); 897 // Class Encoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Encoder"); 898 // 899 // // set up encoderCall 900 // params = new Class[] {CharsetEncoderICU.class, Object.class, 901 // CharBuffer.class, ByteBuffer.class, IntBuffer.class, 902 // char[].class, int.class, int.class, CoderResult.class }; 903 // Method encoderCall = Encoder.getDeclaredMethod("call", params); 904 // 905 // // set up decoderCall 906 // params = new Class[] {CharsetDecoderICU.class, Object.class, 907 // ByteBuffer.class, CharBuffer.class, IntBuffer.class, 908 // char[].class, int.class, CoderResult.class}; 909 // Method decoderCall = Decoder.getDeclaredMethod("call", params); 910 // 911 // // get relevant fields 912 // Object SUB_STOP_ON_ILLEGAL = getFieldValue(CharsetCallback, "SUB_STOP_ON_ILLEGAL", null); 913 // 914 // // set up a few arguments 915 // CharsetProvider provider = new CharsetProviderICU(); 916 // Charset charset = provider.charsetForName("UTF-8"); 917 // CharsetEncoderICU encoder = (CharsetEncoderICU)charset.newEncoder(); 918 // CharsetDecoderICU decoder = (CharsetDecoderICU)charset.newDecoder(); 919 // CharBuffer chars = CharBuffer.allocate(10); 920 // chars.put('o'); 921 // chars.put('k'); 922 // ByteBuffer bytes = ByteBuffer.allocate(10); 923 // bytes.put((byte)'o'); 924 // bytes.put((byte)'k'); 925 // IntBuffer offsets = IntBuffer.allocate(10); 926 // offsets.put(0); 927 // offsets.put(1); 928 // char[] buffer = null; 929 // Integer length = new Integer(2); 930 // Integer cp = new Integer(0); 931 // CoderResult unmap = CoderResult.unmappableForLength(2); 932 // CoderResult malf = CoderResult.malformedForLength(2); 933 // CoderResult under = CoderResult.UNDERFLOW; 934 // 935 // // set up error arrays 936 // Integer invalidCharLength = new Integer(1); 937 // Byte subChar1 = new Byte((byte)0); 938 // Byte subChar1_alternate = new Byte((byte)1); // for TO_U_CALLBACK_SUBSTITUTE 939 // 940 // // set up chars and bytes backups and expected values for certain cases 941 // CharBuffer charsBackup = bufferCopy(chars); 942 // ByteBuffer bytesBackup = bufferCopy(bytes); 943 // IntBuffer offsetsBackup = bufferCopy(offsets); 944 // CharBuffer encoderCharsExpected = bufferCopy(chars); 945 // ByteBuffer encoderBytesExpected = bufferCopy(bytes); 946 // IntBuffer encoderOffsetsExpected = bufferCopy(offsets); 947 // CharBuffer decoderCharsExpected1 = bufferCopy(chars); 948 // CharBuffer decoderCharsExpected2 = bufferCopy(chars); 949 // IntBuffer decoderOffsetsExpected1 = bufferCopy(offsets); 950 // IntBuffer decoderOffsetsExpected2 = bufferCopy(offsets); 951 // 952 // // initialize fields to obtain expected data 953 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 954 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1); 955 // 956 // // run cbFromUWriteSub 957 // Method cbFromUWriteSub = CharsetEncoderICU.class.getDeclaredMethod("cbFromUWriteSub", new Class[] { CharsetEncoderICU.class, CharBuffer.class, ByteBuffer.class, IntBuffer.class}); 958 // cbFromUWriteSub.setAccessible(true); 959 // CoderResult encoderResultExpected = (CoderResult)cbFromUWriteSub.invoke(encoder, new Object[] {encoder, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected}); 960 // 961 // // run toUWriteUChars with normal data 962 // Method toUWriteUChars = CharsetDecoderICU.class.getDeclaredMethod("toUWriteUChars", new Class[] { CharsetDecoderICU.class, char[].class, int.class, int.class, CharBuffer.class, IntBuffer.class, int.class}); 963 // toUWriteUChars.setAccessible(true); 964 // CoderResult decoderResultExpected1 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0xFFFD}, new Integer(0), new Integer(1), decoderCharsExpected1, decoderOffsetsExpected1, new Integer(bytes.position())}); 965 // 966 // // reset certain fields 967 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 968 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1_alternate); 969 // 970 // // run toUWriteUChars again 971 // CoderResult decoderResultExpected2 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0x1A}, new Integer(0), new Integer(1), decoderCharsExpected2, decoderOffsetsExpected2, new Integer(bytes.position())}); 972 // 973 // // begin creating the tests array 974 // ArrayList tests = new ArrayList(); 975 // 976 // // create tests for FROM_U_CALLBACK_SKIP 0 977 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 978 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 979 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 980 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 981 // 982 // // create tests for TO_U_CALLBACK_SKIP 4 983 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 984 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 985 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 986 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 987 // 988 // // create tests for FROM_U_CALLBACK_STOP 8 989 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 990 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 991 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 992 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 993 // 994 // // create tests for TO_U_CALLBACK_STOP 12 995 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 996 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 997 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 998 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 999 // 1000 // // create tests for FROM_U_CALLBACK_SUBSTITUTE 16 1001 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1002 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1003 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1004 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1005 // 1006 // // create tests for TO_U_CALLBACK_SUBSTITUTE 20 1007 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected1, decoderCharsExpected1, bytesBackup, decoderOffsetsExpected1, new Object[] { invalidCharLength, subChar1 }}); 1008 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected2, decoderCharsExpected2, bytesBackup, decoderOffsetsExpected2, new Object[] { invalidCharLength, subChar1_alternate }}); 1009 // 1010 // Iterator iter = tests.iterator(); 1011 // for (int i=0; iter.hasNext(); i++) { 1012 // // get the data out of the map 1013 // Object[] next = (Object[])iter.next(); 1014 // 1015 // Method method = (Method)next[0]; 1016 // String fieldName = (String)next[1]; 1017 // Object field = getFieldValue(CharsetCallback, fieldName, null); 1018 // Object[] args = (Object[])next[2]; 1019 // CoderResult expected = (CoderResult)next[3]; 1020 // CharBuffer charsExpected = (CharBuffer)next[4]; 1021 // ByteBuffer bytesExpected = (ByteBuffer)next[5]; 1022 // IntBuffer offsetsExpected = (IntBuffer)next[6]; 1023 // 1024 // // set up error arrays and certain fields 1025 // Object[] values = (Object[])next[7]; 1026 // if (method == decoderCall) { 1027 // decoder.reset(); 1028 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, values[0]); 1029 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), values[1]); 1030 // } else if (method == encoderCall) { 1031 // encoder.reset(); 1032 // } 1033 // 1034 // try { 1035 // // invoke the method 1036 // CoderResult actual = (CoderResult)method.invoke(field, args); 1037 // 1038 // // if expected != actual 1039 // if (!coderResultsEqual(expected, actual)) { 1040 // // case #i refers to the index in the arraylist tests 1041 // errln(fieldName + " failed to return the correct result for case #" + i + "."); 1042 // } 1043 // // if the expected buffers != actual buffers 1044 // else if (!(buffersEqual(chars, charsExpected) && 1045 // buffersEqual(bytes, bytesExpected) && 1046 // buffersEqual(offsets, offsetsExpected))) { 1047 // // case #i refers to the index in the arraylist tests 1048 // errln(fieldName + " did not perform the correct operation on the buffers for case #" + i + "."); 1049 // } 1050 // } catch (InvocationTargetException ex) { 1051 // // case #i refers to the index in the arraylist tests 1052 // errln(fieldName + " threw an exception for case #" + i + ": " + ex.getCause()); 1053 // //ex.getCause().printStackTrace(); 1054 // } 1055 // 1056 // // reset the buffers 1057 // System.arraycopy(bytesBackup.array(), 0, bytes.array(), 0, 10); 1058 // System.arraycopy(charsBackup.array(), 0, chars.array(), 0, 10); 1059 // System.arraycopy(offsetsBackup.array(), 0, offsets.array(), 0, 10); 1060 // bytes.position(bytesBackup.position()); 1061 // chars.position(charsBackup.position()); 1062 // offsets.position(offsetsBackup.position()); 1063 // } 1064 // 1065 // } catch (Exception ex) { 1066 // errln("TestCharsetCallback skipped due to " + ex.toString()); 1067 // ex.printStackTrace(); 1068 // } 1069 // } 1070 // 1071 // private Object getFieldValue(Class c, String name, Object instance) throws Exception { 1072 // Field field = c.getDeclaredField(name); 1073 // field.setAccessible(true); 1074 // return field.get(instance); 1075 // } 1076 // private void setFieldValue(Class c, String name, Object instance, Object value) throws Exception { 1077 // Field field = c.getDeclaredField(name); 1078 // field.setAccessible(true); 1079 // if (value instanceof Boolean) 1080 // field.setBoolean(instance, ((Boolean)value).booleanValue()); 1081 // else if (value instanceof Byte) 1082 // field.setByte(instance, ((Byte)value).byteValue()); 1083 // else if (value instanceof Character) 1084 // field.setChar(instance, ((Character)value).charValue()); 1085 // else if (value instanceof Double) 1086 // field.setDouble(instance, ((Double)value).doubleValue()); 1087 // else if (value instanceof Float) 1088 // field.setFloat(instance, ((Float)value).floatValue()); 1089 // else if (value instanceof Integer) 1090 // field.setInt(instance, ((Integer)value).intValue()); 1091 // else if (value instanceof Long) 1092 // field.setLong(instance, ((Long)value).longValue()); 1093 // else if (value instanceof Short) 1094 // field.setShort(instance, ((Short)value).shortValue()); 1095 // else 1096 // field.set(instance, value); 1097 // } 1098 // private boolean coderResultsEqual(CoderResult a, CoderResult b) { 1099 // if (a == null && b == null) 1100 // return true; 1101 // if (a == null || b == null) 1102 // return false; 1103 // if ((a.isUnderflow() && b.isUnderflow()) || (a.isOverflow() && b.isOverflow())) 1104 // return true; 1105 // if (a.length() != b.length()) 1106 // return false; 1107 // if ((a.isMalformed() && b.isMalformed()) || (a.isUnmappable() && b.isUnmappable())) 1108 // return true; 1109 // return false; 1110 // } 1111 // private boolean buffersEqual(ByteBuffer a, ByteBuffer b) { 1112 // if (a.position() != b.position()) 1113 // return false; 1114 // int limit = a.position(); 1115 // for (int i=0; i<limit; i++) 1116 // if (a.get(i) != b.get(i)) 1117 // return false; 1118 // return true; 1119 // } 1120 // private boolean buffersEqual(CharBuffer a, CharBuffer b) { 1121 // if (a.position() != b.position()) 1122 // return false; 1123 // int limit = a.position(); 1124 // for (int i=0; i<limit; i++) 1125 // if (a.get(i) != b.get(i)) 1126 // return false; 1127 // return true; 1128 // } 1129 // private boolean buffersEqual(IntBuffer a, IntBuffer b) { 1130 // if (a.position() != b.position()) 1131 // return false; 1132 // int limit = a.position(); 1133 // for (int i=0; i<limit; i++) 1134 // if (a.get(i) != b.get(i)) 1135 // return false; 1136 // return true; 1137 // } 1138 // private ByteBuffer bufferCopy(ByteBuffer src) { 1139 // ByteBuffer dest = ByteBuffer.allocate(src.limit()); 1140 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1141 // dest.position(src.position()); 1142 // return dest; 1143 // } 1144 // private CharBuffer bufferCopy(CharBuffer src) { 1145 // CharBuffer dest = CharBuffer.allocate(src.limit()); 1146 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1147 // dest.position(src.position()); 1148 // return dest; 1149 // } 1150 // private IntBuffer bufferCopy(IntBuffer src) { 1151 // IntBuffer dest = IntBuffer.allocate(src.limit()); 1152 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1153 // dest.position(src.position()); 1154 // return dest; 1155 // } 1156 1157 1158 @Test 1159 public void TestAPISemantics(/*String encoding*/) { 1160 String encoding = "UTF-16"; 1161 CharsetDecoder decoder = null; 1162 CharsetEncoder encoder = null; 1163 try { 1164 CharsetProviderICU provider = new CharsetProviderICU(); 1165 Charset charset = provider.charsetForName(encoding); 1166 decoder = charset.newDecoder(); 1167 encoder = charset.newEncoder(); 1168 } catch(MissingResourceException ex) { 1169 warnln("Could not load charset data: " + encoding); 1170 return; 1171 } 1172 1173 final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n"; 1174 final byte[] byteStr = { 1175 (byte) 0x00,(byte) 'a', 1176 (byte) 0x00,(byte) 'b', 1177 (byte) 0x00,(byte) 'c', 1178 (byte) 0x00,(byte) 'd', 1179 (byte) 0xd8,(byte) 0x00, 1180 (byte) 0xdc,(byte) 0x00, 1181 (byte) 0x12,(byte) 0x34, 1182 (byte) 0x00,(byte) 0xa5, 1183 (byte) 0x30,(byte) 0x00, 1184 (byte) 0x00,(byte) 0x0d, 1185 (byte) 0x00,(byte) 0x0a 1186 }; 1187 final byte[] expectedByteStr = { 1188 (byte) 0xfe,(byte) 0xff, 1189 (byte) 0x00,(byte) 'a', 1190 (byte) 0x00,(byte) 'b', 1191 (byte) 0x00,(byte) 'c', 1192 (byte) 0x00,(byte) 'd', 1193 (byte) 0xd8,(byte) 0x00, 1194 (byte) 0xdc,(byte) 0x00, 1195 (byte) 0x12,(byte) 0x34, 1196 (byte) 0x00,(byte) 0xa5, 1197 (byte) 0x30,(byte) 0x00, 1198 (byte) 0x00,(byte) 0x0d, 1199 (byte) 0x00,(byte) 0x0a 1200 }; 1201 1202 ByteBuffer byes = ByteBuffer.wrap(byteStr); 1203 CharBuffer uniVal = CharBuffer.wrap(unistr); 1204 ByteBuffer expected = ByteBuffer.wrap(expectedByteStr); 1205 1206 int rc = 0; 1207 if(decoder==null){ 1208 warnln("Could not load decoder."); 1209 return; 1210 } 1211 decoder.reset(); 1212 /* Convert the whole buffer to Unicode */ 1213 try { 1214 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1215 CoderResult result = decoder.decode(byes, chars, false); 1216 1217 if (result.isError()) { 1218 errln("ToChars encountered Error"); 1219 rc = 1; 1220 } 1221 if (result.isOverflow()) { 1222 errln("ToChars encountered overflow exception"); 1223 rc = 1; 1224 } 1225 if (!equals(chars, unistr)) { 1226 errln("ToChars does not match"); 1227 printchars(chars); 1228 errln("Expected : "); 1229 printchars(unistr); 1230 rc = 2; 1231 } 1232 1233 } catch (Exception e) { 1234 errln("ToChars - exception in buffer"); 1235 rc = 5; 1236 } 1237 1238 /* Convert single bytes to Unicode */ 1239 try { 1240 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1241 ByteBuffer b = ByteBuffer.wrap(byteStr); 1242 decoder.reset(); 1243 CoderResult result=null; 1244 for (int i = 1; i <= byteStr.length; i++) { 1245 b.limit(i); 1246 result = decoder.decode(b, chars, false); 1247 if(result.isOverflow()){ 1248 errln("ToChars single threw an overflow exception"); 1249 } 1250 if (result.isError()) { 1251 errln("ToChars single the result is an error "+result.toString()); 1252 } 1253 } 1254 if (unistr.length() != (chars.limit())) { 1255 errln("ToChars single len does not match"); 1256 rc = 3; 1257 } 1258 if (!equals(chars, unistr)) { 1259 errln("ToChars single does not match"); 1260 printchars(chars); 1261 rc = 4; 1262 } 1263 } catch (Exception e) { 1264 errln("ToChars - exception in single"); 1265 //e.printStackTrace(); 1266 rc = 6; 1267 } 1268 1269 /* Convert the buffer one at a time to Unicode */ 1270 try { 1271 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1272 decoder.reset(); 1273 byes.rewind(); 1274 for (int i = 1; i <= byteStr.length; i++) { 1275 byes.limit(i); 1276 CoderResult result = decoder.decode(byes, chars, false); 1277 if (result.isError()) { 1278 errln("Error while decoding: "+result.toString()); 1279 } 1280 if(result.isOverflow()){ 1281 errln("ToChars Simple threw an overflow exception"); 1282 } 1283 } 1284 if (chars.limit() != unistr.length()) { 1285 errln("ToChars Simple buffer len does not match"); 1286 rc = 7; 1287 } 1288 if (!equals(chars, unistr)) { 1289 errln("ToChars Simple buffer does not match"); 1290 printchars(chars); 1291 err(" Expected : "); 1292 printchars(unistr); 1293 rc = 8; 1294 } 1295 } catch (Exception e) { 1296 errln("ToChars - exception in single buffer"); 1297 //e.printStackTrace(System.err); 1298 rc = 9; 1299 } 1300 if (rc != 0) { 1301 errln("Test Simple ToChars for encoding : FAILED"); 1302 } 1303 1304 rc = 0; 1305 /* Convert the whole buffer from unicode */ 1306 try { 1307 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1308 encoder.reset(); 1309 CoderResult result = encoder.encode(uniVal, bytes, false); 1310 if (result.isError()) { 1311 errln("FromChars reported error: " + result.toString()); 1312 rc = 1; 1313 } 1314 if(result.isOverflow()){ 1315 errln("FromChars threw an overflow exception"); 1316 } 1317 bytes.position(0); 1318 if (!bytes.equals(expected)) { 1319 errln("FromChars does not match"); 1320 printbytes(bytes); 1321 printbytes(expected); 1322 rc = 2; 1323 } 1324 } catch (Exception e) { 1325 errln("FromChars - exception in buffer"); 1326 //e.printStackTrace(System.err); 1327 rc = 5; 1328 } 1329 1330 /* Convert the buffer one char at a time to unicode */ 1331 try { 1332 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1333 CharBuffer c = CharBuffer.wrap(unistr); 1334 encoder.reset(); 1335 CoderResult result= null; 1336 for (int i = 1; i <= unistr.length(); i++) { 1337 c.limit(i); 1338 result = encoder.encode(c, bytes, false); 1339 if(result.isOverflow()){ 1340 errln("FromChars single threw an overflow exception"); 1341 } 1342 if(result.isError()){ 1343 errln("FromChars single threw an error: "+ result.toString()); 1344 } 1345 } 1346 if (expectedByteStr.length != bytes.limit()) { 1347 errln("FromChars single len does not match"); 1348 rc = 3; 1349 } 1350 1351 bytes.position(0); 1352 if (!bytes.equals(expected)) { 1353 errln("FromChars single does not match"); 1354 printbytes(bytes); 1355 printbytes(expected); 1356 rc = 4; 1357 } 1358 1359 } catch (Exception e) { 1360 errln("FromChars - exception in single"); 1361 //e.printStackTrace(System.err); 1362 rc = 6; 1363 } 1364 1365 /* Convert one char at a time to unicode */ 1366 try { 1367 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1368 encoder.reset(); 1369 char[] temp = unistr.toCharArray(); 1370 CoderResult result=null; 1371 for (int i = 0; i <= temp.length; i++) { 1372 uniVal.limit(i); 1373 result = encoder.encode(uniVal, bytes, false); 1374 if(result.isOverflow()){ 1375 errln("FromChars simple threw an overflow exception"); 1376 } 1377 if(result.isError()){ 1378 errln("FromChars simple threw an error: "+ result.toString()); 1379 } 1380 } 1381 if (bytes.limit() != expectedByteStr.length) { 1382 errln("FromChars Simple len does not match"); 1383 rc = 7; 1384 } 1385 if (!bytes.equals(byes)) { 1386 errln("FromChars Simple does not match"); 1387 printbytes(bytes); 1388 printbytes(byes); 1389 rc = 8; 1390 } 1391 } catch (Exception e) { 1392 errln("FromChars - exception in single buffer"); 1393 //e.printStackTrace(System.err); 1394 rc = 9; 1395 } 1396 if (rc != 0) { 1397 errln("Test Simple FromChars " + encoding + " --FAILED"); 1398 } 1399 } 1400 1401 void printchars(CharBuffer buf) { 1402 int i; 1403 char[] chars = new char[buf.limit()]; 1404 //save the current position 1405 int pos = buf.position(); 1406 buf.position(0); 1407 buf.get(chars); 1408 //reset to old position 1409 buf.position(pos); 1410 for (i = 0; i < chars.length; i++) { 1411 err(hex(chars[i]) + " "); 1412 } 1413 errln(""); 1414 } 1415 void printchars(String str) { 1416 char[] chars = str.toCharArray(); 1417 for (int i = 0; i < chars.length; i++) { 1418 err(hex(chars[i]) + " "); 1419 } 1420 errln(""); 1421 } 1422 void printbytes(ByteBuffer buf) { 1423 int i; 1424 byte[] bytes = new byte[buf.limit()]; 1425 //save the current position 1426 int pos = buf.position(); 1427 buf.position(0); 1428 buf.get(bytes); 1429 //reset to old position 1430 buf.position(pos); 1431 for (i = 0; i < bytes.length; i++) { 1432 System.out.print(hex(bytes[i]) + " "); 1433 } 1434 errln(""); 1435 } 1436 1437 public boolean equals(CoderResult a, CoderResult b) { 1438 return (a.isUnderflow() && b.isUnderflow()) 1439 || (a.isOverflow() && b.isOverflow()) 1440 || (a.isMalformed() && b.isMalformed() && a.length() == b.length()) 1441 || (a.isUnmappable() && b.isUnmappable() && a.length() == b.length()); 1442 } 1443 public boolean equals(CharBuffer buf, String str) { 1444 return equals(buf, str.toCharArray()); 1445 } 1446 public boolean equals(CharBuffer buf, CharBuffer str) { 1447 if (buf.limit() != str.limit()) 1448 return false; 1449 int limit = buf.limit(); 1450 for (int i = 0; i < limit; i++) 1451 if (buf.get(i) != str.get(i)) 1452 return false; 1453 return true; 1454 } 1455 public boolean equals(CharBuffer buf, CharBuffer str, int limit) { 1456 if (limit > buf.limit() || limit > str.limit()) 1457 return false; 1458 for (int i = 0; i < limit; i++) 1459 if (buf.get(i) != str.get(i)) 1460 return false; 1461 return true; 1462 } 1463 public boolean equals(CharBuffer buf, char[] compareTo) { 1464 char[] chars = new char[buf.limit()]; 1465 //save the current position 1466 int pos = buf.position(); 1467 buf.position(0); 1468 buf.get(chars); 1469 //reset to old position 1470 buf.position(pos); 1471 return equals(chars, compareTo); 1472 } 1473 1474 public boolean equals(char[] chars, char[] compareTo) { 1475 if (chars.length != compareTo.length) { 1476 errln( 1477 "Length does not match chars: " 1478 + chars.length 1479 + " compareTo: " 1480 + compareTo.length); 1481 return false; 1482 } else { 1483 boolean result = true; 1484 for (int i = 0; i < chars.length; i++) { 1485 if (chars[i] != compareTo[i]) { 1486 logln( 1487 "Got: " 1488 + hex(chars[i]) 1489 + " Expected: " 1490 + hex(compareTo[i]) 1491 + " At: " 1492 + i); 1493 result = false; 1494 } 1495 } 1496 return result; 1497 } 1498 } 1499 1500 public boolean equals(ByteBuffer buf, byte[] compareTo) { 1501 byte[] chars = new byte[buf.limit()]; 1502 //save the current position 1503 int pos = buf.position(); 1504 buf.position(0); 1505 buf.get(chars); 1506 //reset to old position 1507 buf.position(pos); 1508 return equals(chars, compareTo); 1509 } 1510 public boolean equals(ByteBuffer buf, ByteBuffer compareTo) { 1511 if (buf.limit() != compareTo.limit()) 1512 return false; 1513 int limit = buf.limit(); 1514 for (int i = 0; i < limit; i++) 1515 if (buf.get(i) != compareTo.get(i)) 1516 return false; 1517 return true; 1518 } 1519 public boolean equals(ByteBuffer buf, ByteBuffer compareTo, int limit) { 1520 if (limit > buf.limit() || limit > compareTo.limit()) 1521 return false; 1522 for (int i = 0; i < limit; i++) 1523 if (buf.get(i) != compareTo.get(i)) 1524 return false; 1525 return true; 1526 } 1527 public boolean equals(byte[] chars, byte[] compareTo) { 1528 if (false/*chars.length != compareTo.length*/) { 1529 errln( 1530 "Length does not match chars: " 1531 + chars.length 1532 + " compareTo: " 1533 + compareTo.length); 1534 return false; 1535 } else { 1536 boolean result = true; 1537 for (int i = 0; i < chars.length; i++) { 1538 if (chars[i] != compareTo[i]) { 1539 logln( 1540 "Got: " 1541 + hex(chars[i]) 1542 + " Expected: " 1543 + hex(compareTo[i]) 1544 + " At: " 1545 + i); 1546 result = false; 1547 } 1548 } 1549 return result; 1550 } 1551 } 1552 1553 // TODO 1554 /* 1555 @Test 1556 public void TestCallback(String encoding) throws Exception { 1557 1558 byte[] gbSource = 1559 { 1560 (byte) 0x81, 1561 (byte) 0x36, 1562 (byte) 0xDE, 1563 (byte) 0x36, 1564 (byte) 0x81, 1565 (byte) 0x36, 1566 (byte) 0xDE, 1567 (byte) 0x37, 1568 (byte) 0x81, 1569 (byte) 0x36, 1570 (byte) 0xDE, 1571 (byte) 0x38, 1572 (byte) 0xe3, 1573 (byte) 0x32, 1574 (byte) 0x9a, 1575 (byte) 0x36 }; 1576 1577 char[] subChars = { 'P', 'I' }; 1578 1579 decoder.reset(); 1580 1581 decoder.replaceWith(new String(subChars)); 1582 ByteBuffer mySource = ByteBuffer.wrap(gbSource); 1583 CharBuffer myTarget = CharBuffer.allocate(5); 1584 1585 decoder.decode(mySource, myTarget, true); 1586 char[] expectedResult = 1587 { '\u22A6', '\u22A7', '\u22A8', '\u0050', '\u0049', }; 1588 1589 if (!equals(myTarget, new String(expectedResult))) { 1590 errln("Test callback GB18030 to Unicode : FAILED"); 1591 } 1592 1593 } 1594 */ 1595 1596 @Test 1597 public void TestCanConvert(/*String encoding*/)throws Exception { 1598 char[] mySource = { 1599 '\ud800', '\udc00',/*surrogate pair */ 1600 '\u22A6','\u22A7','\u22A8','\u22A9','\u22AA', 1601 '\u22AB','\u22AC','\u22AD','\u22AE','\u22AF', 1602 '\u22B0','\u22B1','\u22B2','\u22B3','\u22B4', 1603 '\ud800','\udc00',/*surrogate pair */ 1604 '\u22B5','\u22B6','\u22B7','\u22B8','\u22B9', 1605 '\u22BA','\u22BB','\u22BC','\u22BD','\u22BE' 1606 }; 1607 String encoding = "UTF-16"; 1608 CharsetEncoder encoder = null; 1609 try { 1610 CharsetProviderICU provider = new CharsetProviderICU(); 1611 Charset charset = provider.charsetForName(encoding); 1612 encoder = charset.newEncoder(); 1613 } catch(MissingResourceException ex) { 1614 warnln("Could not load charset data: " + encoding); 1615 return; 1616 } 1617 if (!encoder.canEncode(new String(mySource))) { 1618 errln("Test canConvert() " + encoding + " failed. "+encoder); 1619 } 1620 1621 } 1622 1623 @Test 1624 public void TestAvailableCharsets() { 1625 SortedMap map = Charset.availableCharsets(); 1626 Set keySet = map.keySet(); 1627 Iterator iter = keySet.iterator(); 1628 while(iter.hasNext()){ 1629 logln("Charset name: "+iter.next().toString()); 1630 } 1631 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1632 int mapSize = map.size(); 1633 if(mapSize < charsets.length){ 1634 errln("Charset.availableCharsets() returned a number less than the number returned by icu. ICU: " + charsets.length 1635 + " JDK: " + mapSize); 1636 } 1637 logln("Total Number of chasets = " + map.size()); 1638 } 1639 1640 @Test 1641 public void TestWindows936(){ 1642 CharsetProviderICU icu = new CharsetProviderICU(); 1643 Charset cs = icu.charsetForName("windows-936-2000"); 1644 String canonicalName = cs.name(); 1645 if(!canonicalName.equals("GBK")){ 1646 errln("Did not get the expected canonical name. Got: "+canonicalName); //get the canonical name 1647 } 1648 } 1649 1650 @Test 1651 public void TestICUAvailableCharsets() { 1652 CharsetProviderICU icu = new CharsetProviderICU(); 1653 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1654 for(int i=0;i<charsets.length;i++){ 1655 Charset cs = icu.charsetForName((String)charsets[i]); 1656 try{ 1657 CharsetEncoder encoder = cs.newEncoder(); 1658 if(encoder!=null){ 1659 logln("Creation of encoder succeeded. "+cs.toString()); 1660 } 1661 }catch(Exception ex){ 1662 errln("Could not instantiate encoder for "+charsets[i]+". Error: "+ex.toString()); 1663 } 1664 try{ 1665 CharsetDecoder decoder = cs.newDecoder(); 1666 if(decoder!=null){ 1667 logln("Creation of decoder succeeded. "+cs.toString()); 1668 } 1669 }catch(Exception ex){ 1670 errln("Could not instantiate decoder for "+charsets[i]+". Error: "+ex.toString()); 1671 } 1672 } 1673 } 1674 1675 /* jitterbug 4312 */ 1676 @Test 1677 public void TestUnsupportedCharset(){ 1678 CharsetProvider icu = new CharsetProviderICU(); 1679 Charset icuChar = icu.charsetForName("impossible"); 1680 if(icuChar != null){ 1681 errln("ICU does not conform to the spec"); 1682 } 1683 } 1684 1685 @Test 1686 public void TestEncoderCreation(){ 1687 // Use CharsetICU.forNameICU() so that we get the ICU version 1688 // even if the system or another provider also supports this charset. 1689 String encoding = "GB_2312-80"; 1690 try{ 1691 Charset cs = CharsetICU.forNameICU(encoding); 1692 CharsetEncoder enc = cs.newEncoder(); 1693 if(enc!=null){ 1694 logln("Successfully created an encoder for " + encoding + ": " + enc); 1695 if(!(enc instanceof CharsetEncoderICU)) { 1696 errln("Expected " + encoding + 1697 " to be implemented by ICU but got an instance of " + enc.getClass()); 1698 } 1699 }else{ 1700 errln("Error creating charset encoder for " + encoding); 1701 } 1702 }catch(Exception e){ 1703 warnln("Error creating charset encoder for " + encoding + ": " + e); 1704 } 1705 // Use Charset.forName() which may return an ICU Charset or some other implementation. 1706 encoding = "x-ibm-971_P100-1995"; 1707 try{ 1708 Charset cs = Charset.forName(encoding); 1709 CharsetEncoder enc = cs.newEncoder(); 1710 if(enc!=null){ 1711 logln("Successfully created an encoder for " + encoding + ": " + enc + 1712 " which is implemented by ICU? " + (enc instanceof CharsetEncoderICU)); 1713 }else{ 1714 errln("Error creating charset encoder for " + encoding); 1715 } 1716 }catch(Exception e){ 1717 warnln("Error creating charset encoder for " + encoding + ": " + e); 1718 } 1719 } 1720 1721 @Test 1722 public void TestSubBytes(){ 1723 try{ 1724 //create utf-8 decoder 1725 CharsetDecoder decoder = new CharsetProviderICU().charsetForName("utf-8").newDecoder(); 1726 1727 //create a valid byte array, which can be decoded to " buffer" 1728 byte[] unibytes = new byte[] { 0x0020, 0x0062, 0x0075, 0x0066, 0x0066, 0x0065, 0x0072 }; 1729 1730 ByteBuffer buffer = ByteBuffer.allocate(20); 1731 1732 //add a evil byte to make the byte buffer be malformed input 1733 buffer.put((byte)0xd8); 1734 1735 //put the valid byte array 1736 buffer.put(unibytes); 1737 1738 //reset postion 1739 buffer.flip(); 1740 1741 decoder.onMalformedInput(CodingErrorAction.REPLACE); 1742 CharBuffer out = decoder.decode(buffer); 1743 String expected = "\ufffd buffer"; 1744 if(!expected.equals(new String(out.array()))){ 1745 errln("Did not get the expected result for substitution chars. Got: "+ 1746 new String(out.array()) + "("+ hex(out.array())+")"); 1747 } 1748 logln("Output: "+ new String(out.array()) + "("+ hex(out.array())+")"); 1749 }catch (CharacterCodingException ex){ 1750 errln("Unexpected exception: "+ex.toString()); 1751 } 1752 } 1753 /* 1754 1755 @Test 1756 public void TestImplFlushFailure(){ 1757 1758 try{ 1759 CharBuffer in = CharBuffer.wrap("\u3005\u3006\u3007\u30FC\u2015\u2010\uFF0F"); 1760 CharsetEncoder encoder = new CharsetProviderICU().charsetForName("iso-2022-jp").newEncoder(); 1761 ByteBuffer out = ByteBuffer.allocate(30); 1762 encoder.encode(in, out, true); 1763 encoder.flush(out); 1764 if(out.position()!= 20){ 1765 errln("Did not get the expected position from flush"); 1766 } 1767 1768 }catch (Exception ex){ 1769 errln("Could not create encoder for iso-2022-jp exception: "+ex.toString()); 1770 } 1771 } 1772 */ 1773 1774 @Test 1775 public void TestISO88591() { 1776 1777 Charset cs = new CharsetProviderICU().charsetForName("iso-8859-1"); 1778 if(cs!=null){ 1779 CharsetEncoder encoder = cs.newEncoder(); 1780 if(encoder!=null){ 1781 encoder.canEncode("\uc2a3"); 1782 }else{ 1783 errln("Could not create encoder for iso-8859-1"); 1784 } 1785 }else{ 1786 errln("Could not create Charset for iso-8859-1"); 1787 } 1788 1789 } 1790 1791 @Test 1792 public void TestUTF8Encode() { 1793 // Test with a lead surrogate in the middle of the input text. 1794 // Java API behavior is unclear for surrogates at the end, see ticket #11546. 1795 CharBuffer in = CharBuffer.wrap("\ud800a"); 1796 ByteBuffer out = ByteBuffer.allocate(30); 1797 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1798 CoderResult result = encoderICU.encode(in, out, true); 1799 1800 if (result.isMalformed()) { 1801 logln("\\ud800 is malformed for ICU4JNI utf-8 encoder"); 1802 } else if (result.isUnderflow()) { 1803 errln("FAIL: \\ud800 is OK for ICU4JNI utf-8 encoder"); 1804 } 1805 1806 in.position(0); 1807 out.clear(); 1808 1809 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1810 result = encoderJDK.encode(in, out, true); 1811 if (result.isMalformed()) { 1812 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1813 } else if (result.isUnderflow()) { 1814 errln("BAD: \\ud800 is OK for JDK utf-8 encoder"); 1815 } 1816 } 1817 1818 /* private void printCB(CharBuffer buf){ 1819 buf.rewind(); 1820 while(buf.hasRemaining()){ 1821 System.out.println(hex(buf.get())); 1822 } 1823 buf.rewind(); 1824 } 1825 */ 1826 1827 @Test 1828 public void TestUTF8() throws CharacterCodingException{ 1829 try{ 1830 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1831 encoderICU.encode(CharBuffer.wrap("\ud800")); 1832 errln("\\ud800 is OK for ICU4JNI utf-8 encoder"); 1833 }catch (Exception e) { 1834 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1835 //e.printStackTrace(); 1836 } 1837 1838 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1839 try { 1840 encoderJDK.encode(CharBuffer.wrap("\ud800")); 1841 errln("\\ud800 is OK for JDK utf-8 encoder"); 1842 } catch (Exception e) { 1843 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1844 //e.printStackTrace(); 1845 } 1846 } 1847 1848 @Test 1849 public void TestUTF16Bom(){ 1850 1851 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-16"); 1852 char[] in = new char[] { 0x1122, 0x2211, 0x3344, 0x4433, 1853 0x5566, 0x6655, 0x7788, 0x8877, 0x9900 }; 1854 CharBuffer inBuf = CharBuffer.allocate(in.length); 1855 inBuf.put(in); 1856 CharsetEncoder encoder = cs.newEncoder(); 1857 ByteBuffer outBuf = ByteBuffer.allocate(in.length*2+2); 1858 inBuf.rewind(); 1859 encoder.encode(inBuf, outBuf, true); 1860 outBuf.rewind(); 1861 if(outBuf.get(0)!= (byte)0xFE && outBuf.get(1)!= (byte)0xFF){ 1862 errln("The UTF16 encoder did not appended bom. Length returned: " + outBuf.remaining()); 1863 } 1864 while(outBuf.hasRemaining()){ 1865 logln("0x"+hex(outBuf.get())); 1866 } 1867 CharsetDecoder decoder = cs.newDecoder(); 1868 outBuf.rewind(); 1869 CharBuffer rt = CharBuffer.allocate(in.length); 1870 CoderResult cr = decoder.decode(outBuf, rt, true); 1871 if(cr.isError()){ 1872 errln("Decoding with BOM failed. Error: "+ cr.toString()); 1873 } 1874 equals(rt, in); 1875 { 1876 rt.clear(); 1877 outBuf.rewind(); 1878 Charset utf16 = Charset.forName("UTF-16"); 1879 CharsetDecoder dc = utf16.newDecoder(); 1880 cr = dc.decode(outBuf, rt, true); 1881 equals(rt, in); 1882 } 1883 } 1884 1885 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1886 boolean throwException, boolean flush) throws BufferOverflowException, Exception { 1887 smBufDecode(decoder, encoding, source, target, throwException, flush, true); 1888 } 1889 1890 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1891 boolean throwException, boolean flush, boolean backedByArray) throws BufferOverflowException, Exception { 1892 smBufDecode(decoder, encoding, source, target, throwException, flush, backedByArray, -1); 1893 } 1894 1895 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1896 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) 1897 throws BufferOverflowException, Exception { 1898 ByteBuffer mySource; 1899 CharBuffer myTarget; 1900 if (backedByArray) { 1901 mySource = ByteBuffer.allocate(source.capacity()); 1902 myTarget = CharBuffer.allocate(target.capacity()); 1903 } else { 1904 // this does not guarantee by any means that mySource and myTarget 1905 // are not backed by arrays 1906 mySource = ByteBuffer.allocateDirect(source.capacity()); 1907 myTarget = ByteBuffer.allocateDirect(target.capacity() * 2).asCharBuffer(); 1908 } 1909 mySource.position(source.position()); 1910 for (int i = source.position(); i < source.limit(); i++) 1911 mySource.put(i, source.get(i)); 1912 1913 { 1914 decoder.reset(); 1915 myTarget.limit(target.limit()); 1916 mySource.limit(source.limit()); 1917 mySource.position(source.position()); 1918 CoderResult result = CoderResult.UNDERFLOW; 1919 result = decoder.decode(mySource, myTarget, true); 1920 if (flush) { 1921 result = decoder.flush(myTarget); 1922 } 1923 if (result.isError()) { 1924 if (throwException) { 1925 throw new Exception(); 1926 } 1927 errln("Test complete buffers while decoding failed. " + result.toString()); 1928 return; 1929 } 1930 if (result.isOverflow()) { 1931 if (throwException) { 1932 throw new BufferOverflowException(); 1933 } 1934 errln("Test complete buffers while decoding threw overflow exception"); 1935 return; 1936 } 1937 myTarget.limit(myTarget.position()); 1938 myTarget.position(0); 1939 target.position(0); 1940 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1941 errln(" Test complete buffers while decoding " + encoding + " TO Unicode--failed"); 1942 } 1943 } 1944 if (isQuick()) { 1945 return; 1946 } 1947 { 1948 decoder.reset(); 1949 myTarget.limit(target.position()); 1950 mySource.limit(source.position()); 1951 mySource.position(source.position()); 1952 myTarget.clear(); 1953 myTarget.position(0); 1954 1955 int inputLen = mySource.remaining(); 1956 1957 CoderResult result = CoderResult.UNDERFLOW; 1958 for (int i = 1; i <= inputLen; i++) { 1959 mySource.limit(i); 1960 if (i == inputLen) { 1961 result = decoder.decode(mySource, myTarget, true); 1962 } else { 1963 result = decoder.decode(mySource, myTarget, false); 1964 } 1965 if (result.isError()) { 1966 errln("Test small input buffers while decoding failed. " + result.toString()); 1967 break; 1968 } 1969 if (result.isOverflow()) { 1970 if (throwException) { 1971 throw new BufferOverflowException(); 1972 } 1973 errln("Test small input buffers while decoding threw overflow exception"); 1974 break; 1975 } 1976 1977 } 1978 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1979 errln("Test small input buffers while decoding " + encoding + " TO Unicode--failed"); 1980 } 1981 } 1982 { 1983 decoder.reset(); 1984 myTarget.limit(0); 1985 mySource.limit(0); 1986 mySource.position(source.position()); 1987 myTarget.clear(); 1988 while (true) { 1989 CoderResult result = decoder.decode(mySource, myTarget, false); 1990 if (result.isUnderflow()) { 1991 if (mySource.limit() < source.limit()) 1992 mySource.limit(mySource.limit() + 1); 1993 } else if (result.isOverflow()) { 1994 if (myTarget.limit() < target.limit()) 1995 myTarget.limit(myTarget.limit() + 1); 1996 else 1997 break; 1998 } else /*if (result.isError())*/ { 1999 errln("Test small output buffers while decoding " + result.toString()); 2000 } 2001 if (mySource.position() == mySource.limit()) { 2002 result = decoder.decode(mySource, myTarget, true); 2003 if (result.isError()) { 2004 errln("Test small output buffers while decoding " + result.toString()); 2005 } 2006 result = decoder.flush(myTarget); 2007 if (result.isError()) { 2008 errln("Test small output buffers while decoding " + result.toString()); 2009 } 2010 break; 2011 } 2012 } 2013 2014 if (!equals(myTarget, target, targetLimit)) { 2015 errln("Test small output buffers " + encoding + " TO Unicode failed"); 2016 } 2017 } 2018 } 2019 2020 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2021 boolean throwException, boolean flush) throws Exception, BufferOverflowException { 2022 smBufEncode(encoder, encoding, source, target, throwException, flush, true); 2023 } 2024 2025 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2026 boolean throwException, boolean flush, boolean backedByArray) throws Exception, BufferOverflowException { 2027 smBufEncode(encoder, encoding, source, target, throwException, flush, true, -1); 2028 } 2029 2030 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2031 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) throws Exception, 2032 BufferOverflowException { 2033 logln("Running smBufEncode for " + encoding + " with class " + encoder); 2034 2035 CharBuffer mySource; 2036 ByteBuffer myTarget; 2037 if (backedByArray) { 2038 mySource = CharBuffer.allocate(source.capacity()); 2039 myTarget = ByteBuffer.allocate(target.capacity()); 2040 } else { 2041 mySource = ByteBuffer.allocateDirect(source.capacity() * 2).asCharBuffer(); 2042 myTarget = ByteBuffer.allocateDirect(target.capacity()); 2043 } 2044 mySource.position(source.position()); 2045 for (int i = source.position(); i < source.limit(); i++) 2046 mySource.put(i, source.get(i)); 2047 2048 myTarget.clear(); 2049 { 2050 logln("Running tests on small input buffers for " + encoding); 2051 encoder.reset(); 2052 myTarget.limit(target.limit()); 2053 mySource.limit(source.limit()); 2054 mySource.position(source.position()); 2055 CoderResult result = null; 2056 2057 result = encoder.encode(mySource, myTarget, true); 2058 if (flush) { 2059 result = encoder.flush(myTarget); 2060 } 2061 2062 if (result.isError()) { 2063 if (throwException) { 2064 throw new Exception(); 2065 } 2066 errln("Test complete while encoding failed. " + result.toString()); 2067 } 2068 if (result.isOverflow()) { 2069 if (throwException) { 2070 throw new BufferOverflowException(); 2071 } 2072 errln("Test complete while encoding threw overflow exception"); 2073 } 2074 if (!equals(myTarget, target, targetLimit)) { 2075 errln("Test complete buffers while encoding for " + encoding + " failed"); 2076 2077 } else { 2078 logln("Tests complete buffers for " + encoding + " passed"); 2079 } 2080 } 2081 if (isQuick()) { 2082 return; 2083 } 2084 { 2085 logln("Running tests on small input buffers for " + encoding); 2086 encoder.reset(); 2087 myTarget.clear(); 2088 myTarget.limit(target.limit()); 2089 mySource.limit(source.limit()); 2090 mySource.position(source.position()); 2091 int inputLen = mySource.limit(); 2092 CoderResult result = null; 2093 for (int i = 1; i <= inputLen; i++) { 2094 mySource.limit(i); 2095 result = encoder.encode(mySource, myTarget, false); 2096 if (result.isError()) { 2097 errln("Test small input buffers while encoding failed. " + result.toString()); 2098 } 2099 if (result.isOverflow()) { 2100 if (throwException) { 2101 throw new BufferOverflowException(); 2102 } 2103 errln("Test small input buffers while encoding threw overflow exception"); 2104 } 2105 } 2106 if (!equals(myTarget, target, targetLimit)) { 2107 errln("Test small input buffers " + encoding + " From Unicode failed"); 2108 } else { 2109 logln("Tests on small input buffers for " + encoding + " passed"); 2110 } 2111 } 2112 { 2113 logln("Running tests on small output buffers for " + encoding); 2114 encoder.reset(); 2115 myTarget.clear(); 2116 myTarget.limit(target.limit()); 2117 mySource.limit(source.limit()); 2118 mySource.position(source.position()); 2119 mySource.position(0); 2120 myTarget.position(0); 2121 2122 logln("myTarget.limit: " + myTarget.limit() + " myTarget.capcity: " + myTarget.capacity()); 2123 2124 while (true) { 2125 int pos = myTarget.position(); 2126 2127 CoderResult result = encoder.encode(mySource, myTarget, false); 2128 logln("myTarget.Position: " + pos + " myTarget.limit: " + myTarget.limit()); 2129 logln("mySource.position: " + mySource.position() + " mySource.limit: " + mySource.limit()); 2130 2131 if (result.isError()) { 2132 errln("Test small output buffers while encoding " + result.toString()); 2133 } 2134 if (mySource.position() == mySource.limit()) { 2135 result = encoder.encode(mySource, myTarget, true); 2136 if (result.isError()) { 2137 errln("Test small output buffers while encoding " + result.toString()); 2138 } 2139 2140 myTarget.limit(myTarget.capacity()); 2141 result = encoder.flush(myTarget); 2142 if (result.isError()) { 2143 errln("Test small output buffers while encoding " + result.toString()); 2144 } 2145 break; 2146 } 2147 } 2148 if (!equals(myTarget, target, targetLimit)) { 2149 errln("Test small output buffers " + encoding + " From Unicode failed."); 2150 } 2151 logln("Tests on small output buffers for " + encoding + " passed"); 2152 } 2153 } 2154 2155 // TODO(junit): orphan method 2156 public void convertAllTest(ByteBuffer bSource, CharBuffer uSource) throws Exception { 2157 String encoding = "UTF-16"; 2158 CharsetDecoder decoder = null; 2159 CharsetEncoder encoder = null; 2160 try { 2161 CharsetProviderICU provider = new CharsetProviderICU(); 2162 Charset charset = provider.charsetForName(encoding); 2163 decoder = charset.newDecoder(); 2164 encoder = charset.newEncoder(); 2165 } catch(MissingResourceException ex) { 2166 warnln("Could not load charset data: " + encoding); 2167 return; 2168 } 2169 { 2170 try { 2171 decoder.reset(); 2172 ByteBuffer mySource = bSource.duplicate(); 2173 CharBuffer myTarget = decoder.decode(mySource); 2174 if (!equals(myTarget, uSource)) { 2175 errln( 2176 "--Test convertAll() " 2177 + encoding 2178 + " to Unicode --FAILED"); 2179 } 2180 } catch (Exception e) { 2181 //e.printStackTrace(); 2182 errln(e.getMessage()); 2183 } 2184 } 2185 { 2186 try { 2187 encoder.reset(); 2188 CharBuffer mySource = CharBuffer.wrap(uSource); 2189 ByteBuffer myTarget = encoder.encode(mySource); 2190 if (!equals(myTarget, bSource)) { 2191 errln( 2192 "--Test convertAll() " 2193 + encoding 2194 + " to Unicode --FAILED"); 2195 } 2196 } catch (Exception e) { 2197 //e.printStackTrace(); 2198 errln("encoder.encode() failed "+ e.getMessage()+" "+e.toString()); 2199 } 2200 } 2201 2202 } 2203 2204 //TODO 2205 /* 2206 @Test 2207 public void TestString(ByteBuffer bSource, CharBuffer uSource) throws Exception { 2208 try { 2209 { 2210 String source = uSource.toString(); 2211 byte[] target = source.getBytes(m_encoding); 2212 if (!equals(target, bSource.array())) { 2213 errln("encode using string API failed"); 2214 } 2215 } 2216 { 2217 2218 String target = new String(bSource.array(), m_encoding); 2219 if (!equals(uSource, target.toCharArray())) { 2220 errln("decode using string API failed"); 2221 } 2222 } 2223 } catch (Exception e) { 2224 //e.printStackTrace(); 2225 errln(e.getMessage()); 2226 } 2227 } 2228 2229 /*private void fromUnicodeTest() throws Exception { 2230 2231 logln("Loaded Charset: " + charset.getClass().toString()); 2232 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2233 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2234 2235 ByteBuffer myTarget = ByteBuffer.allocate(gbSource.length); 2236 logln("Created ByteBuffer of length: " + uSource.length); 2237 CharBuffer mySource = CharBuffer.wrap(uSource); 2238 logln("Wrapped ByteBuffer with CharBuffer "); 2239 encoder.reset(); 2240 logln("Test Unicode to " + encoding ); 2241 encoder.encode(mySource, myTarget, true); 2242 if (!equals(myTarget, gbSource)) { 2243 errln("--Test Unicode to " + encoding + ": FAILED"); 2244 } 2245 logln("Test Unicode to " + encoding +" passed"); 2246 } 2247 2248 @Test 2249 public void TestToUnicode( ) throws Exception { 2250 2251 logln("Loaded Charset: " + charset.getClass().toString()); 2252 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2253 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2254 2255 CharBuffer myTarget = CharBuffer.allocate(uSource.length); 2256 ByteBuffer mySource = ByteBuffer.wrap(getByteArray(gbSource)); 2257 decoder.reset(); 2258 CoderResult result = decoder.decode(mySource, myTarget, true); 2259 if (result.isError()) { 2260 errln("Test ToUnicode -- FAILED"); 2261 } 2262 if (!equals(myTarget, uSource)) { 2263 errln("--Test " + encoding + " to Unicode :FAILED"); 2264 } 2265 } 2266 2267 public static byte[] getByteArray(char[] source) { 2268 byte[] target = new byte[source.length]; 2269 int i = source.length; 2270 for (; --i >= 0;) { 2271 target[i] = (byte) source[i]; 2272 } 2273 return target; 2274 } 2275 /* 2276 private void smBufCharset(Charset charset) { 2277 try { 2278 ByteBuffer bTarget = charset.encode(CharBuffer.wrap(uSource)); 2279 CharBuffer uTarget = 2280 charset.decode(ByteBuffer.wrap(getByteArray(gbSource))); 2281 2282 if (!equals(uTarget, uSource)) { 2283 errln("Test " + charset.toString() + " to Unicode :FAILED"); 2284 } 2285 if (!equals(bTarget, gbSource)) { 2286 errln("Test " + charset.toString() + " from Unicode :FAILED"); 2287 } 2288 } catch (Exception ex) { 2289 errln("Encountered exception in smBufCharset"); 2290 } 2291 } 2292 2293 @Test 2294 public void TestMultithreaded() throws Exception { 2295 final Charset cs = Charset.forName(encoding); 2296 if (cs == charset) { 2297 errln("The objects are equal"); 2298 } 2299 smBufCharset(cs); 2300 try { 2301 final Thread t1 = new Thread() { 2302 public void run() { 2303 // commented out since the mehtods on 2304 // Charset API are supposed to be thread 2305 // safe ... to test it we dont sync 2306 2307 // synchronized(charset){ 2308 while (!interrupted()) { 2309 try { 2310 smBufCharset(cs); 2311 } catch (UnsupportedCharsetException ueEx) { 2312 errln(ueEx.toString()); 2313 } 2314 } 2315 2316 // } 2317 } 2318 }; 2319 final Thread t2 = new Thread() { 2320 public void run() { 2321 // synchronized(charset){ 2322 while (!interrupted()) { 2323 try { 2324 smBufCharset(cs); 2325 } catch (UnsupportedCharsetException ueEx) { 2326 errln(ueEx.toString()); 2327 } 2328 } 2329 2330 //} 2331 } 2332 }; 2333 t1.start(); 2334 t2.start(); 2335 int i = 0; 2336 for (;;) { 2337 if (i > 1000000000) { 2338 try { 2339 t1.interrupt(); 2340 } catch (Exception e) { 2341 } 2342 try { 2343 t2.interrupt(); 2344 } catch (Exception e) { 2345 } 2346 break; 2347 } 2348 i++; 2349 } 2350 } catch (Exception e) { 2351 throw e; 2352 } 2353 } 2354 2355 @Test 2356 public void TestSynchronizedMultithreaded() throws Exception { 2357 // Methods on CharsetDecoder and CharsetEncoder classes 2358 // are inherently unsafe if accessed by multiple concurrent 2359 // thread so we synchronize them 2360 final Charset charset = Charset.forName(encoding); 2361 final CharsetDecoder decoder = charset.newDecoder(); 2362 final CharsetEncoder encoder = charset.newEncoder(); 2363 try { 2364 final Thread t1 = new Thread() { 2365 public void run() { 2366 while (!interrupted()) { 2367 try { 2368 synchronized (encoder) { 2369 smBufEncode(encoder, encoding); 2370 } 2371 synchronized (decoder) { 2372 smBufDecode(decoder, encoding); 2373 } 2374 } catch (UnsupportedCharsetException ueEx) { 2375 errln(ueEx.toString()); 2376 } 2377 } 2378 2379 } 2380 }; 2381 final Thread t2 = new Thread() { 2382 public void run() { 2383 while (!interrupted()) { 2384 try { 2385 synchronized (encoder) { 2386 smBufEncode(encoder, encoding); 2387 } 2388 synchronized (decoder) { 2389 smBufDecode(decoder, encoding); 2390 } 2391 } catch (UnsupportedCharsetException ueEx) { 2392 errln(ueEx.toString()); 2393 } 2394 } 2395 } 2396 }; 2397 t1.start(); 2398 t2.start(); 2399 int i = 0; 2400 for (;;) { 2401 if (i > 1000000000) { 2402 try { 2403 t1.interrupt(); 2404 } catch (Exception e) { 2405 } 2406 try { 2407 t2.interrupt(); 2408 } catch (Exception e) { 2409 } 2410 break; 2411 } 2412 i++; 2413 } 2414 } catch (Exception e) { 2415 throw e; 2416 } 2417 } 2418 */ 2419 2420 @Test 2421 public void TestMBCS(){ 2422 { 2423 // Encoder: from Unicode conversion 2424 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("ibm-971").newEncoder(); 2425 ByteBuffer out = ByteBuffer.allocate(6); 2426 encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE); 2427 CoderResult result = encoderICU.encode(CharBuffer.wrap("\u0131\u0061\u00a1"), out, true); 2428 if(!result.isError()){ 2429 byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE}; 2430 if(!equals(expected, out.array())){ 2431 // Android patch: Skip tests that fail with customized data. 2432 logln("Did not get the expected result for substitution bytes. Got: "+ 2433 hex(out.array())); 2434 // Android patch end. 2435 } 2436 logln("Output: "+ hex(out.array())); 2437 }else{ 2438 errln("Encode operation failed for encoder: "+encoderICU.toString()); 2439 } 2440 } 2441 { 2442 // Decoder: to Unicode conversion 2443 CharsetDecoder decoderICU = new CharsetProviderICU().charsetForName("ibm-971").newDecoder(); 2444 CharBuffer out = CharBuffer.allocate(3); 2445 decoderICU.onMalformedInput(CodingErrorAction.REPLACE); 2446 CoderResult result = decoderICU.decode(ByteBuffer.wrap(new byte[] { (byte)0xA2, (byte)0xAE, (byte)0x12, (byte)0x34, (byte)0xEF, (byte)0xDC }), out, true); 2447 if(!result.isError()){ 2448 char[] expected = {'\u00a1', '\ufffd', '\u6676'}; 2449 if(!equals(expected, out.array())){ 2450 errln("Did not get the expected result for substitution chars. Got: "+ 2451 hex(out.array())); 2452 } 2453 logln("Output: "+ hex(out.array())); 2454 }else{ 2455 errln("Decode operation failed for encoder: "+decoderICU.toString()); 2456 } 2457 } 2458 } 2459 2460 @Test 2461 public void TestJB4897(){ 2462 CharsetProviderICU provider = new CharsetProviderICU(); 2463 Charset charset = provider.charsetForName("x-abracadabra"); 2464 if(charset!=null && charset.canEncode()== true){ 2465 errln("provider.charsetForName() does not validate the charset names" ); 2466 } 2467 } 2468 2469 @Test 2470 public void TestJB5027() { 2471 CharsetProviderICU provider= new CharsetProviderICU(); 2472 2473 Charset fake = provider.charsetForName("doesNotExist"); 2474 if(fake != null){ 2475 errln("\"doesNotExist\" returned " + fake); 2476 } 2477 Charset xfake = provider.charsetForName("x-doesNotExist"); 2478 if(xfake!=null){ 2479 errln("\"x-doesNotExist\" returned " + xfake); 2480 } 2481 } 2482 2483 //test to make sure that number of aliases and canonical names are in the charsets that are in 2484 @Test 2485 public void TestAllNames() { 2486 2487 CharsetProviderICU provider= new CharsetProviderICU(); 2488 Object[] available = CharsetProviderICU.getAvailableNames(); 2489 for(int i=0; i<available.length;i++){ 2490 try{ 2491 String canon = CharsetProviderICU.getICUCanonicalName((String)available[i]); 2492 2493 // ',' is not allowed by Java's charset name checker 2494 if(canon.indexOf(',')>=0){ 2495 continue; 2496 } 2497 Charset cs = provider.charsetForName((String)available[i]); 2498 2499 Object[] javaAliases = cs.aliases().toArray(); 2500 //seach for ICU canonical name in javaAliases 2501 boolean inAliasList = false; 2502 for(int j=0; j<javaAliases.length; j++){ 2503 String java = (String) javaAliases[j]; 2504 if(java.equals(canon)){ 2505 logln("javaAlias: " + java + " canon: " + canon); 2506 inAliasList = true; 2507 } 2508 } 2509 if(inAliasList == false){ 2510 errln("Could not find ICU canonical name: "+canon+ " for java canonical name: "+ available[i]+ " "+ i); 2511 } 2512 }catch(UnsupportedCharsetException ex){ 2513 errln("could no load charset "+ available[i]+" "+ex.getMessage()); 2514 continue; 2515 } 2516 } 2517 } 2518 2519 @Test 2520 public void TestDecoderImplFlush() { 2521 CharsetProviderICU provider = new CharsetProviderICU(); 2522 Charset ics = provider.charsetForName("UTF-16"); 2523 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2524 execDecoder(jcs); 2525 execDecoder(ics); 2526 } 2527 2528 @Test 2529 public void TestEncoderImplFlush() { 2530 CharsetProviderICU provider = new CharsetProviderICU(); 2531 Charset ics = provider.charsetForName("UTF-16"); 2532 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2533 execEncoder(jcs); 2534 execEncoder(ics); 2535 } 2536 private void execDecoder(Charset cs){ 2537 CharsetDecoder decoder = cs.newDecoder(); 2538 decoder.onMalformedInput(CodingErrorAction.REPORT); 2539 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2540 CharBuffer out = CharBuffer.allocate(10); 2541 CoderResult result = decoder.decode(ByteBuffer.wrap(new byte[] { -1, 2542 -2, 32, 0, 98 }), out, false); 2543 result = decoder.decode(ByteBuffer.wrap(new byte[] { 98 }), out, true); 2544 2545 logln(cs.getClass().toString()+ ":" +result.toString()); 2546 try { 2547 result = decoder.flush(out); 2548 logln(cs.getClass().toString()+ ":" +result.toString()); 2549 } catch (Exception e) { 2550 errln(e.getMessage()+" "+cs.getClass().toString()); 2551 } 2552 } 2553 private void execEncoder(Charset cs){ 2554 CharsetEncoder encoder = cs.newEncoder(); 2555 encoder.onMalformedInput(CodingErrorAction.REPORT); 2556 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2557 ByteBuffer out = ByteBuffer.allocate(10); 2558 CoderResult result = encoder.encode(CharBuffer.wrap(new char[] { '\uFFFF', 2559 '\u2345', 32, 98 }), out, false); 2560 logln(cs.getClass().toString()+ ":" +result.toString()); 2561 result = encoder.encode(CharBuffer.wrap(new char[] { 98 }), out, true); 2562 2563 logln(cs.getClass().toString()+ ":" +result.toString()); 2564 try { 2565 result = encoder.flush(out); 2566 logln(cs.getClass().toString()+ ":" +result.toString()); 2567 } catch (Exception e) { 2568 errln(e.getMessage()+" "+cs.getClass().toString()); 2569 } 2570 } 2571 2572 @Test 2573 public void TestDecodeMalformed() { 2574 CharsetProviderICU provider = new CharsetProviderICU(); 2575 Charset ics = provider.charsetForName("UTF-16BE"); 2576 //Use SUN's charset 2577 Charset jcs = Charset.forName("UTF-16"); 2578 CoderResult ir = execMalformed(ics); 2579 CoderResult jr = execMalformed(jcs); 2580 if(ir!=jr){ 2581 errln("ICU's decoder did not return the same result as Sun. ICU: "+ir.toString()+" Sun: "+jr.toString()); 2582 } 2583 } 2584 2585 private CoderResult execMalformed(Charset cs){ 2586 CharsetDecoder decoder = cs.newDecoder(); 2587 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2588 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2589 ByteBuffer in = ByteBuffer.wrap(new byte[] { 0x00, 0x41, 0x00, 0x42, 0x01 }); 2590 CharBuffer out = CharBuffer.allocate(3); 2591 return decoder.decode(in, out, true); 2592 } 2593 2594 @Test 2595 public void TestJavaUTF16Decoder(){ 2596 CharsetProviderICU provider = new CharsetProviderICU(); 2597 Charset ics = provider.charsetForName("UTF-16BE"); 2598 //Use SUN's charset 2599 Charset jcs = Charset.forName("UTF-16"); 2600 Exception ie = execConvertAll(ics); 2601 Exception je = execConvertAll(jcs); 2602 if(ie!=je){ 2603 errln("ICU's decoder did not return the same result as Sun. ICU: "+ie.toString()+" Sun: "+je.toString()); 2604 } 2605 } 2606 private Exception execConvertAll(Charset cs){ 2607 ByteBuffer in = ByteBuffer.allocate(400); 2608 int i=0; 2609 while(in.position()!=in.capacity()){ 2610 in.put((byte)0xD8); 2611 in.put((byte)i); 2612 in.put((byte)0xDC); 2613 in.put((byte)i); 2614 i++; 2615 } 2616 in.limit(in.position()); 2617 in.position(0); 2618 CharsetDecoder decoder = cs.newDecoder(); 2619 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2620 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2621 try{ 2622 CharBuffer out = decoder.decode(in); 2623 if(out!=null){ 2624 logln(cs.toString()+" encoing succeeded as expected!"); 2625 } 2626 }catch ( Exception ex){ 2627 errln("Did not get expected exception for encoding: "+cs.toString()); 2628 return ex; 2629 } 2630 return null; 2631 } 2632 2633 @Test 2634 public void TestUTF32BOM(){ 2635 2636 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-32"); 2637 char[] in = new char[] { 0xd800, 0xdc00, 2638 0xd801, 0xdc01, 2639 0xdbff, 0xdfff, 2640 0xd900, 0xdd00, 2641 0x0000, 0x0041, 2642 0x0000, 0x0042, 2643 0x0000, 0x0043}; 2644 2645 CharBuffer inBuf = CharBuffer.allocate(in.length); 2646 inBuf.put(in); 2647 CharsetEncoder encoder = cs.newEncoder(); 2648 ByteBuffer outBuf = ByteBuffer.allocate(in.length*4+4); 2649 inBuf.rewind(); 2650 encoder.encode(inBuf, outBuf, true); 2651 outBuf.rewind(); 2652 if(outBuf.get(0)!= (byte)0x00 && outBuf.get(1)!= (byte)0x00 && 2653 outBuf.get(2)!= (byte)0xFF && outBuf.get(3)!= (byte)0xFE){ 2654 errln("The UTF32 encoder did not appended bom. Length returned: " + outBuf.remaining()); 2655 } 2656 while(outBuf.hasRemaining()){ 2657 logln("0x"+hex(outBuf.get())); 2658 } 2659 CharsetDecoder decoder = cs.newDecoder(); 2660 outBuf.limit(outBuf.position()); 2661 outBuf.rewind(); 2662 CharBuffer rt = CharBuffer.allocate(in.length); 2663 CoderResult cr = decoder.decode(outBuf, rt, true); 2664 if(cr.isError()){ 2665 errln("Decoding with BOM failed. Error: "+ cr.toString()); 2666 } 2667 equals(rt, in); 2668 try{ 2669 rt.clear(); 2670 outBuf.rewind(); 2671 Charset utf16 = Charset.forName("UTF-32"); 2672 CharsetDecoder dc = utf16.newDecoder(); 2673 cr = dc.decode(outBuf, rt, true); 2674 equals(rt, in); 2675 }catch(UnsupportedCharsetException ex){ 2676 // swallow the expection. 2677 } 2678 } 2679 2680 /* 2681 * Michael Ow 2682 * Modified 070424 2683 */ 2684 /*The following two methods provides the option of exceptions when Decoding 2685 * and Encoding if needed for testing purposes. 2686 */ 2687 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target) { 2688 smBufDecode(decoder, encoding, source, target, true); 2689 } 2690 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray) { 2691 try { 2692 smBufDecode(decoder, encoding, source, target, false, false, backedByArray); 2693 } 2694 catch (Exception ex) { 2695 System.out.println("!exception!"); 2696 } 2697 } 2698 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target) { 2699 smBufEncode(encoder, encoding, source, target, true); 2700 } 2701 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray) { 2702 try { 2703 smBufEncode(encoder, encoding, source, target, false, false); 2704 } 2705 catch (Exception ex) { 2706 System.out.println("!exception!"); 2707 } 2708 } 2709 2710 //Test CharsetICUProvider 2711 @Test 2712 public void TestNullCanonicalName() { 2713 String enc = null; 2714 String canonicalName = CharsetProviderICU.getICUCanonicalName(enc); 2715 2716 if (canonicalName != null) { 2717 errln("getICUCanonicalName return a non-null string for given null string"); 2718 } 2719 } 2720 2721 @Test 2722 public void TestGetAllNames() { 2723 String[] names = null; 2724 2725 names = CharsetProviderICU.getAllNames(); 2726 2727 if (names == null) { 2728 errln("getAllNames returned a null string."); 2729 } 2730 } 2731 2732 //Test CharsetICU 2733 @Test 2734 public void TestCharsetContains() { 2735 boolean test; 2736 2737 CharsetProvider provider = new CharsetProviderICU(); 2738 Charset cs1 = provider.charsetForName("UTF-32"); 2739 Charset cs2 = null; 2740 2741 test = cs1.contains(cs2); 2742 2743 if (test != false) { 2744 errln("Charset.contains returned true for a null charset."); 2745 } 2746 2747 cs2 = CharsetICU.forNameICU("UTF-32"); 2748 2749 test = cs1.contains(cs2); 2750 2751 if (test != true) { 2752 errln("Charset.contains returned false for an identical charset."); 2753 } 2754 2755 cs2 = provider.charsetForName("UTF-8"); 2756 2757 test = cs1.contains(cs2); 2758 2759 if (test != false) { 2760 errln("Charset.contains returned true for a different charset."); 2761 } 2762 } 2763 2764 @Test 2765 public void TestCharsetICUNullCharsetName() { 2766 String charsetName = null; 2767 2768 try { 2769 CharsetICU.forNameICU(charsetName); 2770 errln("CharsetICU.forName should have thown an exception after getting a null charsetName."); 2771 } 2772 catch(Exception ex) { 2773 } 2774 } 2775 2776 //Test CharsetASCII 2777 @Test 2778 public void TestCharsetASCIIOverFlow() { 2779 int byteBufferLimit; 2780 int charBufferLimit; 2781 2782 CharsetProvider provider = new CharsetProviderICU(); 2783 Charset cs = provider.charsetForName("ASCII"); 2784 CharsetEncoder encoder = cs.newEncoder(); 2785 CharsetDecoder decoder = cs.newDecoder(); 2786 2787 CharBuffer charBuffer = CharBuffer.allocate(0x90); 2788 ByteBuffer byteBuffer = ByteBuffer.allocate(0x90); 2789 2790 CharBuffer charBufferTest = CharBuffer.allocate(0xb0); 2791 ByteBuffer byteBufferTest = ByteBuffer.allocate(0xb0); 2792 2793 for(int j=0;j<=0x7f; j++){ 2794 charBuffer.put((char)j); 2795 byteBuffer.put((byte)j); 2796 } 2797 2798 byteBuffer.limit(byteBufferLimit = byteBuffer.position()); 2799 byteBuffer.position(0); 2800 charBuffer.limit(charBufferLimit = charBuffer.position()); 2801 charBuffer.position(0); 2802 2803 //test for overflow 2804 byteBufferTest.limit(byteBufferLimit - 5); 2805 byteBufferTest.position(0); 2806 charBufferTest.limit(charBufferLimit - 5); 2807 charBufferTest.position(0); 2808 try { 2809 smBufDecode(decoder, "ASCII", byteBuffer, charBufferTest, true, false); 2810 errln("Overflow exception while decoding ASCII should have been thrown."); 2811 } 2812 catch(Exception ex) { 2813 } 2814 try { 2815 smBufEncode(encoder, "ASCII", charBuffer, byteBufferTest, true, false); 2816 errln("Overflow exception while encoding ASCII should have been thrown."); 2817 } 2818 catch (Exception ex) { 2819 } 2820 2821 // For better code coverage 2822 /* For better code coverage */ 2823 byte byteout[] = { 2824 (byte)0x01 2825 }; 2826 char charin[] = { 2827 (char)0x0001, (char)0x0002 2828 }; 2829 ByteBuffer bb = ByteBuffer.wrap(byteout); 2830 CharBuffer cb = CharBuffer.wrap(charin); 2831 // Cast up to CharSequence to insulate against the CharBuffer.subSequence() return type change 2832 // which makes code compiled for a newer JDK not run on an older one. 2833 CharBuffer cb2 = CharBuffer.wrap(((CharSequence)cb).subSequence(0, 2)); 2834 encoder.reset(); 2835 if (!(encoder.encode(cb2, bb, true)).isOverflow()) { 2836 errln("Overflow error while encoding ASCII should have occurred."); 2837 } 2838 } 2839 2840 //Test CharsetUTF7 2841 @Test 2842 public void TestCharsetUTF7() { 2843 CoderResult result = CoderResult.UNDERFLOW; 2844 CharsetProvider provider = new CharsetProviderICU(); 2845 Charset cs = provider.charsetForName("UTF-7"); 2846 CharsetEncoder encoder = cs.newEncoder(); 2847 CharsetDecoder decoder = cs.newDecoder(); 2848 2849 CharBuffer us = CharBuffer.allocate(0x100); 2850 ByteBuffer bs = ByteBuffer.allocate(0x100); 2851 2852 /* Unicode : A<not equal to Alpha Lamda>. */ 2853 /* UTF7: AImIDkQ. */ 2854 us.put((char)0x41); us.put((char)0x2262); us.put((char)0x391); us.put((char)0x39B); us.put((char)0x2e); 2855 bs.put((byte)0x41); bs.put((byte)0x2b); bs.put((byte)0x49); bs.put((byte)0x6d); 2856 bs.put((byte)0x49); bs.put((byte)0x44); bs.put((byte)0x6b); bs.put((byte)0x51); 2857 bs.put((byte)0x4f); bs.put((byte)0x62); bs.put((byte)0x2e); 2858 2859 bs.limit(bs.position()); 2860 bs.position(0); 2861 us.limit(us.position()); 2862 us.position(0); 2863 2864 smBufDecode(decoder, "UTF-7", bs, us); 2865 smBufEncode(encoder, "UTF-7", us, bs); 2866 2867 /* Testing UTF-7 toUnicode with substitute callbacks */ 2868 { 2869 byte [] bytesTestErrorConsumption = { 2870 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 2871 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 2872 2873 }; 2874 char [] unicodeTestErrorConsumption = { 2875 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 2876 }; 2877 bs = ByteBuffer.wrap(bytesTestErrorConsumption); 2878 us = CharBuffer.wrap(unicodeTestErrorConsumption); 2879 2880 CodingErrorAction savedMal = decoder.malformedInputAction(); 2881 CodingErrorAction savedUMap = decoder.unmappableCharacterAction(); 2882 decoder.onMalformedInput(CodingErrorAction.REPLACE); 2883 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 2884 smBufDecode(decoder, "UTF-7 DE Error Consumption", bs, us); 2885 decoder.onMalformedInput(savedMal); 2886 decoder.onUnmappableCharacter(savedUMap); 2887 } 2888 /* ticket 6151 */ 2889 CharBuffer smallus = CharBuffer.allocate(1); 2890 ByteBuffer bigbs = ByteBuffer.allocate(3); 2891 bigbs.put((byte)0x41); bigbs.put((byte)0x41); bigbs.put((byte)0x41); 2892 bigbs.position(0); 2893 try { 2894 smBufDecode(decoder, "UTF-7-DE-Overflow", bigbs, smallus, true, false); 2895 errln("Buffer Overflow exception should have been thrown while decoding UTF-7."); 2896 } catch (Exception ex) { 2897 } 2898 2899 //The rest of the code in this method is to provide better code coverage 2900 CharBuffer ccus = CharBuffer.allocate(0x10); 2901 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 2902 2903 //start of charset decoder code coverage code 2904 //test for accurate illegal and control character checking 2905 ccbs.put((byte)0x0D); ccbs.put((byte)0x05); 2906 ccus.put((char)0x0000); 2907 2908 ccbs.limit(ccbs.position()); 2909 ccbs.position(0); 2910 ccus.limit(ccus.position()); 2911 ccus.position(0); 2912 2913 try { 2914 smBufDecode(decoder, "UTF-7-CC-DE-1", ccbs, ccus, true, false); 2915 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2916 } 2917 catch (Exception ex) { 2918 } 2919 2920 ccbs.clear(); 2921 ccus.clear(); 2922 2923 //test for illegal base64 character 2924 ccbs.put((byte)0x2b); ccbs.put((byte)0xff); 2925 ccus.put((char)0x0000); 2926 2927 ccbs.limit(ccbs.position()); 2928 ccbs.position(0); 2929 ccus.limit(ccus.position()); 2930 ccus.position(0); 2931 2932 try { 2933 smBufDecode(decoder, "UTF-7-CC-DE-2", ccbs, ccus, true, false); 2934 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2935 } 2936 catch (Exception ex) { 2937 } 2938 2939 ccbs.clear(); 2940 ccus.clear(); 2941 2942 //test for illegal order of the base64 character sequence 2943 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 2944 ccus.put((char)0x0000); ccus.put((char)0x0000); 2945 2946 ccbs.limit(ccbs.position()); 2947 ccbs.position(0); 2948 ccus.limit(ccus.position()); 2949 ccus.position(0); 2950 2951 try { 2952 smBufDecode(decoder, "UTF-7-CC-DE-3", ccbs, ccus, true, false); 2953 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2954 } 2955 catch (Exception ex) { 2956 } 2957 2958 ccbs.clear(); 2959 ccus.clear(); 2960 2961 //test for illegal order of the base64 character sequence 2962 ccbs.put((byte)0x2b); ccbs.put((byte)0x0a); ccbs.put((byte)0x09); 2963 ccus.put((char)0x0000); 2964 2965 ccbs.limit(ccbs.position()); 2966 ccbs.position(0); 2967 ccus.limit(ccus.position()); 2968 ccus.position(0); 2969 2970 try { 2971 smBufDecode(decoder, "UTF-7-CC-DE-4", ccbs, ccus, true, false); 2972 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2973 } 2974 catch (Exception ex) { 2975 } 2976 2977 ccbs.clear(); 2978 ccus.clear(); 2979 2980 //test for illegal order of the base64 character sequence 2981 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x0a); 2982 ccus.put((char)0x0000); 2983 2984 ccbs.limit(ccbs.position()); 2985 ccbs.position(0); 2986 ccus.limit(ccus.position()); 2987 ccus.position(0); 2988 2989 try { 2990 smBufDecode(decoder, "UTF-7-CC-DE-5", ccbs, ccus, true, false); 2991 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2992 } 2993 catch (Exception ex) { 2994 } 2995 2996 ccbs.clear(); 2997 ccus.clear(); 2998 2999 //test for illegal order of the base64 character sequence 3000 ccbs.put((byte)0x2b); ccbs.put((byte)0x00); 3001 ccus.put((char)0x0000); 3002 3003 ccbs.limit(ccbs.position()); 3004 ccbs.position(0); 3005 ccus.limit(ccus.position()); 3006 ccus.position(0); 3007 3008 try { 3009 smBufDecode(decoder, "UTF-7-CC-DE-6", ccbs, ccus, true, false); 3010 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 3011 } 3012 catch (Exception ex) { 3013 } 3014 3015 ccbs.clear(); 3016 ccus.clear(); 3017 3018 //test for overflow buffer error 3019 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); 3020 3021 ccbs.limit(ccbs.position()); 3022 ccbs.position(0); 3023 ccus.limit(0); 3024 ccus.position(0); 3025 3026 try { 3027 smBufDecode(decoder, "UTF-7-CC-DE-7", ccbs, ccus, true, false); 3028 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 3029 } 3030 catch (Exception ex) { 3031 } 3032 3033 ccbs.clear(); 3034 ccus.clear(); 3035 3036 //test for overflow buffer error 3037 ccbs.put((byte)0x0c); ccbs.put((byte)0x0c); 3038 3039 ccbs.limit(ccbs.position()); 3040 ccbs.position(0); 3041 ccus.limit(0); 3042 ccus.position(0); 3043 3044 try { 3045 smBufDecode(decoder, "UTF-7-CC-DE-8", ccbs, ccus, true, false); 3046 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 3047 } 3048 catch (Exception ex) { 3049 } 3050 //end of charset decoder code coverage code 3051 3052 //start of charset encoder code coverage code 3053 ccbs.clear(); 3054 ccus.clear(); 3055 //test for overflow buffer error 3056 ccus.put((char)0x002b); 3057 ccbs.put((byte)0x2b); 3058 3059 ccbs.limit(ccbs.position()); 3060 ccbs.position(0); 3061 ccus.limit(ccus.position()); 3062 ccus.position(0); 3063 3064 try { 3065 smBufEncode(encoder, "UTF-7-CC-EN-1", ccus, ccbs, true, false); 3066 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3067 } 3068 catch (Exception ex) { 3069 } 3070 3071 ccbs.clear(); 3072 ccus.clear(); 3073 3074 //test for overflow buffer error 3075 ccus.put((char)0x002b); ccus.put((char)0x2262); 3076 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3077 3078 ccbs.limit(ccbs.position()); 3079 ccbs.position(0); 3080 ccus.limit(ccus.position()); 3081 ccus.position(0); 3082 3083 try { 3084 smBufEncode(encoder, "UTF-7-CC-EN-2", ccus, ccbs, true, false); 3085 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3086 } 3087 catch (Exception ex) { 3088 } 3089 3090 ccbs.clear(); 3091 ccus.clear(); 3092 3093 //test for overflow buffer error 3094 ccus.put((char)0x2262); ccus.put((char)0x0049); 3095 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3096 ccbs.limit(ccbs.position()); 3097 ccbs.position(0); 3098 ccus.limit(ccus.position()); 3099 ccus.position(0); 3100 3101 try { 3102 smBufEncode(encoder, "UTF-7-CC-EN-3", ccus, ccbs, true, false); 3103 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3104 } 3105 catch (Exception ex) { 3106 } 3107 3108 ccbs.clear(); 3109 ccus.clear(); 3110 3111 //test for overflow buffer error 3112 ccus.put((char)0x2262); ccus.put((char)0x0395); 3113 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3114 ccbs.limit(ccbs.position()); 3115 ccbs.position(0); 3116 ccus.limit(ccus.position()); 3117 ccus.position(0); 3118 3119 try { 3120 smBufEncode(encoder, "UTF-7-CC-EN-4", ccus, ccbs, true, false); 3121 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3122 } 3123 catch (Exception ex) { 3124 } 3125 3126 ccbs.clear(); 3127 ccus.clear(); 3128 3129 //test for overflow buffer error 3130 ccus.put((char)0x2262); ccus.put((char)0x0395); 3131 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3132 ccbs.limit(ccbs.position()); 3133 ccbs.position(0); 3134 ccus.limit(ccus.position()); 3135 ccus.position(0); 3136 3137 try { 3138 smBufEncode(encoder, "UTF-7-CC-EN-5", ccus, ccbs, true, false); 3139 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3140 } 3141 catch (Exception ex) { 3142 } 3143 3144 ccbs.clear(); 3145 ccus.clear(); 3146 3147 //test for overflow buffer error 3148 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3149 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3150 ccbs.limit(ccbs.position()); 3151 ccbs.position(0); 3152 ccus.limit(ccus.position()); 3153 ccus.position(0); 3154 3155 try { 3156 smBufEncode(encoder, "UTF-7-CC-EN-6", ccus, ccbs, true, false); 3157 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3158 } 3159 catch (Exception ex) { 3160 } 3161 3162 ccbs.clear(); 3163 ccus.clear(); 3164 3165 //test for overflow buffer error 3166 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3167 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3168 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3169 ccbs.limit(ccbs.position()); 3170 ccbs.position(0); 3171 ccus.limit(ccus.position()); 3172 ccus.position(0); 3173 3174 try { 3175 smBufEncode(encoder, "UTF-7-CC-EN-7", ccus, ccbs, true, false); 3176 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3177 } 3178 catch (Exception ex) { 3179 } 3180 3181 ccbs.clear(); 3182 ccus.clear(); 3183 3184 //test for overflow buffer error 3185 ccus.put((char)0x0049); ccus.put((char)0x0048); 3186 ccbs.put((byte)0x00); 3187 ccbs.limit(ccbs.position()); 3188 ccbs.position(0); 3189 ccus.limit(ccus.position()); 3190 ccus.position(0); 3191 3192 try { 3193 smBufEncode(encoder, "UTF-7-CC-EN-8", ccus, ccbs, true, false); 3194 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3195 } 3196 catch (Exception ex) { 3197 } 3198 3199 ccbs.clear(); 3200 ccus.clear(); 3201 3202 //test for overflow buffer error 3203 ccus.put((char)0x2262); 3204 ccbs.put((byte)0x00); 3205 ccbs.limit(ccbs.position()); 3206 ccbs.position(0); 3207 ccus.limit(ccus.position()); 3208 ccus.position(0); 3209 3210 try { 3211 smBufEncode(encoder, "UTF-7-CC-EN-9", ccus, ccbs, true, false); 3212 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3213 } 3214 catch (Exception ex) { 3215 } 3216 3217 ccbs.clear(); 3218 ccus.clear(); 3219 3220 //test for overflow buffer error 3221 ccus.put((char)0x2262); ccus.put((char)0x0049); 3222 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3223 ccbs.limit(ccbs.position()); 3224 ccbs.position(0); 3225 ccus.limit(ccus.position()); 3226 ccus.position(0); 3227 3228 try { 3229 smBufEncode(encoder, "UTF-7-CC-EN-10", ccus, ccbs, true, false); 3230 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3231 } 3232 catch (Exception ex) { 3233 } 3234 3235 ccbs.clear(); 3236 ccus.clear(); 3237 3238 //test for overflow buffer error 3239 ccus.put((char)0x2262); 3240 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x6d); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 3241 3242 ccbs.limit(ccbs.position()); 3243 ccbs.position(0); 3244 ccus.limit(ccus.position()); 3245 ccus.position(0); 3246 try { 3247 smBufEncode(encoder, "UTF-7-CC-EN-11", ccus, ccbs, false, true); 3248 } catch (Exception ex) { 3249 errln("Exception while encoding UTF-7 code coverage test should not have been thrown."); 3250 } 3251 3252 ccbs.clear(); 3253 ccus.clear(); 3254 3255 //test for overflow buffer error 3256 encoder.reset(); 3257 ccus.put((char)0x3980); ccus.put((char)0x2715); 3258 ccbs.put((byte)0x2b); ccbs.put((byte)0x4f); ccbs.put((byte)0x59); ccbs.put((byte)0x2d); 3259 3260 ccbs.limit(ccbs.position()); 3261 ccbs.position(0); 3262 ccus.limit(ccus.position()); 3263 ccus.position(0); 3264 3265 result = encoder.encode(ccus, ccbs, true); 3266 result = encoder.flush(ccbs); 3267 if (!result.isOverflow()) { 3268 errln("Overflow buffer while encoding UTF-7 should have occurred."); 3269 } 3270 //end of charset encoder code coverage code 3271 } 3272 3273 //Test Charset ISCII 3274 @Test 3275 public void TestCharsetISCII() { 3276 CharsetProvider provider = new CharsetProviderICU(); 3277 Charset cs = provider.charsetForName("ISCII,version=0"); 3278 CharsetEncoder encoder = cs.newEncoder(); 3279 CharsetDecoder decoder = cs.newDecoder(); 3280 3281 CharBuffer us = CharBuffer.allocate(0x100); 3282 ByteBuffer bs = ByteBuffer.allocate(0x100); 3283 ByteBuffer bsr = ByteBuffer.allocate(0x100); 3284 3285 //test full range of Devanagari 3286 us.put((char)0x0901); us.put((char)0x0902); us.put((char)0x0903); us.put((char)0x0905); us.put((char)0x0906); us.put((char)0x0907); 3287 us.put((char)0x0908); us.put((char)0x0909); us.put((char)0x090A); us.put((char)0x090B); us.put((char)0x090E); us.put((char)0x090F); 3288 us.put((char)0x0910); us.put((char)0x090D); us.put((char)0x0912); us.put((char)0x0913); us.put((char)0x0914); us.put((char)0x0911); 3289 us.put((char)0x0915); us.put((char)0x0916); us.put((char)0x0917); us.put((char)0x0918); us.put((char)0x0919); us.put((char)0x091A); 3290 us.put((char)0x091B); us.put((char)0x091C); us.put((char)0x091D); us.put((char)0x091E); us.put((char)0x091F); us.put((char)0x0920); 3291 us.put((char)0x0921); us.put((char)0x0922); us.put((char)0x0923); us.put((char)0x0924); us.put((char)0x0925); us.put((char)0x0926); 3292 us.put((char)0x0927); us.put((char)0x0928); us.put((char)0x0929); us.put((char)0x092A); us.put((char)0x092B); us.put((char)0x092C); 3293 us.put((char)0x092D); us.put((char)0x092E); us.put((char)0x092F); us.put((char)0x095F); us.put((char)0x0930); us.put((char)0x0931); 3294 us.put((char)0x0932); us.put((char)0x0933); us.put((char)0x0934); us.put((char)0x0935); us.put((char)0x0936); us.put((char)0x0937); 3295 us.put((char)0x0938); us.put((char)0x0939); us.put((char)0x200D); us.put((char)0x093E); us.put((char)0x093F); us.put((char)0x0940); 3296 us.put((char)0x0941); us.put((char)0x0942); us.put((char)0x0943); us.put((char)0x0946); us.put((char)0x0947); us.put((char)0x0948); 3297 us.put((char)0x0945); us.put((char)0x094A); us.put((char)0x094B); us.put((char)0x094C); us.put((char)0x0949); us.put((char)0x094D); 3298 us.put((char)0x093D); us.put((char)0x0966); us.put((char)0x0967); us.put((char)0x0968); us.put((char)0x0969); us.put((char)0x096A); 3299 us.put((char)0x096B); us.put((char)0x096C); us.put((char)0x096D); us.put((char)0x096E); us.put((char)0x096F); 3300 3301 bs.put((byte)0xEF); bs.put((byte)0x42); 3302 bs.put((byte)0xA1); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xA4); bs.put((byte)0xA5); bs.put((byte)0xA6); 3303 bs.put((byte)0xA7); bs.put((byte)0xA8); bs.put((byte)0xA9); bs.put((byte)0xAA); bs.put((byte)0xAB); bs.put((byte)0xAC); 3304 bs.put((byte)0xAD); bs.put((byte)0xAE); bs.put((byte)0xAF); bs.put((byte)0xB0); bs.put((byte)0xB1); bs.put((byte)0xB2); 3305 bs.put((byte)0xB3); bs.put((byte)0xB4); bs.put((byte)0xB5); bs.put((byte)0xB6); bs.put((byte)0xB7); bs.put((byte)0xB8); 3306 bs.put((byte)0xB9); bs.put((byte)0xBA); bs.put((byte)0xBB); bs.put((byte)0xBC); bs.put((byte)0xBD); bs.put((byte)0xBE); 3307 bs.put((byte)0xBF); bs.put((byte)0xC0); bs.put((byte)0xC1); bs.put((byte)0xC2); bs.put((byte)0xC3); bs.put((byte)0xC4); 3308 bs.put((byte)0xC5); bs.put((byte)0xC6); bs.put((byte)0xC7); bs.put((byte)0xC8); bs.put((byte)0xC9); bs.put((byte)0xCA); 3309 bs.put((byte)0xCB); bs.put((byte)0xCC); bs.put((byte)0xCD); bs.put((byte)0xCE); bs.put((byte)0xCF); bs.put((byte)0xD0); 3310 bs.put((byte)0xD1); bs.put((byte)0xD2); bs.put((byte)0xD3); bs.put((byte)0xD4); bs.put((byte)0xD5); bs.put((byte)0xD6); 3311 bs.put((byte)0xD7); bs.put((byte)0xD8); bs.put((byte)0xD9); bs.put((byte)0xDA); bs.put((byte)0xDB); bs.put((byte)0xDC); 3312 bs.put((byte)0xDD); bs.put((byte)0xDE); bs.put((byte)0xDF); bs.put((byte)0xE0); bs.put((byte)0xE1); bs.put((byte)0xE2); 3313 bs.put((byte)0xE3); bs.put((byte)0xE4); bs.put((byte)0xE5); bs.put((byte)0xE6); bs.put((byte)0xE7); bs.put((byte)0xE8); 3314 bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xF1); bs.put((byte)0xF2); bs.put((byte)0xF3); bs.put((byte)0xF4); 3315 bs.put((byte)0xF5); bs.put((byte)0xF6); bs.put((byte)0xF7); bs.put((byte)0xF8); bs.put((byte)0xF9); bs.put((byte)0xFA); 3316 3317 bsr.put((byte)0xA1); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xA4); bsr.put((byte)0xA5); bsr.put((byte)0xA6); 3318 bsr.put((byte)0xA7); bsr.put((byte)0xA8); bsr.put((byte)0xA9); bsr.put((byte)0xAA); bsr.put((byte)0xAB); bsr.put((byte)0xAC); 3319 bsr.put((byte)0xAD); bsr.put((byte)0xAE); bsr.put((byte)0xAF); bsr.put((byte)0xB0); bsr.put((byte)0xB1); bsr.put((byte)0xB2); 3320 bsr.put((byte)0xB3); bsr.put((byte)0xB4); bsr.put((byte)0xB5); bsr.put((byte)0xB6); bsr.put((byte)0xB7); bsr.put((byte)0xB8); 3321 bsr.put((byte)0xB9); bsr.put((byte)0xBA); bsr.put((byte)0xBB); bsr.put((byte)0xBC); bsr.put((byte)0xBD); bsr.put((byte)0xBE); 3322 bsr.put((byte)0xBF); bsr.put((byte)0xC0); bsr.put((byte)0xC1); bsr.put((byte)0xC2); bsr.put((byte)0xC3); bsr.put((byte)0xC4); 3323 bsr.put((byte)0xC5); bsr.put((byte)0xC6); bsr.put((byte)0xC7); bsr.put((byte)0xC8); bsr.put((byte)0xC9); bsr.put((byte)0xCA); 3324 bsr.put((byte)0xCB); bsr.put((byte)0xCC); bsr.put((byte)0xCD); bsr.put((byte)0xCE); bsr.put((byte)0xCF); bsr.put((byte)0xD0); 3325 bsr.put((byte)0xD1); bsr.put((byte)0xD2); bsr.put((byte)0xD3); bsr.put((byte)0xD4); bsr.put((byte)0xD5); bsr.put((byte)0xD6); 3326 bsr.put((byte)0xD7); bsr.put((byte)0xD8); bsr.put((byte)0xD9); bsr.put((byte)0xDA); bsr.put((byte)0xDB); bsr.put((byte)0xDC); 3327 bsr.put((byte)0xDD); bsr.put((byte)0xDE); bsr.put((byte)0xDF); bsr.put((byte)0xE0); bsr.put((byte)0xE1); bsr.put((byte)0xE2); 3328 bsr.put((byte)0xE3); bsr.put((byte)0xE4); bsr.put((byte)0xE5); bsr.put((byte)0xE6); bsr.put((byte)0xE7); bsr.put((byte)0xE8); 3329 bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xF1); bsr.put((byte)0xF2); bsr.put((byte)0xF3); bsr.put((byte)0xF4); 3330 bsr.put((byte)0xF5); bsr.put((byte)0xF6); bsr.put((byte)0xF7); bsr.put((byte)0xF8); bsr.put((byte)0xF9); bsr.put((byte)0xFA); 3331 3332 //test Soft Halant 3333 us.put((char)0x0915); us.put((char)0x094d); us.put((char)0x200D); 3334 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE9); 3335 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE9); 3336 3337 //test explicit halant 3338 us.put((char)0x0915); us.put((char)0x094D); us.put((char)0x200C); 3339 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE8); 3340 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE8); 3341 3342 //test double danda 3343 us.put((char)0x0965); 3344 bs.put((byte)0xEA); bs.put((byte)0xEA); 3345 bsr.put((byte)0xEA); bsr.put((byte)0xEA); 3346 3347 //test ASCII 3348 us.put((char)0x1B); us.put((char)0x24); us.put((char)0x29); us.put((char)0x47); us.put((char)0x0E); us.put((char)0x23); 3349 us.put((char)0x21); us.put((char)0x23); us.put((char)0x22); us.put((char)0x23); us.put((char)0x23); us.put((char)0x23); 3350 us.put((char)0x24); us.put((char)0x23); us.put((char)0x25); us.put((char)0x23); us.put((char)0x26); us.put((char)0x23); 3351 us.put((char)0x27); us.put((char)0x23); us.put((char)0x28); us.put((char)0x23); us.put((char)0x29); us.put((char)0x23); 3352 us.put((char)0x2A); us.put((char)0x23); us.put((char)0x2B); us.put((char)0x0F); us.put((char)0x2F); us.put((char)0x2A); 3353 3354 bs.put((byte)0x1B); bs.put((byte)0x24); bs.put((byte)0x29); bs.put((byte)0x47); bs.put((byte)0x0E); bs.put((byte)0x23); 3355 bs.put((byte)0x21); bs.put((byte)0x23); bs.put((byte)0x22); bs.put((byte)0x23); bs.put((byte)0x23); bs.put((byte)0x23); 3356 bs.put((byte)0x24); bs.put((byte)0x23); bs.put((byte)0x25); bs.put((byte)0x23); bs.put((byte)0x26); bs.put((byte)0x23); 3357 bs.put((byte)0x27); bs.put((byte)0x23); bs.put((byte)0x28); bs.put((byte)0x23); bs.put((byte)0x29); bs.put((byte)0x23); 3358 bs.put((byte)0x2A); bs.put((byte)0x23); bs.put((byte)0x2B); bs.put((byte)0x0F); bs.put((byte)0x2F); bs.put((byte)0x2A); 3359 3360 bsr.put((byte)0x1B); bsr.put((byte)0x24); bsr.put((byte)0x29); bsr.put((byte)0x47); bsr.put((byte)0x0E); bsr.put((byte)0x23); 3361 bsr.put((byte)0x21); bsr.put((byte)0x23); bsr.put((byte)0x22); bsr.put((byte)0x23); bsr.put((byte)0x23); bsr.put((byte)0x23); 3362 bsr.put((byte)0x24); bsr.put((byte)0x23); bsr.put((byte)0x25); bsr.put((byte)0x23); bsr.put((byte)0x26); bsr.put((byte)0x23); 3363 bsr.put((byte)0x27); bsr.put((byte)0x23); bsr.put((byte)0x28); bsr.put((byte)0x23); bsr.put((byte)0x29); bsr.put((byte)0x23); 3364 bsr.put((byte)0x2A); bsr.put((byte)0x23); bsr.put((byte)0x2B); bsr.put((byte)0x0F); bsr.put((byte)0x2F); bsr.put((byte)0x2A); 3365 3366 //test from Lotus 3367 //Some of the Lotus ISCII code points have been changed or commented out. 3368 us.put((char)0x0061); us.put((char)0x0915); us.put((char)0x000D); us.put((char)0x000A); us.put((char)0x0996); us.put((char)0x0043); 3369 us.put((char)0x0930); us.put((char)0x094D); us.put((char)0x200D); us.put((char)0x0901); us.put((char)0x000D); us.put((char)0x000A); 3370 us.put((char)0x0905); us.put((char)0x0985); us.put((char)0x0043); us.put((char)0x0915); us.put((char)0x0921); us.put((char)0x002B); 3371 us.put((char)0x095F); 3372 bs.put((byte)0x61); bs.put((byte)0xB3); 3373 bs.put((byte)0x0D); bs.put((byte)0x0A); 3374 bs.put((byte)0xEF); bs.put((byte)0x42); 3375 bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xB4); bs.put((byte)0x43); 3376 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xCF); bs.put((byte)0xE8); bs.put((byte)0xE9); bs.put((byte)0xA1); bs.put((byte)0x0D); bs.put((byte)0x0A); bs.put((byte)0xEF); bs.put((byte)0x42); 3377 bs.put((byte)0xA4); bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xA4); bs.put((byte)0x43); bs.put((byte)0xEF); 3378 bs.put((byte)0x42); bs.put((byte)0xB3); bs.put((byte)0xBF); bs.put((byte)0x2B); 3379 bs.put((byte)0xCE); 3380 bsr.put((byte)0x61); bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xEF); bsr.put((byte)0x30); bsr.put((byte)0xB3); 3381 bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xB4); bsr.put((byte)0x43); 3382 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xCF); bsr.put((byte)0xE8); bsr.put((byte)0xD9); bsr.put((byte)0xEF); 3383 bsr.put((byte)0x42); bsr.put((byte)0xA1); bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3384 bsr.put((byte)0xA4); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xA4); bsr.put((byte)0x43); bsr.put((byte)0xEF); 3385 bsr.put((byte)0x42); bsr.put((byte)0xB3); bsr.put((byte)0xBF); bsr.put((byte)0x2B); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3386 bsr.put((byte)0xCE); 3387 //end of test from Lotus 3388 3389 //tamil range 3390 us.put((char)0x0B86); us.put((char)0x0B87); us.put((char)0x0B88); 3391 bs.put((byte)0xEF); bs.put((byte)0x44); bs.put((byte)0xA5); bs.put((byte)0xA6); bs.put((byte)0xA7); 3392 bsr.put((byte)0xEF); bsr.put((byte)0x44); bsr.put((byte)0xA5); bsr.put((byte)0xA6); bsr.put((byte)0xA7); 3393 3394 //telugu range 3395 us.put((char)0x0C05); us.put((char)0x0C02); us.put((char)0x0C03); us.put((char)0x0C31); 3396 bs.put((byte)0xEF); bs.put((byte)0x45); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xD0); 3397 bsr.put((byte)0xEF); bsr.put((byte)0x45); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xD0); 3398 3399 //kannada range 3400 us.put((char)0x0C85); us.put((char)0x0C82); us.put((char)0x0C83); 3401 bs.put((byte)0xEF); bs.put((byte)0x48); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); 3402 bsr.put((byte)0xEF); bsr.put((byte)0x48); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); 3403 3404 //test Abbr sign and Anudatta 3405 us.put((char)0x0970); us.put((char)0x0952); us.put((char)0x0960); us.put((char)0x0944); us.put((char)0x090C); us.put((char)0x0962); 3406 us.put((char)0x0961); us.put((char)0x0963); us.put((char)0x0950); us.put((char)0x093D); us.put((char)0x0958); us.put((char)0x0959); 3407 us.put((char)0x095A); us.put((char)0x095B); us.put((char)0x095C); us.put((char)0x095D); us.put((char)0x095E); us.put((char)0x0020); 3408 us.put((char)0x094D); us.put((char)0x0930); us.put((char)0x0000); us.put((char)0x00A0); 3409 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xF0); bs.put((byte)0xBF); bs.put((byte)0xF0); bs.put((byte)0xB8); 3410 bs.put((byte)0xAA); bs.put((byte)0xE9); bs.put((byte)0xDF); bs.put((byte)0xE9); bs.put((byte)0xA6); bs.put((byte)0xE9); 3411 bs.put((byte)0xDB); bs.put((byte)0xE9); bs.put((byte)0xA7); bs.put((byte)0xE9); bs.put((byte)0xDC); bs.put((byte)0xE9); 3412 bs.put((byte)0xA1); bs.put((byte)0xE9); bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xB3); bs.put((byte)0xE9); 3413 bs.put((byte)0xB4); bs.put((byte)0xE9); bs.put((byte)0xB5); bs.put((byte)0xE9); bs.put((byte)0xBA); bs.put((byte)0xE9); 3414 bs.put((byte)0xBF); bs.put((byte)0xE9); bs.put((byte)0xC0); bs.put((byte)0xE9); bs.put((byte)0xC9); bs.put((byte)0xE9); 3415 bs.put((byte)0x20); bs.put((byte)0xE8); bs.put((byte)0xCF); bs.put((byte)0x00); bs.put((byte)0xA0); 3416 //bs.put((byte)0xEF); bs.put((byte)0x30); 3417 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xF0); bsr.put((byte)0xBF); bsr.put((byte)0xF0); bsr.put((byte)0xB8); 3418 bsr.put((byte)0xAA); bsr.put((byte)0xE9); bsr.put((byte)0xDF); bsr.put((byte)0xE9); bsr.put((byte)0xA6); bsr.put((byte)0xE9); 3419 bsr.put((byte)0xDB); bsr.put((byte)0xE9); bsr.put((byte)0xA7); bsr.put((byte)0xE9); bsr.put((byte)0xDC); bsr.put((byte)0xE9); 3420 bsr.put((byte)0xA1); bsr.put((byte)0xE9); bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xB3); bsr.put((byte)0xE9); 3421 bsr.put((byte)0xB4); bsr.put((byte)0xE9); bsr.put((byte)0xB5); bsr.put((byte)0xE9); bsr.put((byte)0xBA); bsr.put((byte)0xE9); 3422 bsr.put((byte)0xBF); bsr.put((byte)0xE9); bsr.put((byte)0xC0); bsr.put((byte)0xE9); bsr.put((byte)0xC9); bsr.put((byte)0xE9); 3423 bsr.put((byte)0xD9); bsr.put((byte)0xE8); bsr.put((byte)0xCF); bsr.put((byte)0x00); bsr.put((byte)0xA0); 3424 3425 bs.limit(bs.position()); 3426 bs.position(0); 3427 us.limit(us.position()); 3428 us.position(0); 3429 bsr.limit(bsr.position()); 3430 bsr.position(0); 3431 3432 //round trip test 3433 try { 3434 smBufDecode(decoder, "ISCII-part1", bsr, us, false, true); 3435 smBufEncode(encoder, "ISCII-part2", us, bs); 3436 smBufDecode(decoder, "ISCII-part3", bs, us, false, true); 3437 } catch (Exception ex) { 3438 errln("ISCII round trip test failed."); 3439 } 3440 3441 //Test new characters in the ISCII charset 3442 encoder = provider.charsetForName("ISCII,version=0").newEncoder(); 3443 decoder = provider.charsetForName("ISCII,version=0").newDecoder(); 3444 char u_pts[] = { 3445 /* DEV */ (char)0x0904, 3446 /* PNJ */ (char)0x0A01, (char)0x0A03, (char)0x0A33, (char)0x0A70 3447 }; 3448 byte b_pts[] = { 3449 (byte)0xef, (byte)0x42, 3450 /* DEV */ (byte)0xa4, (byte)0xe0, 3451 /* PNJ */ (byte)0xef, (byte)0x4b, (byte)0xa1, (byte)0xa3, (byte)0xd2, (byte)0xf0, (byte)0xbf 3452 }; 3453 us = CharBuffer.allocate(u_pts.length); 3454 bs = ByteBuffer.allocate(b_pts.length); 3455 us.put(u_pts); 3456 bs.put(b_pts); 3457 3458 bs.limit(bs.position()); 3459 bs.position(0); 3460 us.limit(us.position()); 3461 us.position(0); 3462 3463 try { 3464 smBufDecode(decoder, "ISCII-update", bs, us, true, true); 3465 bs.position(0); 3466 us.position(0); 3467 smBufEncode(encoder, "ISCII-update", us, bs, true, true); 3468 } catch (Exception ex) { 3469 errln("Error occurred while encoding/decoding ISCII with the new characters."); 3470 } 3471 3472 //The rest of the code in this method is to provide better code coverage 3473 CharBuffer ccus = CharBuffer.allocate(0x10); 3474 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 3475 3476 //start of charset decoder code coverage code 3477 //test overflow buffer 3478 ccbs.put((byte)0x49); 3479 3480 ccbs.limit(ccbs.position()); 3481 ccbs.position(0); 3482 ccus.limit(0); 3483 ccus.position(0); 3484 3485 try { 3486 smBufDecode(decoder, "ISCII-CC-DE-1", ccbs, ccus, true, false); 3487 errln("Exception while decoding ISCII should have been thrown."); 3488 } 3489 catch (Exception ex) { 3490 } 3491 3492 ccbs.clear(); 3493 ccus.clear(); 3494 3495 //test atr overflow buffer 3496 ccbs.put((byte)0xEF); ccbs.put((byte)0x40); ccbs.put((byte)0xEF); ccbs.put((byte)0x20); 3497 ccus.put((char)0x00); 3498 3499 ccbs.limit(ccbs.position()); 3500 ccbs.position(0); 3501 ccus.limit(ccus.position()); 3502 ccus.position(0); 3503 3504 try { 3505 smBufDecode(decoder, "ISCII-CC-DE-2", ccbs, ccus, true, false); 3506 errln("Exception while decoding ISCII should have been thrown."); 3507 } 3508 catch (Exception ex) { 3509 } 3510 3511 //end of charset decoder code coverage code 3512 3513 ccbs.clear(); 3514 ccus.clear(); 3515 3516 //start of charset encoder code coverage code 3517 //test ascii overflow buffer 3518 ccus.put((char)0x41); 3519 3520 ccus.limit(ccus.position()); 3521 ccus.position(0); 3522 ccbs.limit(0); 3523 ccbs.position(0); 3524 3525 try { 3526 smBufEncode(encoder, "ISCII-CC-EN-1", ccus, ccbs, true, false); 3527 errln("Exception while encoding ISCII should have been thrown."); 3528 } 3529 catch (Exception ex) { 3530 } 3531 3532 ccbs.clear(); 3533 ccus.clear(); 3534 3535 //test ascii overflow buffer 3536 ccus.put((char)0x0A); ccus.put((char)0x0043); 3537 ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3538 3539 ccus.limit(ccus.position()); 3540 ccus.position(0); 3541 ccbs.limit(ccbs.position()); 3542 ccbs.position(0); 3543 3544 try { 3545 smBufEncode(encoder, "ISCII-CC-EN-2", ccus, ccbs, true, false); 3546 errln("Exception while encoding ISCII should have been thrown."); 3547 } 3548 catch (Exception ex) { 3549 } 3550 3551 ccbs.clear(); 3552 ccus.clear(); 3553 3554 //test surrogate malform 3555 ccus.put((char)0x06E3); 3556 ccbs.put((byte)0x00); 3557 3558 ccus.limit(ccus.position()); 3559 ccus.position(0); 3560 ccbs.limit(ccbs.position()); 3561 ccbs.position(0); 3562 3563 try { 3564 smBufEncode(encoder, "ISCII-CC-EN-3", ccus, ccbs, true, false); 3565 errln("Exception while encoding ISCII should have been thrown."); 3566 } 3567 catch (Exception ex) { 3568 } 3569 3570 ccbs.clear(); 3571 ccus.clear(); 3572 3573 //test surrogate malform 3574 ccus.put((char)0xD801); ccus.put((char)0xDD01); 3575 ccbs.put((byte)0x00); 3576 3577 ccus.limit(ccus.position()); 3578 ccus.position(0); 3579 ccbs.limit(ccbs.position()); 3580 ccbs.position(0); 3581 3582 try { 3583 smBufEncode(encoder, "ISCII-CC-EN-4", ccus, ccbs, true, false); 3584 errln("Exception while encoding ISCII should have been thrown."); 3585 } 3586 catch (Exception ex) { 3587 } 3588 3589 ccbs.clear(); 3590 ccus.clear(); 3591 3592 //test trail surrogate malform 3593 ccus.put((char)0xDD01); 3594 ccbs.put((byte)0x00); 3595 3596 ccus.limit(ccus.position()); 3597 ccus.position(0); 3598 ccbs.limit(ccbs.position()); 3599 ccbs.position(0); 3600 3601 try { 3602 smBufEncode(encoder, "ISCII-CC-EN-5", ccus, ccbs, true, false); 3603 errln("Exception while encoding ISCII should have been thrown."); 3604 } 3605 catch (Exception ex) { 3606 } 3607 3608 ccbs.clear(); 3609 ccus.clear(); 3610 3611 //test lead surrogates malform 3612 ccus.put((char)0xD801); ccus.put((char)0xD802); 3613 ccbs.put((byte)0x00); 3614 3615 ccus.limit(ccus.position()); 3616 ccus.position(0); 3617 ccbs.limit(ccbs.position()); 3618 ccbs.position(0); 3619 3620 try { 3621 smBufEncode(encoder, "ISCII-CC-EN-6", ccus, ccbs, true, false); 3622 errln("Exception while encoding ISCII should have been thrown."); 3623 } 3624 catch (Exception ex) { 3625 } 3626 3627 ccus.clear(); 3628 ccbs.clear(); 3629 3630 //test overflow buffer 3631 ccus.put((char)0x0901); 3632 ccbs.put((byte)0x00); 3633 3634 ccus.limit(ccus.position()); 3635 ccus.position(0); 3636 ccbs.limit(ccbs.position()); 3637 ccbs.position(0); 3638 3639 cs = provider.charsetForName("ISCII,version=0"); 3640 encoder = cs.newEncoder(); 3641 3642 try { 3643 smBufEncode(encoder, "ISCII-CC-EN-7", ccus, ccbs, true, false); 3644 errln("Exception while encoding ISCII should have been thrown."); 3645 } 3646 catch (Exception ex) { 3647 } 3648 //end of charset encoder code coverage code 3649 } 3650 3651 //Test for the IMAP Charset 3652 @Test 3653 public void TestCharsetIMAP() { 3654 CharsetProvider provider = new CharsetProviderICU(); 3655 Charset cs = provider.charsetForName("IMAP-mailbox-name"); 3656 CharsetEncoder encoder = cs.newEncoder(); 3657 CharsetDecoder decoder = cs.newDecoder(); 3658 3659 CharBuffer us = CharBuffer.allocate(0x20); 3660 ByteBuffer bs = ByteBuffer.allocate(0x20); 3661 3662 us.put((char)0x00A3); us.put((char)0x2020); us.put((char)0x41); 3663 3664 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x4B); bs.put((byte)0x4D); bs.put((byte)0x67); bs.put((byte)0x49); 3665 bs.put((byte)0x41); bs.put((byte)0x2D); bs.put((byte)0x41); 3666 3667 3668 bs.limit(bs.position()); 3669 bs.position(0); 3670 us.limit(us.position()); 3671 us.position(0); 3672 3673 smBufDecode(decoder, "IMAP", bs, us); 3674 smBufEncode(encoder, "IMAP", us, bs); 3675 3676 //the rest of the code in this method is for better code coverage 3677 us.clear(); 3678 bs.clear(); 3679 3680 //start of charset encoder code coverage 3681 //test buffer overflow 3682 us.put((char)0x0026); us.put((char)0x17A9); 3683 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3684 3685 bs.limit(bs.position()); 3686 bs.position(0); 3687 us.limit(us.position()); 3688 us.position(0); 3689 3690 try { 3691 smBufEncode(encoder, "IMAP-EN-1", us, bs, true, false); 3692 errln("Exception while encoding IMAP (1) should have been thrown."); 3693 } catch(Exception ex) { 3694 } 3695 3696 us.clear(); 3697 bs.clear(); 3698 3699 //test buffer overflow 3700 us.put((char)0x17A9); us.put((char)0x0941); 3701 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3702 3703 bs.limit(bs.position()); 3704 bs.position(0); 3705 us.limit(us.position()); 3706 us.position(0); 3707 3708 try { 3709 smBufEncode(encoder, "IMAP-EN-2", us, bs, true, false); 3710 errln("Exception while encoding IMAP (2) should have been thrown."); 3711 } catch(Exception ex) { 3712 } 3713 3714 us.clear(); 3715 bs.clear(); 3716 3717 //test buffer overflow 3718 us.put((char)0x17A9); us.put((char)0x0941); 3719 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3720 3721 bs.limit(bs.position()); 3722 bs.position(0); 3723 us.limit(us.position()); 3724 us.position(0); 3725 3726 try { 3727 smBufEncode(encoder, "IMAP-EN-3", us, bs, true, false); 3728 errln("Exception while encoding IMAP (3) should have been thrown."); 3729 } catch(Exception ex) { 3730 } 3731 3732 us.clear(); 3733 bs.clear(); 3734 3735 //test buffer overflow 3736 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3737 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3738 bs.put((byte)0x00); 3739 3740 bs.limit(bs.position()); 3741 bs.position(0); 3742 us.limit(us.position()); 3743 us.position(0); 3744 3745 try { 3746 smBufEncode(encoder, "IMAP-EN-4", us, bs, true, false); 3747 errln("Exception while encoding IMAP (4) should have been thrown."); 3748 } catch(Exception ex) { 3749 } 3750 3751 us.clear(); 3752 bs.clear(); 3753 3754 //test buffer overflow 3755 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3756 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3757 bs.put((byte)0x00); bs.put((byte)0x00); 3758 3759 bs.limit(bs.position()); 3760 bs.position(0); 3761 us.limit(us.position()); 3762 us.position(0); 3763 3764 try { 3765 smBufEncode(encoder, "IMAP-EN-5", us, bs, true, false); 3766 errln("Exception while encoding IMAP (5) should have been thrown."); 3767 } catch(Exception ex) { 3768 } 3769 3770 us.clear(); 3771 bs.clear(); 3772 3773 //test buffer overflow 3774 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); us.put((char)0x0970); 3775 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3776 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3777 3778 bs.limit(bs.position()); 3779 bs.position(0); 3780 us.limit(us.position()); 3781 us.position(0); 3782 3783 try { 3784 smBufEncode(encoder, "IMAP-EN-6", us, bs, true, false); 3785 errln("Exception while encoding IMAP (6) should have been thrown."); 3786 } catch(Exception ex) { 3787 } 3788 3789 us.clear(); 3790 bs.clear(); 3791 3792 //test buffer overflow 3793 us.put((char)0x17A9); us.put((char)0x0941); 3794 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3795 bs.put((byte)0x00); 3796 3797 bs.limit(bs.position()); 3798 bs.position(0); 3799 us.limit(us.position()); 3800 us.position(0); 3801 3802 try { 3803 smBufEncode(encoder, "IMAP-EN-7", us, bs, true, true); 3804 errln("Exception while encoding IMAP (7) should have been thrown."); 3805 } catch(Exception ex) { 3806 } 3807 3808 us.clear(); 3809 bs.clear(); 3810 3811 //test flushing 3812 us.put((char)0x17A9); us.put((char)0x0941); 3813 bs.put((byte)0x26); bs.put((byte)0x46); bs.put((byte)0x36); bs.put((byte)0x6b); bs.put((byte)0x4a); bs.put((byte)0x51); 3814 bs.put((byte)0x51); bs.put((byte)0x2d); 3815 3816 bs.limit(bs.position()); 3817 bs.position(0); 3818 us.limit(us.position()); 3819 us.position(0); 3820 3821 try { 3822 smBufEncode(encoder, "IMAP-EN-8", us, bs, true, true); 3823 } catch(Exception ex) { 3824 errln("Exception while encoding IMAP (8) should not have been thrown."); 3825 } 3826 3827 us = CharBuffer.allocate(0x08); 3828 bs = ByteBuffer.allocate(0x08); 3829 3830 //test flushing buffer overflow 3831 us.put((char)0x0061); 3832 bs.put((byte)0x61); bs.put((byte)0x00); 3833 3834 bs.limit(bs.position()); 3835 bs.position(0); 3836 us.limit(us.position()); 3837 us.position(0); 3838 3839 try { 3840 smBufEncode(encoder, "IMAP-EN-9", us, bs, true, true); 3841 } catch(Exception ex) { 3842 errln("Exception while encoding IMAP (9) should not have been thrown."); 3843 } 3844 //end of charset encoder code coverage 3845 3846 us = CharBuffer.allocate(0x10); 3847 bs = ByteBuffer.allocate(0x10); 3848 3849 //start of charset decoder code coverage 3850 //test malform case 2 3851 us.put((char)0x0000); us.put((char)0x0000); 3852 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x43); bs.put((byte)0x41); 3853 3854 bs.limit(bs.position()); 3855 bs.position(0); 3856 us.limit(us.position()); 3857 us.position(0); 3858 3859 try { 3860 smBufDecode(decoder, "IMAP-DE-1", bs, us, true, false); 3861 errln("Exception while decoding IMAP (1) should have been thrown."); 3862 } catch(Exception ex) { 3863 } 3864 3865 us.clear(); 3866 bs.clear(); 3867 3868 //test malform case 5 3869 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3870 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3871 bs.put((byte)0x41); bs.put((byte)0x49); bs.put((byte)0x41); 3872 3873 bs.limit(bs.position()); 3874 bs.position(0); 3875 us.limit(us.position()); 3876 us.position(0); 3877 3878 try { 3879 smBufDecode(decoder, "IMAP-DE-2", bs, us, true, false); 3880 errln("Exception while decoding IMAP (2) should have been thrown."); 3881 } catch(Exception ex) { 3882 } 3883 3884 us.clear(); 3885 bs.clear(); 3886 3887 //test malform case 7 3888 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3889 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3890 bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x42); 3891 bs.put((byte)0x41); 3892 3893 bs.limit(bs.position()); 3894 bs.position(0); 3895 us.limit(us.position()); 3896 us.position(0); 3897 3898 try { 3899 smBufDecode(decoder, "IMAP-DE-3", bs, us, true, false); 3900 errln("Exception while decoding IMAP (3) should have been thrown."); 3901 } catch(Exception ex) { 3902 } 3903 //end of charset decoder coder coverage 3904 } 3905 3906 //Test for charset UTF32LE to provide better code coverage 3907 @Test 3908 public void TestCharsetUTF32LE() { 3909 CoderResult result = CoderResult.UNDERFLOW; 3910 CharsetProvider provider = new CharsetProviderICU(); 3911 Charset cs = provider.charsetForName("UTF-32LE"); 3912 CharsetEncoder encoder = cs.newEncoder(); 3913 //CharsetDecoder decoder = cs.newDecoder(); 3914 3915 CharBuffer us = CharBuffer.allocate(0x10); 3916 ByteBuffer bs = ByteBuffer.allocate(0x10); 3917 3918 3919 //test malform surrogate 3920 us.put((char)0xD901); 3921 bs.put((byte)0x00); 3922 3923 bs.limit(bs.position()); 3924 bs.position(0); 3925 us.limit(us.position()); 3926 us.position(0); 3927 3928 try { 3929 smBufEncode(encoder, "UTF32LE-EN-1", us, bs, true, false); 3930 errln("Exception while encoding UTF32LE (1) should have been thrown."); 3931 } catch (Exception ex) { 3932 } 3933 3934 bs.clear(); 3935 us.clear(); 3936 3937 //test malform surrogate 3938 us.put((char)0xD901); us.put((char)0xD902); 3939 bs.put((byte)0x00); 3940 3941 bs.limit(bs.position()); 3942 bs.position(0); 3943 us.limit(us.position()); 3944 us.position(0); 3945 3946 result = encoder.encode(us, bs, true); 3947 3948 if (!result.isError() && !result.isOverflow()) { 3949 errln("Error while encoding UTF32LE (2) should have occurred."); 3950 } 3951 3952 bs.clear(); 3953 us.clear(); 3954 3955 //test overflow trail surrogate 3956 us.put((char)0xDD01); us.put((char)0xDD0E); us.put((char)0xDD0E); 3957 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3958 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3959 3960 bs.limit(bs.position()); 3961 bs.position(0); 3962 us.limit(us.position()); 3963 us.position(0); 3964 3965 result = encoder.encode(us, bs, true); 3966 3967 if (!result.isError() && !result.isOverflow()) { 3968 errln("Error while encoding UTF32LE (3) should have occurred."); 3969 } 3970 3971 bs.clear(); 3972 us.clear(); 3973 3974 //test malform lead surrogate 3975 us.put((char)0xD90D); us.put((char)0xD90E); 3976 bs.put((byte)0x00); 3977 3978 bs.limit(bs.position()); 3979 bs.position(0); 3980 us.limit(us.position()); 3981 us.position(0); 3982 3983 try { 3984 smBufEncode(encoder, "UTF32LE-EN-4", us, bs, true, false); 3985 errln("Exception while encoding UTF32LE (4) should have been thrown."); 3986 } catch (Exception ex) { 3987 } 3988 3989 bs.clear(); 3990 us.clear(); 3991 3992 //test overflow buffer 3993 us.put((char)0x0061); 3994 bs.put((byte)0x00); 3995 3996 bs.limit(bs.position()); 3997 bs.position(0); 3998 us.limit(us.position()); 3999 us.position(0); 4000 4001 try { 4002 smBufEncode(encoder, "UTF32LE-EN-5", us, bs, true, false); 4003 errln("Exception while encoding UTF32LE (5) should have been thrown."); 4004 } catch (Exception ex) { 4005 } 4006 4007 bs.clear(); 4008 us.clear(); 4009 4010 //test malform trail surrogate 4011 us.put((char)0xDD01); 4012 bs.put((byte)0x00); 4013 4014 bs.limit(bs.position()); 4015 bs.position(0); 4016 us.limit(us.position()); 4017 us.position(0); 4018 4019 try { 4020 smBufEncode(encoder, "UTF32LE-EN-6", us, bs, true, false); 4021 errln("Exception while encoding UTF32LE (6) should have been thrown."); 4022 } catch (Exception ex) { 4023 } 4024 } 4025 4026 //Test for charset UTF16LE to provide better code coverage 4027 @Test 4028 public void TestCharsetUTF16LE() { 4029 CoderResult result = CoderResult.UNDERFLOW; 4030 CharsetProvider provider = new CharsetProviderICU(); 4031 Charset cs = provider.charsetForName("UTF-16LE"); 4032 CharsetEncoder encoder = cs.newEncoder(); 4033 //CharsetDecoder decoder = cs.newDecoder(); 4034 4035 // Test for malform and change fromUChar32 for next call 4036 char u_pts1[] = { 4037 (char)0xD805, 4038 (char)0xDC01, (char)0xDC02, (char)0xDC03, 4039 (char)0xD901, (char)0xD902 4040 }; 4041 byte b_pts1[] = { 4042 (byte)0x00, 4043 (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00 4044 }; 4045 4046 CharBuffer us = CharBuffer.allocate(u_pts1.length); 4047 ByteBuffer bs = ByteBuffer.allocate(b_pts1.length); 4048 4049 us.put(u_pts1); 4050 bs.put(b_pts1); 4051 4052 us.limit(1); 4053 us.position(0); 4054 bs.limit(1); 4055 bs.position(0); 4056 4057 result = encoder.encode(us, bs, true); 4058 4059 if (!result.isMalformed()) { 4060 // LE should not output BOM, so this should be malformed 4061 errln("Malformed while encoding UTF-16LE (1) should have occured."); 4062 } 4063 4064 // Test for malform surrogate from previous buffer 4065 us.limit(4); 4066 us.position(1); 4067 bs.limit(7); 4068 bs.position(1); 4069 4070 result = encoder.encode(us, bs, true); 4071 4072 if (!result.isMalformed()) { 4073 errln("Error while encoding UTF-16LE (2) should have occured."); 4074 } 4075 4076 // Test for malform trail surrogate 4077 encoder.reset(); 4078 4079 us.limit(1); 4080 us.position(0); 4081 bs.limit(1); 4082 bs.position(0); 4083 4084 result = encoder.encode(us, bs, true); 4085 4086 us.limit(6); 4087 us.position(4); 4088 bs.limit(4); 4089 bs.position(1); 4090 4091 result = encoder.encode(us, bs, true); 4092 4093 if (!result.isMalformed()) { 4094 errln("Error while encoding UTF-16LE (3) should have occured."); 4095 } 4096 } 4097 4098 //provide better code coverage for the generic charset UTF32 4099 @Test 4100 public void TestCharsetUTF32() { 4101 CoderResult result = CoderResult.UNDERFLOW; 4102 CharsetProvider provider = new CharsetProviderICU(); 4103 Charset cs = provider.charsetForName("UTF-32"); 4104 CharsetDecoder decoder = cs.newDecoder(); 4105 CharsetEncoder encoder = cs.newEncoder(); 4106 4107 //start of decoding code coverage 4108 char us_array[] = { 4109 0x0000, 0x0000, 0x0000, 0x0000, 4110 }; 4111 4112 byte bs_array1[] = { 4113 (byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF, 4114 (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x43, 4115 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4116 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4117 }; 4118 4119 byte bs_array2[] = { 4120 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4121 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4122 }; 4123 4124 CharBuffer us = CharBuffer.allocate(us_array.length); 4125 ByteBuffer bs = ByteBuffer.allocate(bs_array1.length); 4126 4127 us.put(us_array); 4128 bs.put(bs_array1); 4129 4130 us.limit(us.position()); 4131 us.position(0); 4132 bs.limit(bs.position()); 4133 bs.position(0); 4134 4135 try { 4136 smBufDecode(decoder, "UTF32-DE-1", bs, us, true, false); 4137 errln("Malform exception while decoding UTF32 charset (1) should have been thrown."); 4138 } catch (Exception ex) { 4139 } 4140 4141 decoder = cs.newDecoder(); 4142 4143 bs = ByteBuffer.allocate(bs_array2.length); 4144 bs.put(bs_array2); 4145 4146 us.limit(4); 4147 us.position(0); 4148 bs.limit(bs.position()); 4149 bs.position(0); 4150 4151 try { 4152 smBufDecode(decoder, "UTF32-DE-2", bs, us, true, false); 4153 } catch (Exception ex) { 4154 // should recognize little endian BOM 4155 errln("Exception while decoding UTF32 charset (2) should not have been thrown."); 4156 } 4157 4158 //Test malform exception 4159 bs.clear(); 4160 us.clear(); 4161 4162 bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); bs.put((byte)0x00); bs.put((byte)0x00); 4163 us.put((char)0x0000); 4164 4165 us.limit(us.position()); 4166 us.position(0); 4167 bs.limit(bs.position()); 4168 bs.position(0); 4169 4170 try { 4171 smBufDecode(decoder, "UTF32-DE-3", bs, us, true, false); 4172 errln("Malform exception while decoding UTF32 charset (3) should have been thrown."); 4173 } catch (Exception ex) { 4174 } 4175 4176 //Test BOM testing 4177 bs.clear(); 4178 us.clear(); 4179 4180 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFF); bs.put((byte)0xFE); 4181 us.put((char)0x0000); 4182 4183 us.limit(us.position()); 4184 us.position(0); 4185 bs.limit(bs.position()); 4186 bs.position(0); 4187 4188 try { 4189 smBufDecode(decoder, "UTF32-DE-4", bs, us, true, false); 4190 } catch (Exception ex) { 4191 // should recognize big endian BOM 4192 errln("Exception while decoding UTF32 charset (4) should not have been thrown."); 4193 } 4194 //end of decoding code coverage 4195 4196 //start of encoding code coverage 4197 us = CharBuffer.allocate(0x10); 4198 bs = ByteBuffer.allocate(0x10); 4199 4200 //test wite BOM overflow error 4201 us.put((char)0xDC01); 4202 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4203 4204 us.limit(us.position()); 4205 us.position(0); 4206 bs.limit(bs.position()); 4207 bs.position(0); 4208 4209 result = encoder.encode(us, bs, true); 4210 // must try to output BOM first for UTF-32 (not UTF-32BE or UTF-32LE) 4211 if (!result.isOverflow()) { 4212 errln("Buffer overflow error while encoding UTF32 charset (1) should have occurred."); 4213 } 4214 4215 us.clear(); 4216 bs.clear(); 4217 4218 //test malform surrogate and store value in fromChar32 4219 us.put((char)0xD801); us.put((char)0xD802); 4220 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4221 4222 us.limit(us.position()); 4223 us.position(0); 4224 bs.limit(bs.position()); 4225 bs.position(0); 4226 4227 result = encoder.encode(us, bs, true); 4228 if (!result.isMalformed()) { 4229 errln("Malformed error while encoding UTF32 charset (2) should have occurred."); 4230 } 4231 4232 us.clear(); 4233 bs.clear(); 4234 4235 //test malform surrogate 4236 us.put((char)0x0000); us.put((char)0xD902); 4237 4238 us.limit(us.position()); 4239 us.position(0); 4240 bs.limit(bs.position()); 4241 bs.position(0); 4242 4243 result = encoder.encode(us, bs, true); 4244 if (!result.isOverflow()) { 4245 errln("Overflow error while encoding UTF32 charset (3) should have occurred."); 4246 } 4247 4248 us.clear(); 4249 bs.clear(); 4250 4251 //test malform surrogate 4252 encoder.reset(); 4253 us.put((char)0xD801); 4254 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4255 4256 us.limit(us.position()); 4257 us.position(0); 4258 bs.limit(bs.position()); 4259 bs.position(0); 4260 4261 result = encoder.encode(us, bs, true); 4262 if (!result.isMalformed()) { 4263 errln("Malform error while encoding UTF32 charset (4) should have occurred."); 4264 } 4265 4266 us.clear(); 4267 bs.clear(); 4268 4269 //test overflow surrogate 4270 us.put((char)0x0000); us.put((char)0xDDE1); us.put((char)0xD915); us.put((char)0xDDF2); 4271 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4272 4273 us.limit(us.position()); 4274 us.position(0); 4275 bs.limit(bs.position()); 4276 bs.position(0); 4277 4278 result = encoder.encode(us, bs, true); 4279 if (!result.isOverflow()) { 4280 errln("Overflow error while encoding UTF32 charset (5) should have occurred."); 4281 } 4282 4283 us.clear(); 4284 bs.clear(); 4285 4286 //test malform surrogate 4287 encoder.reset(); 4288 us.put((char)0xDDE1); 4289 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4290 4291 us.limit(us.position()); 4292 us.position(0); 4293 bs.limit(bs.position()); 4294 bs.position(0); 4295 4296 result = encoder.encode(us, bs, true); 4297 if (!result.isMalformed()) { 4298 errln("Malform error while encoding UTF32 charset (6) should have occurred."); 4299 } 4300 //end of encoding code coverage 4301 } 4302 4303 //this method provides better code coverage decoding UTF32 LE/BE 4304 @Test 4305 public void TestDecodeUTF32LEBE() { 4306 CoderResult result = CoderResult.UNDERFLOW; 4307 CharsetProvider provider = new CharsetProviderICU(); 4308 CharsetDecoder decoder; 4309 CharBuffer us = CharBuffer.allocate(0x10); 4310 ByteBuffer bs = ByteBuffer.allocate(0x10); 4311 4312 //decode UTF32LE 4313 decoder = provider.charsetForName("UTF-32LE").newDecoder(); 4314 //test overflow buffer 4315 bs.put((byte)0x41); bs.put((byte)0xFF); bs.put((byte)0x01); bs.put((byte)0x00); 4316 us.put((char)0x0000); 4317 4318 us.limit(us.position()); 4319 us.position(0); 4320 bs.limit(bs.position()); 4321 bs.position(0); 4322 4323 try { 4324 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4325 errln("Overflow exception while decoding UTF32LE (1) should have been thrown."); 4326 } catch (Exception ex) { 4327 } 4328 // test overflow buffer handling in CharsetDecoderICU 4329 bs.position(0); 4330 us.position(0); 4331 decoder.reset(); 4332 result = decoder.decode(bs, us, true); 4333 if (result.isOverflow()) { 4334 result = decoder.decode(bs, us, true); 4335 if (!result.isOverflow()) { 4336 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4337 } 4338 } else { 4339 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4340 } 4341 4342 us.clear(); 4343 bs.clear(); 4344 //test malform buffer 4345 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4346 us.put((char)0x0000); 4347 4348 us.limit(us.position()); 4349 us.position(0); 4350 bs.limit(bs.position()); 4351 bs.position(0); 4352 4353 try { 4354 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4355 errln("Malform exception while decoding UTF32LE (2) should have been thrown."); 4356 } catch (Exception ex) { 4357 } 4358 4359 us.clear(); 4360 bs.clear(); 4361 //test malform buffer 4362 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4363 bs.put((byte)0xFF); bs.put((byte)0xDF); bs.put((byte)0x10); 4364 us.put((char)0x0000); 4365 4366 us.limit(us.position()); 4367 us.position(0); 4368 bs.limit(bs.position()); 4369 bs.position(0); 4370 4371 try { 4372 // must flush in order to exhibit malformed behavior 4373 smBufDecode(decoder, "UTF-32LE", bs, us, true, true); 4374 errln("Malform exception while decoding UTF32LE (3) should have been thrown."); 4375 } catch (Exception ex) { 4376 } 4377 4378 us.clear(); 4379 bs.clear(); 4380 //test malform buffer 4381 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4382 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4383 us.put((char)0x0000); 4384 4385 us.limit(us.position()); 4386 us.position(0); 4387 bs.limit(bs.position()); 4388 bs.position(0); 4389 4390 try { 4391 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4392 errln("Malform exception while decoding UTF32LE (4) should have been thrown."); 4393 } catch (Exception ex) { 4394 } 4395 4396 us.clear(); 4397 bs.clear(); 4398 //test overflow buffer 4399 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4400 bs.put((byte)0xDD); bs.put((byte)0xFF); bs.put((byte)0x10); bs.put((byte)0x00); 4401 us.put((char)0x0000); 4402 4403 us.limit(us.position()); 4404 us.position(0); 4405 bs.limit(bs.position()); 4406 bs.position(0); 4407 4408 try { 4409 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4410 errln("Overflow exception while decoding UTF32LE (5) should have been thrown."); 4411 } catch (Exception ex) { 4412 } 4413 //end of decode UTF32LE 4414 4415 bs.clear(); 4416 us.clear(); 4417 4418 //decode UTF32BE 4419 decoder = provider.charsetForName("UTF-32BE").newDecoder(); 4420 //test overflow buffer 4421 bs.put((byte)0x00); bs.put((byte)0x01); bs.put((byte)0xFF); bs.put((byte)0x41); 4422 us.put((char)0x0000); 4423 4424 us.limit(us.position()); 4425 us.position(0); 4426 bs.limit(bs.position()); 4427 bs.position(0); 4428 4429 try { 4430 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4431 errln("Overflow exception while decoding UTF32BE (1) should have been thrown."); 4432 } catch (Exception ex) { 4433 } 4434 4435 bs.clear(); 4436 us.clear(); 4437 //test malform buffer 4438 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xD9); bs.put((byte)0x02); 4439 us.put((char)0x0000); 4440 4441 us.limit(us.position()); 4442 us.position(0); 4443 bs.limit(bs.position()); 4444 bs.position(0); 4445 4446 try { 4447 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4448 errln("Malform exception while decoding UTF32BE (2) should have been thrown."); 4449 } catch (Exception ex) { 4450 } 4451 4452 bs.clear(); 4453 us.clear(); 4454 //test malform buffer 4455 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4456 bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDF); 4457 us.put((char)0x0000); 4458 4459 us.limit(us.position()); 4460 us.position(0); 4461 bs.limit(bs.position()); 4462 bs.position(0); 4463 4464 try { 4465 // must flush to exhibit malformed behavior 4466 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4467 errln("Malform exception while decoding UTF32BE (3) should have been thrown."); 4468 } catch (Exception ex) { 4469 } 4470 4471 bs.clear(); 4472 us.clear(); 4473 //test overflow buffer 4474 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4475 bs.put((byte)0x00); bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDD); 4476 us.put((char)0x0000); 4477 4478 us.limit(us.position()); 4479 us.position(0); 4480 bs.limit(bs.position()); 4481 bs.position(0); 4482 4483 try { 4484 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4485 errln("Overflow exception while decoding UTF32BE (4) should have been thrown."); 4486 } catch (Exception ex) { 4487 } 4488 4489 bs.clear(); 4490 us.clear(); 4491 //test malform buffer 4492 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); 4493 us.put((char)0x0000); 4494 4495 us.limit(us.position()); 4496 us.position(0); 4497 bs.limit(bs.position()); 4498 bs.position(0); 4499 4500 try { 4501 // must flush to exhibit malformed behavior 4502 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4503 errln("Malform exception while decoding UTF32BE (5) should have been thrown."); 4504 } catch (Exception ex) { 4505 } 4506 //end of decode UTF32BE 4507 } 4508 4509 //provide better code coverage for UTF8 4510 @Test 4511 public void TestCharsetUTF8() { 4512 CoderResult result = CoderResult.UNDERFLOW; 4513 CharsetProvider provider = new CharsetProviderICU(); 4514 CharsetDecoder decoder = provider.charsetForName("UTF-8").newDecoder(); 4515 CharsetEncoder encoder = provider.charsetForName("UTF-8").newEncoder(); 4516 4517 CharBuffer us = CharBuffer.allocate(0x10); 4518 ByteBuffer bs = ByteBuffer.allocate(0x10); 4519 ByteBuffer bs2; 4520 CharBuffer us2; 4521 int limit_us; 4522 int limit_bs; 4523 4524 //encode and decode using read only buffer 4525 encoder.reset(); 4526 decoder.reset(); 4527 us.put((char)0x0041); us.put((char)0x0081); us.put((char)0xEF65); us.put((char)0xD902); 4528 bs.put((byte)0x41); bs.put((byte)0xc2); bs.put((byte)0x81); bs.put((byte)0xee); bs.put((byte)0xbd); bs.put((byte)0xa5); 4529 bs.put((byte)0x00); 4530 limit_us = us.position(); 4531 limit_bs = bs.position(); 4532 4533 us.limit(limit_us); 4534 us.position(0); 4535 bs.limit(limit_bs); 4536 bs.position(0); 4537 bs2 = bs.asReadOnlyBuffer(); 4538 us2 = us.asReadOnlyBuffer(); 4539 4540 result = decoder.decode(bs2, us, true); 4541 if (!result.isUnderflow() || !equals(us, us2)) { 4542 errln("Error while decoding UTF-8 (1) should not have occured."); 4543 } 4544 4545 us2.limit(limit_us); 4546 us2.position(0); 4547 bs.limit(limit_bs); 4548 bs.position(0); 4549 4550 result = encoder.encode(us2, bs, true); 4551 if (!result.isUnderflow() || !equals(bs, bs2)) { 4552 errln("Error while encoding UTF-8 (1) should not have occured."); 4553 } 4554 4555 us.clear(); 4556 bs.clear(); 4557 4558 //test overflow buffer while encoding 4559 //readonly buffer 4560 encoder.reset(); 4561 us.put((char)0x0081); us.put((char)0xEF65); 4562 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4563 limit_us = us.position(); 4564 us2 = us.asReadOnlyBuffer(); 4565 us2.limit(limit_us); 4566 us2.position(0); 4567 bs.limit(1); 4568 bs.position(0); 4569 result = encoder.encode(us2, bs, true); 4570 if (!result.isOverflow()) { 4571 errln("Overflow Error should have occured while encoding UTF-8 (2)."); 4572 } 4573 4574 encoder.reset(); 4575 4576 us2.limit(limit_us); 4577 us2.position(1); 4578 bs.limit(1); 4579 bs.position(0); 4580 result = encoder.encode(us2, bs, true); 4581 if (!result.isOverflow()) { 4582 errln("Overflow Error should have occured while encoding UTF-8 (3)."); 4583 } 4584 4585 encoder.reset(); 4586 4587 us2.limit(limit_us); 4588 us2.position(1); 4589 bs.limit(2); 4590 bs.position(0); 4591 result = encoder.encode(us2, bs, true); 4592 if (!result.isOverflow()) { 4593 errln("Overflow Error should have occured while encoding UTF-8 (4)."); 4594 } 4595 4596 encoder.reset(); 4597 4598 us2.limit(limit_us); 4599 us2.position(0); 4600 bs.limit(2); 4601 bs.position(0); 4602 result = encoder.encode(us2, bs, true); 4603 if (!result.isOverflow()) { 4604 errln("Overflow Error should have occured while encoding UTF-8 (5)."); 4605 } 4606 4607 //not readonly buffer 4608 encoder.reset(); 4609 4610 us.limit(limit_us); 4611 us.position(0); 4612 bs.limit(1); 4613 bs.position(0); 4614 result = encoder.encode(us, bs, true); 4615 if (!result.isOverflow()) { 4616 errln("Overflow Error should have occured while encoding UTF-8 (6)."); 4617 } 4618 4619 encoder.reset(); 4620 4621 us.limit(limit_us); 4622 us.position(0); 4623 bs.limit(3); 4624 bs.position(0); 4625 result = encoder.encode(us, bs, true); 4626 if (!result.isOverflow()) { 4627 errln("Overflow Error should have occured while encoding UTF-8 (7)."); 4628 } 4629 4630 encoder.reset(); 4631 4632 us.limit(limit_us); 4633 us.position(1); 4634 bs.limit(2); 4635 bs.position(0); 4636 result = encoder.encode(us, bs, true); 4637 if (!result.isOverflow()) { 4638 errln("Overflow Error should have occured while encoding UTF-8 (8)."); 4639 } 4640 4641 encoder.reset(); 4642 4643 us.limit(limit_us + 1); 4644 us.position(1); 4645 bs.limit(3); 4646 bs.position(0); 4647 result = encoder.encode(us, bs, true); 4648 if (!result.isOverflow()) { 4649 errln("Overflow Error should have occured while encoding UTF-8 (9)."); 4650 } 4651 4652 us.clear(); 4653 bs.clear(); 4654 4655 //test encoding 4 byte characters 4656 encoder.reset(); 4657 us.put((char)0xD902); us.put((char)0xDD02); us.put((char)0x0041); 4658 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4659 limit_us = us.position(); 4660 us2 = us.asReadOnlyBuffer(); 4661 us2.limit(limit_us); 4662 us2.position(0); 4663 bs.limit(1); 4664 bs.position(0); 4665 result = encoder.encode(us2, bs, true); 4666 if (!result.isOverflow()) { 4667 errln("Overflow Error should have occured while encoding UTF-8 (10)."); 4668 } 4669 4670 encoder.reset(); 4671 4672 us2.limit(limit_us); 4673 us2.position(0); 4674 bs.limit(2); 4675 bs.position(0); 4676 result = encoder.encode(us2, bs, true); 4677 if (!result.isOverflow()) { 4678 errln("Overflow Error should have occured while encoding UTF-8 (11)."); 4679 } 4680 4681 encoder.reset(); 4682 4683 us2.limit(limit_us); 4684 us2.position(0); 4685 bs.limit(3); 4686 bs.position(0); 4687 result = encoder.encode(us2, bs, true); 4688 if (!result.isOverflow()) { 4689 errln("Overflow Error should have occured while encoding UTF-8 (12)."); 4690 } 4691 4692 encoder.reset(); 4693 4694 us2.limit(limit_us); 4695 us2.position(0); 4696 bs.limit(4); 4697 bs.position(0); 4698 result = encoder.encode(us2, bs, true); 4699 if (!result.isOverflow()) { 4700 errln("Overflow Error should have occured while encoding UTF-8 (13)."); 4701 } 4702 4703 us.clear(); 4704 bs.clear(); 4705 4706 //decoding code coverage 4707 //test malform error 4708 decoder.reset(); 4709 bs.put((byte)0xC0); bs.put((byte)0xC0); 4710 us.put((char)0x0000); 4711 bs2 = bs.asReadOnlyBuffer(); 4712 4713 us.limit(1); 4714 us.position(0); 4715 bs2.limit(1); 4716 bs2.position(0); 4717 4718 result = decoder.decode(bs2, us, true); 4719 result = decoder.flush(us); 4720 if (!result.isMalformed()) { 4721 errln("Malform error should have occurred while decoding UTF-8 (1)."); 4722 } 4723 4724 us.limit(1); 4725 us.position(0); 4726 bs2.limit(1); 4727 bs2.position(0); 4728 4729 decoder.reset(); 4730 4731 result = decoder.decode(bs2, us, true); 4732 us.limit(1); 4733 us.position(0); 4734 bs2.limit(2); 4735 bs2.position(0); 4736 result = decoder.decode(bs2, us, true); 4737 if (!result.isMalformed()) { 4738 errln("Malform error should have occurred while decoding UTF-8 (2)."); 4739 } 4740 4741 us.clear(); 4742 bs.clear(); 4743 4744 //test overflow buffer 4745 bs.put((byte)0x01); bs.put((byte)0x41); 4746 us.put((char)0x0000); 4747 bs2 = bs.asReadOnlyBuffer(); 4748 us.limit(1); 4749 us.position(0); 4750 bs2.limit(2); 4751 bs2.position(0); 4752 4753 result = decoder.decode(bs2, us, true); 4754 if (!result.isOverflow()) { 4755 errln("Overflow error should have occurred while decoding UTF-8 (3)."); 4756 } 4757 4758 us.clear(); 4759 bs.clear(); 4760 4761 //test malform string 4762 decoder.reset(); 4763 bs.put((byte)0xF5); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4764 us.put((char)0x0000); 4765 bs2 = bs.asReadOnlyBuffer(); 4766 us.limit(1); 4767 us.position(0); 4768 bs2.limit(4); 4769 bs2.position(0); 4770 4771 result = decoder.decode(bs2, us, true); 4772 if (!result.isMalformed()) { 4773 errln("Malform error should have occurred while decoding UTF-8 (4)."); 4774 } 4775 4776 bs.clear(); 4777 4778 //test overflow 4779 decoder.reset(); 4780 bs.put((byte)0xF3); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4781 bs2 = bs.asReadOnlyBuffer(); 4782 us.limit(1); 4783 us.position(0); 4784 bs2.limit(4); 4785 bs2.position(0); 4786 4787 result = decoder.decode(bs2, us, true); 4788 if (!result.isOverflow()) { 4789 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4790 } 4791 4792 //test overflow 4793 decoder.reset(); 4794 us.limit(2); 4795 us.position(0); 4796 bs2.limit(5); 4797 bs2.position(0); 4798 4799 result = decoder.decode(bs2, us, true); 4800 if (!result.isOverflow()) { 4801 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4802 } 4803 4804 //test overflow 4805 decoder.reset(); 4806 us.limit(1); 4807 us.position(0); 4808 bs.limit(5); 4809 bs.position(0); 4810 4811 result = decoder.decode(bs, us, true); 4812 if (!result.isOverflow()) { 4813 errln("Overflow error should have occurred while decoding UTF-8 (6)."); 4814 } 4815 4816 bs.clear(); 4817 4818 //test overflow 4819 decoder.reset(); 4820 bs.put((byte)0x41); bs.put((byte)0x42); 4821 us.limit(1); 4822 us.position(0); 4823 bs.limit(2); 4824 bs.position(0); 4825 4826 result = decoder.decode(bs, us, true); 4827 if (!result.isOverflow()) { 4828 errln("Overflow error should have occurred while decoding UTF-8 (7)."); 4829 } 4830 4831 } 4832 4833 //provide better code coverage for Charset UTF16 4834 @Test 4835 public void TestCharsetUTF16() { 4836 CoderResult result = CoderResult.UNDERFLOW; 4837 CharsetProvider provider = new CharsetProviderICU(); 4838 CharsetDecoder decoder = provider.charsetForName("UTF-16").newDecoder(); 4839 CharsetEncoder encoder = provider.charsetForName("UTF-16").newEncoder(); 4840 4841 CharBuffer us = CharBuffer.allocate(0x10); 4842 ByteBuffer bs = ByteBuffer.allocate(0x10); 4843 4844 //test flush buffer and malform string 4845 bs.put((byte)0xFF); 4846 us.put((char)0x0000); 4847 4848 us.limit(us.position()); 4849 us.position(0); 4850 bs.limit(bs.position()); 4851 bs.position(0); 4852 4853 result = decoder.decode(bs, us, true); 4854 result = decoder.flush(us); 4855 if (!result.isMalformed()) { 4856 errln("Malform error while decoding UTF-16 should have occurred."); 4857 } 4858 4859 us.clear(); 4860 bs.clear(); 4861 4862 us.put((char)0xD902); us.put((char)0xDD01); us.put((char)0x0041); 4863 4864 us.limit(1); 4865 us.position(0); 4866 bs.limit(4); 4867 bs.position(0); 4868 4869 result = encoder.encode(us, bs, true); 4870 us.limit(3); 4871 us.position(0); 4872 bs.limit(3); 4873 bs.position(0); 4874 result = encoder.encode(us, bs, true); 4875 if (!result.isOverflow()) { 4876 errln("Overflow buffer while encoding UTF-16 should have occurred."); 4877 } 4878 4879 us.clear(); 4880 bs.clear(); 4881 4882 //test overflow buffer 4883 decoder.reset(); 4884 decoder = provider.charsetForName("UTF-16BE").newDecoder(); 4885 4886 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x41); 4887 4888 us.limit(0); 4889 us.position(0); 4890 bs.limit(3); 4891 bs.position(0); 4892 4893 result = decoder.decode(bs, us, true); 4894 if (!result.isOverflow()) { 4895 errln("Overflow buffer while decoding UTF-16 should have occurred."); 4896 } 4897 } 4898 4899 //provide better code coverage for Charset ISO-2022-KR 4900 @Test 4901 public void TestCharsetISO2022KR() { 4902 CoderResult result = CoderResult.UNDERFLOW; 4903 CharsetProvider provider = new CharsetProviderICU(); 4904 CharsetDecoder decoder = provider.charsetForName("ISO-2022-KR").newDecoder(); 4905 4906 byte bytearray[] = { 4907 (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x43, (byte)0x41, (byte)0x42, 4908 }; 4909 char chararray[] = { 4910 (char)0x0041 4911 }; 4912 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4913 CharBuffer cb = CharBuffer.wrap(chararray); 4914 4915 result = decoder.decode(bb, cb, true); 4916 4917 if (!result.isOverflow()) { 4918 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4919 } 4920 } 4921 4922 //provide better code coverage for Charset ISO-2022-JP 4923 @Test 4924 public void TestCharsetISO2022JP() { 4925 CoderResult result = CoderResult.UNDERFLOW; 4926 CharsetProvider provider = new CharsetProviderICU(); 4927 CharsetDecoder decoder = provider.charsetForName("ISO-2022-JP-2").newDecoder(); 4928 4929 byte bytearray[] = { 4930 (byte)0x1b, (byte)0x24, (byte)0x28, (byte)0x44, (byte)0x0A, (byte)0x41, 4931 }; 4932 char chararray[] = { 4933 (char)0x000A 4934 }; 4935 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4936 CharBuffer cb = CharBuffer.wrap(chararray); 4937 4938 result = decoder.decode(bb, cb, true); 4939 4940 if (!result.isOverflow()) { 4941 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4942 } 4943 } 4944 4945 //provide better code coverage for Charset ASCII 4946 @Test 4947 public void TestCharsetASCII() { 4948 CoderResult result = CoderResult.UNDERFLOW; 4949 CharsetProvider provider = new CharsetProviderICU(); 4950 CharsetDecoder decoder = provider.charsetForName("US-ASCII").newDecoder(); 4951 4952 byte bytearray[] = { 4953 (byte)0x41 4954 }; 4955 char chararray[] = { 4956 (char)0x0041 4957 }; 4958 4959 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4960 CharBuffer cb = CharBuffer.wrap(chararray); 4961 4962 result = decoder.decode(bb, cb, true); 4963 result = decoder.flush(cb); 4964 4965 if (result.isError()) { 4966 errln("Error occurred while decoding US-ASCII."); 4967 } 4968 } 4969 4970 // provide better code coverage for Charset Callbacks 4971 /* Different aspects of callbacks are being tested including using different context available */ 4972 @Test 4973 public void TestCharsetCallbacks() { 4974 CoderResult result = CoderResult.UNDERFLOW; 4975 CharsetProvider provider = new CharsetProviderICU(); 4976 CharsetEncoder encoder = provider.charsetForName("iso-2022-jp").newEncoder(); 4977 CharsetDecoder decoder = provider.charsetForName("iso-2022-jp").newDecoder(); 4978 4979 String context3[] = { 4980 "i", 4981 "J" 4982 }; 4983 4984 // Testing encoder escape callback 4985 String context1[] = { 4986 "J", 4987 "C", 4988 "D", 4989 null 4990 }; 4991 char chararray[] = { 4992 (char)0xd122 4993 }; 4994 ByteBuffer bb = ByteBuffer.allocate(20); 4995 CharBuffer cb = CharBuffer.wrap(chararray); 4996 4997 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.OVERFLOW, CharsetCallback.FROM_U_CALLBACK_ESCAPE, null); // This callback is not valid. 4998 for (int i = 0; i < context1.length; i++) { 4999 encoder.reset(); 5000 cb.position(0); 5001 bb.position(0); 5002 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_ESCAPE, context1[i]); // This callback is valid. 5003 5004 result = encoder.encode(cb, bb, true); 5005 if (result.isError()) { 5006 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 5007 } 5008 } 5009 5010 // Testing encoder skip callback 5011 for (int i = 0; i < context3.length; i++) { 5012 encoder.reset(); 5013 cb.position(0); 5014 bb.position(0); 5015 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SKIP, context3[i]); 5016 5017 result = encoder.encode(cb, bb, true); 5018 if (result.isError() && i == 0) { 5019 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 5020 } 5021 } 5022 5023 // Testing encoder sub callback 5024 for (int i = 0; i < context3.length; i++) { 5025 encoder.reset(); 5026 cb.position(0); 5027 bb.position(0); 5028 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE, context3[i]); 5029 5030 result = encoder.encode(cb, bb, true); 5031 if (result.isError() && i == 0) { 5032 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 5033 } 5034 } 5035 5036 // Testing decoder escape callback 5037 String context2[] = { 5038 "X", 5039 "C", 5040 "D", 5041 null 5042 }; 5043 byte bytearray[] = { 5044 (byte)0x1b, (byte)0x2e, (byte)0x43 5045 }; 5046 bb = ByteBuffer.wrap(bytearray); 5047 cb = CharBuffer.allocate(20); 5048 5049 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_ESCAPE, null); // This callback is not valid. 5050 for (int i = 0; i < context2.length; i++) { 5051 decoder.reset(); 5052 cb.position(0); 5053 bb.position(0); 5054 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_ESCAPE, context2[i]); // This callback is valid. 5055 5056 result = decoder.decode(bb, cb, true); 5057 if (result.isError()) { 5058 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder."); 5059 } 5060 } 5061 5062 // Testing decoder skip callback 5063 for (int i = 0; i < context3.length; i++) { 5064 decoder.reset(); 5065 cb.position(0); 5066 bb.position(0); 5067 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_SKIP, context3[i]); 5068 result = decoder.decode(bb, cb, true); 5069 if (!result.isError()) { 5070 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder should have occurred."); 5071 } 5072 } 5073 } 5074 5075 // Testing invalid input exceptions 5076 @Test 5077 public void TestInvalidInput() { 5078 CharsetProvider provider = new CharsetProviderICU(); 5079 Charset charset = provider.charsetForName("iso-2022-jp"); 5080 CharsetEncoder encoder = charset.newEncoder(); 5081 CharsetDecoder decoder = charset.newDecoder(); 5082 5083 try { 5084 encoder.encode(CharBuffer.allocate(10), null, true); 5085 errln("Illegal argument exception should have been thrown due to null target."); 5086 } catch (Exception ex) { 5087 } 5088 5089 try { 5090 decoder.decode(ByteBuffer.allocate(10), null, true); 5091 errln("Illegal argument exception should have been thrown due to null target."); 5092 } catch (Exception ex) { 5093 } 5094 } 5095 5096 // Test java canonical names 5097 @Test 5098 public void TestGetICUJavaCanonicalNames() { 5099 // Ambiguous charset name. 5100 String javaCName = CharsetProviderICU.getJavaCanonicalName("windows-1250"); 5101 String icuCName = CharsetProviderICU.getICUCanonicalName("Windows-1250"); 5102 if (javaCName == null || icuCName == null) { 5103 errln("Unable to get Java or ICU canonical name from ambiguous alias"); 5104 } 5105 5106 } 5107 5108 // Port over from ICU4C for test conversion tables (mbcs version 5.x) 5109 // Provide better code coverage in CharsetMBCS, CharsetDecoderICU, and CharsetEncoderICU. 5110 @Test 5111 public void TestCharsetTestData() { 5112 CoderResult result = CoderResult.UNDERFLOW; 5113 String charsetName = "test4"; 5114 CharsetProvider provider = new CharsetProviderICU(); 5115 Charset charset = ((CharsetProviderICU)provider).charsetForName(charsetName, "com/ibm/icu/dev/data/testdata", 5116 this.getClass().getClassLoader()); 5117 CharsetEncoder encoder = charset.newEncoder(); 5118 CharsetDecoder decoder = charset.newDecoder(); 5119 5120 byte bytearray[] = { 5121 0x01, 0x02, 0x03, 0x0a, 5122 0x01, 0x02, 0x03, 0x0b, 5123 0x01, 0x02, 0x03, 0x0d, 5124 }; 5125 5126 // set the callback for overflow errors 5127 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_STOP, null); 5128 5129 ByteBuffer bb = ByteBuffer.wrap(bytearray); 5130 CharBuffer cb = CharBuffer.allocate(10); 5131 5132 bb.limit(4); 5133 cb.limit(1); // Overflow should occur and is expected 5134 result = decoder.decode(bb, cb, false); 5135 if (result.isError()) { 5136 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5137 } 5138 5139 bb.limit(8); 5140 result = decoder.decode(bb, cb, false); 5141 if (result.isError()) { 5142 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5143 } 5144 5145 bb.limit(12); 5146 result = decoder.decode(bb, cb, true); 5147 if (result.isError()) { 5148 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5149 } 5150 5151 char chararray[] = { 5152 0xDBC4,0xDE34,0xD900,0xDC05,/* \U00101234\U00050005 */ 5153 0xD940, /* first half of \U00060006 or \U00060007 */ 5154 0xDC07/* second half of \U00060007 */ 5155 }; 5156 5157 cb = CharBuffer.wrap(chararray); 5158 bb = ByteBuffer.allocate(10); 5159 5160 bb.limit(2); 5161 cb.limit(4); 5162 result = encoder.encode(cb, bb, false); 5163 if (result.isError()) { 5164 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5165 } 5166 cb.limit(5); 5167 result = encoder.encode(cb, bb, false); 5168 if (result.isError()) { 5169 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5170 } 5171 cb.limit(6); 5172 result = encoder.encode(cb, bb, true); 5173 if (!result.isError()) { 5174 errln("Error should have occurred while encoding: " + charsetName); 5175 } 5176 } 5177 5178 /* Round trip test of SCSU converter*/ 5179 @Test 5180 public void TestSCSUConverter(){ 5181 byte allFeaturesSCSU[]={ 5182 0x41,(byte) 0xdf, 0x12,(byte) 0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x1b, 0x03, 5183 (byte)0xdf, 0x1c,(byte) 0x88,(byte) 0x80, 0x0b, (byte)0xbf,(byte) 0xff,(byte) 0xff, 0x0d, 0x0a, 5184 0x41, 0x10, (byte)0xdf, 0x12, (byte)0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x13, 5185 (byte)0xdf, 0x14,(byte) 0x80, 0x15, (byte)0xff 5186 }; 5187 5188 char allFeaturesUTF16[]={ 5189 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 5190 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 5191 0x01df, 0xf000, 0xdbff, 0xdfff 5192 }; 5193 5194 5195 char germanUTF16[]={ 5196 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 5197 }; 5198 5199 byte germanSCSU[]={ 5200 (byte)0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65,(byte) 0xdf, 0x74 5201 }; 5202 5203 char russianUTF16[]={ 5204 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 5205 }; 5206 5207 byte russianSCSU[]={ 5208 0x12, (byte)0x9c,(byte)0xbe,(byte) 0xc1, (byte)0xba, (byte)0xb2, (byte)0xb0 5209 }; 5210 5211 char japaneseUTF16[]={ 5212 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 5213 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 5214 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 5215 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 5216 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 5217 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 5218 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 5219 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 5220 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 5221 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 5222 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 5223 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 5224 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 5225 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 5226 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 5227 }; 5228 5229 // SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 5230 //it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient 5231 byte japaneseSCSU[]={ 5232 0x08, 0x00, 0x1b, 0x4c,(byte) 0xea, 0x16, (byte)0xca, (byte)0xd3,(byte) 0x94, 0x0f, 0x53, (byte)0xef, 0x61, 0x1b, (byte)0xe5,(byte) 0x84, 5233 (byte)0xc4, 0x0f, (byte)0x53,(byte) 0xef, 0x61, 0x1b, (byte)0xe5, (byte)0x84, (byte)0xc4, 0x16, (byte)0xca, (byte)0xd3, (byte)0x94, 0x08, 0x02, 0x0f, 5234 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, (byte)0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41,(byte) 0x88, 0x4c, 5235 (byte) 0xe5,(byte) 0x97, (byte)0x9f, 0x08, 0x0c, 0x16,(byte) 0xca,(byte) 0xd3, (byte)0x94, 0x15, (byte)0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 5236 (byte) 0x8c, (byte)0xb4, (byte)0xa3,(byte) 0x9f,(byte) 0xca, (byte)0x99, (byte)0xcb,(byte) 0x8b, (byte)0xc2,(byte) 0x97,(byte) 0xcc,(byte) 0xaa,(byte) 0x84, 0x08, 0x02, 0x0e, 5237 0x7c, 0x73, (byte)0xe2, 0x16, (byte)0xa3,(byte) 0xb7, (byte)0xcb, (byte)0x93, (byte)0xd3,(byte) 0xb4,(byte) 0xc5, (byte)0xdc, (byte)0x9f, 0x0e, 0x79, 0x3e, 5238 0x06, (byte)0xae, (byte)0xb1, (byte)0x9d,(byte) 0x93, (byte)0xd3, 0x08, 0x0c, (byte)0xbe,(byte) 0xa3, (byte)0x8f, 0x08,(byte) 0x88,(byte) 0xbe,(byte) 0xa3,(byte) 0x8d, 5239 (byte)0xd3,(byte) 0xa8, (byte)0xa3, (byte)0x97,(byte) 0xc5, 0x17,(byte) 0x89, 0x08, 0x0d, 0x15,(byte) 0xd2, 0x08, 0x01, (byte)0x93, (byte)0xc8,(byte) 0xaa, 5240 (byte)0x8f, 0x0e, 0x61, 0x1b, (byte)0x99,(byte) 0xcb, 0x0e, 0x4e, (byte)0xba, (byte)0x9f, (byte)0xa1,(byte) 0xae,(byte) 0x93, (byte)0xa8,(byte) 0xa0, 0x08, 5241 0x02, 0x08, 0x0c, (byte)0xe2, 0x16, (byte)0xa3, (byte)0xb7, (byte)0xcb, 0x0f, 0x4f,(byte) 0xe1,(byte) 0x80, 0x05,(byte) 0xec, 0x60, (byte)0x8d, 5242 (byte)0xea, 0x06,(byte) 0xd3,(byte) 0xe6, 0x0f,(byte) 0x8a, 0x00, 0x30, 0x44, 0x65,(byte) 0xb9, (byte)0xe4, (byte)0xfe,(byte) 0xe7,(byte) 0xc2, 0x06, 5243 (byte)0xcb, (byte)0x82 5244 }; 5245 5246 CharsetProviderICU cs = new CharsetProviderICU(); 5247 CharsetICU charset = (CharsetICU)cs.charsetForName("scsu"); 5248 CharsetDecoder decode = charset.newDecoder(); 5249 CharsetEncoder encode = charset.newEncoder(); 5250 5251 //String[] codePoints = {"allFeatures", "german","russian","japanese"}; 5252 byte[][] fromUnicode={allFeaturesSCSU,germanSCSU,russianSCSU,japaneseSCSU}; 5253 char[][] toUnicode = {allFeaturesUTF16, germanUTF16,russianUTF16,japaneseUTF16}; 5254 5255 for(int i=0;i<4;i++){ 5256 ByteBuffer decoderBuffer = ByteBuffer.wrap(fromUnicode[i]); 5257 CharBuffer encoderBuffer = CharBuffer.wrap(toUnicode[i]); 5258 5259 try{ 5260 // Decoding 5261 CharBuffer decoderResult = decode.decode(decoderBuffer); 5262 encoderBuffer.position(0); 5263 if(!decoderResult.equals(encoderBuffer)){ 5264 errln("Error occured while decoding "+ charset.name()); 5265 } 5266 // Encoding 5267 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5268 // RoundTrip Test 5269 ByteBuffer roundTrip = encoderResult; 5270 CharBuffer roundTripResult = decode.decode(roundTrip); 5271 encoderBuffer.position(0); 5272 if(!roundTripResult.equals(encoderBuffer)){ 5273 errln("Error occured while encoding "+ charset.name()); 5274 } 5275 // Test overflow for code coverage reasons 5276 if (i == 0) { 5277 ByteBuffer test = encoderResult; 5278 test.position(0); 5279 CharBuffer smallBuffer = CharBuffer.allocate(11); 5280 decode.reset(); 5281 CoderResult status = decode.decode(test, smallBuffer, true); 5282 if (status != CoderResult.OVERFLOW) { 5283 errln("Overflow buffer error should have been thrown."); 5284 } 5285 } 5286 }catch(Exception e){ 5287 errln("Exception while converting SCSU thrown: " + e); 5288 } 5289 } 5290 5291 /* Provide better code coverage */ 5292 /* testing illegal codepoints */ 5293 CoderResult illegalResult = CoderResult.UNDERFLOW; 5294 CharBuffer illegalDecoderTrgt = CharBuffer.allocate(10); 5295 5296 byte[] illegalDecoderSrc1 = { (byte)0x41, (byte)0xdf, (byte)0x0c }; 5297 decode.reset(); 5298 illegalResult = decode.decode(ByteBuffer.wrap(illegalDecoderSrc1), illegalDecoderTrgt, true); 5299 if (illegalResult == CoderResult.OVERFLOW || illegalResult == CoderResult.UNDERFLOW) { 5300 errln("Malformed error should have been returned for decoder " + charset.name()); 5301 } 5302 /* code coverage test from nucnvtst.c in ICU4C */ 5303 CoderResult ccResult = CoderResult.UNDERFLOW; 5304 int CCBufSize = 120 * 10; 5305 ByteBuffer trgt = ByteBuffer.allocate(CCBufSize); 5306 CharBuffer test = CharBuffer.allocate(CCBufSize); 5307 String [] ccSrc = { 5308 "\ud800\udc00", /* smallest surrogate*/ 5309 "\ud8ff\udcff", 5310 "\udBff\udFff", /* largest surrogate pair*/ 5311 "\ud834\udc00", 5312 //"\U0010FFFF", 5313 "Hello \u9292 \u9192 World!", 5314 "Hell\u0429o \u9292 \u9192 W\u00e4rld!", 5315 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5316 5317 "\u0648\u06c8", /* catch missing reset*/ 5318 "\u0648\u06c8", 5319 5320 "\u4444\uE001", /* lowest quotable*/ 5321 "\u4444\uf2FF", /* highest quotable*/ 5322 "\u4444\uf188\u4444", 5323 "\u4444\uf188\uf288", 5324 "\u4444\uf188abc\u0429\uf288", 5325 "\u9292\u2222", 5326 "Hell\u0429\u04230o \u9292 \u9292W\u00e4\u0192rld!", 5327 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5328 "Hello World!123456", 5329 "Hello W\u0081\u011f\u0082!", /* Latin 1 run*/ 5330 5331 "abc\u0301\u0302", /* uses SQn for u301 u302*/ 5332 "abc\u4411d", /* uses SQU*/ 5333 "abc\u4411\u4412d",/* uses SCU*/ 5334 "abc\u0401\u0402\u047f\u00a5\u0405", /* uses SQn for ua5*/ 5335 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", /* SJIS like data*/ 5336 "\u9292\u2222", 5337 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", 5338 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c", 5339 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002", 5340 5341 "", /* empty input*/ 5342 "\u0000", /* smallest BMP character*/ 5343 "\uFFFF", /* largest BMP character*/ 5344 5345 /* regression tests*/ 5346 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa", 5347 /*"\u00df\u01df\uf000\udbff\udfff\u000d\n\u0041\u00df\u0401\u015f\u00df\u01df\uf000\udbff\udfff",*/ 5348 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c", 5349 "\u0041\u00df\u0401\u015f", 5350 "\u9066\u2123abc", 5351 //"\ud266\u43d7\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5", 5352 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489", 5353 }; 5354 for (int i = 0; i < ccSrc.length; i++) { 5355 CharBuffer ubuf = CharBuffer.wrap(ccSrc[i]); 5356 encode.reset(); 5357 decode.reset(); 5358 trgt.clear(); 5359 test.clear(); 5360 ccResult = encode.encode(ubuf, trgt, true); 5361 if (ccResult.isError()) { 5362 errln("Error while encoding " + charset.name() + " in test for code coverage[" + i + "]."); 5363 } else { 5364 trgt.limit(trgt.position()); 5365 trgt.position(0); 5366 ccResult = decode.decode(trgt, test, true); 5367 if (ccResult.isError()) { 5368 errln("Error while decoding " + charset.name() + " in test for code coverage[" + i + "]."); 5369 } else { 5370 ubuf.position(0); 5371 test.limit(test.position()); 5372 test.position(0); 5373 if (!equals(test, ubuf)) { 5374 errln("Roundtrip failed for " + charset.name() + " in test for code coverage[" + i + "]."); 5375 } 5376 } 5377 } 5378 } 5379 5380 /* Monkey test */ 5381 { 5382 char[] monkeyIn = { 5383 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 5384 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 5385 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 5386 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 5387 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 5388 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 5389 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 5390 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 5391 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 5392 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 5393 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 5394 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 5395 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 5396 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 5397 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 5398 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 5399 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 5400 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 5401 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 5402 /* test non-BMP code points */ 5403 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 5404 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 5405 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 5406 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 5407 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 5408 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 5409 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 5410 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 5411 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 5412 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 5413 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 5414 5415 5416 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 5417 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 5418 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 5419 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 5420 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 5421 }; 5422 encode.reset(); 5423 decode.reset(); 5424 CharBuffer monkeyCB = CharBuffer.wrap(monkeyIn); 5425 try { 5426 ByteBuffer monkeyBB = encode.encode(monkeyCB); 5427 /* CharBuffer monkeyEndResult =*/ decode.decode(monkeyBB); 5428 5429 } catch (Exception ex) { 5430 errln("Exception thrown while encoding/decoding monkey test in SCSU: " + ex); 5431 } 5432 } 5433 // Test malformed 5434 { 5435 char[] malformedSequence = { 5436 0xD899, 0xDC7F, 0xDC88, 0xDC88, 0xD888, 0xDDF9 5437 }; 5438 encode.reset(); 5439 CharBuffer malformedSrc = CharBuffer.wrap(malformedSequence); 5440 5441 try { 5442 encode.encode(malformedSrc); 5443 errln("Malformed error should have thrown an exception."); 5444 } catch (Exception ex) { 5445 } 5446 } 5447 // Test overflow buffer 5448 { 5449 ByteBuffer overflowTest = ByteBuffer.wrap(allFeaturesSCSU); 5450 int sizes[] = { 8, 2, 11 }; 5451 for (int i = 0; i < sizes.length; i++) { 5452 try { 5453 decode.reset(); 5454 overflowTest.position(0); 5455 smBufDecode(decode, "SCSU overflow test", overflowTest, CharBuffer.allocate(sizes[i]), true, false); 5456 errln("Buffer overflow exception should have been thrown."); 5457 } catch (BufferOverflowException ex) { 5458 } catch (Exception ex) { 5459 errln("Buffer overflow exception should have been thrown."); 5460 } 5461 } 5462 5463 } 5464 } 5465 5466 /* Test for BOCU1 converter*/ 5467 @Test 5468 public void TestBOCU1Converter(){ 5469 char expected[]={ 5470 0xFEFF, 0x0061, 0x0062, 0x0020, // 0 5471 0x0063, 0x0061, 0x000D, 0x000A, 5472 5473 0x0020, 0x0000, 0x00DF, 0x00E6, // 8 5474 0x0930, 0x0020, 0x0918, 0x0909, 5475 5476 0x3086, 0x304D, 0x0020, 0x3053, // 16 5477 0x4000, 0x4E00, 0x7777, 0x0020, 5478 5479 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, // 24 5480 0x0020, 0xD7A3, 0xDC00, 0xD800, 5481 5482 0xD800, 0xDC00, 0xD845, 0xDDDD, // 32 5483 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 5484 5485 0xDFFF, 0x0001, 0x0E40, 0x0020, // 40 5486 0x0009 5487 }; 5488 5489 byte sampleText[]={ // from cintltst/bocu1tst.c/TestBOCU1 text 1 5490 (byte) 0xFB, 5491 (byte) 0xEE, 5492 0x28, // from source offset 0 5493 0x24, 0x1E, 0x52, (byte) 0xB2, 0x20, 5494 (byte) 0xB3, 5495 (byte) 0xB1, 5496 0x0D, 5497 0x0A, 5498 5499 0x20, // from 8 5500 0x00, (byte) 0xD0, 0x6C, (byte) 0xB6, (byte) 0xD8, (byte) 0xA5, 5501 0x20, 0x68, 5502 0x59, 5503 5504 (byte) 0xF9, 5505 0x28, // from 16 5506 0x6D, 0x20, 0x73, (byte) 0xE0, 0x2D, (byte) 0xDE, 0x43, 5507 (byte) 0xD0, 0x33, 0x20, 5508 5509 (byte) 0xFA, 5510 (byte) 0x83, // from 24 5511 0x25, 0x01, (byte) 0xFB, 0x16, (byte) 0x87, 0x4B, 0x16, 0x20, 5512 (byte) 0xE6, (byte) 0xBD, (byte) 0xEB, 0x5B, 0x4B, (byte) 0xCC, 5513 5514 (byte) 0xF9, 5515 (byte) 0xA2, // from 32 5516 (byte) 0xFC, 0x10, 0x3E, (byte) 0xFE, 0x16, 0x3A, (byte) 0x8C, 5517 0x20, (byte) 0xFC, 0x03, (byte) 0xAC, 5518 5519 0x01, /// from 41 5520 (byte) 0xDE, (byte) 0x83, 0x20, 0x09 5521 }; 5522 5523 CharsetProviderICU cs = new CharsetProviderICU(); 5524 CharsetICU charset = (CharsetICU)cs.charsetForName("BOCU-1"); 5525 CharsetDecoder decode = charset.newDecoder(); 5526 CharsetEncoder encode = charset.newEncoder(); 5527 5528 ByteBuffer decoderBuffer = ByteBuffer.wrap(sampleText); 5529 CharBuffer encoderBuffer = CharBuffer.wrap(expected); 5530 try{ 5531 // Decoding 5532 CharBuffer decoderResult = decode.decode(decoderBuffer); 5533 5534 encoderBuffer.position(0); 5535 if(!decoderResult.equals(encoderBuffer)){ 5536 errln("Error occured while decoding "+ charset.name()); 5537 } 5538 // Encoding 5539 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5540 // RoundTrip Test 5541 ByteBuffer roundTrip = encoderResult; 5542 CharBuffer roundTripResult = decode.decode(roundTrip); 5543 5544 encoderBuffer.position(0); 5545 if(!roundTripResult.equals(encoderBuffer)){ 5546 errln("Error occured while encoding "+ charset.name()); 5547 } 5548 }catch(Exception e){ 5549 errln("Exception while converting BOCU-1 thrown: " + e); 5550 } 5551 } 5552 5553 /* Test that ICU4C and ICU4J get the same ICU canonical name when given the same alias. */ 5554 @Test 5555 public void TestICUCanonicalNameConsistency() { 5556 String[] alias = { 5557 "KSC_5601" 5558 }; 5559 String[] expected = { 5560 "windows-949-2000" 5561 }; 5562 5563 for (int i = 0; i < alias.length; i++) { 5564 String name = CharsetProviderICU.getICUCanonicalName(alias[i]); 5565 if (!name.equals(expected[i])) { 5566 errln("The ICU canonical name in ICU4J does not match that in ICU4C. Result: " + name + "Expected: " + expected[i]); 5567 } 5568 } 5569 } 5570 5571 /* Increase code coverage for CharsetICU and CharsetProviderICU*/ 5572 @Test 5573 public void TestCharsetICUCodeCoverage() { 5574 CharsetProviderICU provider = new CharsetProviderICU(); 5575 5576 if (provider.charsetForName("UTF16", null) != null) { 5577 errln("charsetForName should have returned a null"); 5578 } 5579 5580 if (CharsetProviderICU.getJavaCanonicalName(null) != null) { 5581 errln("getJavaCanonicalName should have returned a null when null is given to it."); 5582 } 5583 5584 try { 5585 Charset testCharset = CharsetICU.forNameICU("bogus"); 5586 errln("UnsupportedCharsetException should be thrown for charset \"bogus\" - but got charset " + testCharset.name()); 5587 } catch (UnsupportedCharsetException ex) { 5588 logln("UnsupportedCharsetException was thrown for CharsetICU.forNameICU(\"bogus\")"); 5589 } 5590 5591 Charset charset = provider.charsetForName("UTF16"); 5592 5593 try { 5594 ((CharsetICU)charset).getUnicodeSet(null, 0); 5595 } catch (IllegalArgumentException ex) { 5596 return; 5597 } 5598 errln("IllegalArgumentException should have been thrown."); 5599 } 5600 5601 @Test 5602 public void TestCharsetLMBCS() { 5603 String []lmbcsNames = { 5604 "LMBCS-1", 5605 "LMBCS-2", 5606 "LMBCS-3", 5607 "LMBCS-4", 5608 "LMBCS-5", 5609 "LMBCS-6", 5610 "LMBCS-8", 5611 "LMBCS-11", 5612 "LMBCS-16", 5613 "LMBCS-17", 5614 "LMBCS-18", 5615 "LMBCS-19" 5616 }; 5617 5618 char[] src = { 5619 0x0192, 0x0041, 0x0061, 0x00D0, 0x00F6, 0x0100, 0x0174, 0x02E4, 0x03F5, 0x03FB, 5620 0x05D3, 0x05D4, 0x05EA, 0x0684, 0x0685, 0x1801, 0x11B3, 0x11E8, 0x1F9A, 0x2EB4, 5621 0x3157, 0x3336, 0x3304, 0xD881, 0xDC88 5622 }; 5623 CharBuffer cbInput = CharBuffer.wrap(src); 5624 5625 CharsetProviderICU provider = new CharsetProviderICU(); 5626 5627 for (int i = 0; i < lmbcsNames.length; i++) { 5628 Charset charset = provider.charsetForName(lmbcsNames[i]); 5629 if (charset == null) { 5630 errln("Unable to create LMBCS charset: " + lmbcsNames[i]); 5631 return; 5632 } 5633 CharsetEncoder encoder = charset.newEncoder(); 5634 CharsetDecoder decoder = charset.newDecoder(); 5635 5636 try { 5637 cbInput.position(0); 5638 ByteBuffer bbTmp = encoder.encode(cbInput); 5639 CharBuffer cbOutput = decoder.decode(bbTmp); 5640 5641 if (!equals(cbInput, cbOutput)) { 5642 errln("Roundtrip test failed for charset: " + lmbcsNames[i]); 5643 } 5644 } catch (Exception ex) { 5645 if (i >= 8) { 5646 /* Expected exceptions */ 5647 continue; 5648 } 5649 errln("Exception thrown: " + ex + " while using charset: " + lmbcsNames[i]); 5650 } 5651 5652 } 5653 5654 // Test malformed 5655 CoderResult malformedResult = CoderResult.UNDERFLOW; 5656 byte[] malformedBytes = { 5657 (byte)0x61, (byte)0x01, (byte)0x29, (byte)0x81, (byte)0xa0, (byte)0x0f 5658 }; 5659 ByteBuffer malformedSrc = ByteBuffer.wrap(malformedBytes); 5660 CharBuffer malformedTrgt = CharBuffer.allocate(10); 5661 int[] malformedLimits = { 5662 2, 6 5663 }; 5664 CharsetDecoder malformedDecoderTest = provider.charsetForName("LMBCS-1").newDecoder(); 5665 for (int n = 0; n < malformedLimits.length; n++) { 5666 malformedDecoderTest.reset(); 5667 5668 malformedSrc.position(0); 5669 malformedSrc.limit(malformedLimits[n]); 5670 5671 malformedTrgt.clear(); 5672 5673 malformedResult = malformedDecoderTest.decode(malformedSrc,malformedTrgt, true); 5674 if (!malformedResult.isMalformed()) { 5675 errln("Malformed error should have resulted."); 5676 } 5677 } 5678 } 5679 5680 /* 5681 * This is a port of ICU4C TestAmbiguousConverter in cintltst. 5682 * Since there is no concept of ambiguous converters in ICU4J 5683 * this test is merely for code coverage reasons. 5684 */ 5685 @Test 5686 public void TestAmbiguousConverter() { 5687 byte [] inBytes = { 5688 0x61, 0x5b, 0x5c 5689 }; 5690 ByteBuffer src = ByteBuffer.wrap(inBytes); 5691 CharBuffer trgt = CharBuffer.allocate(20); 5692 5693 CoderResult result = CoderResult.UNDERFLOW; 5694 CharsetProviderICU provider = new CharsetProviderICU(); 5695 String[] names = CharsetProviderICU.getAllNames(); 5696 5697 for (int i = 0; i < names.length; i++) { 5698 Charset charset = provider.charsetForName(names[i]); 5699 if (charset == null) { 5700 /* We don't care about any failures because not all converters are available. */ 5701 continue; 5702 } 5703 CharsetDecoder decoder = charset.newDecoder(); 5704 5705 src.position(0); 5706 trgt.clear(); 5707 5708 result = decoder.decode(src, trgt, true); 5709 if (result.isError()) { 5710 /* We don't care about any failures. */ 5711 continue; 5712 } 5713 } 5714 } 5715 5716 @Test 5717 public void TestIsFixedWidth(){ 5718 String[] fixedWidth = { 5719 "US-ASCII", 5720 "UTF32", 5721 "ibm-5478_P100-1995" 5722 }; 5723 5724 String[] notFixedWidth = { 5725 "GB18030", 5726 "UTF8", 5727 "windows-949-2000", 5728 "UTF16" 5729 }; 5730 CharsetProvider provider = new CharsetProviderICU(); 5731 Charset charset; 5732 5733 for (int i = 0; i < fixedWidth.length; i++) { 5734 charset = provider.charsetForName(fixedWidth[i]); 5735 5736 if (!((CharsetICU)charset).isFixedWidth()) { 5737 errln(fixedWidth[i] + " is a fixedWidth charset but returned false."); 5738 } 5739 } 5740 5741 for (int i = 0; i < notFixedWidth.length; i++) { 5742 charset = provider.charsetForName(notFixedWidth[i]); 5743 5744 if (((CharsetICU)charset).isFixedWidth()) { 5745 errln(notFixedWidth[i] + " is NOT a fixedWidth charset but returned true."); 5746 } 5747 } 5748 } 5749 5750 @Test 5751 public void TestBytesLengthForString() { 5752 CharsetProviderICU provider = new CharsetProviderICU(); 5753 String[] charsets = { 5754 "windows-949-2000", 5755 "ibm-1047_P100-1995,swaplfnl", 5756 "ibm-930_P120-1999", 5757 "ISCII,version=0", 5758 "ISO_2022,locale=ko,version=0" 5759 }; 5760 5761 int[] expected = { 5762 40, 5763 20, 5764 80, /* changed from 60 to 80 to reflect the updates by #9205 */ 5765 80, 5766 160 5767 }; 5768 5769 int stringLength = 10; 5770 int length; 5771 int maxCharSize; 5772 5773 for (int i = 0; i < charsets.length; i++) { 5774 maxCharSize = (int)provider.charsetForName(charsets[i]).newEncoder().maxBytesPerChar(); 5775 length = CharsetEncoderICU.getMaxBytesForString(stringLength, maxCharSize); 5776 5777 if (length != expected[i]) { 5778 errln("For charset " + charsets[i] + " with string length " + stringLength + ", expected max byte length is " + expected[i] + " but got " + length); 5779 } 5780 } 5781 } 5782 5783 /* 5784 * When converting slices of a larger CharBuffer, Charset88591 and CharsetASCII does not handle the buffer correctly when 5785 * an unmappable character occurs. 5786 * Ticket #8729 5787 */ 5788 @Test 5789 public void TestCharsetASCII8859BufferHandling() { 5790 String firstLine = "C077693790=|MEMO=|00=|022=|Blanche st and the driveway grate was fault and rotated under my car=|\r\n"; 5791 String secondLine = "C077693790=|MEMO=|00=|023=|puncturing the fuel tank. I spoke to the store operator (Ram Reddi =|\r\n"; 5792 5793 String charsetNames[] = { 5794 "ASCII", 5795 "ISO-8859-1" 5796 }; 5797 5798 CoderResult result = CoderResult.UNDERFLOW; 5799 5800 CharsetEncoder encoder; 5801 5802 ByteBuffer outBuffer = ByteBuffer.allocate(500); 5803 CharBuffer charBuffer = CharBuffer.allocate(firstLine.length() + secondLine.length()); 5804 charBuffer.put(firstLine); 5805 charBuffer.put(secondLine); 5806 charBuffer.flip(); 5807 5808 for (int i = 0; i < charsetNames.length; i++) { 5809 encoder = CharsetICU.forNameICU(charsetNames[i]).newEncoder(); 5810 5811 charBuffer.position(firstLine.length()); 5812 CharBuffer charBufferSlice = charBuffer.slice(); 5813 charBufferSlice.limit(secondLine.length() - 2); 5814 5815 5816 try { 5817 result = encoder.encode(charBufferSlice, outBuffer, false); 5818 if (!result.isUnmappable()) { 5819 errln("Result of encoding " + charsetNames[i] + " should be: \"Unmappable\". Instead got: " + result); 5820 } 5821 } catch (IllegalArgumentException ex) { 5822 errln("IllegalArgumentException should not have been thrown when encoding: " + charsetNames[i]); 5823 } 5824 } 5825 } 5826 5827 /* 5828 * When converting with the String method getBytes(), buffer overflow exception is thrown because 5829 * of the way ICU4J is calculating the max bytes per char. This should be changed only on the ICU4J 5830 * side to match what the Java method is expecting. The ICU4C size will be left unchanged. 5831 * Ticket #9205 5832 */ 5833 @Test 5834 public void TestBufferOverflowErrorUsingJavagetBytes() { 5835 String charsetName = "ibm-5035"; 5836 String testCase = "\u7d42"; 5837 5838 try { 5839 testCase.getBytes(charsetName); 5840 } catch (Exception ex) { 5841 errln("Error calling getBytes(): " + ex); 5842 } 5843 5844 } 5845 5846 @Test 5847 public void TestDefaultIgnorableCallback() { 5848 String cnv_name = "euc-jp-2007"; 5849 String pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; 5850 String pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]"; 5851 UnicodeSet set_ignorable = new UnicodeSet(pattern_ignorable); 5852 UnicodeSet set_not_ignorable = new UnicodeSet(pattern_not_ignorable); 5853 CharsetEncoder encoder = CharsetICU.forNameICU(cnv_name).newEncoder(); 5854 5855 // set callback for the converter 5856 encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 5857 encoder.onMalformedInput(CodingErrorAction.REPLACE); 5858 5859 // test ignorable code points are ignored 5860 int size = set_ignorable.size(); 5861 for (int i = 0; i < size; i++) { 5862 encoder.reset(); 5863 try { 5864 if(encoder.encode(CharBuffer.wrap(Character.toChars(set_ignorable.charAt(i)))).limit() > 0) { 5865 errln("Callback should have ignore default ignorable: U+" + Integer.toHexString(set_ignorable.charAt(i))); 5866 } 5867 } catch (Exception ex) { 5868 errln("Error received converting +" + Integer.toHexString(set_ignorable.charAt(i))); 5869 } 5870 } 5871 5872 // test non-ignorable code points are not ignored 5873 size = set_not_ignorable.size(); 5874 for (int i = 0; i < size; i++) { 5875 encoder.reset(); 5876 try { 5877 if(encoder.encode(CharBuffer.wrap(Character.toChars(set_not_ignorable.charAt(i)))).limit() == 0) { 5878 errln("Callback should not have ignored: U+" + Integer.toHexString(set_not_ignorable.charAt(i))); 5879 } 5880 } catch (Exception ex) { 5881 errln("Error received converting U+" + Integer.toHexString(set_not_ignorable.charAt(i))); 5882 } 5883 } 5884 } 5885 } 5886