1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package android.icu.dev.test.lang; 12 13 import java.io.BufferedReader; 14 import java.io.IOException; 15 import java.util.Arrays; 16 import java.util.Locale; 17 18 import org.junit.Test; 19 import org.junit.runner.RunWith; 20 import org.junit.runners.JUnit4; 21 22 import android.icu.dev.test.TestFmwk; 23 import android.icu.dev.test.TestUtil; 24 import android.icu.impl.Norm2AllModes; 25 import android.icu.impl.Normalizer2Impl; 26 import android.icu.impl.PatternProps; 27 import android.icu.impl.UCharacterName; 28 import android.icu.impl.Utility; 29 import android.icu.lang.UCharacter; 30 import android.icu.lang.UCharacterCategory; 31 import android.icu.lang.UCharacterDirection; 32 import android.icu.lang.UCharacterEnums; 33 import android.icu.lang.UProperty; 34 import android.icu.lang.UScript; 35 import android.icu.text.Normalizer2; 36 import android.icu.text.UTF16; 37 import android.icu.text.UnicodeSet; 38 import android.icu.text.UnicodeSetIterator; 39 import android.icu.util.RangeValueIterator; 40 import android.icu.util.ULocale; 41 import android.icu.util.ValueIterator; 42 import android.icu.util.VersionInfo; 43 import android.icu.testsharding.MainTestShard; 44 45 /** 46 * Testing class for UCharacter 47 * Mostly following the test cases for ICU 48 * @author Syn Wee Quek 49 * @since nov 04 2000 50 */ 51 @MainTestShard 52 @RunWith(JUnit4.class) 53 public final class UCharacterTest extends TestFmwk 54 { 55 // private variables ============================================= 56 57 /** 58 * Expected Unicode version. 59 */ 60 private final VersionInfo VERSION_ = VersionInfo.getInstance(10); 61 62 // constructor =================================================== 63 64 /** 65 * Constructor 66 */ 67 public UCharacterTest() 68 { 69 } 70 71 // public methods ================================================ 72 73 /** 74 * Testing the letter and number determination in UCharacter 75 */ 76 @Test 77 public void TestLetterNumber() 78 { 79 for (int i = 0x0041; i < 0x005B; i ++) 80 if (!UCharacter.isLetter(i)) 81 errln("FAIL \\u" + hex(i) + " expected to be a letter"); 82 83 for (int i = 0x0660; i < 0x066A; i ++) 84 if (UCharacter.isLetter(i)) 85 errln("FAIL \\u" + hex(i) + " expected not to be a letter"); 86 87 for (int i = 0x0660; i < 0x066A; i ++) 88 if (!UCharacter.isDigit(i)) 89 errln("FAIL \\u" + hex(i) + " expected to be a digit"); 90 91 for (int i = 0x0041; i < 0x005B; i ++) 92 if (!UCharacter.isLetterOrDigit(i)) 93 errln("FAIL \\u" + hex(i) + " expected not to be a digit"); 94 95 for (int i = 0x0660; i < 0x066A; i ++) 96 if (!UCharacter.isLetterOrDigit(i)) 97 errln("FAIL \\u" + hex(i) + 98 "expected to be either a letter or a digit"); 99 100 /* 101 * The following checks work only starting from Unicode 4.0. 102 * Check the version number here. 103 */ 104 VersionInfo version = UCharacter.getUnicodeVersion(); 105 if(version.getMajor()<4 || version.equals(VersionInfo.getInstance(4, 0, 1))) { 106 return; 107 } 108 109 110 111 /* 112 * Sanity check: 113 * Verify that exactly the digit characters have decimal digit values. 114 * This assumption is used in the implementation of u_digit() 115 * (which checks nt=de) 116 * compared with the parallel java.lang.Character.digit() 117 * (which checks Nd). 118 * 119 * This was not true in Unicode 3.2 and earlier. 120 * Unicode 4.0 fixed discrepancies. 121 * Unicode 4.0.1 re-introduced problems in this area due to an 122 * unintentionally incomplete last-minute change. 123 */ 124 String digitsPattern = "[:Nd:]"; 125 String decimalValuesPattern = "[:Numeric_Type=Decimal:]"; 126 127 UnicodeSet digits, decimalValues; 128 129 digits= new UnicodeSet(digitsPattern); 130 decimalValues=new UnicodeSet(decimalValuesPattern); 131 132 133 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", true); 134 135 136 } 137 138 /** 139 * Tests for space determination in UCharacter 140 */ 141 @Test 142 public void TestSpaces() 143 { 144 int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005}; 145 int nonspaces[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0074}; 146 int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c /* ,0x200b */}; // 0x200b was "Zs" in Unicode 4.0, but it is "Cf" in Unicode 4.1 147 int nonwhitespaces[] = {0x0061, 0x0062, 0x003c, 0x0028, 0x003f, 0x00a0, 0x2007, 0x202f, 0xfefe, 0x200b}; 148 149 int size = spaces.length; 150 for (int i = 0; i < size; i ++) 151 { 152 if (!UCharacter.isSpaceChar(spaces[i])) 153 { 154 errln("FAIL \\u" + hex(spaces[i]) + 155 " expected to be a space character"); 156 break; 157 } 158 159 if (UCharacter.isSpaceChar(nonspaces[i])) 160 { 161 errln("FAIL \\u" + hex(nonspaces[i]) + 162 " expected not to be space character"); 163 break; 164 } 165 166 if (!UCharacter.isWhitespace(whitespaces[i])) 167 { 168 errln("FAIL \\u" + hex(whitespaces[i]) + 169 " expected to be a white space character"); 170 break; 171 } 172 if (UCharacter.isWhitespace(nonwhitespaces[i])) 173 { 174 errln("FAIL \\u" + hex(nonwhitespaces[i]) + 175 " expected not to be a space character"); 176 break; 177 } 178 logln("Ok \\u" + hex(spaces[i]) + " and \\u" + 179 hex(nonspaces[i]) + " and \\u" + hex(whitespaces[i]) + 180 " and \\u" + hex(nonwhitespaces[i])); 181 } 182 183 int patternWhiteSpace[] = {0x9, 0xd, 0x20, 0x85, 184 0x200e, 0x200f, 0x2028, 0x2029}; 185 int nonPatternWhiteSpace[] = {0x8, 0xe, 0x21, 0x86, 0xa0, 0xa1, 186 0x1680, 0x1681, 0x180e, 0x180f, 187 0x1FFF, 0x2000, 0x200a, 0x200b, 188 0x2010, 0x202f, 0x2030, 0x205f, 189 0x2060, 0x3000, 0x3001}; 190 for (int i = 0; i < patternWhiteSpace.length; i ++) { 191 if (!PatternProps.isWhiteSpace(patternWhiteSpace[i])) { 192 errln("\\u" + Utility.hex(patternWhiteSpace[i], 4) 193 + " expected to be a Pattern_White_Space"); 194 } 195 } 196 for (int i = 0; i < nonPatternWhiteSpace.length; i ++) { 197 if (PatternProps.isWhiteSpace(nonPatternWhiteSpace[i])) { 198 errln("\\u" + Utility.hex(nonPatternWhiteSpace[i], 4) 199 + " expected to be a non-Pattern_White_Space"); 200 } 201 } 202 203 // TODO: propose public API for constants like uchar.h's U_GC_*_MASK 204 // (http://bugs.icu-project.org/trac/ticket/7461) 205 int GC_Z_MASK = 206 (1 << UCharacter.SPACE_SEPARATOR) | 207 (1 << UCharacter.LINE_SEPARATOR) | 208 (1 << UCharacter.PARAGRAPH_SEPARATOR); 209 210 // UCharacter.isWhitespace(c) should be the same as Character.isWhitespace(). 211 // This uses logln() because Character.isWhitespace() differs between Java versions, thus 212 // it is not necessarily an error if there is a difference between 213 // particular Java and ICU versions. 214 // However, you need to run tests with -v to see the output. 215 // Also note that, at least as of Unicode 5.2, 216 // there are no supplementary white space characters. 217 for (int c = 0; c <= 0xffff; ++c) { 218 boolean j = Character.isWhitespace(c); 219 boolean i = UCharacter.isWhitespace(c); 220 boolean u = UCharacter.isUWhiteSpace(c); 221 boolean z = (UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK) & 222 GC_Z_MASK) != 0; 223 if (j != i) { 224 logln(String.format( 225 "isWhitespace(U+%04x) difference: JDK %5b ICU %5b Unicode WS %5b Z Separator %5b", 226 c, j, i, u, z)); 227 } else if (j || i || u || z) { 228 logln(String.format( 229 "isWhitespace(U+%04x) FYI: JDK %5b ICU %5b Unicode WS %5b Z Separator %5b", 230 c, j, i, u, z)); 231 } 232 } 233 for (char c = 0; c <= 0xff; ++c) { 234 boolean j = Character.isSpace(c); 235 boolean i = UCharacter.isSpace(c); 236 boolean z = (UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK) & 237 GC_Z_MASK) != 0; 238 if (j != i) { 239 logln(String.format( 240 "isSpace(U+%04x) difference: JDK %5b ICU %5b Z Separator %5b", 241 (int)c, j, i, z)); 242 } else if (j || i || z) { 243 logln(String.format( 244 "isSpace(U+%04x) FYI: JDK %5b ICU %5b Z Separator %5b", 245 (int)c, j, i, z)); 246 } 247 } 248 } 249 250 /** 251 * Test various implementations of Pattern_Syntax & Pattern_White_Space. 252 */ 253 @Test 254 public void TestPatternProperties() { 255 UnicodeSet syn_pp = new UnicodeSet(); 256 UnicodeSet syn_prop = new UnicodeSet("[:Pattern_Syntax:]"); 257 UnicodeSet syn_list = new UnicodeSet( 258 "[!-/\\:-@\\[-\\^`\\{-~"+ 259 "\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE\u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7"+ 260 "\u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E\u2190-\u245F\u2500-\u2775"+ 261 "\u2794-\u2BFF\u2E00-\u2E7F\u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]"); 262 UnicodeSet ws_pp = new UnicodeSet(); 263 UnicodeSet ws_prop = new UnicodeSet("[:Pattern_White_Space:]"); 264 UnicodeSet ws_list = new UnicodeSet("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]"); 265 UnicodeSet syn_ws_pp = new UnicodeSet(); 266 UnicodeSet syn_ws_prop = new UnicodeSet(syn_prop).addAll(ws_prop); 267 for(int c=0; c<=0xffff; ++c) { 268 if(PatternProps.isSyntax(c)) { 269 syn_pp.add(c); 270 } 271 if(PatternProps.isWhiteSpace(c)) { 272 ws_pp.add(c); 273 } 274 if(PatternProps.isSyntaxOrWhiteSpace(c)) { 275 syn_ws_pp.add(c); 276 } 277 } 278 compareUSets(syn_pp, syn_prop, 279 "PatternProps.isSyntax()", "[:Pattern_Syntax:]", true); 280 compareUSets(syn_pp, syn_list, 281 "PatternProps.isSyntax()", "[Pattern_Syntax ranges]", true); 282 compareUSets(ws_pp, ws_prop, 283 "PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", true); 284 compareUSets(ws_pp, ws_list, 285 "PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", true); 286 compareUSets(syn_ws_pp, syn_ws_prop, 287 "PatternProps.isSyntaxOrWhiteSpace()", 288 "[[:Pattern_Syntax:][:Pattern_White_Space:]]", true); 289 } 290 291 /** 292 * Tests for defined and undefined characters 293 */ 294 @Test 295 public void TestDefined() 296 { 297 int undefined[] = {0xfff1, 0xfff7, 0xfa6e}; 298 int defined[] = {0x523E, 0x004f88, 0x00fffd}; 299 300 int size = undefined.length; 301 for (int i = 0; i < size; i ++) 302 { 303 if (UCharacter.isDefined(undefined[i])) 304 { 305 errln("FAIL \\u" + hex(undefined[i]) + 306 " expected not to be defined"); 307 break; 308 } 309 if (!UCharacter.isDefined(defined[i])) 310 { 311 errln("FAIL \\u" + hex(defined[i]) + " expected defined"); 312 break; 313 } 314 } 315 } 316 317 /** 318 * Tests for base characters and their cellwidth 319 */ 320 @Test 321 public void TestBase() 322 { 323 int base[] = {0x0061, 0x000031, 0x0003d2}; 324 int nonbase[] = {0x002B, 0x000020, 0x00203B}; 325 int size = base.length; 326 for (int i = 0; i < size; i ++) 327 { 328 if (UCharacter.isBaseForm(nonbase[i])) 329 { 330 errln("FAIL \\u" + hex(nonbase[i]) + 331 " expected not to be a base character"); 332 break; 333 } 334 if (!UCharacter.isBaseForm(base[i])) 335 { 336 errln("FAIL \\u" + hex(base[i]) + 337 " expected to be a base character"); 338 break; 339 } 340 } 341 } 342 343 /** 344 * Tests for digit characters 345 */ 346 @Test 347 public void TestDigits() 348 { 349 int digits[] = {0x0030, 0x000662, 0x000F23, 0x000ED5, 0x002160}; 350 351 //special characters not in the properties table 352 int digits2[] = {0x3007, 0x004e00, 0x004e8c, 0x004e09, 0x0056d8, 353 0x004e94, 0x00516d, 0x4e03, 0x00516b, 0x004e5d}; 354 int nondigits[] = {0x0010, 0x000041, 0x000122, 0x0068FE}; 355 356 int digitvalues[] = {0, 2, 3, 5, 1}; 357 int digitvalues2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; 358 359 int size = digits.length; 360 for (int i = 0; i < size; i ++) { 361 if (UCharacter.isDigit(digits[i]) && 362 UCharacter.digit(digits[i]) != digitvalues[i]) 363 { 364 errln("FAIL \\u" + hex(digits[i]) + 365 " expected digit with value " + digitvalues[i]); 366 break; 367 } 368 } 369 size = nondigits.length; 370 for (int i = 0; i < size; i ++) 371 if (UCharacter.isDigit(nondigits[i])) 372 { 373 errln("FAIL \\u" + hex(nondigits[i]) + " expected nondigit"); 374 break; 375 } 376 377 size = digits2.length; 378 for (int i = 0; i < 10; i ++) { 379 if (UCharacter.isDigit(digits2[i]) && 380 UCharacter.digit(digits2[i]) != digitvalues2[i]) 381 { 382 errln("FAIL \\u" + hex(digits2[i]) + 383 " expected digit with value " + digitvalues2[i]); 384 break; 385 } 386 } 387 } 388 389 /** 390 * Tests for numeric characters 391 */ 392 @Test 393 public void TestNumeric() 394 { 395 if (UCharacter.getNumericValue(0x00BC) != -2) { 396 errln("Numeric value of 0x00BC expected to be -2"); 397 } 398 399 for (int i = '0'; i < '9'; i ++) { 400 int n1 = UCharacter.getNumericValue(i); 401 double n2 = UCharacter.getUnicodeNumericValue(i); 402 if (n1 != n2 || n1 != (i - '0')) { 403 errln("Numeric value of " + (char)i + " expected to be " + 404 (i - '0')); 405 } 406 } 407 for (int i = 'A'; i < 'F'; i ++) { 408 int n1 = UCharacter.getNumericValue(i); 409 double n2 = UCharacter.getUnicodeNumericValue(i); 410 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 'A' + 10)) { 411 errln("Numeric value of " + (char)i + " expected to be " + 412 (i - 'A' + 10)); 413 } 414 } 415 for (int i = 0xFF21; i < 0xFF26; i ++) { 416 // testing full wideth latin characters A-F 417 int n1 = UCharacter.getNumericValue(i); 418 double n2 = UCharacter.getUnicodeNumericValue(i); 419 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 0xFF21 + 10)) { 420 errln("Numeric value of " + (char)i + " expected to be " + 421 (i - 0xFF21 + 10)); 422 } 423 } 424 // testing han numbers 425 int han[] = {0x96f6, 0, 0x58f9, 1, 0x8cb3, 2, 0x53c3, 3, 426 0x8086, 4, 0x4f0d, 5, 0x9678, 6, 0x67d2, 7, 427 0x634c, 8, 0x7396, 9, 0x5341, 10, 0x62fe, 10, 428 0x767e, 100, 0x4f70, 100, 0x5343, 1000, 0x4edf, 1000, 429 0x824c, 10000, 0x5104, 100000000}; 430 for (int i = 0; i < han.length; i += 2) { 431 if (UCharacter.getHanNumericValue(han[i]) != han[i + 1]) { 432 errln("Numeric value of \\u" + 433 Integer.toHexString(han[i]) + " expected to be " + 434 han[i + 1]); 435 } 436 } 437 } 438 439 /** 440 * Tests for version 441 */ 442 @Test 443 public void TestVersion() 444 { 445 if (!UCharacter.getUnicodeVersion().equals(VERSION_)) 446 errln("FAIL expected: " + VERSION_ + " got: " + UCharacter.getUnicodeVersion()); 447 } 448 449 /** 450 * Tests for control characters 451 */ 452 @Test 453 public void TestISOControl() 454 { 455 int control[] = {0x001b, 0x000097, 0x000082}; 456 int noncontrol[] = {0x61, 0x000031, 0x0000e2}; 457 458 int size = control.length; 459 for (int i = 0; i < size; i ++) 460 { 461 if (!UCharacter.isISOControl(control[i])) 462 { 463 errln("FAIL 0x" + Integer.toHexString(control[i]) + 464 " expected to be a control character"); 465 break; 466 } 467 if (UCharacter.isISOControl(noncontrol[i])) 468 { 469 errln("FAIL 0x" + Integer.toHexString(noncontrol[i]) + 470 " expected to be not a control character"); 471 break; 472 } 473 474 logln("Ok 0x" + Integer.toHexString(control[i]) + " and 0x" + 475 Integer.toHexString(noncontrol[i])); 476 } 477 } 478 479 /** 480 * Test Supplementary 481 */ 482 @Test 483 public void TestSupplementary() 484 { 485 for (int i = 0; i < 0x10000; i ++) { 486 if (UCharacter.isSupplementary(i)) { 487 errln("Codepoint \\u" + Integer.toHexString(i) + 488 " is not supplementary"); 489 } 490 } 491 for (int i = 0x10000; i < 0x10FFFF; i ++) { 492 if (!UCharacter.isSupplementary(i)) { 493 errln("Codepoint \\u" + Integer.toHexString(i) + 494 " is supplementary"); 495 } 496 } 497 } 498 499 /** 500 * Test mirroring 501 */ 502 @Test 503 public void TestMirror() 504 { 505 if (!(UCharacter.isMirrored(0x28) && UCharacter.isMirrored(0xbb) && 506 UCharacter.isMirrored(0x2045) && UCharacter.isMirrored(0x232a) 507 && !UCharacter.isMirrored(0x27) && 508 !UCharacter.isMirrored(0x61) && !UCharacter.isMirrored(0x284) 509 && !UCharacter.isMirrored(0x3400))) { 510 errln("isMirrored() does not work correctly"); 511 } 512 513 if (!(UCharacter.getMirror(0x3c) == 0x3e && 514 UCharacter.getMirror(0x5d) == 0x5b && 515 UCharacter.getMirror(0x208d) == 0x208e && 516 UCharacter.getMirror(0x3017) == 0x3016 && 517 518 UCharacter.getMirror(0xbb) == 0xab && 519 UCharacter.getMirror(0x2215) == 0x29F5 && 520 UCharacter.getMirror(0x29F5) == 0x2215 && /* large delta between the code points */ 521 522 UCharacter.getMirror(0x2e) == 0x2e && 523 UCharacter.getMirror(0x6f3) == 0x6f3 && 524 UCharacter.getMirror(0x301c) == 0x301c && 525 UCharacter.getMirror(0xa4ab) == 0xa4ab && 526 527 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 528 UCharacter.getMirror(0x2018) == 0x2018 && 529 UCharacter.getMirror(0x201b) == 0x201b && 530 UCharacter.getMirror(0x301d) == 0x301d)) { 531 errln("getMirror() does not work correctly"); 532 } 533 534 /* verify that Bidi_Mirroring_Glyph roundtrips */ 535 UnicodeSet set=new UnicodeSet("[:Bidi_Mirrored:]"); 536 UnicodeSetIterator iter=new UnicodeSetIterator(set); 537 int start, end, c2, c3; 538 while(iter.nextRange() && (start=iter.codepoint)>=0) { 539 end=iter.codepointEnd; 540 do { 541 c2=UCharacter.getMirror(start); 542 c3=UCharacter.getMirror(c2); 543 if(c3!=start) { 544 errln("getMirror() does not roundtrip: U+"+hex(start)+"->U+"+hex(c2)+"->U+"+hex(c3)); 545 } 546 c3=UCharacter.getBidiPairedBracket(start); 547 if(UCharacter.getIntPropertyValue(start, UProperty.BIDI_PAIRED_BRACKET_TYPE)==UCharacter.BidiPairedBracketType.NONE) { 548 if(c3!=start) { 549 errln("u_getBidiPairedBracket(U+"+hex(start)+") != self for bpt(c)==None"); 550 } 551 } else { 552 if(c3!=c2) { 553 errln("u_getBidiPairedBracket(U+"+hex(start)+") != U+"+hex(c2)+" = bmg(c)'"); 554 } 555 } 556 } while(++start<=end); 557 } 558 559 // verify that Unicode Corrigendum #6 reverts mirrored status of the following 560 if (UCharacter.isMirrored(0x2018) || 561 UCharacter.isMirrored(0x201d) || 562 UCharacter.isMirrored(0x201f) || 563 UCharacter.isMirrored(0x301e)) { 564 errln("Unicode Corrigendum #6 conflict, one or more of 2018/201d/201f/301e has mirrored property"); 565 } 566 } 567 568 /** 569 * Tests for printable characters 570 */ 571 @Test 572 public void TestPrint() 573 { 574 int printable[] = {0x0042, 0x00005f, 0x002014}; 575 int nonprintable[] = {0x200c, 0x00009f, 0x00001b}; 576 577 int size = printable.length; 578 for (int i = 0; i < size; i ++) 579 { 580 if (!UCharacter.isPrintable(printable[i])) 581 { 582 errln("FAIL \\u" + hex(printable[i]) + 583 " expected to be a printable character"); 584 break; 585 } 586 if (UCharacter.isPrintable(nonprintable[i])) 587 { 588 errln("FAIL \\u" + hex(nonprintable[i]) + 589 " expected not to be a printable character"); 590 break; 591 } 592 logln("Ok \\u" + hex(printable[i]) + " and \\u" + 593 hex(nonprintable[i])); 594 } 595 596 // test all ISO 8 controls 597 for (int ch = 0; ch <= 0x9f; ++ ch) { 598 if (ch == 0x20) { 599 // skip ASCII graphic characters and continue with DEL 600 ch = 0x7f; 601 } 602 if (UCharacter.isPrintable(ch)) { 603 errln("Fail \\u" + hex(ch) + 604 " is a ISO 8 control character hence not printable\n"); 605 } 606 } 607 608 /* test all Latin-1 graphic characters */ 609 for (int ch = 0x20; ch <= 0xff; ++ ch) { 610 if (ch == 0x7f) { 611 ch = 0xa0; 612 } 613 if (!UCharacter.isPrintable(ch) 614 && ch != 0x00AD/* Unicode 4.0 changed the defintion of soft hyphen to be a Cf*/) { 615 errln("Fail \\u" + hex(ch) + 616 " is a Latin-1 graphic character\n"); 617 } 618 } 619 } 620 621 /** 622 * Testing for identifier characters 623 */ 624 @Test 625 public void TestIdentifier() 626 { 627 int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061}; 628 int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019}; 629 int unicodeidpart[] = {0x005f, 0x000032, 0x000045}; 630 int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020}; 631 int idignore[] = {0x0006, 0x0010, 0x206b}; 632 int nonidignore[] = {0x0075, 0x0000a3, 0x000061}; 633 634 int size = unicodeidstart.length; 635 for (int i = 0; i < size; i ++) 636 { 637 if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i])) 638 { 639 errln("FAIL \\u" + hex(unicodeidstart[i]) + 640 " expected to be a unicode identifier start character"); 641 break; 642 } 643 if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i])) 644 { 645 errln("FAIL \\u" + hex(nonunicodeidstart[i]) + 646 " expected not to be a unicode identifier start " + 647 "character"); 648 break; 649 } 650 if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i])) 651 { 652 errln("FAIL \\u" + hex(unicodeidpart[i]) + 653 " expected to be a unicode identifier part character"); 654 break; 655 } 656 if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i])) 657 { 658 errln("FAIL \\u" + hex(nonunicodeidpart[i]) + 659 " expected not to be a unicode identifier part " + 660 "character"); 661 break; 662 } 663 if (!UCharacter.isIdentifierIgnorable(idignore[i])) 664 { 665 errln("FAIL \\u" + hex(idignore[i]) + 666 " expected to be a ignorable unicode character"); 667 break; 668 } 669 if (UCharacter.isIdentifierIgnorable(nonidignore[i])) 670 { 671 errln("FAIL \\u" + hex(nonidignore[i]) + 672 " expected not to be a ignorable unicode character"); 673 break; 674 } 675 logln("Ok \\u" + hex(unicodeidstart[i]) + " and \\u" + 676 hex(nonunicodeidstart[i]) + " and \\u" + 677 hex(unicodeidpart[i]) + " and \\u" + 678 hex(nonunicodeidpart[i]) + " and \\u" + 679 hex(idignore[i]) + " and \\u" + hex(nonidignore[i])); 680 } 681 } 682 683 /** 684 * Tests for the character types, direction.<br> 685 * This method reads in UnicodeData.txt file for testing purposes. A 686 * default path is provided relative to the src path, however the user 687 * could set a system property to change the directory path.<br> 688 * e.g. java -DUnicodeData="data_directory_path" 689 * android.icu.dev.test.lang.UCharacterTest 690 */ 691 @Test 692 public void TestUnicodeData() 693 { 694 // this is the 2 char category types used in the UnicodeData file 695 final String TYPE = 696 "LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf"; 697 698 // directorionality types used in the UnicodeData file 699 // padded by spaces to make each type size 4 700 final String DIR = 701 "L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI "; 702 703 Normalizer2 nfc = Normalizer2.getNFCInstance(); 704 Normalizer2 nfkc = Normalizer2.getNFKCInstance(); 705 706 BufferedReader input = null; 707 try { 708 input = TestUtil.getDataReader("unicode/UnicodeData.txt"); 709 int numErrors = 0; 710 711 for (;;) { 712 String s = input.readLine(); 713 if(s == null) { 714 break; 715 } 716 if(s.length()<4 || s.startsWith("#")) { 717 continue; 718 } 719 String[] fields = s.split(";", -1); 720 assert (fields.length == 15 ) : "Number of fields is " + fields.length + ": " + s; 721 722 int ch = Integer.parseInt(fields[0], 16); 723 724 // testing the general category 725 int type = TYPE.indexOf(fields[2]); 726 if (type < 0) 727 type = 0; 728 else 729 type = (type >> 1) + 1; 730 if (UCharacter.getType(ch) != type) 731 { 732 errln("FAIL \\u" + hex(ch) + " expected type " + type); 733 break; 734 } 735 736 if (UCharacter.getIntPropertyValue(ch, 737 UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) { 738 errln("error: getIntPropertyValue(\\u" + 739 Integer.toHexString(ch) + 740 ", UProperty.GENERAL_CATEGORY_MASK) != " + 741 "getMask(getType(ch))"); 742 } 743 744 // testing combining class 745 int cc = Integer.parseInt(fields[3]); 746 if (UCharacter.getCombiningClass(ch) != cc) 747 { 748 errln("FAIL \\u" + hex(ch) + " expected combining " + 749 "class " + cc); 750 break; 751 } 752 if (nfkc.getCombiningClass(ch) != cc) 753 { 754 errln("FAIL \\u" + hex(ch) + " expected NFKC combining " + 755 "class " + cc); 756 break; 757 } 758 759 // testing the direction 760 String d = fields[4]; 761 if (d.length() == 1) 762 d = d + " "; 763 764 int dir = DIR.indexOf(d) >> 2; 765 if (UCharacter.getDirection(ch) != dir) 766 { 767 errln("FAIL \\u" + hex(ch) + 768 " expected direction " + dir + " but got " + UCharacter.getDirection(ch)); 769 break; 770 } 771 772 byte bdir = (byte)dir; 773 if (UCharacter.getDirectionality(ch) != bdir) 774 { 775 errln("FAIL \\u" + hex(ch) + 776 " expected directionality " + bdir + " but got " + 777 UCharacter.getDirectionality(ch)); 778 break; 779 } 780 781 /* get Decomposition_Type & Decomposition_Mapping, field 5 */ 782 int dt; 783 if(fields[5].length()==0) { 784 /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */ 785 if(ch==0xac00 || ch==0xd7a3) { 786 dt=UCharacter.DecompositionType.CANONICAL; 787 } else { 788 dt=UCharacter.DecompositionType.NONE; 789 } 790 } else { 791 d=fields[5]; 792 dt=-1; 793 if(d.charAt(0)=='<') { 794 int end=d.indexOf('>', 1); 795 if(end>=0) { 796 dt=UCharacter.getPropertyValueEnum(UProperty.DECOMPOSITION_TYPE, d.substring(1, end)); 797 while(d.charAt(++end)==' ') {} // skip spaces 798 d=d.substring(end); 799 } 800 } else { 801 dt=UCharacter.DecompositionType.CANONICAL; 802 } 803 } 804 String dm; 805 if(dt>UCharacter.DecompositionType.NONE) { 806 if(ch==0xac00) { 807 dm="\u1100\u1161"; 808 } else if(ch==0xd7a3) { 809 dm="\ud788\u11c2"; 810 } else { 811 String[] dmChars=d.split(" +"); 812 StringBuilder dmb=new StringBuilder(dmChars.length); 813 for(String dmc : dmChars) { 814 dmb.appendCodePoint(Integer.parseInt(dmc, 16)); 815 } 816 dm=dmb.toString(); 817 } 818 } else { 819 dm=null; 820 } 821 if(dt<0) { 822 errln(String.format("error in UnicodeData.txt: syntax error in U+%04x decomposition field", ch)); 823 return; 824 } 825 int i=UCharacter.getIntPropertyValue(ch, UProperty.DECOMPOSITION_TYPE); 826 assertEquals( 827 String.format("error: UCharacter.getIntPropertyValue(U+%04x, UProperty.DECOMPOSITION_TYPE) is wrong", ch), 828 dt, i); 829 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */ 830 String mapping=nfkc.getRawDecomposition(ch); 831 assertEquals( 832 String.format("error: nfkc.getRawDecomposition(U+%04x) is wrong", ch), 833 dm, mapping); 834 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */ 835 if(dt!=UCharacter.DecompositionType.CANONICAL) { 836 dm=null; 837 } 838 mapping=nfc.getRawDecomposition(ch); 839 assertEquals( 840 String.format("error: nfc.getRawDecomposition(U+%04x) is wrong", ch), 841 dm, mapping); 842 /* recompose */ 843 if(dt==UCharacter.DecompositionType.CANONICAL 844 && !UCharacter.hasBinaryProperty(ch, UProperty.FULL_COMPOSITION_EXCLUSION)) { 845 int a=dm.codePointAt(0); 846 int b=dm.codePointBefore(dm.length()); 847 int composite=nfc.composePair(a, b); 848 assertEquals( 849 String.format( 850 "error: nfc U+%04X decomposes to U+%04X+U+%04X "+ 851 "but does not compose back (instead U+%04X)", 852 ch, a, b, composite), 853 ch, composite); 854 /* 855 * Note: NFKC has fewer round-trip mappings than NFC, 856 * so we can't just test nfkc.composePair(a, b) here without further data. 857 */ 858 } 859 860 // testing iso comment 861 try{ 862 String isocomment = fields[11]; 863 String comment = UCharacter.getISOComment(ch); 864 if (comment == null) { 865 comment = ""; 866 } 867 if (!comment.equals(isocomment)) { 868 errln("FAIL \\u" + hex(ch) + 869 " expected iso comment " + isocomment); 870 break; 871 } 872 }catch(Exception e){ 873 if(e.getMessage().indexOf("unames.icu") >= 0){ 874 numErrors++; 875 }else{ 876 throw e; 877 } 878 } 879 880 String upper = fields[12]; 881 int tempchar = ch; 882 if (upper.length() > 0) { 883 tempchar = Integer.parseInt(upper, 16); 884 } 885 int resultCp = UCharacter.toUpperCase(ch); 886 if (resultCp != tempchar) { 887 errln("FAIL \\u" + Utility.hex(ch, 4) 888 + " expected uppercase \\u" 889 + Utility.hex(tempchar, 4) 890 + " but got \\u" 891 + Utility.hex(resultCp, 4)); 892 break; 893 } 894 895 String lower = fields[13]; 896 tempchar = ch; 897 if (lower.length() > 0) { 898 tempchar = Integer.parseInt(lower, 16); 899 } 900 if (UCharacter.toLowerCase(ch) != tempchar) { 901 errln("FAIL \\u" + Utility.hex(ch, 4) 902 + " expected lowercase \\u" 903 + Utility.hex(tempchar, 4)); 904 break; 905 } 906 907 908 909 String title = fields[14]; 910 tempchar = ch; 911 if (title.length() > 0) { 912 tempchar = Integer.parseInt(title, 16); 913 } 914 if (UCharacter.toTitleCase(ch) != tempchar) { 915 errln("FAIL \\u" + Utility.hex(ch, 4) 916 + " expected titlecase \\u" 917 + Utility.hex(tempchar, 4)); 918 break; 919 } 920 } 921 if(numErrors > 0){ 922 warnln("Could not find unames.icu"); 923 } 924 } catch (Exception e) { 925 e.printStackTrace(); 926 } finally { 927 if (input != null) { 928 try { 929 input.close(); 930 } catch (IOException ignored) { 931 } 932 } 933 } 934 935 if (UCharacter.UnicodeBlock.of(0x0041) 936 != UCharacter.UnicodeBlock.BASIC_LATIN 937 || UCharacter.getIntPropertyValue(0x41, UProperty.BLOCK) 938 != UCharacter.UnicodeBlock.BASIC_LATIN.getID()) { 939 errln("UCharacter.UnicodeBlock.of(\\u0041) property failed! " 940 + "Expected : " 941 + UCharacter.UnicodeBlock.BASIC_LATIN.getID() + " got " 942 + UCharacter.UnicodeBlock.of(0x0041)); 943 } 944 945 // sanity check on repeated properties 946 for (int ch = 0xfffe; ch <= 0x10ffff;) { 947 int type = UCharacter.getType(ch); 948 if (UCharacter.getIntPropertyValue(ch, 949 UProperty.GENERAL_CATEGORY_MASK) 950 != (1 << type)) { 951 errln("error: UCharacter.getIntPropertyValue(\\u" 952 + Integer.toHexString(ch) 953 + ", UProperty.GENERAL_CATEGORY_MASK) != " 954 + "getMask(getType())"); 955 } 956 if (type != UCharacterCategory.UNASSIGNED) { 957 errln("error: UCharacter.getType(\\u" + Utility.hex(ch, 4) 958 + " != UCharacterCategory.UNASSIGNED (returns " 959 + UCharacterCategory.toString(UCharacter.getType(ch)) 960 + ")"); 961 } 962 if ((ch & 0xffff) == 0xfffe) { 963 ++ ch; 964 } 965 else { 966 ch += 0xffff; 967 } 968 } 969 970 // test that PUA is not "unassigned" 971 for(int ch = 0xe000; ch <= 0x10fffd;) { 972 int type = UCharacter.getType(ch); 973 if (UCharacter.getIntPropertyValue(ch, 974 UProperty.GENERAL_CATEGORY_MASK) 975 != (1 << type)) { 976 errln("error: UCharacter.getIntPropertyValue(\\u" 977 + Integer.toHexString(ch) 978 + ", UProperty.GENERAL_CATEGORY_MASK) != " 979 + "getMask(getType())"); 980 } 981 982 if (type == UCharacterCategory.UNASSIGNED) { 983 errln("error: UCharacter.getType(\\u" 984 + Utility.hex(ch, 4) 985 + ") == UCharacterCategory.UNASSIGNED"); 986 } 987 else if (type != UCharacterCategory.PRIVATE_USE) { 988 logln("PUA override: UCharacter.getType(\\u" 989 + Utility.hex(ch, 4) + ")=" + type); 990 } 991 if (ch == 0xf8ff) { 992 ch = 0xf0000; 993 } 994 else if (ch == 0xffffd) { 995 ch = 0x100000; 996 } 997 else { 998 ++ ch; 999 } 1000 } 1001 } 1002 1003 1004 /** 1005 * Test for the character names 1006 */ 1007 @Test 1008 public void TestNames() 1009 { 1010 try{ 1011 int length = UCharacterName.INSTANCE.getMaxCharNameLength(); 1012 if (length < 83) { // Unicode 3.2 max char name length 1013 errln("getMaxCharNameLength()=" + length + " is too short"); 1014 } 1015 1016 int c[] = {0x0061, //LATIN SMALL LETTER A 1017 0x000284, //LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK 1018 0x003401, //CJK UNIFIED IDEOGRAPH-3401 1019 0x007fed, //CJK UNIFIED IDEOGRAPH-7FED 1020 0x00ac00, //HANGUL SYLLABLE GA 1021 0x00d7a3, //HANGUL SYLLABLE HIH 1022 0x00d800, 0x00dc00, //LINEAR B SYLLABLE B008 A 1023 0xff08, //FULLWIDTH LEFT PARENTHESIS 1024 0x00ffe5, //FULLWIDTH YEN SIGN 1025 0x00ffff, //null 1026 0x0023456 //CJK UNIFIED IDEOGRAPH-23456 1027 }; 1028 String name[] = { 1029 "LATIN SMALL LETTER A", 1030 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 1031 "CJK UNIFIED IDEOGRAPH-3401", 1032 "CJK UNIFIED IDEOGRAPH-7FED", 1033 "HANGUL SYLLABLE GA", 1034 "HANGUL SYLLABLE HIH", 1035 "", 1036 "", 1037 "FULLWIDTH LEFT PARENTHESIS", 1038 "FULLWIDTH YEN SIGN", 1039 "", 1040 "CJK UNIFIED IDEOGRAPH-23456" 1041 }; 1042 String oldname[] = {"", "", "", 1043 "", 1044 "", "", "", "", "", "", 1045 "", ""}; 1046 String extendedname[] = {"LATIN SMALL LETTER A", 1047 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 1048 "CJK UNIFIED IDEOGRAPH-3401", 1049 "CJK UNIFIED IDEOGRAPH-7FED", 1050 "HANGUL SYLLABLE GA", 1051 "HANGUL SYLLABLE HIH", 1052 "<lead surrogate-D800>", 1053 "<trail surrogate-DC00>", 1054 "FULLWIDTH LEFT PARENTHESIS", 1055 "FULLWIDTH YEN SIGN", 1056 "<noncharacter-FFFF>", 1057 "CJK UNIFIED IDEOGRAPH-23456"}; 1058 1059 int size = c.length; 1060 String str; 1061 int uc; 1062 1063 for (int i = 0; i < size; i ++) 1064 { 1065 // modern Unicode character name 1066 str = UCharacter.getName(c[i]); 1067 if ((str == null && name[i].length() > 0) || 1068 (str != null && !str.equals(name[i]))) 1069 { 1070 errln("FAIL \\u" + hex(c[i]) + " expected name " + 1071 name[i]); 1072 break; 1073 } 1074 1075 // 1.0 Unicode character name 1076 str = UCharacter.getName1_0(c[i]); 1077 if ((str == null && oldname[i].length() > 0) || 1078 (str != null && !str.equals(oldname[i]))) 1079 { 1080 errln("FAIL \\u" + hex(c[i]) + " expected 1.0 name " + 1081 oldname[i]); 1082 break; 1083 } 1084 1085 // extended character name 1086 str = UCharacter.getExtendedName(c[i]); 1087 if (str == null || !str.equals(extendedname[i])) 1088 { 1089 errln("FAIL \\u" + hex(c[i]) + " expected extended name " + 1090 extendedname[i]); 1091 break; 1092 } 1093 1094 // retrieving unicode character from modern name 1095 uc = UCharacter.getCharFromName(name[i]); 1096 if (uc != c[i] && name[i].length() != 0) 1097 { 1098 errln("FAIL " + name[i] + " expected character \\u" + 1099 hex(c[i])); 1100 break; 1101 } 1102 1103 //retrieving unicode character from 1.0 name 1104 uc = UCharacter.getCharFromName1_0(oldname[i]); 1105 if (uc != c[i] && oldname[i].length() != 0) 1106 { 1107 errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + 1108 hex(c[i])); 1109 break; 1110 } 1111 1112 //retrieving unicode character from 1.0 name 1113 uc = UCharacter.getCharFromExtendedName(extendedname[i]); 1114 if (uc != c[i] && i != 0 && (i == 1 || i == 6)) 1115 { 1116 errln("FAIL " + extendedname[i] + 1117 " expected extended character \\u" + hex(c[i])); 1118 break; 1119 } 1120 } 1121 1122 // test getName works with mixed-case names (new in 2.0) 1123 if (0x61 != UCharacter.getCharFromName("LATin smALl letTER A")) { 1124 errln("FAIL: 'LATin smALl letTER A' should result in character " 1125 + "U+0061"); 1126 } 1127 1128 if (TestFmwk.getExhaustiveness() >= 5) { 1129 // extra testing different from icu 1130 for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++) 1131 { 1132 str = UCharacter.getName(i); 1133 if (str != null && UCharacter.getCharFromName(str) != i) 1134 { 1135 errln("FAIL \\u" + hex(i) + " " + str + 1136 " retrieval of name and vice versa" ); 1137 break; 1138 } 1139 } 1140 } 1141 1142 // Test getCharNameCharacters 1143 if (TestFmwk.getExhaustiveness() >= 10) { 1144 boolean map[] = new boolean[256]; 1145 1146 UnicodeSet set = new UnicodeSet(1, 0); // empty set 1147 UnicodeSet dumb = new UnicodeSet(1, 0); // empty set 1148 1149 // uprv_getCharNameCharacters() will likely return more lowercase 1150 // letters than actual character names contain because 1151 // it includes all the characters in lowercased names of 1152 // general categories, for the full possible set of extended names. 1153 UCharacterName.INSTANCE.getCharNameCharacters(set); 1154 1155 // build set the dumb (but sure-fire) way 1156 Arrays.fill(map, false); 1157 1158 int maxLength = 0; 1159 for (int cp = 0; cp < 0x110000; ++ cp) { 1160 String n = UCharacter.getExtendedName(cp); 1161 int len = n.length(); 1162 if (len > maxLength) { 1163 maxLength = len; 1164 } 1165 1166 for (int i = 0; i < len; ++ i) { 1167 char ch = n.charAt(i); 1168 if (!map[ch & 0xff]) { 1169 dumb.add(ch); 1170 map[ch & 0xff] = true; 1171 } 1172 } 1173 } 1174 1175 length = UCharacterName.INSTANCE.getMaxCharNameLength(); 1176 if (length != maxLength) { 1177 errln("getMaxCharNameLength()=" + length 1178 + " differs from the maximum length " + maxLength 1179 + " of all extended names"); 1180 } 1181 1182 // compare the sets. Where is my uset_equals?!! 1183 boolean ok = true; 1184 for (int i = 0; i < 256; ++ i) { 1185 if (set.contains(i) != dumb.contains(i)) { 1186 if (0x61 <= i && i <= 0x7a // a-z 1187 && set.contains(i) && !dumb.contains(i)) { 1188 // ignore lowercase a-z that are in set but not in dumb 1189 ok = true; 1190 } 1191 else { 1192 ok = false; 1193 break; 1194 } 1195 } 1196 } 1197 1198 String pattern1 = set.toPattern(true); 1199 String pattern2 = dumb.toPattern(true); 1200 1201 if (!ok) { 1202 errln("FAIL: getCharNameCharacters() returned " + pattern1 1203 + " expected " + pattern2 1204 + " (too many lowercase a-z are ok)"); 1205 } else { 1206 logln("Ok: getCharNameCharacters() returned " + pattern1); 1207 } 1208 } 1209 // improve code coverage 1210 String expected = "LATIN SMALL LETTER A|LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK|"+ 1211 "CJK UNIFIED IDEOGRAPH-3401|CJK UNIFIED IDEOGRAPH-7FED|HANGUL SYLLABLE GA|"+ 1212 "HANGUL SYLLABLE HIH|LINEAR B SYLLABLE B008 A|FULLWIDTH LEFT PARENTHESIS|"+ 1213 "FULLWIDTH YEN SIGN|"+ 1214 "null|"+ // getName returns null because 0xFFFF does not have a name, but has an extended name! 1215 "CJK UNIFIED IDEOGRAPH-23456"; 1216 String separator= "|"; 1217 String source = Utility.valueOf(c); 1218 String result = UCharacter.getName(source, separator); 1219 if(!result.equals(expected)){ 1220 errln("UCharacter.getName did not return the expected result.\n\t Expected: "+ expected+"\n\t Got: "+ result); 1221 } 1222 1223 }catch(IllegalArgumentException e){ 1224 if(e.getMessage().indexOf("unames.icu") >= 0){ 1225 warnln("Could not find unames.icu"); 1226 }else{ 1227 throw e; 1228 } 1229 } 1230 1231 } 1232 1233 @Test 1234 public void TestUCharFromNameUnderflow() { 1235 // Ticket #10889: Underflow crash when there is no dash. 1236 int c = UCharacter.getCharFromExtendedName("<NO BREAK SPACE>"); 1237 if(c >= 0) { 1238 errln("UCharacter.getCharFromExtendedName(<NO BREAK SPACE>) = U+" + hex(c) + 1239 " but should fail (-1)"); 1240 } 1241 1242 // Test related edge cases. 1243 c = UCharacter.getCharFromExtendedName("<-00a0>"); 1244 if(c >= 0) { 1245 errln("UCharacter.getCharFromExtendedName(<-00a0>) = U+" + hex(c) + 1246 " but should fail (-1)"); 1247 } 1248 1249 c = UCharacter.getCharFromExtendedName("<control->"); 1250 if(c >= 0) { 1251 errln("UCharacter.getCharFromExtendedName(<control->) = U+" + hex(c) + 1252 " but should fail (-1)"); 1253 } 1254 1255 c = UCharacter.getCharFromExtendedName("<control-111111>"); 1256 if(c >= 0) { 1257 errln("UCharacter.getCharFromExtendedName(<control-111111>) = U+" + hex(c) + 1258 " but should fail (-1)"); 1259 } 1260 } 1261 1262 /** 1263 * Testing name iteration 1264 */ 1265 @Test 1266 public void TestNameIteration()throws Exception 1267 { 1268 try { 1269 ValueIterator iterator = UCharacter.getExtendedNameIterator(); 1270 ValueIterator.Element element = new ValueIterator.Element(); 1271 ValueIterator.Element old = new ValueIterator.Element(); 1272 // testing subrange 1273 iterator.setRange(-10, -5); 1274 if (iterator.next(element)) { 1275 errln("Fail, expected iterator to return false when range is set outside the meaningful range"); 1276 } 1277 iterator.setRange(0x110000, 0x111111); 1278 if (iterator.next(element)) { 1279 errln("Fail, expected iterator to return false when range is set outside the meaningful range"); 1280 } 1281 try { 1282 iterator.setRange(50, 10); 1283 errln("Fail, expected exception when encountered invalid range"); 1284 } catch (Exception e) { 1285 } 1286 1287 iterator.setRange(-10, 10); 1288 if (!iterator.next(element) || element.integer != 0) { 1289 errln("Fail, expected iterator to return 0 when range start limit is set outside the meaningful range"); 1290 } 1291 1292 iterator.setRange(0x10FFFE, 0x200000); 1293 int last = 0; 1294 while (iterator.next(element)) { 1295 last = element.integer; 1296 } 1297 if (last != 0x10FFFF) { 1298 errln("Fail, expected iterator to return 0x10FFFF when range end limit is set outside the meaningful range"); 1299 } 1300 1301 iterator = UCharacter.getNameIterator(); 1302 iterator.setRange(0xF, 0x45); 1303 while (iterator.next(element)) { 1304 if (element.integer <= old.integer) { 1305 errln("FAIL next returned a less codepoint \\u" + 1306 Integer.toHexString(element.integer) + " than \\u" + 1307 Integer.toHexString(old.integer)); 1308 break; 1309 } 1310 if (!UCharacter.getName(element.integer).equals(element.value)) 1311 { 1312 errln("FAIL next codepoint \\u" + 1313 Integer.toHexString(element.integer) + 1314 " does not have the expected name " + 1315 UCharacter.getName(element.integer) + 1316 " instead have the name " + (String)element.value); 1317 break; 1318 } 1319 old.integer = element.integer; 1320 } 1321 1322 iterator.reset(); 1323 iterator.next(element); 1324 if (element.integer != 0x20) { 1325 errln("FAIL reset in iterator"); 1326 } 1327 1328 iterator.setRange(0, 0x110000); 1329 old.integer = 0; 1330 while (iterator.next(element)) { 1331 if (element.integer != 0 && element.integer <= old.integer) { 1332 errln("FAIL next returned a less codepoint \\u" + 1333 Integer.toHexString(element.integer) + " than \\u" + 1334 Integer.toHexString(old.integer)); 1335 break; 1336 } 1337 if (!UCharacter.getName(element.integer).equals(element.value)) 1338 { 1339 errln("FAIL next codepoint \\u" + 1340 Integer.toHexString(element.integer) + 1341 " does not have the expected name " + 1342 UCharacter.getName(element.integer) + 1343 " instead have the name " + (String)element.value); 1344 break; 1345 } 1346 for (int i = old.integer + 1; i < element.integer; i ++) { 1347 if (UCharacter.getName(i) != null) { 1348 errln("FAIL between codepoints are not null \\u" + 1349 Integer.toHexString(old.integer) + " and " + 1350 Integer.toHexString(element.integer) + " has " + 1351 Integer.toHexString(i) + " with a name " + 1352 UCharacter.getName(i)); 1353 break; 1354 } 1355 } 1356 old.integer = element.integer; 1357 } 1358 1359 iterator = UCharacter.getExtendedNameIterator(); 1360 old.integer = 0; 1361 while (iterator.next(element)) { 1362 if (element.integer != 0 && element.integer != old.integer) { 1363 errln("FAIL next returned a codepoint \\u" + 1364 Integer.toHexString(element.integer) + 1365 " different from \\u" + 1366 Integer.toHexString(old.integer)); 1367 break; 1368 } 1369 if (!UCharacter.getExtendedName(element.integer).equals( 1370 element.value)) { 1371 errln("FAIL next codepoint \\u" + 1372 Integer.toHexString(element.integer) + 1373 " name should be " 1374 + UCharacter.getExtendedName(element.integer) + 1375 " instead of " + (String)element.value); 1376 break; 1377 } 1378 old.integer++; 1379 } 1380 iterator = UCharacter.getName1_0Iterator(); 1381 old.integer = 0; 1382 while (iterator.next(element)) { 1383 logln(Integer.toHexString(element.integer) + " " + 1384 (String)element.value); 1385 if (element.integer != 0 && element.integer <= old.integer) { 1386 errln("FAIL next returned a less codepoint \\u" + 1387 Integer.toHexString(element.integer) + " than \\u" + 1388 Integer.toHexString(old.integer)); 1389 break; 1390 } 1391 if (!element.value.equals(UCharacter.getName1_0( 1392 element.integer))) { 1393 errln("FAIL next codepoint \\u" + 1394 Integer.toHexString(element.integer) + 1395 " name cannot be null"); 1396 break; 1397 } 1398 for (int i = old.integer + 1; i < element.integer; i ++) { 1399 if (UCharacter.getName1_0(i) != null) { 1400 errln("FAIL between codepoints are not null \\u" + 1401 Integer.toHexString(old.integer) + " and " + 1402 Integer.toHexString(element.integer) + " has " + 1403 Integer.toHexString(i) + " with a name " + 1404 UCharacter.getName1_0(i)); 1405 break; 1406 } 1407 } 1408 old.integer = element.integer; 1409 } 1410 } catch(Exception e){ 1411 // !!! wouldn't preflighting be simpler? This looks like 1412 // it is effectively be doing that. It seems that for every 1413 // true error the code will call errln, which will throw the error, which 1414 // this will catch, which this will then rethrow the error. Just seems 1415 // cumbersome. 1416 if(e.getMessage().indexOf("unames.icu") >= 0){ 1417 warnln("Could not find unames.icu"); 1418 } else { 1419 errln(e.getMessage()); 1420 } 1421 } 1422 } 1423 1424 /** 1425 * Testing the for illegal characters 1426 */ 1427 @Test 1428 public void TestIsLegal() 1429 { 1430 int illegal[] = {0xFFFE, 0x00FFFF, 0x005FFFE, 0x005FFFF, 0x0010FFFE, 1431 0x0010FFFF, 0x110000, 0x00FDD0, 0x00FDDF, 0x00FDE0, 1432 0x00FDEF, 0xD800, 0xDC00, -1}; 1433 int legal[] = {0x61, 0x00FFFD, 0x0010000, 0x005FFFD, 0x0060000, 1434 0x0010FFFD, 0xFDCF, 0x00FDF0}; 1435 for (int count = 0; count < illegal.length; count ++) { 1436 if (UCharacter.isLegal(illegal[count])) { 1437 errln("FAIL \\u" + hex(illegal[count]) + 1438 " is not a legal character"); 1439 } 1440 } 1441 1442 for (int count = 0; count < legal.length; count ++) { 1443 if (!UCharacter.isLegal(legal[count])) { 1444 errln("FAIL \\u" + hex(legal[count]) + 1445 " is a legal character"); 1446 } 1447 } 1448 1449 String illegalStr = "This is an illegal string "; 1450 String legalStr = "This is a legal string "; 1451 1452 for (int count = 0; count < illegal.length; count ++) { 1453 StringBuffer str = new StringBuffer(illegalStr); 1454 if (illegal[count] < 0x10000) { 1455 str.append((char)illegal[count]); 1456 } 1457 else { 1458 char lead = UTF16.getLeadSurrogate(illegal[count]); 1459 char trail = UTF16.getTrailSurrogate(illegal[count]); 1460 str.append(lead); 1461 str.append(trail); 1462 } 1463 if (UCharacter.isLegal(str.toString())) { 1464 errln("FAIL " + hex(str.toString()) + 1465 " is not a legal string"); 1466 } 1467 } 1468 1469 for (int count = 0; count < legal.length; count ++) { 1470 StringBuffer str = new StringBuffer(legalStr); 1471 if (legal[count] < 0x10000) { 1472 str.append((char)legal[count]); 1473 } 1474 else { 1475 char lead = UTF16.getLeadSurrogate(legal[count]); 1476 char trail = UTF16.getTrailSurrogate(legal[count]); 1477 str.append(lead); 1478 str.append(trail); 1479 } 1480 if (!UCharacter.isLegal(str.toString())) { 1481 errln("FAIL " + hex(str.toString()) + " is a legal string"); 1482 } 1483 } 1484 } 1485 1486 /** 1487 * Test getCodePoint 1488 */ 1489 @Test 1490 public void TestCodePoint() 1491 { 1492 int ch = 0x10000; 1493 for (char i = 0xD800; i < 0xDC00; i ++) { 1494 for (char j = 0xDC00; j <= 0xDFFF; j ++) { 1495 if (UCharacter.getCodePoint(i, j) != ch) { 1496 errln("Error getting codepoint for surrogate " + 1497 "characters \\u" 1498 + Integer.toHexString(i) + " \\u" + 1499 Integer.toHexString(j)); 1500 } 1501 ch ++; 1502 } 1503 } 1504 try 1505 { 1506 UCharacter.getCodePoint((char)0xD7ff, (char)0xDC00); 1507 errln("Invalid surrogate characters should not form a " + 1508 "supplementary"); 1509 } catch(Exception e) { 1510 } 1511 for (char i = 0; i < 0xFFFF; i++) { 1512 if (i == 0xFFFE || 1513 (i >= 0xD800 && i <= 0xDFFF) || 1514 (i >= 0xFDD0 && i <= 0xFDEF)) { 1515 // not a character 1516 try { 1517 UCharacter.getCodePoint(i); 1518 errln("Not a character is not a valid codepoint"); 1519 } catch (Exception e) { 1520 } 1521 } 1522 else { 1523 if (UCharacter.getCodePoint(i) != i) { 1524 errln("A valid codepoint should return itself"); 1525 } 1526 } 1527 } 1528 } 1529 1530 /** 1531 * This method is a little different from the type test in icu4c. 1532 * But combined with testUnicodeData, they basically do the same thing. 1533 */ 1534 @Test 1535 public void TestIteration() 1536 { 1537 int limit = 0; 1538 int prevtype = -1; 1539 int shouldBeDir; 1540 int test[][]={{0x41, UCharacterCategory.UPPERCASE_LETTER}, 1541 {0x308, UCharacterCategory.NON_SPACING_MARK}, 1542 {0xfffe, UCharacterCategory.GENERAL_OTHER_TYPES}, 1543 {0xe0041, UCharacterCategory.FORMAT}, 1544 {0xeffff, UCharacterCategory.UNASSIGNED}}; 1545 1546 // default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header 1547 int defaultBidi[][]={ 1548 { 0x0590, UCharacterDirection.LEFT_TO_RIGHT }, 1549 { 0x0600, UCharacterDirection.RIGHT_TO_LEFT }, 1550 { 0x07C0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1551 { 0x0860, UCharacterDirection.RIGHT_TO_LEFT }, 1552 { 0x0870, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, // Unicode 10 changes U+0860..U+086F from R to AL. 1553 { 0x08A0, UCharacterDirection.RIGHT_TO_LEFT }, 1554 { 0x0900, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */ 1555 { 0x20A0, UCharacterDirection.LEFT_TO_RIGHT }, 1556 { 0x20D0, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */ 1557 { 0xFB1D, UCharacterDirection.LEFT_TO_RIGHT }, 1558 { 0xFB50, UCharacterDirection.RIGHT_TO_LEFT }, 1559 { 0xFE00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1560 { 0xFE70, UCharacterDirection.LEFT_TO_RIGHT }, 1561 { 0xFF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1562 { 0x10800, UCharacterDirection.LEFT_TO_RIGHT }, 1563 { 0x11000, UCharacterDirection.RIGHT_TO_LEFT }, 1564 { 0x1E800, UCharacterDirection.LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */ 1565 { 0x1EE00, UCharacterDirection.RIGHT_TO_LEFT }, 1566 { 0x1EF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */ 1567 { 0x1F000, UCharacterDirection.RIGHT_TO_LEFT }, 1568 { 0x110000, UCharacterDirection.LEFT_TO_RIGHT } 1569 }; 1570 1571 RangeValueIterator iterator = UCharacter.getTypeIterator(); 1572 RangeValueIterator.Element result = new RangeValueIterator.Element(); 1573 while (iterator.next(result)) { 1574 if (result.start != limit) { 1575 errln("UCharacterIteration failed: Ranges not continuous " + 1576 "0x" + Integer.toHexString(result.start)); 1577 } 1578 1579 limit = result.limit; 1580 if (result.value == prevtype) { 1581 errln("Type of the next set of enumeration should be different"); 1582 } 1583 prevtype = result.value; 1584 1585 for (int i = result.start; i < limit; i ++) { 1586 int temptype = UCharacter.getType(i); 1587 if (temptype != result.value) { 1588 errln("UCharacterIteration failed: Codepoint \\u" + 1589 Integer.toHexString(i) + " should be of type " + 1590 temptype + " not " + result.value); 1591 } 1592 } 1593 1594 for (int i = 0; i < test.length; ++ i) { 1595 if (result.start <= test[i][0] && test[i][0] < result.limit) { 1596 if (result.value != test[i][1]) { 1597 errln("error: getTypes() has range [" 1598 + Integer.toHexString(result.start) + ", " 1599 + Integer.toHexString(result.limit) 1600 + "] with type " + result.value 1601 + " instead of [" 1602 + Integer.toHexString(test[i][0]) + ", " 1603 + Integer.toHexString(test[i][1])); 1604 } 1605 } 1606 } 1607 1608 // LineBreak.txt specifies: 1609 // # - Assigned characters that are not listed explicitly are given the value 1610 // # "AL". 1611 // # - Unassigned characters are given the value "XX". 1612 // 1613 // PUA characters are listed explicitly with "XX". 1614 // Verify that no assigned character has "XX". 1615 if (result.value != UCharacterCategory.UNASSIGNED 1616 && result.value != UCharacterCategory.PRIVATE_USE) { 1617 int c = result.start; 1618 while (c < result.limit) { 1619 if (0 == UCharacter.getIntPropertyValue(c, 1620 UProperty.LINE_BREAK)) { 1621 logln("error UProperty.LINE_BREAK(assigned \\u" 1622 + Utility.hex(c, 4) + ")=XX"); 1623 } 1624 ++ c; 1625 } 1626 } 1627 1628 /* 1629 * Verify default Bidi classes. 1630 * See DerivedBidiClass.txt, especially for unassigned code points. 1631 */ 1632 if (result.value == UCharacterCategory.UNASSIGNED 1633 || result.value == UCharacterCategory.PRIVATE_USE) { 1634 int c = result.start; 1635 for (int i = 0; i < defaultBidi.length && c < result.limit; 1636 ++ i) { 1637 if (c < defaultBidi[i][0]) { 1638 while (c < result.limit && c < defaultBidi[i][0]) { 1639 // TODO change to public UCharacter.isNonCharacter(c) once it's available 1640 if(android.icu.impl.UCharacterUtility.isNonCharacter(c) || UCharacter.hasBinaryProperty(c, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) { 1641 shouldBeDir=UCharacter.BOUNDARY_NEUTRAL; 1642 } else { 1643 shouldBeDir=defaultBidi[i][1]; 1644 } 1645 1646 if (UCharacter.getDirection(c) != shouldBeDir 1647 || UCharacter.getIntPropertyValue(c, 1648 UProperty.BIDI_CLASS) 1649 != shouldBeDir) { 1650 errln("error: getDirection(unassigned/PUA " 1651 + Integer.toHexString(c) 1652 + ") should be " 1653 + shouldBeDir); 1654 } 1655 ++ c; 1656 } 1657 } 1658 } 1659 } 1660 } 1661 1662 iterator.reset(); 1663 if (iterator.next(result) == false || result.start != 0) { 1664 System.out.println("result " + result.start); 1665 errln("UCharacterIteration reset() failed"); 1666 } 1667 } 1668 1669 /** 1670 * Testing getAge 1671 */ 1672 @Test 1673 public void TestGetAge() 1674 { 1675 int ages[] = {0x41, 1, 1, 0, 0, 1676 0xffff, 1, 1, 0, 0, 1677 0x20ab, 2, 0, 0, 0, 1678 0x2fffe, 2, 0, 0, 0, 1679 0x20ac, 2, 1, 0, 0, 1680 0xfb1d, 3, 0, 0, 0, 1681 0x3f4, 3, 1, 0, 0, 1682 0x10300, 3, 1, 0, 0, 1683 0x220, 3, 2, 0, 0, 1684 0xff60, 3, 2, 0, 0}; 1685 for (int i = 0; i < ages.length; i += 5) { 1686 VersionInfo age = UCharacter.getAge(ages[i]); 1687 if (age != VersionInfo.getInstance(ages[i + 1], ages[i + 2], 1688 ages[i + 3], ages[i + 4])) { 1689 errln("error: getAge(\\u" + Integer.toHexString(ages[i]) + 1690 ") == " + age.toString() + " instead of " + 1691 ages[i + 1] + "." + ages[i + 2] + "." + ages[i + 3] + 1692 "." + ages[i + 4]); 1693 } 1694 } 1695 1696 int[] valid_tests = { 1697 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 1698 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 1699 int[] invalid_tests = { 1700 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 1701 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 1702 1703 for(int i=0; i< valid_tests.length; i++){ 1704 try{ 1705 UCharacter.getAge(valid_tests[i]); 1706 } catch(Exception e){ 1707 errln("UCharacter.getAge(int) was not suppose to have " + 1708 "an exception. Value passed: " + valid_tests[i]); 1709 } 1710 } 1711 1712 for(int i=0; i< invalid_tests.length; i++){ 1713 try{ 1714 UCharacter.getAge(invalid_tests[i]); 1715 errln("UCharacter.getAge(int) was suppose to have " + 1716 "an exception. Value passed: " + invalid_tests[i]); 1717 } catch(Exception e){ 1718 } 1719 } 1720 } 1721 1722 /** 1723 * Test binary non core properties 1724 */ 1725 @Test 1726 public void TestAdditionalProperties() 1727 { 1728 final int FALSE = 0; 1729 final int TRUE = 1; 1730 // test data for hasBinaryProperty() 1731 int props[][] = { // code point, property 1732 { 0x0627, UProperty.ALPHABETIC, 1 }, 1733 { 0x1034a, UProperty.ALPHABETIC, 1 }, 1734 { 0x2028, UProperty.ALPHABETIC, 0 }, 1735 1736 { 0x0066, UProperty.ASCII_HEX_DIGIT, 1 }, 1737 { 0x0067, UProperty.ASCII_HEX_DIGIT, 0 }, 1738 1739 { 0x202c, UProperty.BIDI_CONTROL, 1 }, 1740 { 0x202f, UProperty.BIDI_CONTROL, 0 }, 1741 1742 { 0x003c, UProperty.BIDI_MIRRORED, 1 }, 1743 { 0x003d, UProperty.BIDI_MIRRORED, 0 }, 1744 1745 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 1746 { 0x2018, UProperty.BIDI_MIRRORED, 0 }, 1747 { 0x201d, UProperty.BIDI_MIRRORED, 0 }, 1748 { 0x201f, UProperty.BIDI_MIRRORED, 0 }, 1749 { 0x301e, UProperty.BIDI_MIRRORED, 0 }, 1750 1751 { 0x058a, UProperty.DASH, 1 }, 1752 { 0x007e, UProperty.DASH, 0 }, 1753 1754 { 0x0c4d, UProperty.DIACRITIC, 1 }, 1755 { 0x3000, UProperty.DIACRITIC, 0 }, 1756 1757 { 0x0e46, UProperty.EXTENDER, 1 }, 1758 { 0x0020, UProperty.EXTENDER, 0 }, 1759 1760 { 0xfb1d, UProperty.FULL_COMPOSITION_EXCLUSION, 1 }, 1761 { 0x1d15f, UProperty.FULL_COMPOSITION_EXCLUSION, 1 }, 1762 { 0xfb1e, UProperty.FULL_COMPOSITION_EXCLUSION, 0 }, 1763 1764 { 0x110a, UProperty.NFD_INERT, 1 }, /* Jamo L */ 1765 { 0x0308, UProperty.NFD_INERT, 0 }, 1766 1767 { 0x1164, UProperty.NFKD_INERT, 1 }, /* Jamo V */ 1768 { 0x1d79d, UProperty.NFKD_INERT, 0 }, /* math compat version of xi */ 1769 1770 { 0x0021, UProperty.NFC_INERT, 1 }, /* ! */ 1771 { 0x0061, UProperty.NFC_INERT, 0 }, /* a */ 1772 { 0x00e4, UProperty.NFC_INERT, 0 }, /* a-umlaut */ 1773 { 0x0102, UProperty.NFC_INERT, 0 }, /* a-breve */ 1774 { 0xac1c, UProperty.NFC_INERT, 0 }, /* Hangul LV */ 1775 { 0xac1d, UProperty.NFC_INERT, 1 }, /* Hangul LVT */ 1776 1777 { 0x1d79d, UProperty.NFKC_INERT, 0 }, /* math compat version of xi */ 1778 { 0x2a6d6, UProperty.NFKC_INERT, 1 }, /* Han, last of CJK ext. B */ 1779 1780 { 0x00e4, UProperty.SEGMENT_STARTER, 1 }, 1781 { 0x0308, UProperty.SEGMENT_STARTER, 0 }, 1782 { 0x110a, UProperty.SEGMENT_STARTER, 1 }, /* Jamo L */ 1783 { 0x1164, UProperty.SEGMENT_STARTER, 0 },/* Jamo V */ 1784 { 0xac1c, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LV */ 1785 { 0xac1d, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LVT */ 1786 1787 { 0x0044, UProperty.HEX_DIGIT, 1 }, 1788 { 0xff46, UProperty.HEX_DIGIT, 1 }, 1789 { 0x0047, UProperty.HEX_DIGIT, 0 }, 1790 1791 { 0x30fb, UProperty.HYPHEN, 1 }, 1792 { 0xfe58, UProperty.HYPHEN, 0 }, 1793 1794 { 0x2172, UProperty.ID_CONTINUE, 1 }, 1795 { 0x0307, UProperty.ID_CONTINUE, 1 }, 1796 { 0x005c, UProperty.ID_CONTINUE, 0 }, 1797 1798 { 0x2172, UProperty.ID_START, 1 }, 1799 { 0x007a, UProperty.ID_START, 1 }, 1800 { 0x0039, UProperty.ID_START, 0 }, 1801 1802 { 0x4db5, UProperty.IDEOGRAPHIC, 1 }, 1803 { 0x2f999, UProperty.IDEOGRAPHIC, 1 }, 1804 { 0x2f99, UProperty.IDEOGRAPHIC, 0 }, 1805 1806 { 0x200c, UProperty.JOIN_CONTROL, 1 }, 1807 { 0x2029, UProperty.JOIN_CONTROL, 0 }, 1808 1809 { 0x1d7bc, UProperty.LOWERCASE, 1 }, 1810 { 0x0345, UProperty.LOWERCASE, 1 }, 1811 { 0x0030, UProperty.LOWERCASE, 0 }, 1812 1813 { 0x1d7a9, UProperty.MATH, 1 }, 1814 { 0x2135, UProperty.MATH, 1 }, 1815 { 0x0062, UProperty.MATH, 0 }, 1816 1817 { 0xfde1, UProperty.NONCHARACTER_CODE_POINT, 1 }, 1818 { 0x10ffff, UProperty.NONCHARACTER_CODE_POINT, 1 }, 1819 { 0x10fffd, UProperty.NONCHARACTER_CODE_POINT, 0 }, 1820 1821 { 0x0022, UProperty.QUOTATION_MARK, 1 }, 1822 { 0xff62, UProperty.QUOTATION_MARK, 1 }, 1823 { 0xd840, UProperty.QUOTATION_MARK, 0 }, 1824 1825 { 0x061f, UProperty.TERMINAL_PUNCTUATION, 1 }, 1826 { 0xe003f, UProperty.TERMINAL_PUNCTUATION, 0 }, 1827 1828 { 0x1d44a, UProperty.UPPERCASE, 1 }, 1829 { 0x2162, UProperty.UPPERCASE, 1 }, 1830 { 0x0345, UProperty.UPPERCASE, 0 }, 1831 1832 { 0x0020, UProperty.WHITE_SPACE, 1 }, 1833 { 0x202f, UProperty.WHITE_SPACE, 1 }, 1834 { 0x3001, UProperty.WHITE_SPACE, 0 }, 1835 1836 { 0x0711, UProperty.XID_CONTINUE, 1 }, 1837 { 0x1d1aa, UProperty.XID_CONTINUE, 1 }, 1838 { 0x007c, UProperty.XID_CONTINUE, 0 }, 1839 1840 { 0x16ee, UProperty.XID_START, 1 }, 1841 { 0x23456, UProperty.XID_START, 1 }, 1842 { 0x1d1aa, UProperty.XID_START, 0 }, 1843 1844 /* 1845 * Version break: 1846 * The following properties are only supported starting with the 1847 * Unicode version indicated in the second field. 1848 */ 1849 { -1, 0x320, 0 }, 1850 1851 { 0x180c, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 }, 1852 { 0xfe02, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 }, 1853 { 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 }, 1854 1855 { 0x0149, UProperty.DEPRECATED, 1 }, /* changed in Unicode 5.2 */ 1856 { 0x0341, UProperty.DEPRECATED, 0 }, /* changed in Unicode 5.2 */ 1857 { 0xe0001, UProperty.DEPRECATED, 1 }, /* Changed from Unicode 5 to 5.1 */ 1858 { 0xe0100, UProperty.DEPRECATED, 0 }, 1859 1860 { 0x00a0, UProperty.GRAPHEME_BASE, 1 }, 1861 { 0x0a4d, UProperty.GRAPHEME_BASE, 0 }, 1862 { 0xff9d, UProperty.GRAPHEME_BASE, 1 }, 1863 { 0xff9f, UProperty.GRAPHEME_BASE, 0 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */ 1864 1865 { 0x0300, UProperty.GRAPHEME_EXTEND, 1 }, 1866 { 0xff9d, UProperty.GRAPHEME_EXTEND, 0 }, 1867 { 0xff9f, UProperty.GRAPHEME_EXTEND, 1 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */ 1868 { 0x0603, UProperty.GRAPHEME_EXTEND, 0 }, 1869 1870 { 0x0a4d, UProperty.GRAPHEME_LINK, 1 }, 1871 { 0xff9f, UProperty.GRAPHEME_LINK, 0 }, 1872 1873 { 0x2ff7, UProperty.IDS_BINARY_OPERATOR, 1 }, 1874 { 0x2ff3, UProperty.IDS_BINARY_OPERATOR, 0 }, 1875 1876 { 0x2ff3, UProperty.IDS_TRINARY_OPERATOR, 1 }, 1877 { 0x2f03, UProperty.IDS_TRINARY_OPERATOR, 0 }, 1878 1879 { 0x0ec1, UProperty.LOGICAL_ORDER_EXCEPTION, 1 }, 1880 { 0xdcba, UProperty.LOGICAL_ORDER_EXCEPTION, 0 }, 1881 1882 { 0x2e9b, UProperty.RADICAL, 1 }, 1883 { 0x4e00, UProperty.RADICAL, 0 }, 1884 1885 { 0x012f, UProperty.SOFT_DOTTED, 1 }, 1886 { 0x0049, UProperty.SOFT_DOTTED, 0 }, 1887 1888 { 0xfa11, UProperty.UNIFIED_IDEOGRAPH, 1 }, 1889 { 0xfa12, UProperty.UNIFIED_IDEOGRAPH, 0 }, 1890 1891 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */ 1892 1893 { 0x002e, UProperty.S_TERM, 1 }, 1894 { 0x0061, UProperty.S_TERM, 0 }, 1895 1896 { 0x180c, UProperty.VARIATION_SELECTOR, 1 }, 1897 { 0xfe03, UProperty.VARIATION_SELECTOR, 1 }, 1898 { 0xe01ef, UProperty.VARIATION_SELECTOR, 1 }, 1899 { 0xe0200, UProperty.VARIATION_SELECTOR, 0 }, 1900 1901 /* enum/integer type properties */ 1902 /* test default Bidi classes for unassigned code points */ 1903 { 0x0590, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1904 { 0x05cf, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1905 { 0x05ed, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1906 { 0x07f2, UProperty.BIDI_CLASS, UCharacterDirection.DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */ 1907 { 0x07fe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, /* unassigned R */ 1908 { 0x089f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1909 { 0xfb37, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1910 { 0xfb42, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1911 { 0x10806, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1912 { 0x10909, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1913 { 0x10fe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1914 1915 { 0x061d, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1916 { 0x063f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1917 { 0x070e, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1918 { 0x0775, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1919 { 0xfbc2, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1920 { 0xfd90, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1921 { 0xfefe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1922 1923 { 0x02AF, UProperty.BLOCK, UCharacter.UnicodeBlock.IPA_EXTENSIONS.getID() }, 1924 { 0x0C4E, UProperty.BLOCK, UCharacter.UnicodeBlock.TELUGU.getID()}, 1925 { 0x155A, UProperty.BLOCK, UCharacter.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS.getID() }, 1926 { 0x1717, UProperty.BLOCK, UCharacter.UnicodeBlock.TAGALOG.getID() }, 1927 { 0x1900, UProperty.BLOCK, UCharacter.UnicodeBlock.LIMBU.getID() }, 1928 { 0x1CBF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID()}, 1929 { 0x3040, UProperty.BLOCK, UCharacter.UnicodeBlock.HIRAGANA.getID()}, 1930 { 0x1D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS.getID()}, 1931 { 0x50000, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() }, 1932 { 0xEFFFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() }, 1933 { 0x10D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B.getID() }, 1934 1935 /* UProperty.CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */ 1936 { 0xd7d7, UProperty.CANONICAL_COMBINING_CLASS, 0 }, 1937 1938 { 0x00A0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NOBREAK }, 1939 { 0x00A8, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.COMPAT }, 1940 { 0x00bf, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE }, 1941 { 0x00c0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1942 { 0x1E9B, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1943 { 0xBCDE, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1944 { 0xFB5D, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.MEDIAL }, 1945 { 0x1D736, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.FONT }, 1946 { 0xe0033, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE }, 1947 1948 { 0x0009, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 1949 { 0x0020, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NARROW }, 1950 { 0x00B1, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1951 { 0x20A9, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.HALFWIDTH }, 1952 { 0x2FFB, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1953 { 0x3000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.FULLWIDTH }, 1954 { 0x35bb, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1955 { 0x58bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1956 { 0xD7A3, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1957 { 0xEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1958 { 0x1D198, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 1959 { 0x20000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1960 { 0x2F8C7, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1961 { 0x3a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1962 { 0x5a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 1963 { 0xFEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1964 { 0x10EEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1965 1966 /* UProperty.GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */ 1967 { 0xd7c7, UProperty.GENERAL_CATEGORY, 0 }, 1968 { 0xd7d7, UProperty.GENERAL_CATEGORY, UCharacterEnums.ECharacterCategory.OTHER_LETTER }, /* changed in Unicode 5.2 */ 1969 1970 { 0x0444, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 1971 { 0x0639, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.AIN }, 1972 { 0x072A, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.DALATH_RISH }, 1973 { 0x0647, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH }, 1974 { 0x06C1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH_GOAL }, 1975 1976 { 0x200C, UProperty.JOINING_TYPE, UCharacter.JoiningType.NON_JOINING }, 1977 { 0x200D, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING }, 1978 { 0x0639, UProperty.JOINING_TYPE, UCharacter.JoiningType.DUAL_JOINING }, 1979 { 0x0640, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING }, 1980 { 0x06C3, UProperty.JOINING_TYPE, UCharacter.JoiningType.RIGHT_JOINING }, 1981 { 0x0300, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 1982 { 0x070F, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 1983 { 0xe0033, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 1984 1985 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */ 1986 { 0xe7e7, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN }, 1987 { 0x10fffd, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN }, 1988 { 0x0028, UProperty.LINE_BREAK, UCharacter.LineBreak.OPEN_PUNCTUATION }, 1989 { 0x232A, UProperty.LINE_BREAK, UCharacter.LineBreak.CLOSE_PUNCTUATION }, 1990 { 0x3401, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 1991 { 0x4e02, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 1992 { 0x20004, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 1993 { 0xf905, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 1994 { 0xdb7e, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 1995 { 0xdbfd, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 1996 { 0xdffc, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 1997 { 0x2762, UProperty.LINE_BREAK, UCharacter.LineBreak.EXCLAMATION }, 1998 { 0x002F, UProperty.LINE_BREAK, UCharacter.LineBreak.BREAK_SYMBOLS }, 1999 { 0x1D49C, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC }, 2000 { 0x1731, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC }, 2001 2002 /* UProperty.NUMERIC_TYPE tested in TestNumericProperties() */ 2003 2004 /* UProperty.SCRIPT tested in TestUScriptCodeAPI() */ 2005 2006 { 0x10ff, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2007 { 0x1100, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2008 { 0x1111, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2009 { 0x1159, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2010 { 0x115a, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2011 { 0x115e, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2012 { 0x115f, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2013 2014 { 0xa95f, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2015 { 0xa960, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2016 { 0xa97c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2017 { 0xa97d, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2018 2019 { 0x1160, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2020 { 0x1161, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2021 { 0x1172, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2022 { 0x11a2, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2023 { 0x11a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2024 { 0x11a7, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2025 2026 { 0xd7af, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2027 { 0xd7b0, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2028 { 0xd7c6, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2029 { 0xd7c7, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2030 2031 { 0x11a8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2032 { 0x11b8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2033 { 0x11c8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2034 { 0x11f9, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2035 { 0x11fa, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2036 { 0x11ff, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2037 { 0x1200, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2038 2039 { 0xd7ca, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2040 { 0xd7cb, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2041 { 0xd7fb, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2042 { 0xd7fc, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2043 2044 { 0xac00, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2045 { 0xac1c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2046 { 0xc5ec, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2047 { 0xd788, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2048 2049 { 0xac01, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2050 { 0xac1b, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2051 { 0xac1d, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2052 { 0xc5ee, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2053 { 0xd7a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2054 2055 { 0xd7a4, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2056 2057 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */ 2058 2059 { 0x00d7, UProperty.PATTERN_SYNTAX, 1 }, 2060 { 0xfe45, UProperty.PATTERN_SYNTAX, 1 }, 2061 { 0x0061, UProperty.PATTERN_SYNTAX, 0 }, 2062 2063 { 0x0020, UProperty.PATTERN_WHITE_SPACE, 1 }, 2064 { 0x0085, UProperty.PATTERN_WHITE_SPACE, 1 }, 2065 { 0x200f, UProperty.PATTERN_WHITE_SPACE, 1 }, 2066 { 0x00a0, UProperty.PATTERN_WHITE_SPACE, 0 }, 2067 { 0x3000, UProperty.PATTERN_WHITE_SPACE, 0 }, 2068 2069 { 0x1d200, UProperty.BLOCK, UCharacter.UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION_ID }, 2070 { 0x2c8e, UProperty.BLOCK, UCharacter.UnicodeBlock.COPTIC_ID }, 2071 { 0xfe17, UProperty.BLOCK, UCharacter.UnicodeBlock.VERTICAL_FORMS_ID }, 2072 2073 { 0x1a00, UProperty.SCRIPT, UScript.BUGINESE }, 2074 { 0x2cea, UProperty.SCRIPT, UScript.COPTIC }, 2075 { 0xa82b, UProperty.SCRIPT, UScript.SYLOTI_NAGRI }, 2076 { 0x103d0, UProperty.SCRIPT, UScript.OLD_PERSIAN }, 2077 2078 { 0xcc28, UProperty.LINE_BREAK, UCharacter.LineBreak.H2 }, 2079 { 0xcc29, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 }, 2080 { 0xac03, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 }, 2081 { 0x115f, UProperty.LINE_BREAK, UCharacter.LineBreak.JL }, 2082 { 0x11aa, UProperty.LINE_BREAK, UCharacter.LineBreak.JT }, 2083 { 0x11a1, UProperty.LINE_BREAK, UCharacter.LineBreak.JV }, 2084 2085 { 0xb2c9, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.LVT }, 2086 { 0x036f, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.EXTEND }, 2087 { 0x0000, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.CONTROL }, 2088 { 0x1160, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.V }, 2089 2090 { 0x05f4, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDLETTER }, 2091 { 0x4ef0, UProperty.WORD_BREAK, UCharacter.WordBreak.OTHER }, 2092 { 0x19d9, UProperty.WORD_BREAK, UCharacter.WordBreak.NUMERIC }, 2093 { 0x2044, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDNUM }, 2094 2095 { 0xfffd, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.OTHER }, 2096 { 0x1ffc, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.UPPER }, 2097 { 0xff63, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.CLOSE }, 2098 { 0x2028, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.SEP }, 2099 2100 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */ 2101 2102 /* unassigned code points in new default Bidi R blocks */ 2103 { 0x1ede4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 2104 { 0x1efe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 2105 2106 /* test some script codes >127 */ 2107 { 0xa6e6, UProperty.SCRIPT, UScript.BAMUM }, 2108 { 0xa4d0, UProperty.SCRIPT, UScript.LISU }, 2109 { 0x10a7f, UProperty.SCRIPT, UScript.OLD_SOUTH_ARABIAN }, 2110 2111 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */ 2112 2113 /* value changed in Unicode 6.0 */ 2114 { 0x06C3, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.TEH_MARBUTA_GOAL }, 2115 2116 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */ 2117 2118 /* unassigned code points in new/changed default Bidi AL blocks */ 2119 { 0x08ba, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 2120 { 0x1eee4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 2121 2122 { -1, 0x630, 0 }, /* version break for Unicode 6.3 */ 2123 2124 /* unassigned code points in the currency symbols block now default to ET */ 2125 { 0x20C0, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, 2126 { 0x20CF, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, 2127 2128 /* new property in Unicode 6.3 */ 2129 { 0x0027, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE }, 2130 { 0x0028, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN }, 2131 { 0x0029, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE }, 2132 { 0xFF5C, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE }, 2133 { 0xFF5B, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN }, 2134 { 0xFF5D, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE }, 2135 2136 { -1, 0x700, 0 }, /* version break for Unicode 7.0 */ 2137 2138 /* new character range with Joining_Group values */ 2139 { 0x10ABF, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 2140 { 0x10AC0, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_ALEPH }, 2141 { 0x10AC1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_BETH }, 2142 { 0x10AEF, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_HUNDRED }, 2143 { 0x10AF0, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 2144 2145 { -1, 0xa00, 0 }, // version break for Unicode 10 2146 2147 { 0x1F1E5, UProperty.REGIONAL_INDICATOR, FALSE }, 2148 { 0x1F1E7, UProperty.REGIONAL_INDICATOR, TRUE }, 2149 { 0x1F1FF, UProperty.REGIONAL_INDICATOR, TRUE }, 2150 { 0x1F200, UProperty.REGIONAL_INDICATOR, FALSE }, 2151 2152 { 0x0600, UProperty.PREPENDED_CONCATENATION_MARK, TRUE }, 2153 { 0x0606, UProperty.PREPENDED_CONCATENATION_MARK, FALSE }, 2154 { 0x110BD, UProperty.PREPENDED_CONCATENATION_MARK, TRUE }, 2155 2156 /* undefined UProperty values */ 2157 { 0x61, 0x4a7, 0 }, 2158 { 0x234bc, 0x15ed, 0 } 2159 }; 2160 2161 2162 if (UCharacter.getIntPropertyMinValue(UProperty.DASH) != 0 2163 || UCharacter.getIntPropertyMinValue(UProperty.BIDI_CLASS) != 0 2164 || UCharacter.getIntPropertyMinValue(UProperty.BLOCK)!= 0 /* j2478 */ 2165 || UCharacter.getIntPropertyMinValue(UProperty.SCRIPT)!= 0 /* JB#2410 */ 2166 || UCharacter.getIntPropertyMinValue(0x2345) != 0) { 2167 errln("error: UCharacter.getIntPropertyMinValue() wrong"); 2168 } 2169 2170 if( UCharacter.getIntPropertyMaxValue(UProperty.DASH)!=1) { 2171 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DASH) wrong\n"); 2172 } 2173 if( UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE)!=1) { 2174 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) wrong\n"); 2175 } 2176 if( UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1)!=1) { 2177 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1) wrong\n"); 2178 } 2179 2180 if( UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)!=UCharacterDirection.CHAR_DIRECTION_COUNT-1 ) { 2181 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS) wrong\n"); 2182 } 2183 if( UCharacter.getIntPropertyMaxValue(UProperty.BLOCK)!=UCharacter.UnicodeBlock.COUNT-1 ) { 2184 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BLOCK) wrong\n"); 2185 } 2186 if(UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK)!=UCharacter.LineBreak.COUNT-1) { 2187 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK) wrong\n"); 2188 } 2189 if(UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)!=UScript.CODE_LIMIT-1) { 2190 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT) wrong\n"); 2191 } 2192 if(UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE)!=UCharacter.NumericType.COUNT-1) { 2193 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE) wrong\n"); 2194 } 2195 if(UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY)!=UCharacterCategory.CHAR_CATEGORY_COUNT-1) { 2196 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY) wrong\n"); 2197 } 2198 if(UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE)!=UCharacter.HangulSyllableType.COUNT-1) { 2199 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE) wrong\n"); 2200 } 2201 if(UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK)!=UCharacter.GraphemeClusterBreak.COUNT-1) { 2202 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK) wrong\n"); 2203 } 2204 if(UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK)!=UCharacter.SentenceBreak.COUNT-1) { 2205 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK) wrong\n"); 2206 } 2207 if(UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK)!=UCharacter.WordBreak.COUNT-1) { 2208 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) wrong\n"); 2209 } 2210 if(UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE)!=UCharacter.BidiPairedBracketType.COUNT-1) { 2211 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE) wrong\n"); 2212 } 2213 /*JB#2410*/ 2214 if( UCharacter.getIntPropertyMaxValue(0x2345)!=-1) { 2215 errln("error: UCharacter.getIntPropertyMaxValue(0x2345) wrong\n"); 2216 } 2217 if( UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) != (UCharacter.DecompositionType.COUNT - 1)) { 2218 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) wrong\n"); 2219 } 2220 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) != (UCharacter.JoiningGroup.COUNT -1)) { 2221 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) wrong\n"); 2222 } 2223 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) != (UCharacter.JoiningType.COUNT -1)) { 2224 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) wrong\n"); 2225 } 2226 if( UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) != (UCharacter.EastAsianWidth.COUNT -1)) { 2227 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) wrong\n"); 2228 } 2229 2230 VersionInfo version = UCharacter.getUnicodeVersion(); 2231 2232 // test hasBinaryProperty() 2233 for (int i = 0; i < props.length; ++ i) { 2234 int which = props[i][1]; 2235 if (props[i][0] < 0) { 2236 if (version.compareTo(VersionInfo.getInstance(which >> 8, 2237 (which >> 4) & 0xF, 2238 which & 0xF, 2239 0)) < 0) { 2240 break; 2241 } 2242 continue; 2243 } 2244 String whichName; 2245 try { 2246 whichName = UCharacter.getPropertyName(which, UProperty.NameChoice.LONG); 2247 } catch(IllegalArgumentException e) { 2248 // There are intentionally invalid property integer values ("which"). 2249 // Catch and ignore the exception from getPropertyName(). 2250 whichName = "undefined UProperty value"; 2251 } 2252 boolean expect = true; 2253 if (props[i][2] == 0) { 2254 expect = false; 2255 } 2256 if (which < UProperty.INT_START) { 2257 if (UCharacter.hasBinaryProperty(props[i][0], which) 2258 != expect) { 2259 errln("error: UCharacter.hasBinaryProperty(U+" + 2260 Utility.hex(props[i][0], 4) + ", " + 2261 whichName + ") has an error, expected=" + expect); 2262 } 2263 } 2264 2265 int retVal = UCharacter.getIntPropertyValue(props[i][0], which); 2266 if (retVal != props[i][2]) { 2267 errln("error: UCharacter.getIntPropertyValue(U+" + 2268 Utility.hex(props[i][0], 4) + 2269 ", " + whichName + ") is wrong, expected=" 2270 + props[i][2] + " actual=" + retVal); 2271 } 2272 2273 // test separate functions, too 2274 switch (which) { 2275 case UProperty.ALPHABETIC: 2276 if (UCharacter.isUAlphabetic(props[i][0]) != expect) { 2277 errln("error: UCharacter.isUAlphabetic(\\u" + 2278 Integer.toHexString(props[i][0]) + 2279 ") is wrong expected " + props[i][2]); 2280 } 2281 break; 2282 case UProperty.LOWERCASE: 2283 if (UCharacter.isULowercase(props[i][0]) != expect) { 2284 errln("error: UCharacter.isULowercase(\\u" + 2285 Integer.toHexString(props[i][0]) + 2286 ") is wrong expected " +props[i][2]); 2287 } 2288 break; 2289 case UProperty.UPPERCASE: 2290 if (UCharacter.isUUppercase(props[i][0]) != expect) { 2291 errln("error: UCharacter.isUUppercase(\\u" + 2292 Integer.toHexString(props[i][0]) + 2293 ") is wrong expected " + props[i][2]); 2294 } 2295 break; 2296 case UProperty.WHITE_SPACE: 2297 if (UCharacter.isUWhiteSpace(props[i][0]) != expect) { 2298 errln("error: UCharacter.isUWhiteSpace(\\u" + 2299 Integer.toHexString(props[i][0]) + 2300 ") is wrong expected " + props[i][2]); 2301 } 2302 break; 2303 default: 2304 break; 2305 } 2306 } 2307 } 2308 2309 @Test 2310 public void TestNumericProperties() 2311 { 2312 // see UnicodeData.txt, DerivedNumericValues.txt 2313 double values[][] = { 2314 // Code point, numeric type, numeric value. 2315 // If a fourth value is specified, it is the getNumericValue(). 2316 // Otherwise it is expected to be the same as the getUnicodeNumericValue(), 2317 // where UCharacter.NO_NUMERIC_VALUE is turned into -1. 2318 // getNumericValue() returns -2 if the code point has a value 2319 // which is not a non-negative integer. (This is mostly auto-converted to -2.) 2320 { 0x0F33, UCharacter.NumericType.NUMERIC, -1./2. }, 2321 { 0x0C66, UCharacter.NumericType.DECIMAL, 0 }, 2322 { 0x96f6, UCharacter.NumericType.NUMERIC, 0 }, 2323 { 0xa833, UCharacter.NumericType.NUMERIC, 1./16. }, 2324 { 0x2152, UCharacter.NumericType.NUMERIC, 1./10. }, 2325 { 0x2151, UCharacter.NumericType.NUMERIC, 1./9. }, 2326 { 0x1245f, UCharacter.NumericType.NUMERIC, 1./8. }, 2327 { 0x2150, UCharacter.NumericType.NUMERIC, 1./7. }, 2328 { 0x2159, UCharacter.NumericType.NUMERIC, 1./6. }, 2329 { 0x09f6, UCharacter.NumericType.NUMERIC, 3./16. }, 2330 { 0x2155, UCharacter.NumericType.NUMERIC, 1./5. }, 2331 { 0x00BD, UCharacter.NumericType.NUMERIC, 1./2. }, 2332 { 0x0031, UCharacter.NumericType.DECIMAL, 1. }, 2333 { 0x4e00, UCharacter.NumericType.NUMERIC, 1. }, 2334 { 0x58f1, UCharacter.NumericType.NUMERIC, 1. }, 2335 { 0x10320, UCharacter.NumericType.NUMERIC, 1. }, 2336 { 0x0F2B, UCharacter.NumericType.NUMERIC, 3./2. }, 2337 { 0x00B2, UCharacter.NumericType.DIGIT, 2. }, /* Unicode 4.0 change */ 2338 { 0x5f10, UCharacter.NumericType.NUMERIC, 2. }, 2339 { 0x1813, UCharacter.NumericType.DECIMAL, 3. }, 2340 { 0x5f0e, UCharacter.NumericType.NUMERIC, 3. }, 2341 { 0x2173, UCharacter.NumericType.NUMERIC, 4. }, 2342 { 0x8086, UCharacter.NumericType.NUMERIC, 4. }, 2343 { 0x278E, UCharacter.NumericType.DIGIT, 5. }, 2344 { 0x1D7F2, UCharacter.NumericType.DECIMAL, 6. }, 2345 { 0x247A, UCharacter.NumericType.DIGIT, 7. }, 2346 { 0x7396, UCharacter.NumericType.NUMERIC, 9. }, 2347 { 0x1372, UCharacter.NumericType.NUMERIC, 10. }, 2348 { 0x216B, UCharacter.NumericType.NUMERIC, 12. }, 2349 { 0x16EE, UCharacter.NumericType.NUMERIC, 17. }, 2350 { 0x249A, UCharacter.NumericType.NUMERIC, 19. }, 2351 { 0x303A, UCharacter.NumericType.NUMERIC, 30. }, 2352 { 0x5345, UCharacter.NumericType.NUMERIC, 30. }, 2353 { 0x32B2, UCharacter.NumericType.NUMERIC, 37. }, 2354 { 0x1375, UCharacter.NumericType.NUMERIC, 40. }, 2355 { 0x10323, UCharacter.NumericType.NUMERIC, 50. }, 2356 { 0x0BF1, UCharacter.NumericType.NUMERIC, 100. }, 2357 { 0x964c, UCharacter.NumericType.NUMERIC, 100. }, 2358 { 0x217E, UCharacter.NumericType.NUMERIC, 500. }, 2359 { 0x2180, UCharacter.NumericType.NUMERIC, 1000. }, 2360 { 0x4edf, UCharacter.NumericType.NUMERIC, 1000. }, 2361 { 0x2181, UCharacter.NumericType.NUMERIC, 5000. }, 2362 { 0x137C, UCharacter.NumericType.NUMERIC, 10000. }, 2363 { 0x4e07, UCharacter.NumericType.NUMERIC, 10000. }, 2364 { 0x12432, UCharacter.NumericType.NUMERIC, 216000. }, 2365 { 0x12433, UCharacter.NumericType.NUMERIC, 432000. }, 2366 { 0x4ebf, UCharacter.NumericType.NUMERIC, 100000000. }, 2367 { 0x5146, UCharacter.NumericType.NUMERIC, 1000000000000. }, 2368 { -1, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2369 { 0x61, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE, 10. }, 2370 { 0x3000, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2371 { 0xfffe, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2372 { 0x10301, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2373 { 0xe0033, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2374 { 0x10ffff, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2375 { 0x110000, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE } 2376 }; 2377 2378 for (int i = 0; i < values.length; ++ i) { 2379 int c = (int)values[i][0]; 2380 int type = UCharacter.getIntPropertyValue(c, 2381 UProperty.NUMERIC_TYPE); 2382 double nv = UCharacter.getUnicodeNumericValue(c); 2383 2384 if (type != values[i][1]) { 2385 errln("UProperty.NUMERIC_TYPE(\\u" + Utility.hex(c, 4) 2386 + ") = " + type + " should be " + (int)values[i][1]); 2387 } 2388 if (0.000001 <= Math.abs(nv - values[i][2])) { 2389 errln("UCharacter.getUnicodeNumericValue(\\u" + Utility.hex(c, 4) 2390 + ") = " + nv + " should be " + values[i][2]); 2391 } 2392 2393 // Test getNumericValue() as well. 2394 // It can only return the subset of numeric values that are 2395 // non-negative and fit into an int. 2396 int expectedInt; 2397 if (values[i].length == 3) { 2398 if (values[i][2] == UCharacter.NO_NUMERIC_VALUE) { 2399 expectedInt = -1; 2400 } else { 2401 expectedInt = (int)values[i][2]; 2402 if (expectedInt < 0 || expectedInt != values[i][2]) { 2403 // The numeric value is not a non-negative integer. 2404 expectedInt = -2; 2405 } 2406 } 2407 } else { 2408 expectedInt = (int)values[i][3]; 2409 } 2410 int nvInt = UCharacter.getNumericValue(c); 2411 if (nvInt != expectedInt) { 2412 errln("UCharacter.getNumericValue(\\u" + Utility.hex(c, 4) 2413 + ") = " + nvInt + " should be " + expectedInt); 2414 } 2415 } 2416 } 2417 2418 /** 2419 * Test the property values API. See JB#2410. 2420 */ 2421 @Test 2422 public void TestPropertyValues() { 2423 int i, p, min, max; 2424 2425 /* Min should be 0 for everything. */ 2426 /* Until JB#2478 is fixed, the one exception is UProperty.BLOCK. */ 2427 for (p=UProperty.INT_START; p<UProperty.INT_LIMIT; ++p) { 2428 min = UCharacter.getIntPropertyMinValue(p); 2429 if (min != 0) { 2430 if (p == UProperty.BLOCK) { 2431 /* This is okay...for now. See JB#2487. 2432 TODO Update this for JB#2487. */ 2433 } else { 2434 String name; 2435 name = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG); 2436 errln("FAIL: UCharacter.getIntPropertyMinValue(" + name + ") = " + 2437 min + ", exp. 0"); 2438 } 2439 } 2440 } 2441 2442 if (UCharacter.getIntPropertyMinValue(UProperty.GENERAL_CATEGORY_MASK) 2443 != 0 2444 || UCharacter.getIntPropertyMaxValue( 2445 UProperty.GENERAL_CATEGORY_MASK) 2446 != -1) { 2447 errln("error: UCharacter.getIntPropertyMin/MaxValue(" 2448 + "UProperty.GENERAL_CATEGORY_MASK) is wrong"); 2449 } 2450 2451 /* Max should be -1 for invalid properties. */ 2452 max = UCharacter.getIntPropertyMaxValue(-1); 2453 if (max != -1) { 2454 errln("FAIL: UCharacter.getIntPropertyMaxValue(-1) = " + 2455 max + ", exp. -1"); 2456 } 2457 2458 /* Script should return 0 for an invalid code point. If the API 2459 throws an exception then that's fine too. */ 2460 for (i=0; i<2; ++i) { 2461 try { 2462 int script = 0; 2463 String desc = null; 2464 switch (i) { 2465 case 0: 2466 script = UScript.getScript(-1); 2467 desc = "UScript.getScript(-1)"; 2468 break; 2469 case 1: 2470 script = UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT); 2471 desc = "UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT)"; 2472 break; 2473 } 2474 if (script != 0) { 2475 errln("FAIL: " + desc + " = " + script + ", exp. 0"); 2476 } 2477 } catch (IllegalArgumentException e) {} 2478 } 2479 } 2480 2481 @Test 2482 public void TestBidiPairedBracketType() { 2483 // BidiBrackets-6.3.0.txt says: 2484 // 2485 // The set of code points listed in this file was originally derived 2486 // using the character properties General_Category (gc), Bidi_Class (bc), 2487 // Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows: 2488 // two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe, 2489 // both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket 2490 // maps A to B and vice versa, and their Bidi_Paired_Bracket_Type 2491 // property values are Open and Close, respectively. 2492 UnicodeSet bpt = new UnicodeSet("[:^bpt=n:]"); 2493 assertTrue("bpt!=None is not empty", !bpt.isEmpty()); 2494 // The following should always be true. 2495 UnicodeSet mirrored = new UnicodeSet("[:Bidi_M:]"); 2496 UnicodeSet other_neutral = new UnicodeSet("[:bc=ON:]"); 2497 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt)); 2498 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt)); 2499 // The following are true at least initially in Unicode 6.3. 2500 UnicodeSet bpt_open = new UnicodeSet("[:bpt=o:]"); 2501 UnicodeSet bpt_close = new UnicodeSet("[:bpt=c:]"); 2502 UnicodeSet ps = new UnicodeSet("[:Ps:]"); 2503 UnicodeSet pe = new UnicodeSet("[:Pe:]"); 2504 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open)); 2505 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close)); 2506 } 2507 2508 @Test 2509 public void TestEmojiProperties() { 2510 assertFalse("space is not Emoji", UCharacter.hasBinaryProperty(0x20, UProperty.EMOJI)); 2511 assertTrue("shooting star is Emoji", UCharacter.hasBinaryProperty(0x1F320, UProperty.EMOJI)); 2512 UnicodeSet emoji = new UnicodeSet("[:Emoji:]"); 2513 assertTrue("lots of Emoji", emoji.size() > 700); 2514 2515 assertTrue("shooting star is Emoji_Presentation", 2516 UCharacter.hasBinaryProperty(0x1F320, UProperty.EMOJI_PRESENTATION)); 2517 assertTrue("Fitzpatrick 6 is Emoji_Modifier", 2518 UCharacter.hasBinaryProperty(0x1F3FF, UProperty.EMOJI_MODIFIER)); 2519 assertTrue("happy person is Emoji_Modifier_Base", 2520 UCharacter.hasBinaryProperty(0x1F64B, UProperty.EMOJI_MODIFIER_BASE)); 2521 assertTrue("asterisk is Emoji_Component", 2522 UCharacter.hasBinaryProperty(0x2A, UProperty.EMOJI_COMPONENT)); 2523 } 2524 2525 @Test 2526 public void TestIsBMP() 2527 { 2528 int ch[] = {0x0, -1, 0xffff, 0x10ffff, 0xff, 0x1ffff}; 2529 boolean flag[] = {true, false, true, false, true, false}; 2530 for (int i = 0; i < ch.length; i ++) { 2531 if (UCharacter.isBMP(ch[i]) != flag[i]) { 2532 errln("Fail: \\u" + Utility.hex(ch[i], 8) 2533 + " failed at UCharacter.isBMP"); 2534 } 2535 } 2536 } 2537 2538 private boolean showADiffB(UnicodeSet a, UnicodeSet b, 2539 String a_name, String b_name, 2540 boolean expect, 2541 boolean diffIsError){ 2542 int i, start, end; 2543 boolean equal=true; 2544 for(i=0; i < a.getRangeCount(); ++i) { 2545 start = a.getRangeStart(i); 2546 end = a.getRangeEnd(i); 2547 if(expect!=b.contains(start, end)) { 2548 equal=false; 2549 while(start<=end) { 2550 if(expect!=b.contains(start)) { 2551 if(diffIsError) { 2552 if(expect) { 2553 errln("error: "+ a_name +" contains "+ hex(start)+" but "+ b_name +" does not"); 2554 } else { 2555 errln("error: "+a_name +" and "+ b_name+" both contain "+hex(start) +" but should not intersect"); 2556 } 2557 } else { 2558 if(expect) { 2559 logln("info: "+a_name +" contains "+hex(start)+ "but " + b_name +" does not"); 2560 } else { 2561 logln("info: "+a_name +" and "+b_name+" both contain "+hex(start)+" but should not intersect"); 2562 } 2563 } 2564 } 2565 ++start; 2566 } 2567 } 2568 } 2569 return equal; 2570 } 2571 private boolean showAMinusB(UnicodeSet a, UnicodeSet b, 2572 String a_name, String b_name, 2573 boolean diffIsError) { 2574 2575 return showADiffB(a, b, a_name, b_name, true, diffIsError); 2576 } 2577 2578 private boolean showAIntersectB(UnicodeSet a, UnicodeSet b, 2579 String a_name, String b_name, 2580 boolean diffIsError) { 2581 return showADiffB(a, b, a_name, b_name, false, diffIsError); 2582 } 2583 2584 private boolean compareUSets(UnicodeSet a, UnicodeSet b, 2585 String a_name, String b_name, 2586 boolean diffIsError) { 2587 return 2588 showAMinusB(a, b, a_name, b_name, diffIsError) && 2589 showAMinusB(b, a, b_name, a_name, diffIsError); 2590 } 2591 2592 /* various tests for consistency of UCD data and API behavior */ 2593 @Test 2594 public void TestConsistency() { 2595 UnicodeSet set1, set2, set3, set4; 2596 2597 int start, end; 2598 int i, length; 2599 2600 String hyphenPattern = "[:Hyphen:]"; 2601 String dashPattern = "[:Dash:]"; 2602 String lowerPattern = "[:Lowercase:]"; 2603 String formatPattern = "[:Cf:]"; 2604 String alphaPattern = "[:Alphabetic:]"; 2605 2606 /* 2607 * It used to be that UCD.html and its precursors said 2608 * "Those dashes used to mark connections between pieces of words, 2609 * plus the Katakana middle dot." 2610 * 2611 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash 2612 * but not from Hyphen. 2613 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html. 2614 * Therefore, do not show errors when testing the Hyphen property. 2615 */ 2616 logln("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n" 2617 + "known to the UTC and not considered errors.\n"); 2618 2619 set1=new UnicodeSet(hyphenPattern); 2620 set2=new UnicodeSet(dashPattern); 2621 2622 /* remove the Katakana middle dot(s) from set1 */ 2623 set1.remove(0x30fb); 2624 set2.remove (0xff65); /* halfwidth variant */ 2625 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", false); 2626 2627 2628 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */ 2629 set3=new UnicodeSet(formatPattern); 2630 set4=new UnicodeSet(alphaPattern); 2631 2632 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", false); 2633 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", true); 2634 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", true); 2635 /* 2636 * Check that each lowercase character has "small" in its name 2637 * and not "capital". 2638 * There are some such characters, some of which seem odd. 2639 * Use the verbose flag to see these notices. 2640 */ 2641 set1=new UnicodeSet(lowerPattern); 2642 2643 for(i=0;; ++i) { 2644 // try{ 2645 // length=set1.getItem(set1, i, &start, &end, NULL, 0, &errorCode); 2646 // }catch(Exception e){ 2647 // break; 2648 // } 2649 start = set1.getRangeStart(i); 2650 end = set1.getRangeEnd(i); 2651 length = i<set1.getRangeCount() ? set1.getRangeCount() : 0; 2652 if(length!=0) { 2653 break; /* done with code points, got a string or -1 */ 2654 } 2655 2656 while(start<=end) { 2657 String name=UCharacter.getName(start); 2658 2659 if( (name.indexOf("SMALL")< 0 || name.indexOf("CAPITAL")<-1) && 2660 name.indexOf("SMALL CAPITAL")==-1 2661 ) { 2662 logln("info: [:Lowercase:] contains U+"+hex(start) + " whose name does not suggest lowercase: " + name); 2663 } 2664 ++start; 2665 } 2666 } 2667 2668 2669 /* 2670 * Test for an example that unorm_getCanonStartSet() delivers 2671 * all characters that compose from the input one, 2672 * even in multiple steps. 2673 * For example, the set for "I" (0049) should contain both 2674 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E). 2675 * In general, the set for the middle such character should be a subset 2676 * of the set for the first. 2677 */ 2678 Normalizer2 norm2=Normalizer2.getNFDInstance(); 2679 set1=new UnicodeSet(); 2680 Norm2AllModes.getNFCInstance().impl. 2681 ensureCanonIterData().getCanonStartSet(0x49, set1); 2682 set2=new UnicodeSet(); 2683 2684 /* enumerate all characters that are plausible to be latin letters */ 2685 for(start=0xa0; start<0x2000; ++start) { 2686 String decomp=norm2.normalize(UTF16.valueOf(start)); 2687 if(decomp.length() > 1 && decomp.charAt(0)==0x49) { 2688 set2.add(start); 2689 } 2690 } 2691 2692 compareUSets(set1, set2, 2693 "[canon start set of 0049]", "[all c with canon decomp with 0049]", 2694 false); 2695 2696 } 2697 2698 @Test 2699 public void TestCoverage() { 2700 //cover forDigit 2701 char ch1 = UCharacter.forDigit(7, 11); 2702 assertEquals("UCharacter.forDigit ", "7", String.valueOf(ch1)); 2703 char ch2 = UCharacter.forDigit(17, 20); 2704 assertEquals("UCharacter.forDigit ", "h", String.valueOf(ch2)); 2705 2706 //Jitterbug 4451, for coverage 2707 for (int i = 0x0041; i < 0x005B; i++) { 2708 if (!UCharacter.isJavaLetter(i)) 2709 errln("FAIL \\u" + hex(i) + " expected to be a letter"); 2710 if (!UCharacter.isJavaIdentifierStart(i)) 2711 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier start character"); 2712 if (!UCharacter.isJavaLetterOrDigit(i)) 2713 errln("FAIL \\u" + hex(i) + " expected not to be a Java letter"); 2714 if (!UCharacter.isJavaIdentifierPart(i)) 2715 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier part character"); 2716 } 2717 char[] spaces = {'\t','\n','\f','\r',' '}; 2718 for (int i = 0; i < spaces.length; i++){ 2719 if (!UCharacter.isSpace(spaces[i])) 2720 errln("FAIL \\u" + hex(spaces[i]) + " expected to be a Java space"); 2721 } 2722 } 2723 2724 @Test 2725 public void TestBlockData() 2726 { 2727 Class ubc = UCharacter.UnicodeBlock.class; 2728 2729 for (int b = 1; b < UCharacter.UnicodeBlock.COUNT; b += 1) { 2730 UCharacter.UnicodeBlock blk = UCharacter.UnicodeBlock.getInstance(b); 2731 int id = blk.getID(); 2732 String name = blk.toString(); 2733 2734 if (id != b) { 2735 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with id = " + id); 2736 } 2737 2738 try { 2739 if (ubc.getField(name + "_ID").getInt(blk) != b) { 2740 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with a name of " + name + 2741 " which does not match the block id."); 2742 } 2743 } catch (Exception e) { 2744 errln("Couldn't get the id name for id " + b); 2745 } 2746 } 2747 } 2748 2749 /* 2750 * The following method tests 2751 * public static UnicodeBlock getInstance(int id) 2752 */ 2753 @Test 2754 public void TestGetInstance(){ 2755 // Testing values for invalid and valid ID 2756 int[] invalid_test = {-1,-10,-100}; 2757 for(int i=0; i< invalid_test.length; i++){ 2758 if(UCharacter.UnicodeBlock.INVALID_CODE != UCharacter.UnicodeBlock.getInstance(invalid_test[i])){ 2759 errln("UCharacter.UnicodeBlock.getInstance(invalid_test[i]) was " + 2760 "suppose to return UCharacter.UnicodeBlock.INVALID_CODE. Got " + 2761 UCharacter.UnicodeBlock.getInstance(invalid_test[i]) + ". Expected " + 2762 UCharacter.UnicodeBlock.INVALID_CODE); 2763 } 2764 } 2765 } 2766 2767 /* 2768 * The following method tests 2769 * public static UnicodeBlock of(int ch) 2770 */ 2771 @Test 2772 public void TestOf(){ 2773 if(UCharacter.UnicodeBlock.INVALID_CODE != UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1)){ 2774 errln("UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1) was " + 2775 "suppose to return UCharacter.UnicodeBlock.INVALID_CODE. Got " + 2776 UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1) + ". Expected " + 2777 UCharacter.UnicodeBlock.INVALID_CODE); 2778 } 2779 } 2780 2781 /* 2782 * The following method tests 2783 * public static final UnicodeBlock forName(String blockName) 2784 */ 2785 @Test 2786 public void TestForName(){ 2787 //UCharacter.UnicodeBlock.forName(""); 2788 //Tests when "if (b == null)" is true 2789 } 2790 2791 /* 2792 * The following method tests 2793 * public static int getNumericValue(int ch) 2794 */ 2795 @Test 2796 public void TestGetNumericValue(){ 2797 // The following tests the else statement when 2798 // if(numericType<NumericType.COUNT) is false 2799 // The following values were obtained by testing all values from 2800 // UTF16.CODEPOINT_MIN_VALUE to UTF16.CODEPOINT_MAX_VALUE inclusively 2801 // to obtain the value to go through the else statement. 2802 int[] valid_values = 2803 {3058,3442,4988,8558,8559,8574,8575,8576,8577,8578,8583,8584,19975, 2804 20159,20191,20740,20806,21315,33836,38433,65819,65820,65821,65822, 2805 65823,65824,65825,65826,65827,65828,65829,65830,65831,65832,65833, 2806 65834,65835,65836,65837,65838,65839,65840,65841,65842,65843,65861, 2807 65862,65863,65868,65869,65870,65875,65876,65877,65878,65899,65900, 2808 65901,65902,65903,65904,65905,65906,66378,68167}; 2809 2810 int[] results = 2811 {1000,1000,10000,500,1000,500,1000,1000,5000,10000,50000,100000, 2812 10000,100000000,1000,100000000,-2,1000,10000,1000,300,400,500, 2813 600,700,800,900,1000,2000,3000,4000,5000,6000,7000,8000,9000, 2814 10000,20000,30000,40000,50000,60000,70000,80000,90000,500,5000, 2815 50000,500,1000,5000,500,1000,10000,50000,300,500,500,500,500,500, 2816 1000,5000,900,1000}; 2817 2818 if(valid_values.length != results.length){ 2819 errln("The valid_values array and the results array need to be "+ 2820 "the same length."); 2821 } else { 2822 for(int i = 0; i < valid_values.length; i++){ 2823 try{ 2824 if(UCharacter.getNumericValue(valid_values[i]) != results[i]){ 2825 errln("UCharacter.getNumericValue(i) returned a " + 2826 "different value from the expected result. " + 2827 "Got " + UCharacter.getNumericValue(valid_values[i]) + 2828 "Expected" + results[i]); 2829 } 2830 } catch(Exception e){ 2831 errln("UCharacter.getNumericValue(int) returned an exception " + 2832 "with the parameter value"); 2833 } 2834 } 2835 } 2836 } 2837 2838 /* 2839 * The following method tests 2840 * public static double getUnicodeNumericValue(int ch) 2841 */ 2842 // The following tests covers if(mant==0), else if(mant > 9), and default 2843 @Test 2844 public void TestGetUnicodeNumericValue(){ 2845 /* The code coverage for if(mant==0), else if(mant > 9), and default 2846 * could not be covered even with input values from UTF16.CODEPOINT_MIN_VALUE 2847 * to UTF16.CODEPOINT_MAX_VALUE. I also tested from UTF16.CODEPOINT_MAX_VALUE to 2848 * Integer.MAX_VALUE and didn't recieve any code coverage there too. 2849 * Therefore, the code could either be dead code or meaningless. 2850 */ 2851 } 2852 2853 /* 2854 * The following method tests 2855 * public static String toString(int ch) 2856 */ 2857 @Test 2858 public void TestToString(){ 2859 int[] valid_tests = { 2860 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 2861 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 2862 int[] invalid_tests = { 2863 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 2864 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 2865 2866 for(int i=0; i< valid_tests.length; i++){ 2867 if(UCharacter.toString(valid_tests[i]) == null){ 2868 errln("UCharacter.toString(int) was not suppose to return " + 2869 "null because it was given a valid parameter. Value passed: " + 2870 valid_tests[i] + ". Got null."); 2871 } 2872 } 2873 2874 for(int i=0; i< invalid_tests.length; i++){ 2875 if(UCharacter.toString(invalid_tests[i]) != null){ 2876 errln("UCharacter.toString(int) was suppose to return " + 2877 "null because it was given an invalid parameter. Value passed: " + 2878 invalid_tests[i] + ". Got: " + UCharacter.toString(invalid_tests[i])); 2879 } 2880 } 2881 } 2882 2883 /* 2884 * The following method tests 2885 * public static int getCombiningClass(int ch) 2886 */ 2887 @Test 2888 public void TestGetCombiningClass(){ 2889 int[] valid_tests = { 2890 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 2891 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 2892 int[] invalid_tests = { 2893 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 2894 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 2895 2896 for(int i=0; i< valid_tests.length; i++){ 2897 try{ 2898 UCharacter.getCombiningClass(valid_tests[i]); 2899 } catch(Exception e){ 2900 errln("UCharacter.getCombiningClass(int) was not supposed to have " + 2901 "an exception. Value passed: " + valid_tests[i]); 2902 } 2903 } 2904 2905 for(int i=0; i< invalid_tests.length; i++){ 2906 try{ 2907 assertEquals("getCombiningClass(out of range)", 2908 0, UCharacter.getCombiningClass(invalid_tests[i])); 2909 } catch(Exception e){ 2910 errln("UCharacter.getCombiningClass(int) was not supposed to have " + 2911 "an exception. Value passed: " + invalid_tests[i]); 2912 } 2913 } 2914 } 2915 2916 /* 2917 * The following method tests 2918 * public static String getName(int ch) 2919 */ 2920 @Test 2921 public void TestGetName(){ 2922 // Need to test on other "one characters" for the getName() method 2923 String[] data = {"a","z"}; 2924 String[] results = {"LATIN SMALL LETTER A","LATIN SMALL LETTER Z"}; 2925 if(data.length != results.length){ 2926 errln("The data array and the results array need to be "+ 2927 "the same length."); 2928 } else { 2929 for(int i=0; i < data.length; i++){ 2930 if(UCharacter.getName(data[i], "").compareTo(results[i]) != 0){ 2931 errln("UCharacter.getName(String, String) was suppose " + 2932 "to have the same result for the data in the parameter. " + 2933 "Value passed: " + data[i] + ". Got: " + 2934 UCharacter.getName(data[i], "") + ". Expected: " + 2935 results[i]); 2936 } 2937 } 2938 } 2939 } 2940 2941 /* 2942 * The following method tests 2943 * public static String getISOComment(int ch) 2944 */ 2945 @Test 2946 public void TestGetISOComment(){ 2947 int[] invalid_tests = { 2948 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 2949 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 2950 2951 for(int i=0; i< invalid_tests.length; i++){ 2952 if(UCharacter.getISOComment(invalid_tests[i]) != null){ 2953 errln("UCharacter.getISOComment(int) was suppose to return " + 2954 "null because it was given an invalid parameter. Value passed: " + 2955 invalid_tests[i] + ". Got: " + UCharacter.getISOComment(invalid_tests[i])); 2956 } 2957 } 2958 } 2959 2960 /* 2961 * The following method tests 2962 * public void setLimit(int lim) 2963 */ 2964 @Test 2965 public void TestSetLimit(){ 2966 // TODO: Tests when "if(0<=lim && lim<=s.length())" is false 2967 } 2968 2969 /* 2970 * The following method tests 2971 * public int nextCaseMapCP() 2972 */ 2973 @Test 2974 public void TestNextCaseMapCP(){ 2975 // TODO: Tests when "if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE)" is false 2976 /* TODO: Tests when "if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit && 2977 * UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) && c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE)" is false 2978 */ 2979 } 2980 2981 /* 2982 * The following method tests 2983 * public void reset(int direction) 2984 */ 2985 @Test 2986 public void TestReset(){ 2987 // The method reset() is never called by another function 2988 // TODO: Tests when "else if(direction<0)" is false 2989 } 2990 2991 /* 2992 * The following methods test 2993 * public static String toTitleCase(Locale locale, String str, BreakIterator breakiter) 2994 */ 2995 @Test 2996 public void TestToTitleCaseCoverage(){ 2997 //Calls the function "toTitleCase(Locale locale, String str, BreakIterator breakiter)" 2998 String[] locale={"en","fr","zh","ko","ja","it","de",""}; 2999 for(int i=0; i<locale.length; i++){ 3000 UCharacter.toTitleCase(new Locale(locale[i]), "", null); 3001 } 3002 3003 // Calls the function "String toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)" 3004 // Tests when "if (locale == null)" is true 3005 UCharacter.toTitleCase((ULocale)null, "", null, 0); 3006 3007 // TODO: Tests when "if(index==BreakIterator.DONE || index>srcLength)" is true 3008 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0 && UCaseProps.NONE==gCsp.getType(c))" is false 3009 // TODO: Tests when "if(prev<titleStart)" is false 3010 // TODO: Tests when "if(c<=0xffff)" is false 3011 // TODO: Tests when "if(c<=0xffff)" is false 3012 // TODO: Tests when "if(titleLimit<index)" is false 3013 // TODO: Tests when "else if((nc=iter.nextCaseMapCP())>=0)" is false 3014 } 3015 3016 @Test 3017 public void testToTitleCase_Locale_String_BreakIterator_I() { 3018 String titleCase = UCharacter.toTitleCase(new Locale("nl"), "ijsland", null, 3019 UCharacter.FOLD_CASE_DEFAULT); 3020 assertEquals("Wrong title casing", "IJsland", titleCase); 3021 } 3022 3023 @Test 3024 public void testToTitleCase_String_BreakIterator_en() { 3025 String titleCase = UCharacter.toTitleCase(new Locale("en"), "ijsland", null); 3026 assertEquals("Wrong title casing", "Ijsland", titleCase); 3027 } 3028 /* 3029 * The following method tests 3030 * public static String toUpperCase(ULocale locale, String str) 3031 */ 3032 @Test 3033 public void TestToUpperCase(){ 3034 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0)" is false 3035 } 3036 3037 /* 3038 * The following method tests 3039 * public static String toLowerCase(ULocale locale, String str) 3040 */ 3041 @Test 3042 public void TestToLowerCase(){ 3043 // Test when locale is null 3044 String[] cases = {"","a","A","z","Z","Dummy","DUMMY","dummy","a z","A Z", 3045 "'","\"","0","9","0a","a0","*","~!@#$%^&*()_+"}; 3046 for(int i=0; i<cases.length; i++){ 3047 try{ 3048 UCharacter.toLowerCase((ULocale) null, cases[i]); 3049 } catch(Exception e){ 3050 errln("UCharacter.toLowerCase was not suppose to return an " + 3051 "exception for input of null and string: " + cases[i]); 3052 } 3053 } 3054 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0)" is false 3055 } 3056 3057 /* 3058 * The following method tests 3059 * public static int getHanNumericValue(int ch) 3060 */ 3061 @Test 3062 public void TestGetHanNumericValue(){ 3063 int[] valid = { 3064 0x3007, //IDEOGRAPHIC_NUMBER_ZERO_ 3065 0x96f6, //CJK_IDEOGRAPH_COMPLEX_ZERO_ 3066 0x4e00, //CJK_IDEOGRAPH_FIRST_ 3067 0x58f9, //CJK_IDEOGRAPH_COMPLEX_ONE_ 3068 0x4e8c, //CJK_IDEOGRAPH_SECOND_ 3069 0x8cb3, //CJK_IDEOGRAPH_COMPLEX_TWO_ 3070 0x4e09, //CJK_IDEOGRAPH_THIRD_ 3071 0x53c3, //CJK_IDEOGRAPH_COMPLEX_THREE_ 3072 0x56db, //CJK_IDEOGRAPH_FOURTH_ 3073 0x8086, //CJK_IDEOGRAPH_COMPLEX_FOUR_ 3074 0x4e94, //CJK_IDEOGRAPH_FIFTH_ 3075 0x4f0d, //CJK_IDEOGRAPH_COMPLEX_FIVE_ 3076 0x516d, //CJK_IDEOGRAPH_SIXTH_ 3077 0x9678, //CJK_IDEOGRAPH_COMPLEX_SIX_ 3078 0x4e03, //CJK_IDEOGRAPH_SEVENTH_ 3079 0x67d2, //CJK_IDEOGRAPH_COMPLEX_SEVEN_ 3080 0x516b, //CJK_IDEOGRAPH_EIGHTH_ 3081 0x634c, //CJK_IDEOGRAPH_COMPLEX_EIGHT_ 3082 0x4e5d, //CJK_IDEOGRAPH_NINETH_ 3083 0x7396, //CJK_IDEOGRAPH_COMPLEX_NINE_ 3084 0x5341, //CJK_IDEOGRAPH_TEN_ 3085 0x62fe, //CJK_IDEOGRAPH_COMPLEX_TEN_ 3086 0x767e, //CJK_IDEOGRAPH_HUNDRED_ 3087 0x4f70, //CJK_IDEOGRAPH_COMPLEX_HUNDRED_ 3088 0x5343, //CJK_IDEOGRAPH_THOUSAND_ 3089 0x4edf, //CJK_IDEOGRAPH_COMPLEX_THOUSAND_ 3090 0x824c, //CJK_IDEOGRAPH_TEN_THOUSAND_ 3091 0x5104, //CJK_IDEOGRAPH_HUNDRED_MILLION_ 3092 }; 3093 3094 int[] invalid = {-5,-2,-1,0}; 3095 3096 int[] results = {0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,100,100, 3097 1000,1000,10000,100000000}; 3098 3099 if(valid.length != results.length){ 3100 errln("The arrays valid and results are suppose to be the same length " + 3101 "to test getHanNumericValue(int ch)."); 3102 } else{ 3103 for(int i=0; i<valid.length; i++){ 3104 if(UCharacter.getHanNumericValue(valid[i]) != results[i]){ 3105 errln("UCharacter.getHanNumericValue does not return the " + 3106 "same result as expected. Passed value: " + valid[i] + 3107 ". Got: " + UCharacter.getHanNumericValue(valid[i]) + 3108 ". Expected: " + results[i]); 3109 } 3110 } 3111 } 3112 3113 for(int i=0; i<invalid.length; i++){ 3114 if(UCharacter.getHanNumericValue(invalid[i]) != -1){ 3115 errln("UCharacter.getHanNumericValue does not return the " + 3116 "same result as expected. Passed value: " + invalid[i] + 3117 ". Got: " + UCharacter.getHanNumericValue(invalid[i]) + 3118 ". Expected: -1"); 3119 } 3120 } 3121 } 3122 3123 /* 3124 * The following method tests 3125 * public static boolean hasBinaryProperty(int ch, int property) 3126 */ 3127 @Test 3128 public void TestHasBinaryProperty(){ 3129 // Testing when "if (ch < MIN_VALUE || ch > MAX_VALUE)" is true 3130 int[] invalid = { 3131 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 3132 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 3133 int[] valid = { 3134 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 3135 UCharacter.MAX_VALUE, UCharacter.MAX_VALUE-1}; 3136 3137 for(int i=0; i<invalid.length; i++){ 3138 try{ 3139 if (UCharacter.hasBinaryProperty(invalid[i], 1)) { 3140 errln("UCharacter.hasBinaryProperty(ch, property) should return " + 3141 "false for out-of-range code points but " + 3142 "returns true for " + invalid[i]); 3143 } 3144 } catch(Exception e) { 3145 errln("UCharacter.hasBinaryProperty(ch, property) should not " + 3146 "throw an exception for any input. Value passed: " + 3147 invalid[i]); 3148 } 3149 } 3150 3151 for(int i=0; i<valid.length; i++){ 3152 try{ 3153 UCharacter.hasBinaryProperty(valid[i], 1); 3154 } catch(Exception e) { 3155 errln("UCharacter.hasBinaryProperty(ch, property) should not " + 3156 "throw an exception for any input. Value passed: " + 3157 valid[i]); 3158 } 3159 } 3160 } 3161 3162 /* 3163 * The following method tests 3164 * public static int getIntPropertyValue(int ch, int type) 3165 */ 3166 @Test 3167 public void TestGetIntPropertyValue(){ 3168 /* Testing UCharacter.getIntPropertyValue(ch, type) */ 3169 // Testing when "if (type < UProperty.BINARY_START)" is true 3170 int[] negative_cases = {-100,-50,-10,-5,-2,-1}; 3171 for(int i=0; i<negative_cases.length; i++){ 3172 if(UCharacter.getIntPropertyValue(0, negative_cases[i]) != 0){ 3173 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3174 "when passing a negative value of " + negative_cases[i]); 3175 3176 } 3177 } 3178 3179 // Testing when "if(ch<NormalizerImpl.JAMO_L_BASE)" is true 3180 for(int i=Normalizer2Impl.Hangul.JAMO_L_BASE-5; i<Normalizer2Impl.Hangul.JAMO_L_BASE; i++){ 3181 if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){ 3182 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3183 "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE"); 3184 3185 } 3186 } 3187 3188 // Testing when "else if((ch-=NormalizerImpl.HANGUL_BASE)<0)" is true 3189 for(int i=Normalizer2Impl.Hangul.HANGUL_BASE-5; i<Normalizer2Impl.Hangul.HANGUL_BASE; i++){ 3190 if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){ 3191 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3192 "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE"); 3193 3194 } 3195 } 3196 } 3197 3198 /* 3199 * The following method tests 3200 * public static int getIntPropertyMaxValue(int type) 3201 */ 3202 @Test 3203 public void TestGetIntPropertyMaxValue(){ 3204 /* Testing UCharacter.getIntPropertyMaxValue(type) */ 3205 // Testing when "else if (type < UProperty.INT_START)" is true 3206 int[] cases = {UProperty.BINARY_LIMIT, UProperty.BINARY_LIMIT+1, 3207 UProperty.INT_START-2, UProperty.INT_START-1}; 3208 for(int i=0; i<cases.length; i++){ 3209 if(UCharacter.getIntPropertyMaxValue(cases[i]) != -1){ 3210 errln("UCharacter.getIntPropertyMaxValue was suppose to return -1 " + 3211 "but got " + UCharacter.getIntPropertyMaxValue(cases[i])); 3212 } 3213 } 3214 3215 // TODO: Testing when the case statment reaches "default" 3216 // After testing between values of UProperty.INT_START and 3217 // UProperty.INT_LIMIT are covered, none of the values reaches default. 3218 } 3219 3220 /* 3221 * The following method tests 3222 * public static final int codePointAt(CharSequence seq, int index) 3223 * public static final int codePointAt(char[] text, int index, int limit) 3224 */ 3225 @Test 3226 public void TestCodePointAt(){ 3227 3228 // {LEAD_SURROGATE_MIN_VALUE, 3229 // LEAD_SURROGATE_MAX_VALUE, LEAD_SURROGATE_MAX_VALUE-1 3230 String[] cases = {"\uD800","\uDBFF","\uDBFE"}; 3231 int[] result = {55296,56319,56318}; 3232 for(int i=0; i < cases.length; i++){ 3233 /* Testing UCharacter.codePointAt(seq, index) */ 3234 // Testing when "if (index < seq.length())" is false 3235 if(UCharacter.codePointAt(cases[i], 0) != result[i]) 3236 errln("UCharacter.codePointAt(CharSequence ...) did not return as expected. " + 3237 "Passed value: " + cases[i] + ". Expected: " + 3238 result[i] + ". Got: " + 3239 UCharacter.codePointAt(cases[i], 0)); 3240 3241 /* Testing UCharacter.codePointAt(text, index) */ 3242 // Testing when "if (index < text.length)" is false 3243 if(UCharacter.codePointAt(cases[i].toCharArray(), 0) != result[i]) 3244 errln("UCharacter.codePointAt(char[] ...) did not return as expected. " + 3245 "Passed value: " + cases[i] + ". Expected: " + 3246 result[i] + ". Got: " + 3247 UCharacter.codePointAt(cases[i].toCharArray(), 0)); 3248 3249 /* Testing UCharacter.codePointAt(text, index, limit) */ 3250 // Testing when "if (index < limit)" is false 3251 if(UCharacter.codePointAt(cases[i].toCharArray(), 0, 1) != result[i]) 3252 errln("UCharacter.codePointAt(char[], int, int) did not return as expected. " + 3253 "Passed value: " + cases[i] + ". Expected: " + 3254 result[i] + ". Got: " + 3255 UCharacter.codePointAt(cases[i].toCharArray(), 0, 1)); 3256 } 3257 3258 /* Testing UCharacter.codePointAt(text, index, limit) */ 3259 // Testing when "if (index >= limit || limit > text.length)" is true 3260 char[] empty_text = {}; 3261 char[] one_char_text = {'a'}; 3262 char[] reg_text = {'d','u','m','m','y'}; 3263 int[] limitCases = {2,3,5,10,25}; 3264 3265 // When index >= limit 3266 for(int i=0; i < limitCases.length; i++){ 3267 try{ 3268 UCharacter.codePointAt(reg_text, 100, limitCases[i]); 3269 errln("UCharacter.codePointAt was suppose to return an exception " + 3270 "but got " + UCharacter.codePointAt(reg_text, 100, limitCases[i]) + 3271 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3272 100 + ", Limit: " + limitCases[i] + "."); 3273 } catch(Exception e){ 3274 } 3275 } 3276 3277 // When limit > text.length 3278 for(int i=0; i < limitCases.length; i++){ 3279 try{ 3280 UCharacter.codePointAt(empty_text, 0, limitCases[i]); 3281 errln("UCharacter.codePointAt was suppose to return an exception " + 3282 "but got " + UCharacter.codePointAt(empty_text, 0, limitCases[i]) + 3283 ". The following passed parameters were Text: " + String.valueOf(empty_text) + ", Start: " + 3284 0 + ", Limit: " + limitCases[i] + "."); 3285 } catch(Exception e){ 3286 } 3287 3288 try{ 3289 UCharacter.codePointCount(one_char_text, 0, limitCases[i]); 3290 errln("UCharacter.codePointCount was suppose to return an exception " + 3291 "but got " + UCharacter.codePointCount(one_char_text, 0, limitCases[i]) + 3292 ". The following passed parameters were Text: " + String.valueOf(one_char_text) + ", Start: " + 3293 0 + ", Limit: " + limitCases[i] + "."); 3294 } catch(Exception e){ 3295 } 3296 } 3297 } 3298 3299 /* 3300 * The following method tests 3301 * public static final int codePointBefore(CharSequence seq, int index) 3302 * public static final int codePointBefore(char[] text, int index) 3303 * public static final int codePointBefore(char[] text, int index, int limit) 3304 */ 3305 @Test 3306 public void TestCodePointBefore(){ 3307 // {TRAIL_SURROGATE_MIN_VALUE, 3308 // TRAIL_SURROGATE_MAX_VALUE, TRAIL_SURROGATE_MAX_VALUE -1 3309 String[] cases = {"\uDC00","\uDFFF","\uDDFE"}; 3310 int[] result = {56320,57343,56830}; 3311 for(int i=0; i < cases.length; i++){ 3312 /* Testing UCharacter.codePointBefore(seq, index) */ 3313 // Testing when "if (index > 0)" is false 3314 if(UCharacter.codePointBefore(cases[i], 1) != result[i]) 3315 errln("UCharacter.codePointBefore(CharSequence ...) did not return as expected. " + 3316 "Passed value: " + cases[i] + ". Expected: " + 3317 result[i] + ". Got: " + 3318 UCharacter.codePointBefore(cases[i], 1)); 3319 3320 /* Testing UCharacter.codePointBefore(text, index) */ 3321 // Testing when "if (index > 0)" is false 3322 if(UCharacter.codePointBefore(cases[i].toCharArray(), 1) != result[i]) 3323 errln("UCharacter.codePointBefore(char[] ...) did not return as expected. " + 3324 "Passed value: " + cases[i] + ". Expected: " + 3325 result[i] + ". Got: " + 3326 UCharacter.codePointBefore(cases[i].toCharArray(), 1)); 3327 3328 /* Testing UCharacter.codePointBefore(text, index, limit) */ 3329 // Testing when "if (index > limit)" is false 3330 if(UCharacter.codePointBefore(cases[i].toCharArray(), 1, 0) != result[i]) 3331 errln("UCharacter.codePointBefore(char[], int, int) did not return as expected. " + 3332 "Passed value: " + cases[i] + ". Expected: " + 3333 result[i] + ". Got: " + 3334 UCharacter.codePointBefore(cases[i].toCharArray(), 1, 0)); 3335 } 3336 3337 /* Testing UCharacter.codePointBefore(text, index, limit) */ 3338 char[] dummy = {'d','u','m','m','y'}; 3339 // Testing when "if (index <= limit || limit < 0)" is true 3340 int[] negative_cases = {-100,-10,-5,-2,-1}; 3341 int[] index_cases = {0,1,2,5,10,100}; 3342 3343 for(int i=0; i < negative_cases.length; i++){ 3344 try{ 3345 UCharacter.codePointBefore(dummy, 10000, negative_cases[i]); 3346 errln("UCharacter.codePointBefore(text, index, limit) was suppose to return an exception " + 3347 "when the parameter limit of " + negative_cases[i] + " is a negative number."); 3348 } catch(Exception e) {} 3349 } 3350 3351 for(int i=0; i < index_cases.length; i++){ 3352 try{ 3353 UCharacter.codePointBefore(dummy, index_cases[i], 101); 3354 errln("UCharacter.codePointBefore(text, index, limit) was suppose to return an exception " + 3355 "when the parameter index of " + index_cases[i] + " is a negative number."); 3356 } catch(Exception e) {} 3357 } 3358 } 3359 3360 /* 3361 * The following method tests 3362 * public static final int toChars(int cp, char[] dst, int dstIndex) 3363 * public static final char[] toChars(int cp) 3364 */ 3365 @Test 3366 public void TestToChars(){ 3367 int[] positive_cases = {1,2,5,10,100}; 3368 char[] dst = {'a'}; 3369 3370 /* Testing UCharacter.toChars(cp, dst, dstIndex) */ 3371 for(int i=0; i < positive_cases.length; i++){ 3372 // Testing negative values when cp < 0 for if (cp >= 0) 3373 try{ 3374 UCharacter.toChars(-1*positive_cases[i],dst,0); 3375 errln("UCharacter.toChars(int,char[],int) was suppose to return an exception " + 3376 "when the parameter " + (-1*positive_cases[i]) + " is a negative number."); 3377 } catch(Exception e){ 3378 } 3379 3380 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is true 3381 if(UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i], dst, 0) != 1){ 3382 errln("UCharacter.toChars(int,char[],int) was suppose to return a value of 1. Got: " + 3383 UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i], dst, 0)); 3384 } 3385 3386 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is false and 3387 // when "if (cp <= MAX_CODE_POINT)" is false 3388 try{ 3389 UCharacter.toChars(UCharacter.MAX_CODE_POINT+positive_cases[i],dst,0); 3390 errln("UCharacter.toChars(int,char[],int) was suppose to return an exception " + 3391 "when the parameter " + (UCharacter.MAX_CODE_POINT+positive_cases[i]) + 3392 " is a large number."); 3393 } catch(Exception e){ 3394 } 3395 } 3396 3397 3398 /* Testing UCharacter.toChars(cp)*/ 3399 for(int i=0; i<positive_cases.length; i++){ 3400 // Testing negative values when cp < 0 for if (cp >= 0) 3401 try{ 3402 UCharacter.toChars(-1*positive_cases[i]); 3403 errln("UCharacter.toChars(cint) was suppose to return an exception " + 3404 "when the parameter " + positive_cases[i] + " is a negative number."); 3405 } catch(Exception e){ 3406 } 3407 3408 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is true 3409 if(UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i]).length <= 0){ 3410 errln("UCharacter.toChars(int) was suppose to return some result result when the parameter " + 3411 (UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i]) + "is passed."); 3412 } 3413 3414 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is false and 3415 // when "if (cp <= MAX_CODE_POINT)" is false 3416 try{ 3417 UCharacter.toChars(UCharacter.MAX_CODE_POINT+positive_cases[i]); 3418 errln("UCharacter.toChars(int) was suppose to return an exception " + 3419 "when the parameter " + positive_cases[i] + " is a large number."); 3420 } catch(Exception e){ 3421 } 3422 } 3423 } 3424 3425 /* 3426 * The following method tests 3427 * public static int codePointCount(CharSequence text, int start, int limit) 3428 * public static int codePointCount(char[] text, int start, int limit) 3429 */ 3430 @Test 3431 public void TestCodePointCount(){ 3432 // The following tests the first if statement to make it true: 3433 // if (start < 0 || limit < start || limit > text.length) 3434 // which will throw an exception. 3435 char[] empty_text = {}; 3436 char[] one_char_text = {'a'}; 3437 char[] reg_text = {'d','u','m','m','y'}; 3438 int[] invalid_startCases = {-1,-2,-5,-10,-100}; 3439 int[] limitCases = {2,3,5,10,25}; 3440 3441 // When start < 0 3442 for(int i=0; i < invalid_startCases.length; i++){ 3443 try{ 3444 UCharacter.codePointCount(reg_text, invalid_startCases[i], 1); 3445 errln("UCharacter.codePointCount was suppose to return an exception " + 3446 "but got " + UCharacter.codePointCount(reg_text, invalid_startCases[i], 1) + 3447 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3448 invalid_startCases[i] + ", Limit: " + 1 + "."); 3449 } catch(Exception e){ 3450 } 3451 } 3452 3453 // When limit < start 3454 for(int i=0; i < limitCases.length; i++){ 3455 try{ 3456 UCharacter.codePointCount(reg_text, 100, limitCases[i]); 3457 errln("UCharacter.codePointCount was suppose to return an exception " + 3458 "but got " + UCharacter.codePointCount(reg_text, 100, limitCases[i]) + 3459 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3460 100 + ", Limit: " + limitCases[i] + "."); 3461 } catch(Exception e){ 3462 } 3463 } 3464 3465 // When limit > text.length 3466 for(int i=0; i < limitCases.length; i++){ 3467 try{ 3468 UCharacter.codePointCount(empty_text, 0, limitCases[i]); 3469 errln("UCharacter.codePointCount was suppose to return an exception " + 3470 "but got " + UCharacter.codePointCount(empty_text, 0, limitCases[i]) + 3471 ". The following passed parameters were Text: " + String.valueOf(empty_text) + ", Start: " + 3472 0 + ", Limit: " + limitCases[i] + "."); 3473 } catch(Exception e){ 3474 } 3475 3476 try{ 3477 UCharacter.codePointCount(one_char_text, 0, limitCases[i]); 3478 errln("UCharacter.codePointCount was suppose to return an exception " + 3479 "but got " + UCharacter.codePointCount(one_char_text, 0, limitCases[i]) + 3480 ". The following passed parameters were Text: " + String.valueOf(one_char_text) + ", Start: " + 3481 0 + ", Limit: " + limitCases[i] + "."); 3482 } catch(Exception e){ 3483 } 3484 } 3485 } 3486 3487 /* 3488 * The following method tests 3489 * private static int getEuropeanDigit(int ch) 3490 * The method needs to use the method "digit" in order to access the 3491 * getEuropeanDigit method. 3492 */ 3493 @Test 3494 public void TestGetEuropeanDigit(){ 3495 //The number retrieved from 0xFF41 to 0xFF5A is due to 3496 // exhaustive testing from UTF16.CODEPOINT_MIN_VALUE to 3497 // UTF16.CODEPOINT_MAX_VALUE return a value of -1. 3498 3499 int[] radixResult = { 3500 10,11,12,13,14,15,16,17,18,19,20,21,22, 3501 23,24,25,26,27,28,29,30,31,32,33,34,35}; 3502 // Invalid and too-small-for-these-digits radix values. 3503 int[] radixCase1 = {0,1,5,10,100}; 3504 // Radix values that work for at least some of the "digits". 3505 int[] radixCase2 = {12,16,20,36}; 3506 3507 for(int i=0xFF41; i<=0xFF5A; i++){ 3508 for(int j=0; j < radixCase1.length; j++){ 3509 if(UCharacter.digit(i, radixCase1[j]) != -1){ 3510 errln("UCharacter.digit(int,int) was supposed to return -1 for radix " + radixCase1[j] 3511 + ". Value passed: U+" + Integer.toHexString(i) + ". Got: " + UCharacter.digit(i, radixCase1[j])); 3512 } 3513 } 3514 for(int j=0; j < radixCase2.length; j++){ 3515 int radix = radixCase2[j]; 3516 int expected = (radixResult[i-0xFF41] < radix) ? radixResult[i-0xFF41] : -1; 3517 int actual = UCharacter.digit(i, radix); 3518 if(actual != expected){ 3519 errln("UCharacter.digit(int,int) was supposed to return " + 3520 expected + " for radix " + radix + 3521 ". Value passed: U+" + Integer.toHexString(i) + ". Got: " + actual); 3522 break; 3523 } 3524 } 3525 } 3526 } 3527 3528 /* Tests the method 3529 * private static final int getProperty(int ch) 3530 * from public static int getType(int ch) 3531 */ 3532 @Test 3533 public void TestGetProperty(){ 3534 int[] cases = {UTF16.CODEPOINT_MAX_VALUE+1, UTF16.CODEPOINT_MAX_VALUE+2}; 3535 for(int i=0; i < cases.length; i++) 3536 if(UCharacter.getType(cases[i]) != 0) 3537 errln("UCharacter.getType for testing UCharacter.getProperty " 3538 + "did not return 0 for passed value of " + cases[i] + 3539 " but got " + UCharacter.getType(cases[i])); 3540 } 3541 3542 /* Tests the class 3543 * abstract public static class XSymbolTable implements SymbolTable 3544 */ 3545 @Test 3546 public void TestXSymbolTable(){ 3547 class MyXSymbolTable extends UnicodeSet.XSymbolTable {} 3548 MyXSymbolTable st = new MyXSymbolTable(); 3549 3550 // Tests "public UnicodeMatcher lookupMatcher(int i)" 3551 if(st.lookupMatcher(0) != null) 3552 errln("XSymbolTable.lookupMatcher(int i) was suppose to return null."); 3553 3554 // Tests "public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result)" 3555 if(st.applyPropertyAlias("", "", new UnicodeSet()) != false) 3556 errln("XSymbolTable.applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) was suppose to return false."); 3557 3558 // Tests "public char[] lookup(String s)" 3559 if(st.lookup("") != null) 3560 errln("XSymbolTable.lookup(String s) was suppose to return null."); 3561 3562 // Tests "public String parseReference(String text, ParsePosition pos, int limit)" 3563 if(st.parseReference("", null, 0) != null) 3564 errln("XSymbolTable.parseReference(String text, ParsePosition pos, int limit) was suppose to return null."); 3565 } 3566 3567 /* Tests the method 3568 * public boolean isFrozen() 3569 */ 3570 @Test 3571 public void TestIsFrozen(){ 3572 UnicodeSet us = new UnicodeSet(); 3573 if(us.isFrozen() != false) 3574 errln("Unicode.isFrozen() was suppose to return false."); 3575 3576 us.freeze(); 3577 if(us.isFrozen() != true) 3578 errln("Unicode.isFrozen() was suppose to return true."); 3579 } 3580 3581 /* Tests the methods 3582 * public static String getNameAlias() and 3583 * public static String getCharFromNameAlias() 3584 */ 3585 @Test 3586 public void testNameAliasing() { 3587 int input = '\u01a2'; 3588 String alias = UCharacter.getNameAlias(input); 3589 assertEquals("Wrong name alias", "LATIN CAPITAL LETTER GHA", alias); 3590 int output = UCharacter.getCharFromNameAlias(alias); 3591 assertEquals("alias for '" + input + "'", input, output); 3592 } 3593 } 3594