1 /* 2 ******************************************************************************* 3 * Copyright (C) 2002-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.dev.util; 8 9 import java.util.ArrayList; 10 import java.util.Arrays; 11 import java.util.BitSet; 12 import java.util.Collection; 13 import java.util.HashMap; 14 import java.util.HashSet; 15 import java.util.Iterator; 16 import java.util.List; 17 import java.util.Locale; 18 import java.util.Map; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.TreeSet; 22 23 import com.ibm.icu.lang.UCharacter; 24 import com.ibm.icu.lang.UProperty; 25 import com.ibm.icu.lang.UScript; 26 import com.ibm.icu.text.Normalizer; 27 import com.ibm.icu.text.UTF16; 28 import com.ibm.icu.util.VersionInfo; 29 30 31 /** 32 * Provides a general interface for Unicode Properties, and 33 * extracting sets based on those values. 34 * @author Davis 35 */ 36 37 public class ICUPropertyFactory extends UnicodeProperty.Factory { 38 39 static class ICUProperty extends UnicodeProperty { 40 protected int propEnum = Integer.MIN_VALUE; 41 42 protected ICUProperty(String propName, int propEnum) { 43 setName(propName); 44 this.propEnum = propEnum; 45 setType(internalGetPropertyType(propEnum)); 46 if (propEnum == UProperty.DEFAULT_IGNORABLE_CODE_POINT || propEnum == UProperty.BIDI_CLASS || propEnum == UProperty.GENERAL_CATEGORY) { 47 setUniformUnassigned(false); 48 } else { 49 setUniformUnassigned(true); 50 } 51 } 52 53 boolean shownException = false; 54 55 public String _getValue(int codePoint) { 56 switch (propEnum) { 57 case UProperty.AGE: 58 return getAge(codePoint); 59 case UProperty.BIDI_MIRRORING_GLYPH: 60 return UTF16.valueOf(UCharacter.getMirror(codePoint)); 61 case UProperty.CASE_FOLDING: 62 return UCharacter.foldCase(UTF16.valueOf(codePoint), true); 63 case UProperty.ISO_COMMENT: 64 return UCharacter.getISOComment(codePoint); 65 case UProperty.LOWERCASE_MAPPING: 66 return UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)); 67 case UProperty.NAME: 68 return UCharacter.getName(codePoint); 69 case UProperty.SIMPLE_CASE_FOLDING: 70 return UTF16.valueOf(UCharacter.foldCase(codePoint, true)); 71 case UProperty.SIMPLE_LOWERCASE_MAPPING: 72 return UTF16.valueOf(UCharacter.toLowerCase(codePoint)); 73 case UProperty.SIMPLE_TITLECASE_MAPPING: 74 return UTF16.valueOf(UCharacter.toTitleCase(codePoint)); 75 case UProperty.SIMPLE_UPPERCASE_MAPPING: 76 return UTF16.valueOf(UCharacter.toUpperCase(codePoint)); 77 case UProperty.TITLECASE_MAPPING: 78 return UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null); 79 case UProperty.UNICODE_1_NAME: 80 return UCharacter.getName1_0(codePoint); 81 case UProperty.UPPERCASE_MAPPING: 82 return UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)); 83 // case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC); 84 // case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD); 85 // case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC); 86 // case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD); 87 case isNFC: 88 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint))); 89 case isNFD: 90 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint))); 91 case isNFKC: 92 return String 93 .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint))); 94 case isNFKD: 95 return String 96 .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint))); 97 case isLowercase: 98 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals( 99 UTF16.valueOf(codePoint))); 100 case isUppercase: 101 return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals( 102 UTF16.valueOf(codePoint))); 103 case isTitlecase: 104 return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null).equals( 105 UTF16.valueOf(codePoint))); 106 case isCasefolded: 107 return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint), true).equals( 108 UTF16.valueOf(codePoint))); 109 case isCased: 110 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals( 111 UTF16.valueOf(codePoint))); 112 case UProperty.SCRIPT_EXTENSIONS: 113 return getStringScriptExtensions(codePoint); 114 } 115 if (propEnum < UProperty.INT_LIMIT) { 116 int enumValue = -1; 117 String value = null; 118 try { 119 enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum); 120 if (enumValue >= 0) 121 value = fixedGetPropertyValueName(propEnum, enumValue, UProperty.NameChoice.LONG); 122 } catch (IllegalArgumentException e) { 123 if (!shownException) { 124 System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint)); 125 shownException = true; 126 } 127 } 128 return value != null ? value : String.valueOf(enumValue); 129 } else if (propEnum < UProperty.DOUBLE_LIMIT) { 130 double num = UCharacter.getUnicodeNumericValue(codePoint); 131 if (num == UCharacter.NO_NUMERIC_VALUE) 132 return null; 133 return Double.toString(num); 134 // TODO: Fix HACK -- API deficient 135 } 136 return null; 137 } 138 139 private String getAge(int codePoint) { 140 String temp = UCharacter.getAge(codePoint).toString(); 141 if (temp.equals("0.0.0.0")) 142 return "unassigned"; 143 if (temp.endsWith(".0.0")) 144 return temp.substring(0, temp.length() - 4); 145 return temp; 146 } 147 148 /** 149 * @param valueAlias null if unused. 150 * @param valueEnum -1 if unused 151 * @param nameChoice 152 * @return 153 */ 154 private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) { 155 if (propEnum >= UProperty.STRING_START) { 156 if (nameChoice > UProperty.NameChoice.LONG) 157 throw new IllegalArgumentException(); 158 if (nameChoice != UProperty.NameChoice.LONG) 159 return null; 160 return "<string>"; 161 } else if (propEnum >= UProperty.DOUBLE_START) { 162 if (nameChoice > UProperty.NameChoice.LONG) 163 throw new IllegalArgumentException(); 164 if (nameChoice != UProperty.NameChoice.LONG) 165 return null; 166 return "<number>"; 167 } 168 if (valueAlias != null && !valueAlias.equals("<integer>")) { 169 valueEnum = fixedGetPropertyValueEnum(propEnum, valueAlias); 170 } 171 // because these are defined badly, there may be no normal (long) name. 172 // if there is 173 String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice); 174 if (result != null) 175 return result; 176 // HACK try other namechoice 177 if (nameChoice == UProperty.NameChoice.LONG) { 178 result = fixedGetPropertyValueName(propEnum, valueEnum, UProperty.NameChoice.SHORT); 179 if (result != null) 180 return result; 181 if (isCombiningClassProperty()) 182 return null; 183 return "<integer>"; 184 } 185 return null; 186 } 187 188 public boolean isCombiningClassProperty() { 189 return (propEnum == UProperty.CANONICAL_COMBINING_CLASS 190 || propEnum == UProperty.LEAD_CANONICAL_COMBINING_CLASS 191 || propEnum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS); 192 } 193 194 private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) { 195 try { 196 if (propEnum < BINARY_LIMIT) { 197 propEnum = UProperty.ALPHABETIC; 198 } 199 return UCharacter.getPropertyValueEnum(propEnum, valueAlias); 200 } catch (Exception e) { 201 return Integer.parseInt(valueAlias); 202 } 203 } 204 205 static Map fixSkeleton = new HashMap(); 206 207 private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) { 208 String value = UCharacter.getPropertyValueName(propEnum, valueEnum, nameChoice); 209 String newValue = (String) fixSkeleton.get(value); 210 if (newValue == null) { 211 newValue = value; 212 if (propEnum == UProperty.JOINING_GROUP) { 213 newValue = newValue == null ? null : newValue.toLowerCase(Locale.ENGLISH); 214 } 215 newValue = regularize(newValue, true); 216 fixSkeleton.put(value, newValue); 217 } 218 return newValue; 219 } 220 221 public List _getNameAliases(List result) { 222 if (result == null) 223 result = new ArrayList(); 224 // String alias = String_Extras.get(propEnum); 225 // if (alias == null) 226 String alias = Binary_Extras.get(propEnum); 227 if (alias != null) { 228 addUnique(alias, result); 229 } else { 230 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result); 231 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result); 232 } 233 return result; 234 } 235 236 public String getFixedPropertyName(int propName, int nameChoice) { 237 try { 238 return UCharacter.getPropertyName(propEnum, nameChoice); 239 } catch (IllegalArgumentException e) { 240 return null; 241 } 242 } 243 244 private static Map cccHack = new HashMap(); 245 private static Set cccExtras = new HashSet(); 246 static { 247 for (int i = 0; i <= 255; ++i) { 248 String alias = UCharacter.getPropertyValueName(UProperty.CANONICAL_COMBINING_CLASS, i, 249 UProperty.NameChoice.LONG); 250 String numStr = String.valueOf(i); 251 if (alias != null) { 252 cccHack.put(alias, numStr); 253 } else { 254 cccHack.put(numStr, numStr); 255 cccExtras.add(numStr); 256 } 257 } 258 } 259 260 public List _getAvailableValues(List result) { 261 if (result == null) 262 result = new ArrayList(); 263 if (propEnum == UProperty.AGE) { 264 addAllUnique(getAges(), result); 265 return result; 266 267 } 268 if (propEnum < UProperty.INT_LIMIT) { 269 if (Binary_Extras.isInRange(propEnum)) { 270 propEnum = UProperty.BINARY_START; // HACK 271 } 272 int start = UCharacter.getIntPropertyMinValue(propEnum); 273 int end = UCharacter.getIntPropertyMaxValue(propEnum); 274 for (int i = start; i <= end; ++i) { 275 String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG); 276 String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT); 277 if (alias == null) { 278 alias = alias2; 279 if (alias == null && isCombiningClassProperty()) { 280 alias = String.valueOf(i); 281 } 282 } 283 // System.out.println(propertyAlias + "\t" + i + ":\t" + alias); 284 addUnique(alias, result); 285 } 286 } else if (propEnum >= UProperty.DOUBLE_START && propEnum < UProperty.DOUBLE_LIMIT) { 287 UnicodeMap map = getUnicodeMap(); 288 Collection values = map.values(); 289 addAllUnique(values, result); 290 } else { 291 String alias = getFixedValueAlias(null, -1, UProperty.NameChoice.LONG); 292 addUnique(alias, result); 293 } 294 return result; 295 } 296 297 static String[] AGES = null; 298 299 private String[] getAges() { 300 if (AGES == null) { 301 Set ages = new TreeSet(); 302 for (int i = 0; i < 0x10FFFF; ++i) { 303 ages.add(getAge(i)); 304 } 305 AGES = (String[]) ages.toArray(new String[ages.size()]); 306 } 307 return AGES; 308 } 309 310 public List _getValueAliases(String valueAlias, List result) { 311 if (result == null) 312 result = new ArrayList(); 313 if (propEnum == UProperty.AGE) { 314 addUnique(valueAlias, result); 315 return result; 316 } 317 if (isCombiningClassProperty()) { 318 addUnique(cccHack.get(valueAlias), result); // add number 319 } 320 int type = getType(); 321 if (type == UnicodeProperty.NUMERIC || type == EXTENDED_NUMERIC) { 322 addUnique(valueAlias, result); 323 if (valueAlias.endsWith(".0")) { 324 addUnique(valueAlias.substring(0, valueAlias.length() - 2), result); 325 } 326 } else { 327 for (int nameChoice = UProperty.NameChoice.SHORT;; ++nameChoice) { 328 try { 329 addUnique(getFixedValueAlias(valueAlias, -1, nameChoice), result); 330 } catch (Exception e) { 331 break; 332 } 333 } 334 } 335 return result; 336 } 337 338 /* (non-Javadoc) 339 * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType() 340 */ 341 private int internalGetPropertyType(int prop) { 342 switch (prop) { 343 case UProperty.AGE: 344 case UProperty.BLOCK: 345 case UProperty.SCRIPT: 346 return UnicodeProperty.CATALOG; 347 case UProperty.ISO_COMMENT: 348 case UProperty.NAME: 349 case UProperty.UNICODE_1_NAME: 350 case UProperty.SCRIPT_EXTENSIONS: 351 return UnicodeProperty.MISC; 352 case UProperty.BIDI_MIRRORING_GLYPH: 353 case UProperty.CASE_FOLDING: 354 case UProperty.LOWERCASE_MAPPING: 355 case UProperty.SIMPLE_CASE_FOLDING: 356 case UProperty.SIMPLE_LOWERCASE_MAPPING: 357 case UProperty.SIMPLE_TITLECASE_MAPPING: 358 case UProperty.SIMPLE_UPPERCASE_MAPPING: 359 case UProperty.TITLECASE_MAPPING: 360 case UProperty.UPPERCASE_MAPPING: 361 return UnicodeProperty.EXTENDED_STRING; 362 } 363 if (prop < UProperty.BINARY_START) 364 return UnicodeProperty.UNKNOWN; 365 if (prop < UProperty.BINARY_LIMIT) 366 return UnicodeProperty.BINARY; 367 if (prop < UProperty.INT_START) 368 return UnicodeProperty.EXTENDED_BINARY; 369 if (prop < UProperty.INT_LIMIT) 370 return UnicodeProperty.ENUMERATED; 371 if (prop < UProperty.DOUBLE_START) 372 return UnicodeProperty.EXTENDED_ENUMERATED; 373 if (prop < UProperty.DOUBLE_LIMIT) 374 return UnicodeProperty.NUMERIC; 375 if (prop < UProperty.STRING_START) 376 return UnicodeProperty.EXTENDED_NUMERIC; 377 if (prop < UProperty.STRING_LIMIT) 378 return UnicodeProperty.STRING; 379 return UnicodeProperty.EXTENDED_STRING; 380 } 381 382 /* 383 * (non-Javadoc) 384 * 385 * @see com.ibm.icu.dev.test.util.UnicodeProperty#getVersion() 386 */ 387 public String _getVersion() { 388 return VersionInfo.ICU_VERSION.toString(); 389 } 390 } 391 392 /*{ 393 matchIterator = new UnicodeSetIterator( 394 new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]")); 395 }*/ 396 397 398 399 /* 400 * Other Missing Functions: 401 Expands_On_NFC 402 Expands_On_NFD 403 Expands_On_NFKC 404 Expands_On_NFKD 405 Composition_Exclusion 406 Decomposition_Mapping 407 FC_NFKC_Closure 408 ISO_Comment 409 NFC_Quick_Check 410 NFD_Quick_Check 411 NFKC_Quick_Check 412 NFKD_Quick_Check 413 Special_Case_Condition 414 Unicode_Radical_Stroke 415 */ 416 417 static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT, 418 new String[] { 419 "isNFC", "isNFD", "isNFKC", "isNFKD", 420 "isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased", 421 }); 422 423 // static final Names String_Extras = new Names(UProperty.STRING_LIMIT, 424 // new String[] { 425 // "toNFC", "toNFD", "toNFKC", "toNKFD", 426 // }); 427 428 static final int 429 isNFC = UProperty.BINARY_LIMIT, 430 isNFD = UProperty.BINARY_LIMIT+1, 431 isNFKC = UProperty.BINARY_LIMIT+2, 432 isNFKD = UProperty.BINARY_LIMIT+3, 433 isLowercase = UProperty.BINARY_LIMIT+4, 434 isUppercase = UProperty.BINARY_LIMIT+5, 435 isTitlecase = UProperty.BINARY_LIMIT+6, 436 isCasefolded = UProperty.BINARY_LIMIT+7, 437 isCased = UProperty.BINARY_LIMIT+8, 438 BINARY_LIMIT = UProperty.BINARY_LIMIT+9 439 440 // NFC = UProperty.STRING_LIMIT, 441 // NFD = UProperty.STRING_LIMIT+1, 442 // NFKC = UProperty.STRING_LIMIT+2, 443 // NFKD = UProperty.STRING_LIMIT+3 444 ; 445 446 protected ICUPropertyFactory() { 447 Collection c = getInternalAvailablePropertyAliases(new ArrayList()); 448 Iterator it = c.iterator(); 449 while (it.hasNext()) { 450 add(getInternalProperty((String) it.next())); 451 } 452 } 453 454 static BitSet BITSET = new BitSet(); 455 public static synchronized String getStringScriptExtensions(int codePoint) { 456 int result = UScript.getScriptExtensions(codePoint, BITSET); 457 if (result >= 0) { 458 return UScript.getName(result); 459 } 460 TreeMap<String,String> sorted = new TreeMap<String,String>(); 461 for (int scriptCode = BITSET.nextSetBit(0); scriptCode >= 0; scriptCode = BITSET.nextSetBit(scriptCode+1)) { 462 // sort by short form 463 sorted.put(UScript.getShortName(scriptCode), UScript.getName(scriptCode)); 464 } 465 return CollectionUtilities.join(sorted.values(), " "); 466 } 467 468 private static ICUPropertyFactory singleton = null; 469 470 public static synchronized ICUPropertyFactory make() { 471 if (singleton != null) 472 return singleton; 473 singleton = new ICUPropertyFactory(); 474 return singleton; 475 } 476 477 public List getInternalAvailablePropertyAliases(List result) { 478 int[][] ranges = { 479 {UProperty.BINARY_START, UProperty.BINARY_LIMIT}, 480 {UProperty.INT_START, UProperty.INT_LIMIT}, 481 {UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT}, 482 {UProperty.STRING_START, UProperty.STRING_LIMIT}, 483 {UProperty.OTHER_PROPERTY_START, UProperty.OTHER_PROPERTY_LIMIT}, 484 485 }; 486 for (int i = 0; i < ranges.length; ++i) { 487 for (int j = ranges[i][0]; j < ranges[i][1]; ++j) { 488 String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG); 489 UnicodeProperty.addUnique(alias, result); 490 if (!result.contains(alias)) 491 result.add(alias); 492 } 493 } 494 // result.addAll(String_Extras.getNames()); 495 result.addAll(Binary_Extras.getNames()); 496 return result; 497 } 498 499 public UnicodeProperty getInternalProperty(String propertyAlias) { 500 int propEnum; 501 main: { 502 int possibleItem = Binary_Extras.get(propertyAlias); 503 if (possibleItem >= 0) { 504 propEnum = possibleItem; 505 break main; 506 } 507 // possibleItem = String_Extras.get(propertyAlias); 508 // if (possibleItem >= 0) { 509 // propEnum = possibleItem; 510 // break main; 511 // } 512 propEnum = UCharacter.getPropertyEnum(propertyAlias); 513 } 514 return new ICUProperty(propertyAlias, propEnum); 515 } 516 517 /* 518 * (non-Javadoc) 519 * 520 * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String) 521 */ 522 // TODO file bug on getPropertyValueName for Canonical_Combining_Class 523 public static class Names { 524 private String[] names; 525 private int base; 526 527 public Names(int base, String[] names) { 528 this.base = base; 529 this.names = names; 530 } 531 532 public int get(String name) { 533 for (int i = 0; i < names.length; ++i) { 534 if (name.equalsIgnoreCase(names[i])) 535 return base + i; 536 } 537 return -1; 538 } 539 540 public String get(int number) { 541 number -= base; 542 if (number < 0 || names.length <= number) 543 return null; 544 return names[number]; 545 } 546 547 public boolean isInRange(int number) { 548 number -= base; 549 return (0 <= number && number < names.length); 550 } 551 552 public List getNames() { 553 return Arrays.asList(names); 554 } 555 } 556 } 557