Home | History | Annotate | Download | only in util
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 2002-2012, International Business Machines Corporation and    *
      4  * others. All Rights Reserved.                                                *
      5  *******************************************************************************
      6  */
      7 package com.ibm.icu.dev.util;
      8 
      9 import java.util.ArrayList;
     10 import java.util.Arrays;
     11 import java.util.BitSet;
     12 import java.util.Collection;
     13 import java.util.HashMap;
     14 import java.util.HashSet;
     15 import java.util.Iterator;
     16 import java.util.List;
     17 import java.util.Locale;
     18 import java.util.Map;
     19 import java.util.Set;
     20 import java.util.TreeMap;
     21 import java.util.TreeSet;
     22 
     23 import com.ibm.icu.lang.UCharacter;
     24 import com.ibm.icu.lang.UProperty;
     25 import com.ibm.icu.lang.UScript;
     26 import com.ibm.icu.text.Normalizer;
     27 import com.ibm.icu.text.UTF16;
     28 import com.ibm.icu.util.VersionInfo;
     29 
     30 
     31 /**
     32  * Provides a general interface for Unicode Properties, and
     33  * extracting sets based on those values.
     34  * @author Davis
     35  */
     36 
     37 public class ICUPropertyFactory extends UnicodeProperty.Factory {
     38 
     39     static class ICUProperty extends UnicodeProperty {
     40         protected int propEnum = Integer.MIN_VALUE;
     41 
     42         protected ICUProperty(String propName, int propEnum) {
     43             setName(propName);
     44             this.propEnum = propEnum;
     45             setType(internalGetPropertyType(propEnum));
     46             if (propEnum == UProperty.DEFAULT_IGNORABLE_CODE_POINT || propEnum == UProperty.BIDI_CLASS || propEnum == UProperty.GENERAL_CATEGORY) {
     47                 setUniformUnassigned(false);
     48             } else {
     49                 setUniformUnassigned(true);
     50             }
     51         }
     52 
     53         boolean shownException = false;
     54 
     55         public String _getValue(int codePoint) {
     56             switch (propEnum) {
     57             case UProperty.AGE:
     58                 return getAge(codePoint);
     59             case UProperty.BIDI_MIRRORING_GLYPH:
     60                 return UTF16.valueOf(UCharacter.getMirror(codePoint));
     61             case UProperty.CASE_FOLDING:
     62                 return UCharacter.foldCase(UTF16.valueOf(codePoint), true);
     63             case UProperty.ISO_COMMENT:
     64                 return UCharacter.getISOComment(codePoint);
     65             case UProperty.LOWERCASE_MAPPING:
     66                 return UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
     67             case UProperty.NAME:
     68                 return UCharacter.getName(codePoint);
     69             case UProperty.SIMPLE_CASE_FOLDING:
     70                 return UTF16.valueOf(UCharacter.foldCase(codePoint, true));
     71             case UProperty.SIMPLE_LOWERCASE_MAPPING:
     72                 return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
     73             case UProperty.SIMPLE_TITLECASE_MAPPING:
     74                 return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
     75             case UProperty.SIMPLE_UPPERCASE_MAPPING:
     76                 return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
     77             case UProperty.TITLECASE_MAPPING:
     78                 return UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null);
     79             case UProperty.UNICODE_1_NAME:
     80                 return UCharacter.getName1_0(codePoint);
     81             case UProperty.UPPERCASE_MAPPING:
     82                 return UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
     83             // case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
     84             // case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
     85             // case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
     86             // case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
     87             case isNFC:
     88                 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint)));
     89             case isNFD:
     90                 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint)));
     91             case isNFKC:
     92                 return String
     93                         .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint)));
     94             case isNFKD:
     95                 return String
     96                         .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint)));
     97             case isLowercase:
     98                 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
     99                         UTF16.valueOf(codePoint)));
    100             case isUppercase:
    101                 return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
    102                         UTF16.valueOf(codePoint)));
    103             case isTitlecase:
    104                 return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null).equals(
    105                         UTF16.valueOf(codePoint)));
    106             case isCasefolded:
    107                 return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint), true).equals(
    108                         UTF16.valueOf(codePoint)));
    109             case isCased:
    110                 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
    111                         UTF16.valueOf(codePoint)));
    112             case UProperty.SCRIPT_EXTENSIONS:
    113                 return getStringScriptExtensions(codePoint);
    114             }
    115             if (propEnum < UProperty.INT_LIMIT) {
    116                 int enumValue = -1;
    117                 String value = null;
    118                 try {
    119                     enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
    120                     if (enumValue >= 0)
    121                         value = fixedGetPropertyValueName(propEnum, enumValue, UProperty.NameChoice.LONG);
    122                 } catch (IllegalArgumentException e) {
    123                     if (!shownException) {
    124                         System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
    125                         shownException = true;
    126                     }
    127                 }
    128                 return value != null ? value : String.valueOf(enumValue);
    129             } else if (propEnum < UProperty.DOUBLE_LIMIT) {
    130                 double num = UCharacter.getUnicodeNumericValue(codePoint);
    131                 if (num == UCharacter.NO_NUMERIC_VALUE)
    132                     return null;
    133                 return Double.toString(num);
    134                 // TODO: Fix HACK -- API deficient
    135             }
    136             return null;
    137         }
    138 
    139         private String getAge(int codePoint) {
    140             String temp = UCharacter.getAge(codePoint).toString();
    141             if (temp.equals("0.0.0.0"))
    142                 return "unassigned";
    143             if (temp.endsWith(".0.0"))
    144                 return temp.substring(0, temp.length() - 4);
    145             return temp;
    146         }
    147 
    148         /**
    149          * @param valueAlias null if unused.
    150          * @param valueEnum -1 if unused
    151          * @param nameChoice
    152          * @return
    153          */
    154         private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
    155             if (propEnum >= UProperty.STRING_START) {
    156                 if (nameChoice > UProperty.NameChoice.LONG)
    157                     throw new IllegalArgumentException();
    158                 if (nameChoice != UProperty.NameChoice.LONG)
    159                     return null;
    160                 return "<string>";
    161             } else if (propEnum >= UProperty.DOUBLE_START) {
    162                 if (nameChoice > UProperty.NameChoice.LONG)
    163                     throw new IllegalArgumentException();
    164                 if (nameChoice != UProperty.NameChoice.LONG)
    165                     return null;
    166                 return "<number>";
    167             }
    168             if (valueAlias != null && !valueAlias.equals("<integer>")) {
    169                 valueEnum = fixedGetPropertyValueEnum(propEnum, valueAlias);
    170             }
    171             // because these are defined badly, there may be no normal (long) name.
    172             // if there is
    173             String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
    174             if (result != null)
    175                 return result;
    176             // HACK try other namechoice
    177             if (nameChoice == UProperty.NameChoice.LONG) {
    178                 result = fixedGetPropertyValueName(propEnum, valueEnum, UProperty.NameChoice.SHORT);
    179                 if (result != null)
    180                     return result;
    181                 if (isCombiningClassProperty())
    182                     return null;
    183                 return "<integer>";
    184             }
    185             return null;
    186         }
    187 
    188         public boolean isCombiningClassProperty() {
    189             return (propEnum == UProperty.CANONICAL_COMBINING_CLASS
    190                     || propEnum == UProperty.LEAD_CANONICAL_COMBINING_CLASS
    191                     || propEnum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
    192         }
    193 
    194         private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) {
    195             try {
    196                 if (propEnum < BINARY_LIMIT) {
    197                     propEnum = UProperty.ALPHABETIC;
    198                 }
    199                 return UCharacter.getPropertyValueEnum(propEnum, valueAlias);
    200             } catch (Exception e) {
    201                 return Integer.parseInt(valueAlias);
    202             }
    203         }
    204 
    205         static Map fixSkeleton = new HashMap();
    206 
    207         private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
    208             String value = UCharacter.getPropertyValueName(propEnum, valueEnum, nameChoice);
    209             String newValue = (String) fixSkeleton.get(value);
    210             if (newValue == null) {
    211                 newValue = value;
    212                 if (propEnum == UProperty.JOINING_GROUP) {
    213                     newValue = newValue == null ? null : newValue.toLowerCase(Locale.ENGLISH);
    214                 }
    215                 newValue = regularize(newValue, true);
    216                 fixSkeleton.put(value, newValue);
    217             }
    218             return newValue;
    219         }
    220 
    221         public List _getNameAliases(List result) {
    222             if (result == null)
    223                 result = new ArrayList();
    224             // String alias = String_Extras.get(propEnum);
    225             // if (alias == null)
    226             String alias = Binary_Extras.get(propEnum);
    227             if (alias != null) {
    228                 addUnique(alias, result);
    229             } else {
    230                 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result);
    231                 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result);
    232             }
    233             return result;
    234         }
    235 
    236         public String getFixedPropertyName(int propName, int nameChoice) {
    237             try {
    238                 return UCharacter.getPropertyName(propEnum, nameChoice);
    239             } catch (IllegalArgumentException e) {
    240                 return null;
    241             }
    242         }
    243 
    244         private static Map cccHack = new HashMap();
    245         private static Set cccExtras = new HashSet();
    246         static {
    247             for (int i = 0; i <= 255; ++i) {
    248                 String alias = UCharacter.getPropertyValueName(UProperty.CANONICAL_COMBINING_CLASS, i,
    249                         UProperty.NameChoice.LONG);
    250                 String numStr = String.valueOf(i);
    251                 if (alias != null) {
    252                     cccHack.put(alias, numStr);
    253                 } else {
    254                     cccHack.put(numStr, numStr);
    255                     cccExtras.add(numStr);
    256                 }
    257             }
    258         }
    259 
    260         public List _getAvailableValues(List result) {
    261             if (result == null)
    262                 result = new ArrayList();
    263             if (propEnum == UProperty.AGE) {
    264                 addAllUnique(getAges(), result);
    265                 return result;
    266 
    267             }
    268             if (propEnum < UProperty.INT_LIMIT) {
    269                 if (Binary_Extras.isInRange(propEnum)) {
    270                     propEnum = UProperty.BINARY_START; // HACK
    271                 }
    272                 int start = UCharacter.getIntPropertyMinValue(propEnum);
    273                 int end = UCharacter.getIntPropertyMaxValue(propEnum);
    274                 for (int i = start; i <= end; ++i) {
    275                     String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
    276                     String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
    277                     if (alias == null) {
    278                         alias = alias2;
    279                         if (alias == null && isCombiningClassProperty()) {
    280                             alias = String.valueOf(i);
    281                         }
    282                     }
    283                     // System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
    284                     addUnique(alias, result);
    285                 }
    286             } else if (propEnum >= UProperty.DOUBLE_START && propEnum < UProperty.DOUBLE_LIMIT) {
    287                 UnicodeMap map = getUnicodeMap();
    288                 Collection values = map.values();
    289                 addAllUnique(values, result);
    290             } else {
    291                 String alias = getFixedValueAlias(null, -1, UProperty.NameChoice.LONG);
    292                 addUnique(alias, result);
    293             }
    294             return result;
    295         }
    296 
    297         static String[] AGES = null;
    298 
    299         private String[] getAges() {
    300             if (AGES == null) {
    301                 Set ages = new TreeSet();
    302                 for (int i = 0; i < 0x10FFFF; ++i) {
    303                     ages.add(getAge(i));
    304                 }
    305                 AGES = (String[]) ages.toArray(new String[ages.size()]);
    306             }
    307             return AGES;
    308         }
    309 
    310         public List _getValueAliases(String valueAlias, List result) {
    311             if (result == null)
    312                 result = new ArrayList();
    313             if (propEnum == UProperty.AGE) {
    314                 addUnique(valueAlias, result);
    315                 return result;
    316             }
    317             if (isCombiningClassProperty()) {
    318                 addUnique(cccHack.get(valueAlias), result); // add number
    319             }
    320             int type = getType();
    321             if (type == UnicodeProperty.NUMERIC || type == EXTENDED_NUMERIC) {
    322                 addUnique(valueAlias, result);
    323                 if (valueAlias.endsWith(".0")) {
    324                     addUnique(valueAlias.substring(0, valueAlias.length() - 2), result);
    325                 }
    326             } else {
    327                 for (int nameChoice = UProperty.NameChoice.SHORT;; ++nameChoice) {
    328                     try {
    329                         addUnique(getFixedValueAlias(valueAlias, -1, nameChoice), result);
    330                     } catch (Exception e) {
    331                         break;
    332                     }
    333                 }
    334             }
    335             return result;
    336         }
    337 
    338         /* (non-Javadoc)
    339          * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
    340          */
    341         private int internalGetPropertyType(int prop) {
    342             switch (prop) {
    343             case UProperty.AGE:
    344             case UProperty.BLOCK:
    345             case UProperty.SCRIPT:
    346                 return UnicodeProperty.CATALOG;
    347             case UProperty.ISO_COMMENT:
    348             case UProperty.NAME:
    349             case UProperty.UNICODE_1_NAME:
    350             case UProperty.SCRIPT_EXTENSIONS:
    351                 return UnicodeProperty.MISC;
    352             case UProperty.BIDI_MIRRORING_GLYPH:
    353             case UProperty.CASE_FOLDING:
    354             case UProperty.LOWERCASE_MAPPING:
    355             case UProperty.SIMPLE_CASE_FOLDING:
    356             case UProperty.SIMPLE_LOWERCASE_MAPPING:
    357             case UProperty.SIMPLE_TITLECASE_MAPPING:
    358             case UProperty.SIMPLE_UPPERCASE_MAPPING:
    359             case UProperty.TITLECASE_MAPPING:
    360             case UProperty.UPPERCASE_MAPPING:
    361                 return UnicodeProperty.EXTENDED_STRING;
    362             }
    363             if (prop < UProperty.BINARY_START)
    364                 return UnicodeProperty.UNKNOWN;
    365             if (prop < UProperty.BINARY_LIMIT)
    366                 return UnicodeProperty.BINARY;
    367             if (prop < UProperty.INT_START)
    368                 return UnicodeProperty.EXTENDED_BINARY;
    369             if (prop < UProperty.INT_LIMIT)
    370                 return UnicodeProperty.ENUMERATED;
    371             if (prop < UProperty.DOUBLE_START)
    372                 return UnicodeProperty.EXTENDED_ENUMERATED;
    373             if (prop < UProperty.DOUBLE_LIMIT)
    374                 return UnicodeProperty.NUMERIC;
    375             if (prop < UProperty.STRING_START)
    376                 return UnicodeProperty.EXTENDED_NUMERIC;
    377             if (prop < UProperty.STRING_LIMIT)
    378                 return UnicodeProperty.STRING;
    379             return UnicodeProperty.EXTENDED_STRING;
    380         }
    381 
    382         /*
    383          * (non-Javadoc)
    384          *
    385          * @see com.ibm.icu.dev.test.util.UnicodeProperty#getVersion()
    386          */
    387         public String _getVersion() {
    388             return VersionInfo.ICU_VERSION.toString();
    389         }
    390     }
    391 
    392   /*{
    393             matchIterator = new UnicodeSetIterator(
    394                 new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
    395         }*/
    396 
    397 
    398 
    399     /*
    400      * Other Missing Functions:
    401             Expands_On_NFC
    402             Expands_On_NFD
    403             Expands_On_NFKC
    404             Expands_On_NFKD
    405             Composition_Exclusion
    406             Decomposition_Mapping
    407             FC_NFKC_Closure
    408             ISO_Comment
    409             NFC_Quick_Check
    410             NFD_Quick_Check
    411             NFKC_Quick_Check
    412             NFKD_Quick_Check
    413             Special_Case_Condition
    414             Unicode_Radical_Stroke
    415      */
    416 
    417     static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT,
    418           new String[] {
    419           "isNFC", "isNFD", "isNFKC", "isNFKD",
    420           "isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased",
    421     });
    422 
    423 //    static final Names String_Extras = new Names(UProperty.STRING_LIMIT,
    424 //          new String[] {
    425 //          "toNFC", "toNFD", "toNFKC", "toNKFD",
    426 //    });
    427 
    428     static final int
    429         isNFC = UProperty.BINARY_LIMIT,
    430         isNFD = UProperty.BINARY_LIMIT+1,
    431         isNFKC = UProperty.BINARY_LIMIT+2,
    432         isNFKD = UProperty.BINARY_LIMIT+3,
    433         isLowercase = UProperty.BINARY_LIMIT+4,
    434         isUppercase = UProperty.BINARY_LIMIT+5,
    435         isTitlecase = UProperty.BINARY_LIMIT+6,
    436         isCasefolded = UProperty.BINARY_LIMIT+7,
    437         isCased = UProperty.BINARY_LIMIT+8,
    438         BINARY_LIMIT = UProperty.BINARY_LIMIT+9
    439 
    440 //        NFC  = UProperty.STRING_LIMIT,
    441 //        NFD  = UProperty.STRING_LIMIT+1,
    442 //        NFKC = UProperty.STRING_LIMIT+2,
    443 //        NFKD = UProperty.STRING_LIMIT+3
    444         ;
    445 
    446     protected ICUPropertyFactory() {
    447         Collection c = getInternalAvailablePropertyAliases(new ArrayList());
    448         Iterator it = c.iterator();
    449         while (it.hasNext()) {
    450             add(getInternalProperty((String) it.next()));
    451         }
    452     }
    453 
    454     static BitSet BITSET = new BitSet();
    455     public static synchronized String getStringScriptExtensions(int codePoint) {
    456         int result = UScript.getScriptExtensions(codePoint, BITSET);
    457         if (result >= 0) {
    458             return UScript.getName(result);
    459         }
    460         TreeMap<String,String> sorted = new TreeMap<String,String>();
    461         for (int scriptCode = BITSET.nextSetBit(0); scriptCode >= 0; scriptCode = BITSET.nextSetBit(scriptCode+1)) {
    462             // sort by short form
    463             sorted.put(UScript.getShortName(scriptCode), UScript.getName(scriptCode));
    464         }
    465         return CollectionUtilities.join(sorted.values(), " ");
    466     }
    467 
    468     private static ICUPropertyFactory singleton = null;
    469 
    470     public static synchronized ICUPropertyFactory make() {
    471         if (singleton != null)
    472             return singleton;
    473         singleton = new ICUPropertyFactory();
    474         return singleton;
    475     }
    476 
    477     public List getInternalAvailablePropertyAliases(List result) {
    478         int[][] ranges = {
    479                 {UProperty.BINARY_START,    UProperty.BINARY_LIMIT},
    480                 {UProperty.INT_START,       UProperty.INT_LIMIT},
    481                 {UProperty.DOUBLE_START,    UProperty.DOUBLE_LIMIT},
    482                 {UProperty.STRING_START,    UProperty.STRING_LIMIT},
    483                 {UProperty.OTHER_PROPERTY_START, UProperty.OTHER_PROPERTY_LIMIT},
    484 
    485         };
    486         for (int i = 0; i < ranges.length; ++i) {
    487             for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
    488                 String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);
    489                 UnicodeProperty.addUnique(alias, result);
    490                 if (!result.contains(alias))
    491                     result.add(alias);
    492             }
    493         }
    494         // result.addAll(String_Extras.getNames());
    495         result.addAll(Binary_Extras.getNames());
    496         return result;
    497     }
    498 
    499     public UnicodeProperty getInternalProperty(String propertyAlias) {
    500         int propEnum;
    501         main: {
    502             int possibleItem = Binary_Extras.get(propertyAlias);
    503             if (possibleItem >= 0) {
    504                 propEnum = possibleItem;
    505                 break main;
    506             }
    507             // possibleItem = String_Extras.get(propertyAlias);
    508             // if (possibleItem >= 0) {
    509             // propEnum = possibleItem;
    510             // break main;
    511             // }
    512             propEnum = UCharacter.getPropertyEnum(propertyAlias);
    513         }
    514         return new ICUProperty(propertyAlias, propEnum);
    515     }
    516 
    517     /*
    518      * (non-Javadoc)
    519      *
    520      * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String)
    521      */
    522     // TODO file bug on getPropertyValueName for Canonical_Combining_Class
    523     public static class Names {
    524         private String[] names;
    525         private int base;
    526 
    527         public Names(int base, String[] names) {
    528             this.base = base;
    529             this.names = names;
    530         }
    531 
    532         public int get(String name) {
    533             for (int i = 0; i < names.length; ++i) {
    534                 if (name.equalsIgnoreCase(names[i]))
    535                     return base + i;
    536             }
    537             return -1;
    538         }
    539 
    540         public String get(int number) {
    541             number -= base;
    542             if (number < 0 || names.length <= number)
    543                 return null;
    544             return names[number];
    545         }
    546 
    547         public boolean isInRange(int number) {
    548             number -= base;
    549             return (0 <= number && number < names.length);
    550         }
    551 
    552         public List getNames() {
    553             return Arrays.asList(names);
    554         }
    555     }
    556 }
    557