Home | History | Annotate | Download | only in test
      1 package org.unicode.cldr.test;
      2 
      3 import java.util.Arrays;
      4 import java.util.Collection;
      5 import java.util.Collections;
      6 import java.util.EnumMap;
      7 import java.util.HashMap;
      8 import java.util.HashSet;
      9 import java.util.LinkedHashSet;
     10 import java.util.List;
     11 import java.util.Map;
     12 import java.util.Map.Entry;
     13 import java.util.Set;
     14 import java.util.TreeMap;
     15 import java.util.TreeSet;
     16 import java.util.regex.Pattern;
     17 
     18 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
     19 import org.unicode.cldr.util.CLDRConfig;
     20 import org.unicode.cldr.util.CLDRFile;
     21 import org.unicode.cldr.util.CldrUtility;
     22 import org.unicode.cldr.util.DtdData;
     23 import org.unicode.cldr.util.DtdData.Attribute;
     24 import org.unicode.cldr.util.DtdData.Element;
     25 import org.unicode.cldr.util.DtdType;
     26 import org.unicode.cldr.util.Factory;
     27 import org.unicode.cldr.util.LocaleIDParser;
     28 import org.unicode.cldr.util.PatternCache;
     29 import org.unicode.cldr.util.SupplementalDataInfo;
     30 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo;
     31 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
     32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
     33 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
     34 import org.unicode.cldr.util.XPathParts;
     35 
     36 import com.ibm.icu.dev.util.CollectionUtilities.ObjectMatcher;
     37 import com.ibm.icu.impl.Relation;
     38 import com.ibm.icu.impl.Row;
     39 import com.ibm.icu.impl.Row.R2;
     40 import com.ibm.icu.text.UnicodeSet;
     41 
     42 public class CheckAttributeValues extends FactoryCheckCLDR {
     43 
     44     private static final ObjectMatcher<String> NOT_DONE_YET = new RegexMatcher().set(".*", Pattern.COMMENTS);
     45     private static final boolean FIND_MISSING = CldrUtility.getProperty("FIND_MISSING_ATTRIBUTE_TESTS", false); // turn on to show <attributeValues> that are missing.
     46     private static final boolean SHOW_UNNECESSARY = false; // turn on to show <attributeValues> we should delete.
     47 
     48     static LinkedHashSet<String> elementOrder = new LinkedHashSet<String>();
     49     static LinkedHashSet<String> attributeOrder = new LinkedHashSet<String>();
     50     static LinkedHashSet<String> serialElements = new LinkedHashSet<String>();
     51     static Map<String, Map<String, MatcherPattern>> element_attribute_validity = new HashMap<String, Map<String, MatcherPattern>>();
     52     static Map<String, MatcherPattern> common_attribute_validity = new HashMap<String, MatcherPattern>();
     53     static Map<String, MatcherPattern> variables = new HashMap<String, MatcherPattern>();
     54     // static VariableReplacer variableReplacer = new VariableReplacer(); // note: this can be coalesced with the above
     55     // -- to do later.
     56     static boolean initialized = false;
     57     static LocaleMatcher localeMatcher;
     58     static Map<String, Map<String, String>> code_type_replacement = new TreeMap<String, Map<String, String>>();
     59     static final SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo();
     60     static DtdData ldmlDtdData = DtdData.getInstance(DtdType.ldml);
     61 
     62     boolean isEnglish;
     63     PluralInfo pluralInfo;
     64     Relation<String, String> missingTests = Relation.of(new TreeMap(), TreeSet.class);
     65 
     66     XPathParts parts = new XPathParts(null, null);
     67     static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
     68 
     69     public CheckAttributeValues(Factory factory) {
     70         super(factory);
     71     }
     72 
     73     public void handleFinish() {
     74         for (Entry<String, Set<String>> entry : missingTests.keyValuesSet()) {
     75             System.out.println("Missing element: " + entry.getKey() + ", attributes: " + entry.getValue());
     76         }
     77     }
     78 
     79     public CheckCLDR handleCheck(String path, String fullPath, String value, Options options,
     80         List<CheckStatus> result) {
     81         if (fullPath == null) return this; // skip paths that we don't have
     82         if (fullPath.indexOf('[') < 0) return this; // skip paths with no attributes
     83         String locale = getCldrFileToCheck().getSourceLocaleID(path, null);
     84 
     85         // skip paths that are not in the immediate locale
     86         if (!getCldrFileToCheck().getLocaleID().equals(locale)) {
     87             return this;
     88         }
     89         parts.set(fullPath);
     90         for (int i = 0; i < parts.size(); ++i) {
     91             if (parts.getAttributeCount(i) == 0) continue;
     92             Map<String, String> attributes = parts.getAttributes(i);
     93             String element = parts.getElement(i);
     94             Element elementInfo = ldmlDtdData.getElementFromName().get(element);
     95 
     96             Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element);
     97             for (String attribute : attributes.keySet()) {
     98                 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute);
     99                 if (!attributeInfo.values.isEmpty()) {
    100                     // we don't need to check, since the DTD will enforce values
    101                     continue;
    102                 }
    103                 String attributeValue = attributes.get(attribute);
    104 
    105                 // special hack for         // <type key="calendar" type="chinese">Chinese Calendar</type>
    106                 if (element.equals("type") && attribute.equals("type")) {
    107                     Set<String> typeValues = BCP47_KEY_VALUES.get(attributes.get("key"));
    108                     if (!typeValues.contains(attributeValue)) {
    109                         result.add(new CheckStatus()
    110                             .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue)
    111                             .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}",
    112                                 new Object[] { attribute, attributeValue, typeValues }));
    113                     }
    114                     continue;
    115                 }
    116                 // check the common attributes first
    117                 boolean haveTest = check(common_attribute_validity, attribute, attributeValue, result);
    118                 // then for the specific element
    119                 haveTest = haveTest || check(attribute_validity, attribute, attributeValue, result);
    120                 if (!haveTest && FIND_MISSING) {
    121                     missingTests.put(element, attribute);
    122                 }
    123 
    124                 // now for plurals
    125 
    126                 if (attribute.equals("count")) {
    127                     if (DIGITS.containsAll(attributeValue)) {
    128                         // ok, keep going
    129                     } else {
    130                         final Count countValue = PluralInfo.Count.valueOf(attributeValue);
    131                         if (!pluralInfo.getCounts().contains(countValue)
    132                             && !isPluralException(countValue, locale)) {
    133                             result.add(new CheckStatus()
    134                                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.illegalPlural)
    135                                 .setMessage("Illegal plural value {0}; must be one of: {1}",
    136                                     new Object[] { countValue, pluralInfo.getCounts() }));
    137                         }
    138                     }
    139                 }
    140 
    141                 // TODO check other variable elements, like dayPeriods
    142             }
    143         }
    144         return this;
    145     }
    146 
    147     static final Relation<PluralInfo.Count, String> PLURAL_EXCEPTIONS = Relation.of(
    148         new EnumMap<PluralInfo.Count, Set<String>>(PluralInfo.Count.class), HashSet.class);
    149 
    150     static {
    151         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr");
    152         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr");
    153         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh");
    154         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs");
    155         PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru");
    156     }
    157 
    158     static boolean isPluralException(Count countValue, String locale) {
    159         Set<String> exceptions = PLURAL_EXCEPTIONS.get(countValue);
    160         if (exceptions == null) {
    161             return false;
    162         }
    163         if (exceptions.contains(locale)) {
    164             return true;
    165         }
    166         int bar = locale.indexOf('_'); // catch bs_Cyrl, etc.
    167         if (bar > 0) {
    168             String base = locale.substring(0, bar);
    169             if (exceptions.contains(base)) {
    170                 return true;
    171             }
    172         }
    173         return false;
    174     }
    175 
    176     /**
    177      * return true if we performed a test
    178      * @param attribute_validity
    179      * @param attribute
    180      * @param attributeValue
    181      * @param result
    182      * @return
    183      */
    184     private boolean check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue,
    185         List<CheckStatus> result) {
    186         if (attribute_validity == null) {
    187             return false; // no test
    188         }
    189         MatcherPattern matcherPattern = attribute_validity.get(attribute);
    190         if (matcherPattern == null) {
    191             return false; // no test
    192         }
    193         if (matcherPattern.matcher.matches(attributeValue)) {
    194             return true;
    195         }
    196         // special check for deprecated codes
    197         String replacement = getReplacement(matcherPattern.value, attributeValue);
    198         if (replacement != null) {
    199             if (isEnglish) {
    200                 return true; // don't flag English
    201             }
    202             if (replacement.length() == 0) {
    203                 result.add(new CheckStatus()
    204                     .setCause(this).setMainType(CheckStatus.warningType).setSubtype(Subtype.deprecatedAttribute)
    205                     .setMessage("Deprecated Attribute Value {0}={1}. Consider removing.",
    206                         new Object[] { attribute, attributeValue }));
    207             } else {
    208                 result
    209                     .add(new CheckStatus()
    210                         .setCause(this)
    211                         .setMainType(CheckStatus.warningType)
    212                         .setSubtype(Subtype.deprecatedAttributeWithReplacement)
    213                         .setMessage(
    214                             "Deprecated Attribute Value {0}={1}. Consider removing, and possibly modifying the related value for {2}.",
    215                             new Object[] { attribute, attributeValue, replacement }));
    216             }
    217         } else {
    218             result.add(new CheckStatus()
    219                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue)
    220                 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}",
    221                     new Object[] { attribute, attributeValue, matcherPattern.pattern }));
    222         }
    223         return true;
    224     }
    225 
    226     /**
    227      * Returns replacement, or null if there is none. "" if the code is deprecated, but without a replacement.
    228      * Input is of the form $language
    229      *
    230      * @return
    231      */
    232     String getReplacement(String value, String attributeValue) {
    233         Map<String, String> type_replacement = code_type_replacement.get(value);
    234         if (type_replacement == null) {
    235             return null;
    236         }
    237         return type_replacement.get(attributeValue);
    238     }
    239 
    240     LocaleIDParser localeIDParser = new LocaleIDParser();
    241 
    242     @Override
    243     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
    244         List<CheckStatus> possibleErrors) {
    245         if (cldrFileToCheck == null) return this;
    246         if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) {
    247             setSkipTest(false); // ok
    248         } else {
    249             setSkipTest(true);
    250             return this;
    251         }
    252 
    253         pluralInfo = supplementalData.getPlurals(PluralType.cardinal, cldrFileToCheck.getLocaleID());
    254         super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
    255         isEnglish = "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage());
    256         synchronized (elementOrder) {
    257             if (!initialized) {
    258                 getMetadata();
    259                 initialized = true;
    260                 localeMatcher = LocaleMatcher.make();
    261             }
    262         }
    263         if (!localeMatcher.matches(cldrFileToCheck.getLocaleID())) {
    264             possibleErrors.add(new CheckStatus()
    265                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale)
    266                 .setMessage("Invalid Locale {0}",
    267                     new Object[] { cldrFileToCheck.getLocaleID() }));
    268 
    269         }
    270         return this;
    271     }
    272 
    273     private void getMetadata() {
    274 
    275         // sorting is expensive, but we need it here.
    276 
    277         Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo();
    278         for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) {
    279             String id = item.getKey();
    280             String type = item.getValue().get0();
    281             String value = item.getValue().get1();
    282             MatcherPattern mp = getMatcherPattern2(type, value);
    283             if (mp != null) {
    284                 variables.put(id, mp);
    285                 // variableReplacer.add(id, value);
    286             }
    287         }
    288         //System.out.println("Variables: " + variables.keySet());
    289 
    290         Map<AttributeValidityInfo, String> rawAttributeValueInfo = supplementalData.getAttributeValidity();
    291 
    292         for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) {
    293             AttributeValidityInfo item = entry.getKey();
    294             String value = entry.getValue();
    295             MatcherPattern mp = getMatcherPattern2(item.getType(), value);
    296             if (mp == null) {
    297                 System.out.println("Failed to make matcher for: " + item);
    298                 continue;
    299             }
    300             if (FIND_MISSING && mp.matcher == NOT_DONE_YET) {
    301                 missingTests.put(item.getElements().toString(), item.getAttributes().toString());
    302             }
    303 
    304             Set<DtdType> dtds = item.getDtds();
    305             // TODO handle other DTDs
    306             if (!dtds.contains(DtdType.ldml)) {
    307                 continue;
    308             }
    309             Set<String> attributeList = item.getAttributes();
    310             Set<String> elementList = item.getElements();
    311             if (elementList.size() == 0) {
    312                 addAttributes(attributeList, common_attribute_validity, mp);
    313             } else {
    314                 for (String element : elementList) {
    315                     // check if unnecessary
    316                     Element elementInfo = ldmlDtdData.getElementFromName().get(element);
    317                     if (elementInfo == null) {
    318                         System.out.println("Illegal <attributeValues>, element not valid: element: " + element);
    319                     } else {
    320                         for (String attribute : attributeList) {
    321                             Attribute attributeInfo = elementInfo.getAttributeNamed(attribute);
    322                             if (attributeInfo == null) {
    323                                 System.out.println("Illegal <attributeValues>, attribute not valid: element: " + element + ", attribute: " + attribute);
    324                             } else if (!attributeInfo.values.isEmpty()) {
    325                                 if (SHOW_UNNECESSARY) {
    326                                     System.out.println("Unnecessary <attributeValues >, the DTD has specific list: element: " + element + ", attribute: "
    327                                         + attribute + ", " + attributeInfo.values);
    328                                 }
    329                             }
    330                         }
    331                     }
    332                     // System.out.println("\t" + element);
    333                     Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element);
    334                     if (attribute_validity == null) {
    335                         element_attribute_validity.put(element, attribute_validity = new TreeMap<String, MatcherPattern>());
    336                     }
    337                     addAttributes(attributeList, attribute_validity, mp);
    338                 }
    339             }
    340         }
    341     }
    342 
    343     final static Map<String, Set<String>> BCP47_KEY_VALUES;
    344     static {
    345         Map<String, Set<String>> temp = new HashMap<>();
    346         Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases();
    347         for (Entry<String, Set<String>> keyValues : supplementalData.getBcp47Keys().keyValuesSet()) {
    348             Set<String> fullValues = new TreeSet<>();
    349             String key = keyValues.getKey();
    350             Set<String> rawValues = keyValues.getValue();
    351             for (String value : rawValues) {
    352                 if (key.equals("cu")) { // Currency codes are in upper case.
    353                     fullValues.add(value.toUpperCase());
    354                 } else {
    355                     fullValues.add(value);
    356                 }
    357                 R2<String, String> keyValue = R2.of(key, value);
    358                 Set<String> aliases = bcp47Aliases.getAll(keyValue);
    359                 if (aliases != null) {
    360                     fullValues.addAll(aliases);
    361                 }
    362             }
    363             // Special case exception for generic calendar, since we don't want to expose it in bcp47
    364             if (key.equals("ca")) {
    365                 fullValues.add("generic");
    366             }
    367             fullValues = Collections.unmodifiableSet(fullValues);
    368             temp.put(key, fullValues);
    369             // add aliased keys
    370             Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, ""));
    371             if (aliases != null) {
    372                 for (String aliasKey : aliases) {
    373                     temp.put(aliasKey, fullValues);
    374                 }
    375             }
    376             temp.put("x", Collections.EMPTY_SET); // Hack for 'x', private use.
    377         }
    378         BCP47_KEY_VALUES = Collections.unmodifiableMap(temp);
    379     }
    380 
    381     private MatcherPattern getBcp47MatcherPattern(String key) {
    382         // <key type="calendar">Calendar</key>
    383         // <type key="calendar" type="chinese">Chinese Calendar</type>
    384 
    385         //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues>
    386         //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues>
    387         //<attributeValues elements="type" attributes="type" type="bcp47">use-key</attributeValues>
    388 
    389         MatcherPattern m = new MatcherPattern();
    390         Set<String> values;
    391         if (key.equals("key")) {
    392             values = BCP47_KEY_VALUES.keySet();
    393         } else {
    394             values = BCP47_KEY_VALUES.get(key);
    395         }
    396         m.value = key;
    397         m.pattern = values.toString();
    398         m.matcher = new CollectionMatcher().set(values);
    399         return m;
    400     }
    401 
    402     private MatcherPattern getMatcherPattern2(String type, String value) {
    403         String typeAttribute = type;
    404         MatcherPattern result = variables.get(value);
    405         if (result != null) {
    406             MatcherPattern temp = new MatcherPattern();
    407             temp.pattern = result.pattern;
    408             temp.matcher = result.matcher;
    409             temp.value = value;
    410             result = temp;
    411             if ("list".equals(typeAttribute)) {
    412                 temp.matcher = new ListMatcher().set(result.matcher);
    413             }
    414             return result;
    415         }
    416 
    417         result = new MatcherPattern();
    418         result.pattern = value;
    419         result.value = value;
    420         if ("choice".equals(typeAttribute)) {
    421             result.matcher = new CollectionMatcher()
    422                 .set(new HashSet<String>(Arrays.asList(value.trim().split("\\s+"))));
    423         } else if ("bcp47".equals(typeAttribute)) {
    424             result = getBcp47MatcherPattern(value);
    425         } else if ("regex".equals(typeAttribute)) {
    426             result.matcher = new RegexMatcher().set(value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace
    427         } else if ("locale".equals(typeAttribute)) {
    428             result.matcher = LocaleMatcher.make();
    429         } else if ("notDoneYet".equals(typeAttribute) || "notDoneYet".equals(value)) {
    430             result.matcher = NOT_DONE_YET;
    431         } else {
    432             System.out.println("unknown type; value: <" + value + ">,\t" + typeAttribute);
    433             return null;
    434         }
    435         return result;
    436     }
    437 
    438     private void addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp) {
    439         for (String attribute : attributes) {
    440             MatcherPattern old = attribute_validity.get(attribute);
    441             if (old != null) {
    442                 mp.matcher = new OrMatcher().set(old.matcher, mp.matcher);
    443                 mp.pattern = old.pattern + " OR " + mp.pattern;
    444             }
    445             attribute_validity.put(attribute, mp);
    446         }
    447     }
    448 
    449     private static class MatcherPattern {
    450         public String value;
    451         ObjectMatcher<String> matcher;
    452         String pattern;
    453 
    454         public String toString() {
    455             return matcher.getClass().getName() + "\t" + pattern;
    456         }
    457     }
    458 
    459     public static class RegexMatcher implements ObjectMatcher<String> {
    460         private java.util.regex.Matcher matcher;
    461 
    462         public ObjectMatcher<String> set(String pattern) {
    463             matcher = PatternCache.get(pattern).matcher("");
    464             return this;
    465         }
    466 
    467         public ObjectMatcher<String> set(String pattern, int flags) {
    468             matcher = Pattern.compile(pattern, flags).matcher("");
    469             return this;
    470         }
    471 
    472         public boolean matches(String value) {
    473             matcher.reset(value.toString());
    474             return matcher.matches();
    475         }
    476     }
    477 
    478     public static class CollectionMatcher implements ObjectMatcher<String> {
    479         private Collection<String> collection;
    480 
    481         public ObjectMatcher<String> set(Collection<String> collection) {
    482             this.collection = collection;
    483             return this;
    484         }
    485 
    486         public boolean matches(String value) {
    487             return collection.contains(value);
    488         }
    489     }
    490 
    491     public static class OrMatcher implements ObjectMatcher<String> {
    492         private ObjectMatcher<String> a;
    493         private ObjectMatcher<String> b;
    494 
    495         public ObjectMatcher<String> set(ObjectMatcher<String> a, ObjectMatcher<String> b) {
    496             this.a = a;
    497             this.b = b;
    498             return this;
    499         }
    500 
    501         public boolean matches(String value) {
    502             return a.matches(value) || b.matches(value);
    503         }
    504     }
    505 
    506     public static class ListMatcher implements ObjectMatcher<String> {
    507         private ObjectMatcher<String> other;
    508 
    509         public ObjectMatcher<String> set(ObjectMatcher<String> other) {
    510             this.other = other;
    511             return this;
    512         }
    513 
    514         public boolean matches(String value) {
    515             String[] values = value.trim().split("\\s+");
    516             if (values.length == 1 && values[0].length() == 0) return true;
    517             for (int i = 0; i < values.length; ++i) {
    518                 if (!other.matches(values[i])) {
    519                     return false;
    520                 }
    521             }
    522             return true;
    523         }
    524     }
    525 
    526     public static class LocaleMatcher implements ObjectMatcher<String> {
    527         ObjectMatcher<String> grandfathered = variables.get("$grandfathered").matcher;
    528         ObjectMatcher<String> language = variables.get("$language").matcher;
    529         ObjectMatcher<String> script = variables.get("$script").matcher;
    530         ObjectMatcher<String> territory = variables.get("$territory").matcher;
    531         ObjectMatcher<String> variant = variables.get("$variant").matcher;
    532         LocaleIDParser lip = new LocaleIDParser();
    533         static LocaleMatcher singleton = null;
    534         static Object sync = new Object();
    535 
    536         private LocaleMatcher(boolean b) {
    537         }
    538 
    539         public static LocaleMatcher make() {
    540             synchronized (sync) {
    541                 if (singleton == null) {
    542                     singleton = new LocaleMatcher(true);
    543                 }
    544             }
    545             return singleton;
    546         }
    547 
    548         public boolean matches(String value) {
    549             if (grandfathered.matches(value)) return true;
    550             lip.set((String) value);
    551             String field = lip.getLanguage();
    552             if (!language.matches(field)) return false;
    553             field = lip.getScript();
    554             if (field.length() != 0 && !script.matches(field)) return false;
    555             field = lip.getRegion();
    556             if (field.length() != 0 && !territory.matches(field)) return false;
    557             String[] fields = lip.getVariants();
    558             for (int i = 0; i < fields.length; ++i) {
    559                 if (!variant.matches(fields[i])) return false;
    560             }
    561             return true;
    562         }
    563     }
    564 
    565 }