Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 import java.io.File;
      4 import java.io.StringReader;
      5 import java.util.ArrayList;
      6 import java.util.Arrays;
      7 import java.util.Collection;
      8 import java.util.Collections;
      9 import java.util.Comparator;
     10 import java.util.EnumMap;
     11 import java.util.HashMap;
     12 import java.util.HashSet;
     13 import java.util.Iterator;
     14 import java.util.LinkedHashMap;
     15 import java.util.LinkedHashSet;
     16 import java.util.List;
     17 import java.util.Locale;
     18 import java.util.Map;
     19 import java.util.Map.Entry;
     20 import java.util.Set;
     21 import java.util.TreeMap;
     22 import java.util.regex.Pattern;
     23 
     24 import com.google.common.base.CharMatcher;
     25 import com.google.common.base.Splitter;
     26 import com.google.common.collect.ImmutableSet;
     27 import com.google.common.collect.ImmutableSet.Builder;
     28 import com.google.common.collect.Multimap;
     29 import com.ibm.icu.dev.util.CollectionUtilities;
     30 import com.ibm.icu.impl.Relation;
     31 import com.ibm.icu.text.Transform;
     32 
     33 /**
     34  * An immutable object that contains the structure of a DTD.
     35  * @author markdavis
     36  */
     37 public class DtdData extends XMLFileReader.SimpleHandler {
     38     private static final String COMMENT_PREFIX = System.lineSeparator() + "    ";
     39     private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false);
     40     private static final boolean USE_SYNTHESIZED = false;
     41 
     42     private static final boolean DEBUG = false;
     43     private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]");
     44 
     45     private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class);
     46     private Map<String, Element> nameToElement = new HashMap<String, Element>();
     47     private MapComparator<String> elementComparator;
     48     private MapComparator<String> attributeComparator;
     49 
     50     public final Element ROOT;
     51     public final Element PCDATA = elementFrom("#PCDATA");
     52     public final Element ANY = elementFrom("ANY");
     53     public final DtdType dtdType;
     54     public final String version;
     55     private Element lastElement;
     56     private Attribute lastAttribute;
     57     private Set<String> preCommentCache;
     58     private DtdComparator dtdComparator;
     59 
     60     public enum AttributeStatus {
     61         distinguished, value, metadata
     62     }
     63 
     64     public enum Mode {
     65         REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null");
     66 
     67         public final String source;
     68 
     69         Mode(String s) {
     70             source = s;
     71         }
     72 
     73         public static Mode forString(String mode) {
     74             for (Mode value : Mode.values()) {
     75                 if (value.source.equals(mode)) {
     76                     return value;
     77                 }
     78             }
     79             if (mode == null) {
     80                 return NULL;
     81             }
     82             throw new IllegalArgumentException(mode);
     83         }
     84     }
     85 
     86     public enum AttributeType {
     87         CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE
     88     }
     89 
     90     public static class Attribute implements Named {
     91         public final String name;
     92         public final Element element;
     93         public final Mode mode;
     94         public final String defaultValue;
     95         public final AttributeType type;
     96         public final Map<String, Integer> values;
     97         private final Set<String> commentsPre;
     98         private Set<String> commentsPost;
     99         private boolean isDeprecatedAttribute;
    100         private AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations
    101         private Set<String> deprecatedValues = Collections.emptySet();
    102         private final Comparator<String> attributeValueComparator;
    103 
    104         private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) {
    105             commentsPre = firstComment;
    106             element = element2;
    107             name = aName.intern();
    108             if (name.equals("draft") // normally never permitted on elements with children, but special cases...
    109                 && !element.getName().equals("collation")
    110                 && !element.getName().equals("transform")) {
    111                 int elementChildrenCount = element.getChildren().size();
    112                 if (elementChildrenCount > 1
    113                     || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) {
    114                     isDeprecatedAttribute = true;
    115                     if (DEBUG) {
    116                         System.out.println(element.getName() + ":" + element.getChildren());
    117                     }
    118                 }
    119             }
    120             mode = mode2;
    121             defaultValue = value2 == null ? null
    122                 : value2.intern();
    123             AttributeType _type = AttributeType.ENUMERATED_TYPE;
    124             Map<String, Integer> _values = Collections.emptyMap();
    125             if (split.length == 1) {
    126                 try {
    127                     _type = AttributeType.valueOf(split[0]);
    128                 } catch (Exception e) {
    129                 }
    130             }
    131             type = _type;
    132 
    133             if (_type == AttributeType.ENUMERATED_TYPE) {
    134                 LinkedHashMap<String, Integer> temp = new LinkedHashMap<String, Integer>();
    135                 for (String part : split) {
    136                     if (part.length() != 0) {
    137                         temp.put(part.intern(), temp.size());
    138                     }
    139                 }
    140                 _values = Collections.unmodifiableMap(temp);
    141             }
    142             values = _values;
    143             attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name);
    144         }
    145 
    146         @Override
    147         public String toString() {
    148             return element.name + ":" + name;
    149         }
    150 
    151         public StringBuilder appendDtdString(StringBuilder b) {
    152             Attribute a = this;
    153             b.append("<!ATTLIST " + element.name + " " + a.name);
    154             boolean first;
    155             if (a.type == AttributeType.ENUMERATED_TYPE) {
    156                 b.append(" (");
    157                 first = true;
    158                 for (String s : a.values.keySet()) {
    159                     if (deprecatedValues.contains(s)) {
    160                         continue;
    161                     }
    162                     if (first) {
    163                         first = false;
    164                     } else {
    165                         b.append(" | ");
    166                     }
    167                     b.append(s);
    168                 }
    169                 b.append(")");
    170             } else {
    171                 b.append(' ').append(a.type);
    172             }
    173             if (a.mode != Mode.NULL) {
    174                 b.append(" ").append(a.mode.source);
    175             }
    176             if (a.defaultValue != null) {
    177                 b.append(" \"").append(a.defaultValue).append('"');
    178             }
    179             b.append(" >");
    180             return b;
    181         }
    182 
    183         public String features() {
    184             return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString())
    185                 + (mode == Mode.NULL ? "" : ", mode=" + mode)
    186                 + (defaultValue == null ? "" : ", default=" + defaultValue);
    187         }
    188 
    189         @Override
    190         public String getName() {
    191             return name;
    192         }
    193 
    194         private static Splitter COMMA = Splitter.on(',').trimResults();
    195 
    196         public void addComment(String commentIn) {
    197             if (commentIn.startsWith("@")) {
    198                 // there are exactly 2 cases: deprecated and ordered
    199                 switch (commentIn) {
    200                 case "@METADATA":
    201                     attributeStatus = AttributeStatus.metadata;
    202                     break;
    203                 case "@VALUE":
    204                     attributeStatus = AttributeStatus.value;
    205                     break;
    206                 case "@DEPRECATED":
    207                     isDeprecatedAttribute = true;
    208                     break;
    209                 default:
    210                     if (commentIn.startsWith("@DEPRECATED:")) {
    211                         deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(commentIn.substring("@DEPRECATED:".length()))));
    212                         break;
    213                     }
    214                     throw new IllegalArgumentException("Unrecognized annotation: " + commentIn);
    215                 }
    216                 return;
    217             }
    218             commentsPost = addUnmodifiable(commentsPost, commentIn.trim());
    219         }
    220 
    221         /**
    222          * Special version of identity; only considers name and name of element
    223          */
    224         @Override
    225         public boolean equals(Object obj) {
    226             if (!(obj instanceof Attribute)) {
    227                 return false;
    228             }
    229             Attribute that = (Attribute) obj;
    230             return name.equals(that.name)
    231                 && element.name.equals(that.element.name) // don't use plain element: circularity
    232             // not relevant to identity
    233             //                && Objects.equals(comment, that.comment)
    234             //                && mode.equals(that.mode)
    235             //                && Objects.equals(defaultValue, that.defaultValue)
    236             //                && type.equals(that.type)
    237             //                && values.equals(that.values)
    238             ;
    239         }
    240 
    241         /**
    242          * Special version of identity; only considers name and name of element
    243          */
    244         @Override
    245         public int hashCode() {
    246             return name.hashCode() * 37
    247                 + element.name.hashCode() // don't use plain element: circularity
    248             // not relevant to identity
    249             //                ) * 37 + Objects.hashCode(comment)) * 37
    250             //                + mode.hashCode()) * 37
    251             //                + Objects.hashCode(defaultValue)) * 37
    252             //                + type.hashCode()) * 37
    253             //                + values.hashCode()
    254             ;
    255         }
    256 
    257         public boolean isDeprecated() {
    258             return isDeprecatedAttribute;
    259         }
    260 
    261         public boolean isDeprecatedValue(String value) {
    262             return deprecatedValues.contains(value);
    263         }
    264 
    265         public AttributeStatus getStatus() {
    266             return attributeStatus;
    267         }
    268 
    269     }
    270 
    271     private DtdData(DtdType type, String version) {
    272         this.dtdType = type;
    273         this.ROOT = elementFrom(type.rootType.toString());
    274         this.version = version;
    275     }
    276 
    277     private void addAttribute(String eName, String aName, String type, String mode, String value) {
    278         Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache);
    279         preCommentCache = null;
    280         getAttributesFromName().put(aName, a);
    281         CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size());
    282         lastElement = null;
    283         lastAttribute = a;
    284     }
    285 
    286     public enum ElementType {
    287         EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN;
    288         public final String source;
    289 
    290         private ElementType(String s) {
    291             source = s;
    292         }
    293 
    294         private ElementType() {
    295             source = name();
    296         }
    297     }
    298 
    299     interface Named {
    300         String getName();
    301     }
    302 
    303     public enum ElementStatus {
    304         regular, metadata
    305     }
    306 
    307     public static class Element implements Named {
    308         public final String name;
    309         private String rawModel;
    310         private ElementType type;
    311         private final Map<Element, Integer> children = new LinkedHashMap<Element, Integer>();
    312         private final Map<Attribute, Integer> attributes = new LinkedHashMap<Attribute, Integer>();
    313         private Set<String> commentsPre;
    314         private Set<String> commentsPost;
    315         private String model;
    316         private boolean isOrderedElement;
    317         private boolean isDeprecatedElement;
    318         private ElementStatus elementStatus = ElementStatus.regular;
    319 
    320         private Element(String name2) {
    321             name = name2.intern();
    322         }
    323 
    324         private void setChildren(DtdData dtdData, String model, Set<String> precomments) {
    325             this.commentsPre = precomments;
    326             rawModel = model;
    327             this.model = clean(model);
    328             if (model.equals("EMPTY")) {
    329                 type = ElementType.EMPTY;
    330                 return;
    331             }
    332             type = ElementType.CHILDREN;
    333             for (String part : FILLER.split(model)) {
    334                 if (part.length() != 0) {
    335                     if (part.equals("#PCDATA")) {
    336                         type = ElementType.PCDATA;
    337                     } else if (part.equals("ANY")) {
    338                         type = ElementType.ANY;
    339                     } else {
    340                         CldrUtility.putNew(children, dtdData.elementFrom(part), children.size());
    341                     }
    342                 }
    343             }
    344             if ((type == ElementType.CHILDREN) == (children.size() == 0)
    345                 && !model.startsWith("(#PCDATA|cp")) {
    346                 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model);
    347             }
    348         }
    349 
    350         static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)");
    351         static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])");
    352 
    353         private String clean(String model2) {
    354             // (x) -> ( x );
    355             // x,y -> x, y
    356             // x|y -> x | y
    357             String result = CLEANER1.matcher(model2).replaceAll("$1 ");
    358             result = CLEANER2.matcher(result).replaceAll(" $1");
    359             return result.equals(model2)
    360                 ? model2
    361                 : result; // for debugging
    362         }
    363 
    364         public boolean containsAttribute(String string) {
    365             for (Attribute a : attributes.keySet()) {
    366                 if (a.name.equals(string)) {
    367                     return true;
    368                 }
    369             }
    370             return false;
    371         }
    372 
    373         @Override
    374         public String toString() {
    375             return name;
    376         }
    377 
    378         public String toDtdString() {
    379             return "<!ELEMENT " + name + " " + getRawModel() + " >";
    380         }
    381 
    382         public ElementType getType() {
    383             return type;
    384         }
    385 
    386         public Map<Element, Integer> getChildren() {
    387             return Collections.unmodifiableMap(children);
    388         }
    389 
    390         public Map<Attribute, Integer> getAttributes() {
    391             return Collections.unmodifiableMap(attributes);
    392         }
    393 
    394         @Override
    395         public String getName() {
    396             return name;
    397         }
    398 
    399         public Element getChildNamed(String string) {
    400             for (Element e : children.keySet()) {
    401                 if (e.name.equals(string)) {
    402                     return e;
    403                 }
    404             }
    405             return null;
    406         }
    407 
    408         public Attribute getAttributeNamed(String string) {
    409             for (Attribute a : attributes.keySet()) {
    410                 if (a.name.equals(string)) {
    411                     return a;
    412                 }
    413             }
    414             return null;
    415         }
    416 
    417         public void addComment(String addition) {
    418             if (addition.startsWith("@")) {
    419                 // there are exactly 3 cases: deprecated, ordered, and metadata
    420                 switch (addition) {
    421                 case "@ORDERED":
    422                     isOrderedElement = true;
    423                     break;
    424                 case "@DEPRECATED":
    425                     isDeprecatedElement = true;
    426                     break;
    427                 case "@METADATA":
    428                     elementStatus = ElementStatus.metadata;
    429                     break;
    430                 default:
    431                     throw new IllegalArgumentException("Unrecognized annotation: " + addition);
    432                 }
    433                 return;
    434             }
    435             commentsPost = addUnmodifiable(commentsPost, addition.trim());
    436         }
    437 
    438         /**
    439          * Special version of equals. Only the name is considered in the identity.
    440          */
    441         @Override
    442         public boolean equals(Object obj) {
    443             if (!(obj instanceof Element)) {
    444                 return false;
    445             }
    446             Element that = (Element) obj;
    447             return name.equals(that.name)
    448             // not relevant to the identity of the object
    449             //                && Objects.equals(comment, that.comment)
    450             //                && type == that.type
    451             //                && attributes.equals(that.attributes)
    452             //                && children.equals(that.children)
    453             ;
    454         }
    455 
    456         /**
    457          * Special version of hashcode. Only the name is considered in the identity.
    458          */
    459         @Override
    460         public int hashCode() {
    461             return name.hashCode()
    462             // not relevant to the identity of the object
    463             // * 37 + Objects.hashCode(comment)
    464             //) * 37 + Objects.hashCode(type)
    465             //                ) * 37 + attributes.hashCode()
    466             //                ) * 37 + children.hashCode()
    467             ;
    468         }
    469 
    470         public boolean isDeprecated() {
    471             return isDeprecatedElement;
    472         }
    473 
    474         public ElementStatus getElementStatus() {
    475             return elementStatus;
    476         }
    477 
    478         /**
    479          * @return the rawModel
    480          */
    481         public String getRawModel() {
    482             return rawModel;
    483         }
    484     }
    485 
    486     private Element elementFrom(String name) {
    487         Element result = nameToElement.get(name);
    488         if (result == null) {
    489             nameToElement.put(name, result = new Element(name));
    490         }
    491         return result;
    492     }
    493 
    494     private void addElement(String name2, String model) {
    495         Element element = elementFrom(name2);
    496         element.setChildren(this, model, preCommentCache);
    497         preCommentCache = null;
    498         lastElement = element;
    499         lastAttribute = null;
    500     }
    501 
    502     private void addComment(String comment) {
    503         comment = comment.trim();
    504         if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky"
    505             if (comment.startsWith("@")) {
    506                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
    507             }
    508             preCommentCache = addUnmodifiable(preCommentCache, comment);
    509         } else if (lastElement != null) {
    510             lastElement.addComment(comment);
    511         } else if (lastAttribute != null) {
    512             lastAttribute.addComment(comment);
    513         } else {
    514             if (comment.startsWith("@")) {
    515                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
    516             }
    517             preCommentCache = addUnmodifiable(preCommentCache, comment);
    518         }
    519     }
    520 
    521     // TODO hide this
    522     /**
    523      * @deprecated
    524      */
    525     @Override
    526     public void handleElementDecl(String name, String model) {
    527         if (SHOW_ALL) {
    528             // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) >
    529             System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >");
    530         }
    531         addElement(name, model);
    532     }
    533 
    534     // TODO hide this
    535     /**
    536      * @deprecated
    537      */
    538     @Override
    539     public void handleStartDtd(String name, String publicId, String systemId) {
    540         DtdType explicitDtdType = DtdType.valueOf(name);
    541         if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) {
    542             throw new IllegalArgumentException("Mismatch in dtdTypes");
    543         }
    544     };
    545 
    546     /**
    547      * @deprecated
    548      */
    549     @Override
    550     public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
    551         if (SHOW_ALL) {
    552             // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED >
    553             // <!ATTLIST version number CDATA #REQUIRED >
    554             // <!ATTLIST version cldrVersion CDATA #FIXED "27" >
    555 
    556             System.out.println("<!ATTLIST " + eName
    557                 + " " + aName
    558                 + " " + type
    559                 + " " + mode
    560                 + (value == null ? "" : " \"" + value + "\"")
    561                 + " >");
    562         }
    563         // HACK for 1.1.1
    564         if (eName.equals("draft")) {
    565             eName = "week";
    566         }
    567         addAttribute(eName, aName, type, mode, value);
    568     }
    569 
    570     /**
    571      * @deprecated
    572      */
    573     @Override
    574     public void handleComment(String path, String comment) {
    575         if (SHOW_ALL) {
    576             // <!-- true and false are deprecated. -->
    577             System.out.println("<!-- " + comment.trim() + " -->");
    578         }
    579         addComment(comment);
    580     }
    581 
    582     // TODO hide this
    583     /**
    584      * @deprecated
    585      */
    586     @Override
    587     public void handleEndDtd() {
    588         throw new XMLFileReader.AbortException();
    589     }
    590 
    591     //    static final Map<CLDRFile.DtdType, String> DTD_TYPE_TO_FILE;
    592     //    static {
    593     //        EnumMap<CLDRFile.DtdType, String> temp = new EnumMap<CLDRFile.DtdType, String>(CLDRFile.DtdType.class);
    594     //        temp.put(CLDRFile.DtdType.ldml, CldrUtility.BASE_DIRECTORY + "common/dtd/ldml.dtd");
    595     //        temp.put(CLDRFile.DtdType.supplementalData, CldrUtility.BASE_DIRECTORY + "common/dtd/ldmlSupplemental.dtd");
    596     //        temp.put(CLDRFile.DtdType.ldmlBCP47, CldrUtility.BASE_DIRECTORY + "common/dtd/ldmlBCP47.dtd");
    597     //        temp.put(CLDRFile.DtdType.keyboard, CldrUtility.BASE_DIRECTORY + "keyboards/dtd/ldmlKeyboard.dtd");
    598     //        temp.put(CLDRFile.DtdType.platform, CldrUtility.BASE_DIRECTORY + "keyboards/dtd/ldmlPlatform.dtd");
    599     //        DTD_TYPE_TO_FILE = Collections.unmodifiableMap(temp);
    600     //    }
    601 
    602     /**
    603      * Normal version of DtdData
    604      * Note that it always gets the trunk version
    605      */
    606     public static DtdData getInstance(DtdType type) {
    607         return CACHE.get(type);
    608     }
    609 
    610     /**
    611      * Special form using version, used only by tests, etc.
    612      */
    613     public static DtdData getInstance(DtdType type, String version) {
    614         DtdData simpleHandler = new DtdData(type, version);
    615         XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler);
    616         File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory()
    617             : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version);
    618 
    619         if (type != type.rootType) {
    620             // read the real first, then add onto it.
    621             readFile(type.rootType, xfr, directory);
    622         }
    623         readFile(type, xfr, directory);
    624         // HACK
    625         if (type == DtdType.ldmlICU) {
    626             Element special = simpleHandler.nameToElement.get("special");
    627             for (String extraElementName : Arrays.asList(
    628                 "icu:breakIteratorData",
    629                 "icu:UCARules",
    630                 "icu:scripts",
    631                 "icu:transforms",
    632                 "icu:ruleBasedNumberFormats",
    633                 "icu:isLeapMonth",
    634                 "icu:version",
    635                 "icu:breakDictionaryData",
    636                 "icu:depends")) {
    637                 Element extraElement = simpleHandler.nameToElement.get(extraElementName);
    638                 special.children.put(extraElement, special.children.size());
    639             }
    640         }
    641         if (simpleHandler.ROOT.children.size() == 0) {
    642             throw new IllegalArgumentException(); // should never happen
    643         }
    644         simpleHandler.finish();
    645         simpleHandler.freeze();
    646         return simpleHandler;
    647     }
    648 
    649     private void finish() {
    650         dtdComparator = new DtdComparator();
    651     }
    652 
    653     public static void readFile(DtdType type, XMLFileReader xfr, File directory) {
    654         File file = new File(directory, type.dtdPath);
    655         StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>"
    656             + "<!DOCTYPE " + type
    657             + " SYSTEM '" + file.getAbsolutePath() + "'>");
    658         xfr.read(type.toString(), s, -1, true); //  DTD_TYPE_TO_FILE.get(type)
    659     }
    660 
    661     private void freeze() {
    662         if (version == null) { // only generate for new versions
    663             MergeLists<String> elementMergeList = new MergeLists<String>();
    664             elementMergeList.add(dtdType.toString());
    665             MergeLists<String> attributeMergeList = new MergeLists<String>();
    666             attributeMergeList.add("_q");
    667 
    668             for (Element element : nameToElement.values()) {
    669                 if (element.children.size() > 0) {
    670                     Collection<String> names = getNames(element.children.keySet());
    671                     elementMergeList.add(names);
    672                     if (DEBUG) {
    673                         System.out.println(element.getName() + "\t\t" + names);
    674                     }
    675                 }
    676                 if (element.attributes.size() > 0) {
    677                     Collection<String> names = getNames(element.attributes.keySet());
    678                     attributeMergeList.add(names);
    679                     if (DEBUG) {
    680                         System.out.println(element.getName() + "\t\t@" + names);
    681                     }
    682                 }
    683             }
    684             List<String> elementList = elementMergeList.merge();
    685             List<String> attributeList = attributeMergeList.merge();
    686             if (DEBUG) {
    687                 System.out.println("Element Ordering:\t" + elementList);
    688                 System.out.println("Attribute Ordering:\t" + attributeList);
    689             }
    690             // double-check
    691             //        for (Element element : elements) {
    692             //            if (!MergeLists.hasConsistentOrder(elementList, element.children.keySet())) {
    693             //                throw new IllegalArgumentException("Failed to find good element order: " + element.children.keySet());
    694             //            }
    695             //            if (!MergeLists.hasConsistentOrder(attributeList, element.attributes.keySet())) {
    696             //                throw new IllegalArgumentException("Failed to find good attribute order: " + element.attributes.keySet());
    697             //            }
    698             //        }
    699             elementComparator = new MapComparator<String>(elementList).setErrorOnMissing(true).freeze();
    700             attributeComparator = new MapComparator<String>(attributeList).setErrorOnMissing(true).freeze();
    701         }
    702         nameToAttributes.freeze();
    703         nameToElement = Collections.unmodifiableMap(nameToElement);
    704     }
    705 
    706     private Collection<String> getNames(Collection<? extends Named> keySet) {
    707         List<String> result = new ArrayList<String>();
    708         for (Named e : keySet) {
    709             result.add(e.getName());
    710         }
    711         return result;
    712     }
    713 
    714     public enum DtdItem {
    715         ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE
    716     }
    717 
    718     public interface AttributeValueComparator {
    719         public int compare(String element, String attribute, String value1, String value2);
    720     }
    721 
    722     public Comparator<String> getDtdComparator(AttributeValueComparator avc) {
    723         return dtdComparator;
    724     }
    725 
    726     private class DtdComparator implements Comparator<String> {
    727         @Override
    728         public int compare(String path1, String path2) {
    729             XPathParts a = XPathParts.getFrozenInstance(path1);
    730             XPathParts b = XPathParts.getFrozenInstance(path2);
    731             // there must always be at least one element
    732             String baseA = a.getElement(0);
    733             String baseB = b.getElement(0);
    734             if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) {
    735                 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB);
    736             }
    737             int min = Math.min(a.size(), b.size());
    738             Element parent = ROOT;
    739             Element elementA;
    740             for (int i = 1; i < min; ++i, parent = elementA) {
    741                 // add extra test for "fake" elements, used in diffing. they always start with _
    742                 String elementRawA = a.getElement(i);
    743                 String elementRawB = b.getElement(i);
    744                 if (elementRawA.startsWith("_")) {
    745                     return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1;
    746                 } else if (elementRawB.startsWith("_")) {
    747                     return 1;
    748                 }
    749                 //
    750                 elementA = nameToElement.get(elementRawA);
    751                 Element elementB = nameToElement.get(elementRawB);
    752                 if (elementA != elementB) {
    753                     int aa = parent.children.get(elementA);
    754                     int bb = parent.children.get(elementB);
    755                     return aa - bb;
    756                 }
    757                 int countA = a.getAttributeCount(i);
    758                 int countB = b.getAttributeCount(i);
    759                 if (countA == 0 && countB == 0) {
    760                     continue;
    761                 }
    762                 // we have two ways to compare the attributes. One based on the dtd,
    763                 // and one based on explicit comparators
    764 
    765                 // at this point the elements are the same and correspond to elementA
    766                 // in the dtd
    767 
    768                 // Handle the special added elements
    769                 String aqValue = a.getAttributeValue(i, "_q");
    770                 if (aqValue != null) {
    771                     String bqValue = b.getAttributeValue(i, "_q");
    772                     if (!aqValue.equals(bqValue)) {
    773                         int aValue = Integer.parseInt(aqValue);
    774                         int bValue = Integer.parseInt(bqValue);
    775                         return aValue - bValue;
    776                     }
    777                     --countA;
    778                     --countB;
    779                 }
    780 
    781                 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) {
    782                     Attribute main = attr.getKey();
    783                     String valueA = a.getAttributeValue(i, main.name);
    784                     String valueB = b.getAttributeValue(i, main.name);
    785                     if (valueA == null) {
    786                         if (valueB != null) {
    787                             return -1;
    788                         }
    789                     } else if (valueB == null) {
    790                         return 1;
    791                     } else if (valueA.equals(valueB)) {
    792                         --countA;
    793                         --countB;
    794                         if (countA == 0 && countB == 0) {
    795                             break attributes;
    796                         }
    797                         continue; // TODO
    798                     } else if (main.attributeValueComparator != null) {
    799                         return main.attributeValueComparator.compare(valueA, valueB);
    800                     } else if (main.values.size() != 0) {
    801                         int aa = main.values.get(valueA);
    802                         int bb = main.values.get(valueB);
    803                         return aa - bb;
    804                     } else {
    805                         return valueA.compareTo(valueB);
    806                     }
    807                 }
    808                 if (countA != 0 || countB != 0) {
    809                     throw new IllegalArgumentException();
    810                 }
    811             }
    812             return a.size() - b.size();
    813         }
    814     }
    815 
    816     public MapComparator<String> getAttributeComparator() {
    817         return attributeComparator;
    818     }
    819 
    820     public MapComparator<String> getElementComparator() {
    821         return elementComparator;
    822     }
    823 
    824     public Relation<String, Attribute> getAttributesFromName() {
    825         return nameToAttributes;
    826     }
    827 
    828     public Map<String, Element> getElementFromName() {
    829         return nameToElement;
    830     }
    831 
    832     //    private static class XPathIterator implements SimpleIterator<Node> {
    833     //        private String path;
    834     //        private int position; // at the start of the next element, or at the end of the string
    835     //        private Node node = new Node();
    836     //
    837     //        public void set(String path) {
    838     //            if (!path.startsWith("//")) {
    839     //                throw new IllegalArgumentException();
    840     //            }
    841     //            this.path = path;
    842     //            this.position = 2;
    843     //        }
    844     //
    845     //        @Override
    846     //        public Node next() {
    847     //            // starts with /...[@...="...."]...
    848     //            if (position >= path.length()) {
    849     //                return null;
    850     //            }
    851     //            node.elementName = "";
    852     //            node.attributes.clear();
    853     //            int start = position;
    854     //            // collect the element
    855     //            while (true) {
    856     //                if (position >= path.length()) {
    857     //                    return node;
    858     //                }
    859     //                char ch = path.charAt(position++);
    860     //                switch (ch) {
    861     //                case '/':
    862     //                    return node;
    863     //                case '[':
    864     //                    node.elementName = path.substring(start, position);
    865     //                    break;
    866     //                }
    867     //            }
    868     //            // done with element, we hit a [, collect the attributes
    869     //
    870     //            if (path.charAt(position++) != '@') {
    871     //                throw new IllegalArgumentException();
    872     //            }
    873     //            while (true) {
    874     //                if (position >= path.length()) {
    875     //                    return node;
    876     //                }
    877     //                char ch = path.charAt(position++);
    878     //                switch (ch) {
    879     //                case '/':
    880     //                    return node;
    881     //                case '[':
    882     //                    node.elementName = path.substring(start, position);
    883     //                    break;
    884     //                }
    885     //            }
    886     //        }
    887     //    }
    888 
    889     public String toString() {
    890         StringBuilder b = new StringBuilder();
    891         // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) >
    892         // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. -->
    893 //        if (firstComment != null) {
    894 //            b.append("\n<!--").append(firstComment).append("-->");
    895 //        }
    896         Seen seen = new Seen(dtdType);
    897         seen.seenElements.add(ANY);
    898         seen.seenElements.add(PCDATA);
    899         toString(ROOT, b, seen);
    900 
    901         // Hack for ldmlIcu: catch the items that are not mentioned in the original
    902         int currentEnd = b.length();
    903         for (Element e : nameToElement.values()) {
    904             toString(e, b, seen);
    905         }
    906         if (currentEnd != b.length()) {
    907             b.insert(currentEnd,
    908                 System.lineSeparator() + System.lineSeparator()
    909                     + "<!-- Elements not reachable from root! -->"
    910                     + System.lineSeparator());
    911         }
    912         return b.toString();
    913     }
    914 
    915     static final class Seen {
    916         Set<Element> seenElements = new HashSet<Element>();
    917         Set<Attribute> seenAttributes = new HashSet<Attribute>();
    918 
    919         public Seen(DtdType dtdType) {
    920             if (dtdType.rootType == dtdType) {
    921                 return;
    922             }
    923             DtdData otherData = DtdData.getInstance(dtdType.rootType);
    924             walk(otherData, otherData.ROOT);
    925             seenElements.remove(otherData.nameToElement.get("special"));
    926         }
    927 
    928         private void walk(DtdData otherData, Element current) {
    929             seenElements.add(current);
    930             seenAttributes.addAll(current.attributes.keySet());
    931             for (Element e : current.children.keySet()) {
    932                 walk(otherData, e);
    933             }
    934         }
    935     }
    936 
    937     public Set<Element> getDescendents(Element start, Set<Element> toAddTo) {
    938         if (!toAddTo.contains(start)) {
    939             toAddTo.add(start);
    940             for (Element e : start.children.keySet()) {
    941                 getDescendents(e, toAddTo);
    942             }
    943         }
    944         return toAddTo;
    945     }
    946 
    947     //static final SupplementalDataInfo supplementalDataInfo = CLDRConfig.getInstance().getSupplementalDataInfo();
    948 
    949     private void toString(Element current, StringBuilder b, Seen seen) {
    950 //        if ("calendar".equals(current.name) || current.commentsPost != null && current.commentsPost.contains("use of fields")) {
    951 //            int debug = 0;
    952 //        }
    953         boolean first = true;
    954         if (seen.seenElements.contains(current)) {
    955             return;
    956         }
    957         seen.seenElements.add(current);
    958         boolean elementDeprecated = isDeprecated(current.name, "*", "*");
    959 
    960         showComments(b, current.commentsPre, true);
    961         b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >");
    962         if (USE_SYNTHESIZED) {
    963             Element aliasElement = getElementFromName().get("alias");
    964             //b.append(current.rawChildren);
    965             if (!current.children.isEmpty()) {
    966                 LinkedHashSet<Element> elements = new LinkedHashSet<Element>(current.children.keySet());
    967                 boolean hasAlias = aliasElement != null && elements.remove(aliasElement);
    968                 //boolean hasSpecial = specialElement != null && elements.remove(specialElement);
    969                 if (hasAlias) {
    970                     b.append("(alias |");
    971                 }
    972                 b.append("(");
    973                 // <!ELEMENT transformNames ( alias | (transformName | special)* ) >
    974                 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) >
    975 
    976                 for (Element e : elements) {
    977                     if (first) {
    978                         first = false;
    979                     } else {
    980                         b.append(", ");
    981                     }
    982                     b.append(e.name);
    983                     if (e.type != ElementType.PCDATA) {
    984                         b.append("*");
    985                     }
    986                 }
    987                 if (hasAlias) {
    988                     b.append(")");
    989                 }
    990                 b.append(")");
    991             } else {
    992                 b.append(current.type == null ? "???" : current.type.source);
    993             }
    994             b.append(">");
    995         }
    996         showComments(b, current.commentsPost, false);
    997         if (isOrdered(current.name)) {
    998             b.append(COMMENT_PREFIX + "<!--@ORDERED-->");
    999         }
   1000         if (current.getElementStatus() != ElementStatus.regular) {
   1001             b.append(COMMENT_PREFIX + "<!--@"
   1002                 + current.getElementStatus().toString().toUpperCase(Locale.ROOT)
   1003                 + "-->");
   1004         }
   1005         if (elementDeprecated) {
   1006             b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
   1007         }
   1008 
   1009         LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>();
   1010 
   1011         for (Attribute a : current.attributes.keySet()) {
   1012             if (seen.seenAttributes.contains(a)) {
   1013                 continue;
   1014             }
   1015             seen.seenAttributes.add(a);
   1016             boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*");
   1017 
   1018             deprecatedValues.clear();
   1019 
   1020             showComments(b, a.commentsPre, true);
   1021             b.append("\n<!ATTLIST " + current.name + " " + a.name);
   1022             if (a.type == AttributeType.ENUMERATED_TYPE) {
   1023                 b.append(" (");
   1024                 first = true;
   1025                 for (String s : a.values.keySet()) {
   1026                     if (first) {
   1027                         first = false;
   1028                     } else {
   1029                         b.append(" | ");
   1030                     }
   1031                     b.append(s);
   1032                     if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) {
   1033                         deprecatedValues.add(s);
   1034                     }
   1035                 }
   1036                 b.append(")");
   1037             } else {
   1038                 b.append(' ').append(a.type);
   1039             }
   1040             if (a.mode != Mode.NULL) {
   1041                 b.append(" ").append(a.mode.source);
   1042             }
   1043             if (a.defaultValue != null) {
   1044                 b.append(" \"").append(a.defaultValue).append('"');
   1045             }
   1046             b.append(" >");
   1047             showComments(b, a.commentsPost, false);
   1048 //            if (attributeDeprecated != deprecatedComment) {
   1049 //                System.out.println("*** BAD DEPRECATION ***" + a);
   1050 //            }
   1051             if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) {
   1052                 b.append(COMMENT_PREFIX + "<!--@METADATA-->");
   1053             } else if (!isDistinguishing(current.name, a.name)) {
   1054                 b.append(COMMENT_PREFIX + "<!--@VALUE-->");
   1055             }
   1056             if (attributeDeprecated) {
   1057                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
   1058             } else if (!deprecatedValues.isEmpty()) {
   1059                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + CollectionUtilities.join(deprecatedValues, ", ") + "-->");
   1060             }
   1061         }
   1062         if (current.children.size() > 0) {
   1063             for (Element e : current.children.keySet()) {
   1064                 toString(e, b, seen);
   1065             }
   1066         }
   1067     }
   1068 
   1069     private void showComments(StringBuilder b, Set<String> comments, boolean separate) {
   1070         if (comments == null) {
   1071             return;
   1072         }
   1073         if (separate && b.length() != 0) {
   1074             b.append(System.lineSeparator());
   1075         }
   1076         for (String c : comments) {
   1077             boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat");
   1078             if (!deprecatedComment) {
   1079                 if (separate) {
   1080                     // special handling for very first comment
   1081                     if (b.length() == 0) {
   1082                         b.append("<!--")
   1083                             .append(System.lineSeparator())
   1084                             .append(c)
   1085                             .append(System.lineSeparator())
   1086                             .append("-->");
   1087                         continue;
   1088                     }
   1089                     b.append(System.lineSeparator());
   1090                 } else {
   1091                     b.append(COMMENT_PREFIX);
   1092                 }
   1093                 b.append("<!-- ").append(c).append(" -->");
   1094             }
   1095         }
   1096     }
   1097 
   1098     public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) {
   1099         for (Iterator<T> it = elements.iterator(); it.hasNext();) {
   1100             T item = it.next();
   1101             if (matcher.transform(item) == Boolean.TRUE) {
   1102                 it.remove();
   1103                 return item;
   1104             }
   1105         }
   1106         return null;
   1107     }
   1108 
   1109     public Set<Element> getElements() {
   1110         return new LinkedHashSet<Element>(nameToElement.values());
   1111     }
   1112 
   1113     public Set<Attribute> getAttributes() {
   1114         return new LinkedHashSet<Attribute>(nameToAttributes.values());
   1115     }
   1116 
   1117     public boolean isDistinguishing(String elementName, String attribute) {
   1118         return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished;
   1119     }
   1120 
   1121     static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft"));
   1122 
   1123     static final Set<String> addUnmodifiable(Set<String> comment, String addition) {
   1124         if (comment == null) {
   1125             return Collections.singleton(addition);
   1126         } else {
   1127             comment = new LinkedHashSet<>(comment);
   1128             comment.add(addition);
   1129             return Collections.unmodifiableSet(comment);
   1130         }
   1131     }
   1132 
   1133     public class IllegalByDtdException extends RuntimeException {
   1134         private static final long serialVersionUID = 1L;
   1135         public final String elementName;
   1136         public final String attributeName;
   1137         public final String attributeValue;
   1138 
   1139         public IllegalByDtdException(String elementName, String attributeName, String attributeValue) {
   1140             this.elementName = elementName;
   1141             this.attributeName = attributeName;
   1142             this.attributeValue = attributeValue;
   1143         }
   1144 
   1145         @Override
   1146         public String getMessage() {
   1147             return "Dtd " + dtdType
   1148                 + " doesnt allow "
   1149                 + "element=" + elementName
   1150                 + (attributeName == null ? "" : ", attribute: " + attributeName)
   1151                 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue);
   1152         }
   1153     }
   1154 
   1155     //@SuppressWarnings("unused")
   1156     public boolean isDeprecated(String elementName, String attributeName, String attributeValue) {
   1157         Element element = nameToElement.get(elementName);
   1158         if (element == null) {
   1159             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
   1160         } else if (element.isDeprecatedElement) {
   1161             return true;
   1162         }
   1163         if ("*".equals(attributeName) || "_q".equals(attributeName)) {
   1164             return false;
   1165         }
   1166         Attribute attribute = element.getAttributeNamed(attributeName);
   1167         if (attribute == null) {
   1168             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
   1169         } else if (attribute.isDeprecatedAttribute) {
   1170             return true;
   1171         }
   1172         return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*"
   1173     }
   1174 
   1175     public boolean isOrdered(String elementName) {
   1176         Element element = nameToElement.get(elementName);
   1177         if (element == null) {
   1178             if (elementName.startsWith("icu:")) {
   1179                 return false;
   1180             }
   1181             throw new IllegalByDtdException(elementName, null, null);
   1182         }
   1183         return element.isOrderedElement;
   1184     }
   1185 
   1186     public AttributeStatus getAttributeStatus(String elementName, String attributeName) {
   1187         if ("_q".equals(attributeName)) {
   1188             return AttributeStatus.distinguished; // special case
   1189         }
   1190         if ("#PCDATA".equals(elementName)) {
   1191             int debug = 1;
   1192         }
   1193         Element element = nameToElement.get(elementName);
   1194         if (element == null) {
   1195             if (elementName.startsWith("icu:")) {
   1196                 return AttributeStatus.distinguished;
   1197             }
   1198             throw new IllegalByDtdException(elementName, attributeName, null);
   1199         }
   1200         Attribute attribute = element.getAttributeNamed(attributeName);
   1201         if (attribute == null) {
   1202             if (elementName.startsWith("icu:")) {
   1203                 return AttributeStatus.distinguished;
   1204             }
   1205             throw new IllegalByDtdException(elementName, attributeName, null);
   1206         }
   1207         return attribute.attributeStatus;
   1208     }
   1209 
   1210     // The default is a map comparator, which compares numbers as numbers, and strings with UCA
   1211     private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze();
   1212 
   1213     static MapComparator<String> dayValueOrder = new MapComparator<String>().add(
   1214         "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze();
   1215     static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add(
   1216         "midnight", "am", "noon", "pm",
   1217         "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2",
   1218         // The ones on the following line are no longer used actively. Can be removed later?
   1219         "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze();
   1220     static MapComparator<String> listPatternOrder = new MapComparator<String>().add(
   1221         "start", "middle", "end", "2", "3").freeze();
   1222     static MapComparator<String> widthOrder = new MapComparator<String>().add(
   1223         "abbreviated", "narrow", "short", "wide", "all").freeze();
   1224     static MapComparator<String> lengthOrder = new MapComparator<String>().add(
   1225         "full", "long", "medium", "short").freeze();
   1226     static MapComparator<String> dateFieldOrder = new MapComparator<String>().add(
   1227         "era", "era-short", "era-narrow",
   1228         "year", "year-short", "year-narrow",
   1229         "quarter", "quarter-short", "quarter-narrow",
   1230         "month", "month-short", "month-narrow",
   1231         "week", "week-short", "week-narrow",
   1232         "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
   1233         "day", "day-short", "day-narrow",
   1234         "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
   1235         "weekday", "weekday-short", "weekday-narrow",
   1236         "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
   1237         "sun", "sun-short", "sun-narrow",
   1238         "mon", "mon-short", "mon-narrow",
   1239         "tue", "tue-short", "tue-narrow",
   1240         "wed", "wed-short", "wed-narrow",
   1241         "thu", "thu-short", "thu-narrow",
   1242         "fri", "fri-short", "fri-narrow",
   1243         "sat", "sat-short", "sat-narrow",
   1244         "dayperiod-short", "dayperiod", "dayperiod-narrow",
   1245         "hour", "hour-short", "hour-narrow",
   1246         "minute", "minute-short", "minute-narrow",
   1247         "second", "second-short", "second-narrow",
   1248         "zone", "zone-short", "zone-narrow").freeze();
   1249     static MapComparator<String> unitOrder = new MapComparator<String>().add(
   1250         "acceleration-g-force", "acceleration-meter-per-second-squared",
   1251         "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second",
   1252         "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter",
   1253         "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch",
   1254         "concentr-karat",
   1255         "concentr-milligram-per-deciliter", "concentr-millimole-per-liter",
   1256         "concentr-part-per-million", "concentr-percent", "concentr-permille",
   1257         "consumption-liter-per-kilometer", "consumption-liter-per-100kilometers",
   1258         "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial",
   1259         "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit",
   1260         "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit",
   1261         "digital-byte", "digital-bit",
   1262         "duration-century",
   1263         "duration-year", "duration-year-person",
   1264         "duration-month", "duration-month-person",
   1265         "duration-week", "duration-week-person",
   1266         "duration-day", "duration-day-person",
   1267         "duration-hour", "duration-minute", "duration-second",
   1268         "duration-millisecond", "duration-microsecond", "duration-nanosecond",
   1269         "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt",
   1270         "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour",
   1271         "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz",
   1272         "length-kilometer", "length-meter", "length-decimeter", "length-centimeter",
   1273         "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer",
   1274         "length-mile", "length-yard", "length-foot", "length-inch",
   1275         "length-parsec", "length-light-year", "length-astronomical-unit",
   1276         "length-furlong", "length-fathom",
   1277         "length-nautical-mile", "length-mile-scandinavian",
   1278         "length-point",
   1279         "light-lux",
   1280         "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram",
   1281         "mass-ton", "mass-stone", "mass-pound", "mass-ounce",
   1282         "mass-ounce-troy", "mass-carat",
   1283         "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt",
   1284         "power-horsepower",
   1285         "pressure-hectopascal", "pressure-millimeter-of-mercury",
   1286         "pressure-pound-per-square-inch", "pressure-inch-hg", "pressure-millibar", "pressure-atmosphere",
   1287         "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot",
   1288         "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin",
   1289         "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter",
   1290         "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch",
   1291         "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter",
   1292         "volume-pint-metric", "volume-cup-metric",
   1293         "volume-acre-foot",
   1294         "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup",
   1295         "volume-fluid-ounce", "volume-tablespoon", "volume-teaspoon").freeze();
   1296 
   1297     static MapComparator<String> countValueOrder = new MapComparator<String>().add(
   1298         "0", "1", "zero", "one", "two", "few", "many", "other").freeze();
   1299     static MapComparator<String> unitLengthOrder = new MapComparator<String>().add(
   1300         "long", "short", "narrow").freeze();
   1301     static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add(
   1302         "standard", "accounting").freeze();
   1303     static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator();
   1304 
   1305     static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator();
   1306 
   1307     // Hack for US
   1308     static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() {
   1309         @Override
   1310         public int compare(String o1, String o2) {
   1311             if (o1.contains("{")) {
   1312                 o1 = o1.replace("{", "");
   1313             }
   1314             if (o2.contains("{")) {
   1315                 o2 = o2.replace("{", "");
   1316             }
   1317             return COMP.compare(o1, o2);
   1318         }
   1319 
   1320     };
   1321 
   1322     public static Comparator<String> getAttributeValueComparator(String element, String attribute) {
   1323         return getAttributeValueComparator(DtdType.ldml, element, attribute);
   1324     }
   1325 
   1326     static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) {
   1327         // The default is a map comparator, which compares numbers as numbers, and strings with UCA
   1328         Comparator<String> comp = valueOrdering;
   1329         if (type != DtdType.ldml && type != DtdType.ldmlICU) {
   1330             return comp;
   1331         }
   1332         if (attribute.equals("day")) { // && (element.startsWith("weekend")
   1333             comp = dayValueOrder;
   1334         } else if (attribute.equals("type")) {
   1335             if (element.endsWith("FormatLength")) {
   1336                 comp = lengthOrder;
   1337             } else if (element.endsWith("Width")) {
   1338                 comp = widthOrder;
   1339             } else if (element.equals("day")) {
   1340                 comp = dayValueOrder;
   1341             } else if (element.equals("field")) {
   1342                 comp = dateFieldOrder;
   1343             } else if (element.equals("zone")) {
   1344                 comp = zoneOrder;
   1345             } else if (element.equals("listPatternPart")) {
   1346                 comp = listPatternOrder;
   1347             } else if (element.equals("currencyFormat")) {
   1348                 comp = currencyFormatOrder;
   1349             } else if (element.equals("unitLength")) {
   1350                 comp = unitLengthOrder;
   1351             } else if (element.equals("unit")) {
   1352                 comp = unitOrder;
   1353             } else if (element.equals("dayPeriod")) {
   1354                 comp = dayPeriodOrder;
   1355             }
   1356         } else if (attribute.equals("count") && !element.equals("minDays")) {
   1357             comp = countValueOrder;
   1358         } else if (attribute.equals("cp") && element.equals("annotation")) {
   1359             comp = UNICODE_SET_COMPARATOR;
   1360         }
   1361         return comp;
   1362     }
   1363 
   1364     /**
   1365      * Comparator for attributes in CLDR files
   1366      */
   1367     private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() {
   1368         @Override
   1369         public int compare(String element, String attribute, String value1, String value2) {
   1370             Comparator<String> comp = getAttributeValueComparator(element, attribute);
   1371             return comp.compare(value1, value2);
   1372         }
   1373     };
   1374 
   1375     public boolean hasValue(String elementName) {
   1376         return nameToElement.get(elementName).type == ElementType.PCDATA;
   1377     }
   1378 
   1379     public boolean isMetadata(XPathParts pathPlain) {
   1380         for (String s : pathPlain.getElements()) {
   1381             Element e = getElementFromName().get(s);
   1382             if (e.elementStatus == ElementStatus.metadata) {
   1383                 return true;
   1384             }
   1385         }
   1386         return false;
   1387     }
   1388 
   1389     public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) {
   1390         // TODO Don't use hard-coded list; instead add to DTD annotations
   1391         final String element1 = pathPlain.getElement(1);
   1392         final String element2 = pathPlain.getElement(2);
   1393         final String elementN = pathPlain.getElement(-1);
   1394         switch (dtdType2) {
   1395         case ldml:
   1396             switch (element1) {
   1397             case "generation":
   1398             case "metadata":
   1399                 return true;
   1400             }
   1401             break;
   1402         case ldmlBCP47:
   1403             switch (element1) {
   1404             case "generation":
   1405             case "version":
   1406                 return true;
   1407             }
   1408             break;
   1409         ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment
   1410         case supplementalData:
   1411             // these are NOT under /metadata/ but are actually metadata
   1412             switch (element1) {
   1413             case "generation":
   1414             case "version":
   1415             case "validity":
   1416             case "references":
   1417             case "coverageLevels":
   1418                 return true;
   1419             case "transforms":
   1420                 return elementN.equals("comment");
   1421             case "metadata":
   1422                 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata.
   1423                 switch (element2) {
   1424                 case "validity":
   1425                 case "serialElements":
   1426                 case "suppress":
   1427                 case "distinguishing":
   1428                 case "blocking":
   1429                 case "casingData":
   1430                     return true;
   1431                 }
   1432                 break;
   1433             }
   1434             break;
   1435         default:
   1436         }
   1437         return false;
   1438     }
   1439 
   1440     public boolean isDeprecated(XPathParts pathPlain) {
   1441         for (int i = 0; i < pathPlain.size(); ++i) {
   1442             String elementName = pathPlain.getElement(i);
   1443             if (isDeprecated(elementName, "*", null)) {
   1444                 return true;
   1445             }
   1446             for (String attribute : pathPlain.getAttributeKeys(i)) {
   1447                 String attributeValue = pathPlain.getAttributeValue(i, attribute);
   1448                 if (isDeprecated(elementName, attribute, attributeValue)) {
   1449                     return true;
   1450                 }
   1451             }
   1452         }
   1453         return false;
   1454     }
   1455 
   1456     public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
   1457     public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings();
   1458     public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings();
   1459 
   1460     private static class XPathPartsSet {
   1461         private final Set<XPathParts> list = new LinkedHashSet<>();
   1462 
   1463         private void addElement(String element) {
   1464             if (list.isEmpty()) {
   1465                 list.add(new XPathParts().addElement(element));
   1466             } else {
   1467                 for (XPathParts item : list) {
   1468                     item.addElement(element);
   1469                 }
   1470             }
   1471         }
   1472 
   1473         private void addAttribute(String attribute, String attributeValue) {
   1474             for (XPathParts item : list) {
   1475                 item.addAttribute(attribute, attributeValue);
   1476             }
   1477         }
   1478 
   1479         private void setElement(int i, String string) {
   1480             for (XPathParts item : list) {
   1481                 item.setElement(i, string);
   1482             }
   1483         }
   1484 
   1485 //        private int size() {
   1486 //            return list.iterator().next().size();
   1487 //        }
   1488 //
   1489 //        private void removeElement(int i) {
   1490 //            for (XPathParts item : list) {
   1491 //                item.removeElement(i);
   1492 //            }
   1493 //        }
   1494 
   1495         private void addAttributes(String attribute, List<String> attributeValues) {
   1496             if (attributeValues.size() == 1) {
   1497                 addAttribute(attribute, attributeValues.iterator().next());
   1498             } else {
   1499                 // duplicate all the items in the list with the given values
   1500                 Set<XPathParts> newList = new LinkedHashSet<>();
   1501                 for (XPathParts item : list) {
   1502                     for (String attributeValue : attributeValues) {
   1503                         XPathParts newItem = item.cloneAsThawed();
   1504                         newItem.addAttribute(attribute, attributeValue);
   1505                         newList.add(newItem);
   1506                     }
   1507                 }
   1508                 list.clear();
   1509                 list.addAll(newList);
   1510             }
   1511         }
   1512 
   1513         private ImmutableSet<String> toStrings() {
   1514             Builder<String> result = new ImmutableSet.Builder<>();
   1515 
   1516             for (XPathParts item : list) {
   1517                 result.add(item.toString());
   1518             }
   1519             return result.build();
   1520         }
   1521 
   1522         @Override
   1523         public String toString() {
   1524             return list.toString();
   1525         }
   1526     }
   1527 
   1528     public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) {
   1529         extras.clear();
   1530         Map<String, String> valueAttributes = new HashMap<>();
   1531         XPathPartsSet pathResult = new XPathPartsSet();
   1532         String element = null;
   1533         for (int i = 0; i < pathPlain.size(); ++i) {
   1534             element = pathPlain.getElement(i);
   1535             pathResult.addElement(element);
   1536             valueAttributes.clear();
   1537             for (String attribute : pathPlain.getAttributeKeys(i)) {
   1538                 AttributeStatus status = getAttributeStatus(element, attribute);
   1539                 final String attributeValue = pathPlain.getAttributeValue(i, attribute);
   1540                 switch (status) {
   1541                 case distinguished:
   1542                     AttributeType attrType = getAttributeType(element, attribute);
   1543                     if (attrType == AttributeType.NMTOKENS) {
   1544                         pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue));
   1545                     } else {
   1546                         pathResult.addAttribute(attribute, attributeValue);
   1547                     }
   1548                     break;
   1549                 case value:
   1550                     valueAttributes.put(attribute, attributeValue);
   1551                     break;
   1552                 case metadata:
   1553                     break;
   1554                 }
   1555             }
   1556             if (!valueAttributes.isEmpty()) {
   1557                 boolean hasValue = hasValue(element);
   1558                 // if it doesn't have a value, we construct new child elements, with _ prefix
   1559                 // if it does have a value, we have to play a further trick, since
   1560                 // we can't have a value and child elements at the same level.
   1561                 // So we use a _ suffix on the element.
   1562                 if (hasValue) {
   1563                     pathResult.setElement(i, element + "_");
   1564                 } else {
   1565                     int debug = 0;
   1566                 }
   1567                 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) {
   1568                     final String attribute = attributeAndValue.getKey();
   1569                     final String attributeValue = attributeAndValue.getValue();
   1570 
   1571                     Set<String> pathsShort = pathResult.toStrings();
   1572                     AttributeType attrType = getAttributeType(element, attribute);
   1573                     for (String pathShort : pathsShort) {
   1574                         pathShort += "/_" + attribute;
   1575                         if (attrType == AttributeType.NMTOKENS) {
   1576                             for (String valuePart : SPACE_SPLITTER.split(attributeValue)) {
   1577                                 extras.put(pathShort, valuePart);
   1578                             }
   1579                         } else {
   1580                             extras.put(pathShort, attributeValue);
   1581                         }
   1582                     }
   1583                 }
   1584                 if (hasValue) {
   1585                     pathResult.setElement(i, element); // restore
   1586                 }
   1587             }
   1588         }
   1589         // Only add the path if it could have a value, looking at the last element
   1590         if (!hasValue(element)) {
   1591             return null;
   1592         }
   1593         return pathResult.toStrings();
   1594     }
   1595 
   1596     public AttributeType getAttributeType(String elementName, String attributeName) {
   1597         Element element = nameToElement.get(elementName);
   1598         if (element == null) {
   1599             return null;
   1600         }
   1601         Attribute attr = element.getAttributeNamed(attributeName);
   1602         if (attr == null) {
   1603             return null;
   1604         }
   1605         return attr.type;
   1606     }
   1607 
   1608     // TODO: add support for following to DTD annotations, and rework API
   1609 
   1610     static final Set<String> SPACED_VALUES = ImmutableSet.of(
   1611         "idValidity",
   1612         "languageGroup");
   1613 
   1614     public static Splitter getValueSplitter(XPathParts pathPlain) {
   1615         if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) {
   1616             return SPACE_SPLITTER;
   1617         } else if (pathPlain.getElement(-1).equals("annotation")
   1618             && !pathPlain.getAttributeKeys(-1).contains("tts")) {
   1619             return BAR_SPLITTER;
   1620         }
   1621         return CR_SPLITTER;
   1622     }
   1623 
   1624     public static boolean isComment(XPathParts pathPlain, String line) {
   1625         if (pathPlain.contains("transform")) {
   1626             if (line.startsWith("#")) {
   1627                 return true;
   1628             }
   1629         }
   1630         return false;
   1631     }
   1632 
   1633     public static boolean isExtraSplit(String extraPath) {
   1634         if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
   1635             return true;
   1636         }
   1637         return false;
   1638     }
   1639 
   1640     // ALWAYS KEEP AT END, FOR STATIC INIT ORDER
   1641     private static final Map<DtdType, DtdData> CACHE;
   1642     static {
   1643         EnumMap<DtdType, DtdData> temp = new EnumMap<DtdType, DtdData>(DtdType.class);
   1644         for (DtdType type : DtdType.values()) {
   1645             temp.put(type, getInstance(type, null));
   1646         }
   1647         CACHE = Collections.unmodifiableMap(temp);
   1648     }
   1649     // ALWAYS KEEP AT END, FOR STATIC INIT ORDER
   1650 }
   1651