1 package org.unicode.cldr.util; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.util.Arrays; 6 import java.util.Comparator; 7 import java.util.HashSet; 8 import java.util.LinkedHashSet; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeMap; 13 import java.util.TreeSet; 14 15 import org.unicode.cldr.draft.FileUtilities; 16 import org.unicode.cldr.tool.ToolConfig; 17 import org.unicode.cldr.util.DtdData.Attribute; 18 import org.unicode.cldr.util.DtdData.AttributeType; 19 import org.unicode.cldr.util.DtdData.Element; 20 import org.unicode.cldr.util.DtdData.ElementType; 21 22 import com.ibm.icu.dev.util.CollectionUtilities; 23 import com.ibm.icu.impl.Relation; 24 import com.ibm.icu.impl.Row; 25 import com.ibm.icu.impl.Row.R2; 26 import com.ibm.icu.impl.Row.R4; 27 28 public class DtdDataCheck { 29 30 static SupplementalDataInfo SUPPLEMENTAL = SupplementalDataInfo.getInstance(); 31 32 static final Set<Row.R4<DtdType, String, String, String>> DEPRECATED = new LinkedHashSet<Row.R4<DtdType, String, String, String>>(); 33 static final Map<Row.R2<DtdType, String>, Relation<Boolean, String>> TYPE_ATTRIBUTE_TO_DIST_ELEMENTS = new TreeMap<Row.R2<DtdType, String>, Relation<Boolean, String>>(); 34 35 private static final boolean CHECK_CORRECTNESS = false; 36 37 private static class Walker { 38 HashSet<Element> seen = new HashSet<Element>(); 39 Set<Element> elementsMissingDraft = new LinkedHashSet<Element>(); 40 Set<Element> elementsMissingAlt = new LinkedHashSet<Element>(); 41 static final Set<String> SKIP_ATTRIBUTES = new HashSet<String>(Arrays.asList( 42 "draft", "alt", "standard", "references")); 43 static final Set<String> SKIP_ELEMENTS = new HashSet<String>(Arrays.asList( 44 "alias", "special")); 45 Set<Attribute> attributesWithDefaultValues = new LinkedHashSet<Attribute>(); 46 47 private DtdData dtdData; 48 49 public Walker(DtdData dtdData) { 50 this.dtdData = dtdData; 51 } 52 53 private void showSuppressed() { 54 for (Entry<String, Element> ee : dtdData.getElementFromName().entrySet()) { 55 Element element = ee.getValue(); 56 for (Entry<Attribute, Integer> ae : element.getAttributes().entrySet()) { 57 Attribute a = ae.getKey(); 58 if (a.defaultValue != null) { 59 System.out.println(dtdData.ROOT + "\t" + element.name + "\t" + a.name + "\t" + a.defaultValue); 60 } 61 } 62 } 63 } 64 65 private void show(Element element) { 66 show(element, ""); 67 System.out.println(); 68 if (dtdData.dtdType == DtdType.ldml && elementsMissingDraft.size() != 0) { 69 System.out.println("*Elements missing draft:\t" + elementsMissingDraft); 70 System.out.println(); 71 } 72 if (dtdData.dtdType == DtdType.ldml && elementsMissingAlt.size() != 0) { 73 System.out.println("*Elements missing alt:\t" + elementsMissingAlt); 74 System.out.println(); 75 } 76 if (attributesWithDefaultValues.size() != 0) { 77 System.out.println("*Attributes with default values:"); 78 for (Attribute a : attributesWithDefaultValues) { 79 System.out.println("\t" + a + "\t" + a.features()); 80 } 81 System.out.println(); 82 } 83 StringBuilder diff = new StringBuilder(); 84 for (Entry<String, Set<Attribute>> entry : dtdData.getAttributesFromName().keyValuesSet()) { 85 Relation<String, String> featuresToElements = Relation.of(new TreeMap<String, Set<String>>(), LinkedHashSet.class); 86 for (Attribute a : entry.getValue()) { 87 featuresToElements.put(a.features(), a.element.name); 88 } 89 if (featuresToElements.size() != 1) { 90 diff.append("\t" + entry.getKey() + "\n"); 91 for (Entry<String, Set<String>> entry2 : featuresToElements.keyValuesSet()) { 92 diff.append("\t\t" + entry2.getKey() + "\n"); 93 diff.append("\t\t\t on " + entry2.getValue() + "\n"); 94 } 95 } 96 } 97 if (diff.length() != 0) { 98 System.out.println("*Attributes with different features by element:"); 99 System.out.println(diff); 100 System.out.println(); 101 } 102 } 103 104 private void show(Element element, String indent) { 105 if (seen.contains(element)) { 106 System.out.println(indent + element.name + "*"); 107 } else { 108 seen.add(element); 109 if (!element.containsAttribute("draft")) { 110 elementsMissingDraft.add(element); 111 } 112 if (!element.containsAttribute("alt")) { 113 elementsMissingAlt.add(element); 114 } 115 ElementType type = element.getType(); 116 System.out.println(indent + element.name + (type == ElementType.CHILDREN ? "" : "\t" + type)); 117 indent += "\t"; 118 for (Attribute a : element.getAttributes().keySet()) { 119 if (a.defaultValue != null) { 120 attributesWithDefaultValues.add(a); 121 } 122 if (SKIP_ATTRIBUTES.contains(a.name)) { 123 continue; 124 } 125 String special = ""; 126 boolean allDeprecated = false; 127 if (SUPPLEMENTAL.isDeprecated(dtdData.dtdType, element.name, a.name, "*")) { 128 special += "\t#DEPRECATED#"; 129 allDeprecated = true; 130 DEPRECATED.add(Row.of(dtdData.dtdType, element.name, a.name, "*")); 131 } else if (a.type == AttributeType.ENUMERATED_TYPE) { 132 for (String value : a.values.keySet()) { 133 if (SUPPLEMENTAL.isDeprecated(dtdData.dtdType, element.name, a.name, value)) { 134 special += "\t#DEPRECATED:" + value + "#"; 135 DEPRECATED.add(Row.of(dtdData.dtdType, element.name, a.name, value)); 136 } 137 } 138 } 139 if (!allDeprecated) { 140 R2<DtdType, String> key = Row.of(dtdData.dtdType, a.name); 141 boolean isDisting = CLDRFile.isDistinguishing(dtdData.dtdType, element.name, a.name); 142 special += "\t#DISTINGUISHING#"; 143 Relation<Boolean, String> info = TYPE_ATTRIBUTE_TO_DIST_ELEMENTS.get(key); 144 if (info == null) { 145 TYPE_ATTRIBUTE_TO_DIST_ELEMENTS.put(key, info = Relation.of(new TreeMap<Boolean, Set<String>>(), TreeSet.class)); 146 } 147 info.put(isDisting, element.name); 148 } 149 System.out.println(indent + "@" + a.name + "\t" + a.features() + special); 150 } 151 for (Element e : element.getChildren().keySet()) { 152 if (SKIP_ELEMENTS.contains(e.name)) { 153 continue; 154 } 155 show(e, indent); 156 } 157 } 158 } 159 } 160 161 public static void main(String[] args) throws IOException { 162 if (args.length == 0) { 163 DtdType[] args2 = DtdType.values(); 164 args = new String[args2.length]; 165 int i = 0; 166 for (DtdType arg : args2) { 167 args[i++] = arg.name(); 168 } 169 } 170 Timer timer = new Timer(); 171 for (String arg : args) { 172 173 timer.start(); 174 DtdType type = DtdType.valueOf(arg); 175 DtdData dtdData = DtdData.getInstance(type); 176 PrintWriter br = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "dataproj/src/temp/", type 177 + "-gen.dtd"); 178 br.append(dtdData.toString()); 179 br.close(); 180 timer.stop(); 181 System.out.println("Time: " + timer); 182 if (true) { 183 return; 184 } 185 new Walker(dtdData).show(dtdData.ROOT); 186 if (CHECK_CORRECTNESS && type == DtdType.ldml) { 187 Set<String> errors = new LinkedHashSet<String>(); 188 // checkOrder(dtdData.ROOT, errors); 189 // for (String error : errors) { 190 // System.out.println("ERROR:\t" + error); 191 // } 192 // errors.clear(); 193 dtdData = DtdData.getInstance(DtdType.ldml); 194 // AttributeValueComparator avc = new AttributeValueComparator() { 195 // @Override 196 // public int compare(String element, String attribute, String value1, String value2) { 197 // Comparator<String> comp = CLDRFile.getAttributeValueComparator(element, attribute); 198 // return comp.compare(value1, value2); 199 // } 200 // }; 201 Comparator<String> comp = dtdData.getDtdComparator(null); 202 CLDRFile test = ToolConfig.getToolInstance().getEnglish(); 203 Set<String> sorted = new TreeSet(test.getComparator()); 204 CollectionUtilities.addAll(test.iterator(), sorted); 205 String[] sortedArray = sorted.toArray(new String[sorted.size()]); 206 207 // compare for identity 208 String lastPath = null; 209 for (String currentPath : sortedArray) { 210 if (lastPath != null) { 211 int compValue = comp.compare(lastPath, currentPath); 212 if (compValue >= 0) { 213 comp.compare(lastPath, currentPath); 214 errors.add(lastPath + " " + currentPath); 215 } 216 } 217 lastPath = currentPath; 218 } 219 for (String error : errors) { 220 System.err.println("ERROR:\t" + error); 221 } 222 if (errors.size() != 0) { 223 throw new IllegalArgumentException(); 224 } 225 // check cost 226 checkCost("DtdComparator", sortedArray, comp); 227 checkCost("DtdComparator(null)", sortedArray, dtdData.getDtdComparator(null)); 228 // checkCost("CLDRFile.ldmlComparator", sortedArray, CLDRFile.getLdmlComparator()); 229 //checkCost("XPathParts", sortedArray); 230 231 } 232 } 233 234 for (String arg : args) { 235 timer.start(); 236 DtdType type = DtdType.valueOf(arg); 237 DtdData dtdData = DtdData.getInstance(type); 238 timer.stop(); 239 System.out.println("Time: " + timer); 240 } 241 int i = 0; 242 System.out.println(" <distinguishing>"); 243 Set<String> allElements = new TreeSet<String>(); 244 allElements.add("_q"); 245 DtdType lastType = null; 246 247 for (Entry<R2<DtdType, String>, Relation<Boolean, String>> typeAttributeToDistElement : TYPE_ATTRIBUTE_TO_DIST_ELEMENTS.entrySet()) { 248 R2<DtdType, String> typeAttribute = typeAttributeToDistElement.getKey(); 249 Relation<Boolean, String> distElement = typeAttributeToDistElement.getValue(); 250 Set<String> areDisting = distElement.get(true); 251 if (areDisting == null) { 252 continue; 253 } 254 DtdType type = typeAttribute.get0(); 255 if (lastType != type) { 256 if (lastType != null) { 257 showAll(lastType, allElements); 258 } 259 lastType = type; 260 } 261 String attribute = typeAttribute.get1(); 262 Set<String> areNotDisting = distElement.get(false); 263 if (areNotDisting == null) { 264 allElements.add(attribute); 265 continue; 266 } 267 System.out.println(" <distinguishingItems" 268 + " type=\"" + type 269 + "\" elements=\"" + CollectionUtilities.join(areDisting, " ") 270 + "\" attributes=\"" + attribute 271 + "\"/>" 272 + "\n <!-- NONDISTINGUISH." 273 + " TYPE=\"" + type 274 + "\" ELEMENTS=\"" + CollectionUtilities.join(areNotDisting, " ") 275 + "\" ATTRIBUTES=\"" + attribute 276 + "\" -->"); 277 } 278 showAll(lastType, allElements); 279 System.out.println(" </distinguishing>"); 280 281 i = 0; 282 for (R4<DtdType, String, String, String> x : DEPRECATED) { 283 System.out.println(++i + "\tDEPRECATED\t" + x); 284 } 285 for (String arg : args) { 286 DtdType type = DtdType.valueOf(arg); 287 DtdData dtdData = DtdData.getInstance(type); 288 System.out.println("\n" + arg); 289 new Walker(dtdData).showSuppressed(); 290 } 291 } 292 293 public static void showAll(DtdType type, Set<String> allElements) { 294 System.out.println(" <distinguishingItems" 295 + " type=\"" + type 296 + "\" elements=\"*" 297 + "\" attributes=\"" + CollectionUtilities.join(allElements, " ") 298 + "\"/>"); 299 allElements.clear(); 300 allElements.add("_q"); 301 } 302 303 static final int LOOP = 100; 304 305 private static void checkCost(String title, String[] sortedArray, Comparator<String> comp) { 306 Timer timer = new Timer(); 307 for (int i = 0; i < LOOP; ++i) { 308 String lastPath = null; 309 for (String currentPath : sortedArray) { 310 if (lastPath != null) { 311 int compValue = comp.compare(lastPath, currentPath); 312 } 313 lastPath = currentPath; 314 } 315 } 316 timer.stop(); 317 System.out.println(title + "\tTime:\t" + timer.toString(LOOP)); 318 } 319 320 private static void checkCost(String title, String[] sortedArray) { 321 XPathParts parts = new XPathParts(); 322 Timer timer = new Timer(); 323 for (int i = 0; i < LOOP; ++i) { 324 for (String currentPath : sortedArray) { 325 parts.set(currentPath); 326 } 327 } 328 long end = System.currentTimeMillis(); 329 System.out.println(title + "\tTime:\t" + timer.toString(LOOP)); 330 } 331 332 // private static void checkOrder(Element element, Set<String> errors) { 333 // // compare attributes 334 // Attribute lastAttribute = null; 335 // for (Attribute attribute : element.attributes.keySet()) { 336 // Comparator<String> comp = CLDRFile.getAttributeValueComparator(element.name, attribute.name); 337 // if (attribute.values.size() != 0) { 338 // String lastAttributeValue = null; 339 // for (String value : attribute.values.keySet()) { 340 // if (lastAttributeValue != null) { 341 // int stockCompare = comp.compare(lastAttributeValue, value); 342 // if (stockCompare >= 0) { 343 // errors.add("Failure with " 344 // + element.name 345 // + ":" + attribute.name 346 // + " values:\t" + lastAttributeValue + " " + value); 347 // } 348 // } 349 // lastAttributeValue = value; 350 // } 351 // } 352 // if (lastAttribute != null) { 353 // int stockCompare = CLDRFile.getAttributeComparator().compare(lastAttribute.name, attribute.name); 354 // if (stockCompare >= 0) { 355 // errors.add("Failure with attributes:\t" + lastAttribute.name + " " + attribute.name); 356 // } 357 // } 358 // lastAttribute = attribute; 359 // } 360 // // compare child elements 361 // Element lastElement = null; 362 // for (Element child : element.children.keySet()) { 363 // if (lastElement != null) { 364 // int stockCompare = CLDRFile.getElementOrderComparator().compare(lastElement.name, child.name); 365 // if (stockCompare >= 0) { 366 // errors.add("Failure with elements:\t" + lastElement.name + " " + child.name); 367 // } 368 // } 369 // checkOrder(child, errors); 370 // lastElement = child; 371 // } 372 // } 373 374 } 375