Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.io.File;
      4 import java.io.IOException;
      5 import java.io.PrintWriter;
      6 import java.util.Comparator;
      7 import java.util.HashSet;
      8 import java.util.Set;
      9 import java.util.TreeSet;
     10 
     11 import org.unicode.cldr.draft.FileUtilities;
     12 import org.unicode.cldr.util.CLDRFile;
     13 import org.unicode.cldr.util.CLDRFile.Status;
     14 import org.unicode.cldr.util.CLDRPaths;
     15 import org.unicode.cldr.util.CldrUtility;
     16 import org.unicode.cldr.util.Counter;
     17 import org.unicode.cldr.util.EscapingUtilities;
     18 import org.unicode.cldr.util.Factory;
     19 import org.unicode.cldr.util.PrettyPath;
     20 import org.unicode.cldr.util.SimpleFactory;
     21 import org.unicode.cldr.util.Timer;
     22 
     23 import com.ibm.icu.dev.util.CollectionUtilities;
     24 import com.ibm.icu.impl.Row;
     25 import com.ibm.icu.impl.Row.R2;
     26 import com.ibm.icu.text.Collator;
     27 import com.ibm.icu.text.NumberFormat;
     28 import com.ibm.icu.text.UTF16;
     29 
     30 public class GenerateComparison {
     31 
     32     private static PrettyPath prettyPathMaker;
     33 
     34     private static Collator collator = Collator.getInstance();
     35 
     36     static class EnglishRowComparator implements Comparator<R2<String, String>> {
     37         private static Comparator<String> unicode = new UTF16.StringComparator(true, false, 0);
     38 
     39         public int compare(R2<String, String> arg0, R2<String, String> arg1) {
     40             int result = collator.compare(arg0.get0(), arg1.get0());
     41             if (result != 0) return result;
     42             result = unicode.compare(arg0.get0(), arg1.get0());
     43             if (result != 0) return result;
     44             result = collator.compare(arg0.get1(), arg1.get1());
     45             if (result != 0) return result;
     46             result = unicode.compare(arg0.get1(), arg1.get1());
     47             return result;
     48         }
     49     }
     50 
     51     static EnglishRowComparator ENG = new EnglishRowComparator();
     52 
     53     static final String warningMessage = "<p><b>Warning: this chart is still under development. For how to use it, see <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\">Help: How to Vet</a>.</b></p>";
     54 
     55     public static void main(String[] args) throws IOException {
     56 
     57         // Setup
     58         Timer timer = new Timer();
     59         Timer totalTimer = new Timer();
     60         long totalPaths = 0;
     61         format = NumberFormat.getNumberInstance();
     62         format.setGroupingUsed(true);
     63 
     64         Counter<String> totalCounter = new Counter<String>();
     65 
     66         // Get the args
     67 
     68         String oldDirectory = CldrUtility.getProperty("oldDirectory", new File(CLDRPaths.BASE_DIRECTORY,
     69             "common/main").getCanonicalPath() + "/");
     70         String newDirectory = CldrUtility.getProperty("newDirectory", new File(CLDRPaths.BASE_DIRECTORY,
     71             "../cldr-release-1-7/common/main").getCanonicalPath() + "/");
     72         String changesDirectory = CldrUtility.getProperty("changesDirectory", new File(CLDRPaths.CHART_DIRECTORY
     73             + "/changes/").getCanonicalPath()
     74             + "/");
     75 
     76         String filter = CldrUtility.getProperty("localeFilter", ".*");
     77         boolean SHOW_ALIASED = CldrUtility.getProperty("showAliased", "false").toLowerCase().startsWith("t");
     78 
     79         // Create the factories
     80 
     81         Factory oldFactory = Factory.make(oldDirectory, filter);
     82         Factory newFactory = Factory.make(newDirectory, filter);
     83         CLDRFile english = newFactory.make("en", true);
     84         CLDRFile newRoot = newFactory.make("root", true);
     85 
     86         // Get the union of all the language locales, sorted by English name
     87 
     88         Set<String> oldList = oldFactory.getAvailableLanguages();
     89         Set<String> newList = newFactory.getAvailableLanguages();
     90         Set<String> unifiedList = new HashSet<String>(oldList);
     91         unifiedList.addAll(newList);
     92         Set<R2<String, String>> pairs = new TreeSet<R2<String, String>>();
     93         for (String code : unifiedList) {
     94             pairs.add(Row.of(english.getName(code), code));
     95         }
     96 
     97         prettyPathMaker = new PrettyPath();
     98         int totalDifferences = 0;
     99         int differences = 0;
    100 
    101         Set<R2<String, String>> indexInfo = new TreeSet<R2<String, String>>(ENG);
    102 
    103         // iterate through those
    104         for (R2<String, String> pair : pairs) {
    105             timer.start();
    106             final String locale = pair.get1();
    107             final String localeName = pair.get0();
    108             System.out.println(locale);
    109             differences = 0;
    110             System.out.println();
    111 
    112             // Create CLDR files for both; null if can't open
    113 
    114             CLDRFile oldFile = null;
    115             if (oldList.contains(locale)) {
    116                 try {
    117                     oldFile = oldFactory.make(locale, true, true);
    118                 } catch (Exception e) {
    119                     addToIndex(indexInfo, "ERROR1.6 ", locale, localeName);
    120                     continue;
    121                 }
    122             } else {
    123                 oldFile = SimpleFactory.makeFile(locale); // make empty file
    124             }
    125             CLDRFile newFile = null;
    126             if (newList.contains(locale)) {
    127                 try {
    128                     newFile = newFactory.make(locale, true, true);
    129                 } catch (Exception e) {
    130                     addToIndex(indexInfo, "ERROR1.7 ", locale, localeName);
    131                     continue;
    132                 }
    133             } else {
    134                 newFile = SimpleFactory.makeFile(locale); // make empty file
    135             }
    136 
    137             // for(String str : newFile) {
    138             // String xo = newFile.getFullXPath(str);
    139             // String v = newFile.getStringValue(str);
    140             //
    141             // System.out.println(xo+"\t"+v+"\n");
    142             //
    143             // }
    144             // Check for null cases
    145 
    146             if (oldFile == null) {
    147                 addToIndex(indexInfo, "NEW ", locale, localeName);
    148                 continue;
    149             } else if (newFile == null) {
    150                 addToIndex(indexInfo, "DELETED ", locale, localeName);
    151                 continue;
    152             }
    153             System.out.println("*** " + localeName + "\t" + locale);
    154             System.out.println();
    155 
    156             // exclude aliased locales
    157             if (newFile.isAliasedAtTopLevel()) {
    158                 continue;
    159             }
    160 
    161             // Get the union of all the paths
    162 
    163             Set<String> paths;
    164             try {
    165                 paths = new HashSet<String>();
    166                 CollectionUtilities.addAll(oldFile.iterator(), paths);
    167                 if (oldList.contains(locale)) {
    168                     paths.addAll(oldFile.getExtraPaths());
    169                 }
    170                 CollectionUtilities.addAll(newFile.iterator(), paths);
    171                 if (newList.contains(locale)) {
    172                     paths.addAll(newFile.getExtraPaths());
    173                 }
    174             } catch (Exception e) {
    175                 System.err.println("Locale: " + locale + ", " + localeName);
    176                 e.printStackTrace();
    177                 addToIndex(indexInfo, "ERROR ", locale, localeName);
    178                 continue;
    179             }
    180 
    181             // We now have the full set of all the paths for old and new files
    182             // TODO Sort by the pretty form
    183             // Set<R2<String,String>> pathPairs = new TreeSet();
    184             // for (String code : unifiedList) {
    185             // pairs.add(Row.make(code, english.getName(code)));
    186             // }
    187 
    188             // Initialize sets
    189             // .addColumn("Code", "class='source'", "<a name=\"{0}\" href='likely_subtags.html#und_{0}'>{0}</a>",
    190             // "class='source'", true)
    191 
    192             final String localeDisplayName = english.getName(locale);
    193             TablePrinter table = new TablePrinter()
    194                 .setCaption("Changes in " + localeDisplayName + " (" + locale + ")")
    195                 .addColumn("PRETTY_SORT1").setSortPriority(1).setHidden(true).setRepeatHeader(true)
    196                 .addColumn("PRETTY_SORT2").setSortPriority(2).setHidden(true)
    197                 .addColumn("PRETTY_SORT3").setSortPriority(3).setHidden(true)
    198                 .addColumn("ESCAPED_PATH").setHidden(true)
    199                 .addColumn("Inh.").setCellAttributes("class=\"{0}\"").setSortPriority(0).setSpanRows(true)
    200                 .setRepeatHeader(true)
    201                 .addColumn("Section").setSpanRows(true).setCellAttributes("class='section'")
    202                 .addColumn("Subsection").setSpanRows(true).setCellAttributes("class='subsection'")
    203                 .addColumn("Item").setSpanRows(true).setCellPattern("<a href=\"{4}\">{0}</a>")
    204                 .setCellAttributes("class='item'")
    205                 .addColumn("English").setCellAttributes("class='english'")
    206                 .addColumn("Status").setSortPriority(4).setCellAttributes("class=\"{0}\"")
    207                 .addColumn("Old" + localeDisplayName).setCellAttributes("class='old'")
    208                 .addColumn("New" + localeDisplayName).setCellAttributes("class='new'");
    209             Counter<String> fileCounter = new Counter<String>();
    210 
    211             for (String path : paths) {
    212                 if (path.contains("/alias") || path.contains("/identity")) {
    213                     continue;
    214                 }
    215                 String cleanedPath = CLDRFile.getNondraftNonaltXPath(path);
    216 
    217                 String oldValue = oldFile.getStringValue(cleanedPath);
    218                 String newValue = newFile.getStringValue(path);
    219                 String englishValue = english.getStringValue(cleanedPath);
    220 
    221                 // for debugging
    222                 if (oldValue != null && oldValue.contains("{1} {0}")) {
    223                     System.out.print("");
    224                 }
    225 
    226                 if (equals(newValue, oldValue)) {
    227                     continue;
    228                 }
    229 
    230                 // get the actual place the data is stored
    231                 // AND adjust if the same as root!
    232 
    233                 Status newStatus = new Status();
    234                 String newFoundLocale = getStatus(newFile, newRoot, path, newValue, newStatus);
    235 
    236                 // At this point, we have two unequal values
    237                 // TODO check for non-distinguishing attribute value differences
    238 
    239                 boolean isAliased = false;
    240 
    241                 // Skip deletions of alt-proposed
    242 
    243                 // if (newValue == null) {
    244                 // if (path.contains("@alt=\"proposed")) {
    245                 // continue;
    246                 // }
    247                 // }
    248 
    249                 // Skip if both inherited from the same locale, since we should catch it
    250                 // in that locale.
    251 
    252                 // Mark as aliased if new locale or path is different
    253                 if (!newStatus.pathWhereFound.equals(path)) {
    254                     isAliased = true;
    255                     // continue;
    256                 }
    257 
    258                 if (!newFoundLocale.equals(locale)) {
    259                     isAliased = true;
    260                     // continue;
    261                 }
    262 
    263                 // // skip if old locale or path is aliased
    264                 // if (!oldFoundLocale.equals(locale)) {
    265                 // //isAliased=true;
    266                 // continue;
    267                 // }
    268                 //
    269                 // // Skip if either found path is are different
    270                 // if (!oldStatus.pathWhereFound.equals(cleanedPath)) {
    271                 // //isAliased=true;
    272                 // continue;
    273                 // }
    274 
    275                 // Now check other aliases
    276 
    277                 // final boolean newIsAlias = !newStatus.pathWhereFound.equals(path);
    278                 // if (newIsAlias) { // new is alias
    279                 // // filter out cases of a new string that is found via alias
    280                 // if (oldValue == null) {
    281                 // continue;
    282                 // }
    283                 //
    284                 // }
    285 
    286                 if (isAliased && !SHOW_ALIASED) {
    287                     continue;
    288                 }
    289 
    290                 // We definitely have a difference worth recording, so do so
    291 
    292                 String newFullPath = newFile.getFullXPath(path);
    293                 final boolean reject = newFullPath != null && newFullPath.contains("@draft")
    294                     && !newFullPath.contains("@draft=\"contributed\"");
    295                 String status;
    296                 if (reject) {
    297                     status = "NOT-ACC";
    298                 } else if (newValue == null) {
    299                     status = "deleted";
    300                 } else if (oldValue == null) {
    301                     status = "added";
    302                 } else {
    303                     status = "changed";
    304                 }
    305                 String coreStatus = status;
    306                 if (isAliased) {
    307                     status = "I+" + status;
    308                 }
    309                 fileCounter.increment(status);
    310                 totalCounter.increment(status);
    311 
    312                 String pretty_sort = prettyPathMaker.getPrettyPath(cleanedPath);
    313                 String[] prettyPartsSort = pretty_sort.split("[|]");
    314                 if (prettyPartsSort.length != 3) {
    315                     System.out.println("Bad pretty path: " + pretty_sort + ", original: " + cleanedPath);
    316                 }
    317                 String prettySort1 = prettyPartsSort[0];
    318                 String prettySort2 = prettyPartsSort[1];
    319                 String prettySort3 = prettyPartsSort[2];
    320 
    321                 String pretty = prettyPathMaker.getOutputForm(pretty_sort);
    322                 String escapedPath = "http://unicode.org/cldr/apps/survey?_=" + locale + "&xpath="
    323                     + EscapingUtilities.urlEscape(cleanedPath);
    324                 String[] prettyParts = pretty.split("[|]");
    325                 if (prettyParts.length != 3) {
    326                     System.out.println("Bad pretty path: " + pretty + ", original: " + cleanedPath);
    327                 }
    328                 String pretty1 = prettyParts[0];
    329                 String pretty2 = prettyParts[1];
    330                 String pretty3 = prettyParts[2];
    331 
    332                 // http://kwanyin.unicode.org/cldr-apps/survey?_=kw_GB&xpath=%2F%2Fldml%2FlocaleDisplayNames%2Flanguages%2Flanguage%5B%40type%3D%22mt%22%5D
    333 
    334                 table.addRow()
    335                     .addCell(prettySort1)
    336                     .addCell(prettySort2)
    337                     .addCell(prettySort3)
    338                     .addCell(escapedPath)
    339                     .addCell(isAliased ? "I" : "")
    340                     .addCell(pretty1)
    341                     .addCell(pretty2)
    342                     .addCell(pretty3)
    343                     .addCell(englishValue == null ? "-" : englishValue)
    344                     .addCell(coreStatus)
    345                     .addCell(oldValue == null ? "-" : oldValue)
    346                     .addCell(newValue == null ? "-" : newValue)
    347                     .finishRow();
    348 
    349                 totalDifferences++;
    350                 differences++;
    351             }
    352 
    353             addToIndex(indexInfo, "", locale, localeName, fileCounter);
    354             PrintWriter out = FileUtilities.openUTF8Writer(changesDirectory, locale + ".html");
    355             String title = "Changes in " + localeDisplayName;
    356             out.println("<html>"
    357                 +
    358                 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"
    359                 + CldrUtility.LINE_SEPARATOR
    360                 +
    361                 "<title>"
    362                 + title
    363                 + "</title>"
    364                 + CldrUtility.LINE_SEPARATOR
    365                 +
    366                 "<link rel='stylesheet' href='index.css' type='text/css'>"
    367                 + CldrUtility.LINE_SEPARATOR
    368                 +
    369                 "<base target='_blank'>"
    370                 + CldrUtility.LINE_SEPARATOR
    371                 +
    372                 "</head><body>"
    373                 + CldrUtility.LINE_SEPARATOR
    374                 +
    375                 "<h1>"
    376                 + title
    377                 + "</h1>"
    378                 + CldrUtility.LINE_SEPARATOR
    379                 + "<a href='index.html'>Index</a> | <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>"
    380                 + warningMessage);
    381 
    382             TablePrinter table2 = new TablePrinter()
    383                 .setCaption("Totals")
    384                 .addColumn("Inh.").setSortPriority(0)
    385                 .addColumn("Status").setSortPriority(1)
    386                 .addColumn("Total");
    387 
    388             for (String key : fileCounter.getKeysetSortedByKey()) {
    389                 boolean inherited = key.startsWith("I+");
    390                 table2.addRow()
    391                     .addCell(inherited ? "I" : "")
    392                     .addCell(inherited ? key.substring(2) : key)
    393                     .addCell(format.format(fileCounter.getCount(key)))
    394                     .finishRow();
    395             }
    396             out.println(table2);
    397             out.println("<br>");
    398             out.println(table);
    399 
    400             // show status on console
    401 
    402             System.out.println(locale + "\tDifferences:\t" + format.format(differences)
    403                 + "\tPaths:\t" + format.format(paths.size())
    404                 + "\tTime:\t" + timer);
    405 
    406             totalPaths += paths.size();
    407             out.println(ShowData.dateFooter());
    408             out.println(CldrUtility.ANALYTICS);
    409             out.println("</body></html>");
    410             out.close();
    411         }
    412         PrintWriter indexFile = FileUtilities.openUTF8Writer(changesDirectory, "index.html");
    413         indexFile
    414             .println("<html>"
    415                 +
    416                 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"
    417                 + CldrUtility.LINE_SEPARATOR
    418                 +
    419                 "<title>"
    420                 + "Change Summary"
    421                 + "</title>"
    422                 + CldrUtility.LINE_SEPARATOR
    423                 +
    424                 "<link rel='stylesheet' href='index.css' type='text/css'>"
    425                 + CldrUtility.LINE_SEPARATOR
    426                 +
    427                 "<base target='_blank'>"
    428                 + CldrUtility.LINE_SEPARATOR
    429                 +
    430                 "</head><body>"
    431                 + CldrUtility.LINE_SEPARATOR
    432                 +
    433                 "<h1>"
    434                 + "Change Summary"
    435                 + "</h1>"
    436                 + CldrUtility.LINE_SEPARATOR
    437                 + "<a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>"
    438                 + warningMessage
    439                 + "<table><tr>");
    440 
    441         String separator = "";
    442         int last = 0;
    443         for (R2<String, String> indexPair : indexInfo) {
    444             int firstChar = indexPair.get0().codePointAt(0);
    445             indexFile.append(firstChar == last ? separator
    446                 : (last == 0 ? "" : "</td></tr>\n<tr>") + "<th>" + String.valueOf((char) firstChar) + "</th><td>")
    447                 .append(indexPair.get1());
    448             separator = " | ";
    449             last = indexPair.get0().codePointAt(0);
    450         }
    451         indexFile.println("</tr></table>");
    452         indexFile.println(ShowData.dateFooter());
    453         indexFile.println(CldrUtility.ANALYTICS);
    454         indexFile.println("</body></html>");
    455         indexFile.close();
    456 
    457         System.out.println();
    458 
    459         for (String key : totalCounter.getKeysetSortedByKey()) {
    460             System.out.println(key + "\t" + totalCounter.getCount(key));
    461         }
    462 
    463         System.out.println("Total Differences:\t" + format.format(totalDifferences)
    464             + "\tPaths:\t" + format.format(totalPaths)
    465             + "\tTotal Time:\t" + format.format(totalTimer.getDuration()) + "ms");
    466     }
    467 
    468     // static Transliterator urlHex = Transliterator.createFromRules("foo",
    469     // "([^!(-*,-\\:A-Z_a-z~]) > &hex($1) ;" +
    470     // ":: null;" +
    471     // "'\\u00' > '%' ;"
    472 // , Transliterator.FORWARD);
    473 
    474     private static NumberFormat format;
    475 
    476     private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale,
    477         final String localeName) {
    478         addToIndex(indexInfo, title, locale, localeName, null);
    479     }
    480 
    481     private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale,
    482         final String localeName, Counter<String> fileCounter) {
    483         if (title.startsWith("ERROR")) {
    484             indexInfo.add(R2.of(localeName,
    485                 title + " " + localeName + " (" + locale + ")"));
    486             return;
    487         }
    488         String counterString = "";
    489         if (fileCounter != null) {
    490             for (String s : fileCounter) {
    491                 if (counterString.length() != 0) {
    492                     counterString += "; ";
    493                 }
    494                 counterString += s.charAt(0) + ":" + format.format(fileCounter.getCount(s));
    495             }
    496         }
    497         indexInfo.add(R2.of(localeName,
    498             "<a href='" + locale + ".html'>" + title + localeName + " (" + locale + ")</a>"
    499                 + (counterString.length() == 0 ? "" : " [" + counterString + "]")));
    500     }
    501 
    502     // private static int accumulate(Set<R2<String,String>> rejected, int totalRejected,
    503     // final String locale, String indicator, String oldValue, String newValue, String path) {
    504     // String pretty = prettyPathMaker.getPrettyPath(path, false);
    505     // String line = locale + "\t" + indicator +"\t\u200E[" + oldValue + "]\u200E\t\u200E[" + newValue + "]\u200E\t" +
    506     // pretty;
    507     // String pretty2 = prettyPathMaker.getOutputForm(pretty);
    508     // rejected.add(Row.make(pretty2, line));
    509     // totalRejected++;
    510     // return totalRejected;
    511     // }
    512 
    513     private static String getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path,
    514         String oldString, Status oldStatus) {
    515         String oldLocale = oldFile.getSourceLocaleID(path, oldStatus);
    516         if (!oldLocale.equals("root")) {
    517             String oldRootValue = oldRoot.getStringValue(oldStatus.pathWhereFound);
    518             if (equals(oldString, oldRootValue)) {
    519                 oldLocale = "root";
    520             }
    521         }
    522         return oldLocale;
    523     }
    524 
    525     private static void showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title) {
    526         if (rejected.size() != 0) {
    527             out.println();
    528             out.println(locale + "\t" + title + "\t" + rejected.size());
    529             for (R2<String, String> prettyAndline : rejected) {
    530                 out.println(prettyAndline.get1());
    531             }
    532         }
    533     }
    534 
    535     private static boolean equals(String newString, String oldString) {
    536         if (newString == null) {
    537             return oldString == null;
    538         }
    539         return newString.equals(oldString);
    540     }
    541 
    542 }
    543