Home | History | Annotate | Download | only in unittest
      1 package org.unicode.cldr.unittest;
      2 
      3 import java.io.IOException;
      4 import java.io.PrintWriter;
      5 import java.util.EnumMap;
      6 import java.util.HashMap;
      7 import java.util.HashSet;
      8 import java.util.LinkedHashMap;
      9 import java.util.Map;
     10 import java.util.Map.Entry;
     11 import java.util.Set;
     12 import java.util.TreeMap;
     13 import java.util.TreeSet;
     14 
     15 import org.unicode.cldr.draft.FileUtilities;
     16 import org.unicode.cldr.util.CLDRConfig;
     17 import org.unicode.cldr.util.CLDRFile;
     18 import org.unicode.cldr.util.CLDRPaths;
     19 import org.unicode.cldr.util.Factory;
     20 import org.unicode.cldr.util.LanguageTagParser;
     21 import org.unicode.cldr.util.Level;
     22 import org.unicode.cldr.util.Organization;
     23 import org.unicode.cldr.util.StandardCodes;
     24 import org.unicode.cldr.util.SupplementalDataInfo;
     25 import org.unicode.cldr.util.With;
     26 import org.unicode.cldr.util.XPathParts;
     27 
     28 import com.ibm.icu.dev.util.CollectionUtilities;
     29 import com.ibm.icu.impl.Relation;
     30 import com.ibm.icu.impl.Row;
     31 import com.ibm.icu.impl.Row.R2;
     32 import com.ibm.icu.text.DateTimePatternGenerator;
     33 import com.ibm.icu.text.DateTimePatternGenerator.VariableField;
     34 
     35 public class CheckYear {
     36     static CLDRConfig testInfo = CLDRConfig.getInstance();
     37     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
     38     private static final String LOCALES = ".*";
     39     private static final String[] STOCK = { "short", "medium", "long", "full" };
     40 
     41     enum Category {
     42         Year2_MonthNumeric, Year2_Other, Year4_MonthNumeric, Year4_Other
     43     }
     44 
     45     static DateTimePatternGenerator dtp = DateTimePatternGenerator
     46         .getEmptyInstance();
     47     static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
     48 
     49     // mismatches between stocks
     50     static Map<String, Relation<String, String>> stock2skeleton2locales = new LinkedHashMap<String, Relation<String, String>>();
     51     static {
     52         for (String stock : STOCK) {
     53             stock2skeleton2locales.put("date-" + stock, Relation.of(
     54                 new TreeMap<String, Set<String>>(), TreeSet.class));
     55         }
     56         for (String stock : STOCK) {
     57             stock2skeleton2locales.put("time-" + stock, Relation.of(
     58                 new TreeMap<String, Set<String>>(), TreeSet.class));
     59         }
     60     }
     61 
     62     static class LocaleInfo {
     63         private static final boolean DEBUG = false;
     64         // information on the type of years
     65         Relation<Category, String> category2base = Relation.of(
     66             new EnumMap<Category, Set<String>>(Category.class),
     67             TreeSet.class);
     68         // collisions between baseSkeletons
     69         Map<String, Relation<String, Row.R2<String, String>>> base2BasePatterns2Info = new TreeMap<String, Relation<String, Row.R2<String, String>>>();
     70 
     71         Map<String, String> skeleton2pattern = new HashMap<String, String>();
     72 
     73         public void recordStockTime(String localeId, String stock,
     74             String dateTimePattern) {
     75             String skeleton = dtp.getSkeleton(dateTimePattern);
     76             String base = getBaseSkeleton(skeleton);
     77             stock2skeleton2locales.get("time-" + stock).put(skeleton, localeId);
     78             recordBase(base, skeleton, dateTimePattern);
     79         }
     80 
     81         public void recordStock(String localeId, String stock,
     82             String dateTimePattern) {
     83             String skeleton = dtp.getSkeleton(dateTimePattern);
     84             String base = getBaseSkeleton(skeleton);
     85             stock2skeleton2locales.get("date-" + stock).put(
     86                 skeleton.replace("yyyy", "y"), localeId);
     87             String key = skeleton + "*" + stock.charAt(0);
     88             recordBase(base, skeleton, dateTimePattern);
     89             recordYearStuff(base, key, dateTimePattern);
     90         }
     91 
     92         public void record(String skeleton, String dateTimePattern) {
     93             String base = getBaseSkeleton(skeleton);
     94             recordBase(base, skeleton, dateTimePattern);
     95             recordYearStuff(base, skeleton, dateTimePattern);
     96         }
     97 
     98         public void recordBase(String base, String skeleton,
     99             String dateTimePattern) {
    100             String coreBase = getCoreSkeleton(base);
    101             Relation<String, Row.R2<String, String>> basePatterns2Info = base2BasePatterns2Info
    102                 .get(coreBase);
    103             if (basePatterns2Info == null) {
    104                 base2BasePatterns2Info
    105                     .put(coreBase,
    106                         basePatterns2Info = Relation
    107                             .of(new TreeMap<String, Set<Row.R2<String, String>>>(),
    108                                 TreeSet.class));
    109             }
    110             // adjust the pattern to correspond to the base fields
    111             // String coreSkeleton = getCoreSkeleton(skeleton);
    112             String minimizedPattern = replaceFieldTypes(dateTimePattern,
    113                 coreBase, !coreBase.equals(base));
    114             basePatterns2Info.put(minimizedPattern,
    115                 Row.of(skeleton, dateTimePattern));
    116             // if (skeleton2pattern.put(skeleton, basePattern) != null) {
    117             // throw new IllegalArgumentException();
    118             // }
    119         }
    120 
    121         public String getCoreSkeleton(String skeleton) {
    122             int slashPos = skeleton.indexOf('/');
    123             String s = slashPos < 0 ? skeleton : skeleton
    124                 .substring(0, slashPos);
    125             return s;
    126         }
    127 
    128         private void recordYearStuff(String base, String skeleton,
    129             String dateTimePattern) {
    130             // do the year stuff
    131             if (!dateTimePattern.contains("y")) {
    132                 return;
    133             }
    134             boolean isDigit4 = true;
    135             if (dateTimePattern.contains("yyyy")) {
    136                 // nothing
    137             } else if (dateTimePattern.contains("yy")) {
    138                 isDigit4 = false;
    139             }
    140             boolean monthNumeric = false;
    141             if (dateTimePattern.contains("MMM")
    142                 || dateTimePattern.contains("LLL")) {
    143                 // nothing
    144             } else if (dateTimePattern.contains("M")
    145                 || dateTimePattern.contains("L")) {
    146                 monthNumeric = true;
    147             }
    148             if (isDigit4) {
    149                 if (monthNumeric) {
    150                     category2base.put(Category.Year4_MonthNumeric, skeleton);
    151                 } else {
    152                     category2base.put(Category.Year4_Other, skeleton);
    153                 }
    154             } else {
    155                 if (monthNumeric) {
    156                     category2base.put(Category.Year2_MonthNumeric, skeleton);
    157                 } else {
    158                     category2base.put(Category.Year2_Other, skeleton);
    159                 }
    160             }
    161         }
    162 
    163         public String replaceFieldTypes(String dateTimePattern,
    164             String skeleton, boolean isInterval) {
    165             if (!isInterval) {
    166                 return replaceFieldPartsCompletely(dateTimePattern, skeleton);
    167             } else {
    168                 String part = getCorePattern(dateTimePattern);
    169                 return replaceFieldPartsCompletely(part, skeleton);
    170             }
    171         }
    172 
    173         public String replaceFieldPartsCompletely(String dateTimePattern,
    174             String skeleton) {
    175             String minimizedPattern = dtp.replaceFieldTypes(dateTimePattern,
    176                 skeleton);
    177 
    178             // fix numerics
    179             StringBuilder result = new StringBuilder();
    180             for (Object item : formatParser.set(minimizedPattern).getItems()) {
    181                 if (item instanceof String) {
    182                     Object quoteLiteral = formatParser.quoteLiteral(item
    183                         .toString());
    184                     result.append(quoteLiteral);
    185                 } else {
    186                     VariableField item2 = (DateTimePatternGenerator.VariableField) item;
    187                     if (item2.isNumeric()) {
    188                         result.append(item.toString().charAt(0));
    189                     } else {
    190                         result.append(item);
    191                     }
    192                 }
    193             }
    194             String resultString = result.toString();
    195             return resultString;
    196         }
    197 
    198         private String getCorePattern(String intervalPattern) {
    199             // get up to the first duplicate field. Then compare the result on
    200             // both sides
    201             StringBuilder b = new StringBuilder();
    202             StringBuilder result = new StringBuilder();
    203             boolean firstPart = true;
    204             int endFirstPart = -1;
    205             int startSecondPart = -1;
    206             int goodSoFar = -1;
    207             Set<Integer> firstComponents = new HashSet<Integer>();
    208             Set<Integer> secondComponents = new HashSet<Integer>();
    209             for (Object item : formatParser.set(intervalPattern).getItems()) {
    210                 if (item instanceof String) {
    211                     Object quoteLiteral = formatParser.quoteLiteral(item
    212                         .toString());
    213                     b.append(quoteLiteral);
    214                     goodSoFar = result.length();
    215                     result.append(quoteLiteral);
    216                 } else {
    217                     VariableField item2 = (DateTimePatternGenerator.VariableField) item;
    218                     int type = item2.getType();
    219                     if (firstPart && firstComponents.contains(type)) {
    220                         firstPart = false;
    221                         startSecondPart = b.length();
    222                     }
    223                     b.append(item);
    224                     if (firstPart) {
    225                         endFirstPart = b.length();
    226                         firstComponents.add(type);
    227                         result.append(item);
    228                     } else {
    229                         secondComponents.add(type);
    230                         if (firstComponents.contains(type)) {
    231                             result.setLength(goodSoFar);
    232                         } else {
    233                             result.append(item);
    234                         }
    235                     }
    236                 }
    237             }
    238             String normalized = b.toString();
    239             if (!normalized.equals(intervalPattern)) {
    240                 System.out.println("Not normalized: " + intervalPattern + "\t"
    241                     + normalized);
    242             }
    243             if (endFirstPart < 0 || startSecondPart < 0) {
    244                 throw new IllegalArgumentException("Illegal interval pattern: "
    245                     + intervalPattern);
    246             } else {
    247                 if (DEBUG)
    248                     System.out.println(normalized.substring(0, endFirstPart)
    249                         + "$$"
    250                         + normalized.substring(endFirstPart,
    251                             startSecondPart)
    252                         + "$$"
    253                         + normalized.substring(startSecondPart) + "\t=>\t"
    254                         + result);
    255             }
    256             return result.toString();
    257         }
    258 
    259         private String getBaseSkeleton(String skeleton) {
    260             int slashPos = skeleton.indexOf('/');
    261             String core = skeleton;
    262             String diff = "";
    263             if (slashPos >= 0) {
    264                 core = skeleton.substring(0, slashPos);
    265                 diff = skeleton.substring(slashPos);
    266             }
    267             core = dtp.getBaseSkeleton(core);
    268             return core + diff;
    269         }
    270 
    271     }
    272 
    273     static Map<String, LocaleInfo> data = new TreeMap<String, LocaleInfo>();
    274 
    275     // private static final Relation<String,String> digit4 = Relation.of(new
    276     // TreeMap<String,Set<String>>(),
    277     // TreeSet.class);
    278     // private static final Relation<String,String> digit2 = Relation.of(new
    279     // TreeMap<String,Set<String>>(),
    280     // TreeSet.class);
    281 
    282     public static void main(String[] args) throws IOException {
    283         CLDRFile englishFile = testInfo.getEnglish();
    284 
    285         Factory factory = Factory.make(CLDRPaths.TMP2_DIRECTORY
    286             + "vxml/common/main/", LOCALES);
    287         String calendarID = "gregorian";
    288         System.out.println("Total locales: "
    289             + factory.getAvailableLanguages().size());
    290         Map<String, String> sorted = new TreeMap<String, String>();
    291         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
    292         Set<String> defaultContent = sdi.getDefaultContentLocales();
    293         LanguageTagParser ltp = new LanguageTagParser();
    294 
    295         for (String localeID : factory.getAvailableLanguages()) {
    296             if (!ltp.set(localeID).getRegion().isEmpty()) {
    297                 continue;
    298             }
    299             if (defaultContent.contains(localeID)) {
    300                 System.out.println("Skipping default content: " + localeID);
    301                 continue;
    302             }
    303             sorted.put(englishFile.getName(localeID, true), localeID);
    304             data.put(localeID, new LocaleInfo());
    305         }
    306 
    307         gatherInfo(factory, calendarID, sorted);
    308 
    309         writeYearWidths(sorted, true, "year-width-diff.txt");
    310         writeYearWidths(sorted, false, "year-width-diff-other.txt");
    311 
    312         writeConflictingStockItems(true, "conflicting-stock.txt");
    313         writeConflictingStockItems(false, "conflicting-stock-other.txt");
    314 
    315         writeConflictingPatterns(sorted, true, "conflicting-patterns.txt");
    316         writeConflictingPatterns(sorted, false,
    317             "conflicting-patterns-other.txt");
    318     }
    319 
    320     public static void gatherInfo(Factory factory, String calendarID,
    321         Map<String, String> sorted) throws IOException {
    322         XPathParts parts = new XPathParts();
    323 
    324         for (Entry<String, String> entry : sorted.entrySet()) {
    325             String localeId = entry.getValue();
    326             CLDRFile file = factory.make(localeId, true);
    327             LocaleInfo localeInfo = data.get(localeId);
    328             for (String stock : STOCK) {
    329                 String path = "//ldml/dates/calendars/calendar[@type=\""
    330                     + calendarID
    331                     + "\"]/dateFormats/dateFormatLength[@type=\""
    332                     + stock
    333                     + "\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
    334                 String dateTimePattern = file.getStringValue(path);
    335                 localeInfo.recordStock(localeId, stock, dateTimePattern);
    336                 path = "//ldml/dates/calendars/calendar[@type=\""
    337                     + calendarID
    338                     + "\"]/timeFormats/timeFormatLength[@type=\""
    339                     + stock
    340                     + "\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
    341                 dateTimePattern = file.getStringValue(path);
    342                 localeInfo.recordStockTime(localeId, stock, dateTimePattern);
    343             }
    344             for (String path : With
    345                 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\""
    346                     + calendarID
    347                     + "\"]/dateTimeFormats/availableFormats/dateFormatItem"))) {
    348                 String key = parts.set(path).getAttributeValue(-1, "id");
    349                 String value = file.getStringValue(path);
    350                 localeInfo.record(key, value);
    351             }
    352             for (String path : With
    353                 .in(file.iterator("//ldml/dates/calendars/calendar[@type=\""
    354                     + calendarID
    355                     + "\"]/dateTimeFormats/intervalFormats/intervalFormatItem"))) {
    356                 String skeleton = parts.set(path).getAttributeValue(-2, "id");
    357                 String diff = parts.set(path).getAttributeValue(-1, "id");
    358                 String value = file.getStringValue(path);
    359                 localeInfo.record(skeleton + "/" + diff, value);
    360             }
    361         }
    362     }
    363 
    364     public static void writeYearWidths(Map<String, String> sorted,
    365         boolean modern, String filename) throws IOException {
    366         PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
    367             + "datecheck/", filename);
    368         out.println("Name\tid\t"
    369             + CollectionUtilities.join(Category.values(), "\t"));
    370         for (Entry<String, String> entry : sorted.entrySet()) {
    371             String localeId = entry.getValue();
    372             boolean priority = getPriority(localeId);
    373             if (modern != priority) {
    374                 continue;
    375             }
    376             String name = entry.getKey();
    377             LocaleInfo localeInfo = data.get(localeId);
    378             out.print(name + "\t" + localeId);
    379             for (Category item : Category.values()) {
    380                 Set<String> items = localeInfo.category2base.get(item);
    381                 if (items != null) {
    382                     out.print("\t" + CollectionUtilities.join(items, " "));
    383                 } else {
    384                     out.print("\t");
    385                 }
    386             }
    387             out.println();
    388         }
    389         out.close();
    390     }
    391 
    392     public static void writeConflictingStockItems(boolean modern,
    393         String filename) throws IOException {
    394         PrintWriter out;
    395         System.out.println("\nMismatched Stock items\n");
    396         out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
    397             + "datecheck/", filename);
    398         out.println("Stock\tSkeleton\tLocales");
    399         for (Entry<String, Relation<String, String>> stockAndSkeleton2locales : stock2skeleton2locales
    400             .entrySet()) {
    401             String stock = stockAndSkeleton2locales.getKey();
    402             for (Entry<String, Set<String>> entry2 : stockAndSkeleton2locales
    403                 .getValue().keyValuesSet()) {
    404                 String filtered = filter(entry2.getValue(), modern);
    405                 if (filtered.isEmpty()) {
    406                     continue;
    407                 }
    408                 out.println(stock + "\t" + entry2.getKey() + "\t" + filtered);
    409             }
    410         }
    411         out.close();
    412     }
    413 
    414     private static String filter(Set<String> value, boolean modern) {
    415         StringBuilder b = new StringBuilder();
    416         for (String localeId : value) {
    417             if (modern != getPriority(localeId)) {
    418                 continue;
    419             }
    420             if (b.length() != 0) {
    421                 b.append(" ");
    422             }
    423             b.append(localeId);
    424         }
    425         return b.toString();
    426     }
    427 
    428     public static void writeConflictingPatterns(Map<String, String> sorted,
    429         boolean modern, String filename) throws IOException {
    430         PrintWriter out;
    431         out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY
    432             + "datecheck/", filename);
    433         out.println("Language\tId\tMin. Skeleton\tMin Pat1\tskeleton  pattern\tMin Pat2\tskeleton  pattern\tMin Pat3\tskeleton  pattern");
    434         for (Entry<String, String> entry : sorted.entrySet()) {
    435             String localeId = entry.getValue();
    436             if (modern != getPriority(localeId)) {
    437                 continue;
    438             }
    439             String name = entry.getKey();
    440             LocaleInfo localeInfo = data.get(localeId);
    441 
    442             for (Entry<String, Relation<String, R2<String, String>>> baseAndBasePatterns2Info : localeInfo.base2BasePatterns2Info
    443                 .entrySet()) {
    444                 String base = baseAndBasePatterns2Info.getKey();
    445                 Relation<String, R2<String, String>> basePatterns2Info = baseAndBasePatterns2Info
    446                     .getValue();
    447                 if (basePatterns2Info.size() == 1) {
    448                     continue;
    449                 }
    450                 // Ewe ee MMM LLL  [MMM, LLL]
    451                 // Ewe ee MMM MMM  [MMM/M, MMMMMM]
    452                 // => Ewe ee MMM LLL: tab MMM  LLL tab MMM: tab MMM/M 
    453                 // MMMMMM
    454                 StringBuilder s = new StringBuilder(name + "\t" + localeId
    455                     + "\t" + base);
    456 
    457                 for (Entry<String, Set<R2<String, String>>> basePatternsAndInfo : basePatterns2Info
    458                     .keyValuesSet()) {
    459                     String basePattern = basePatternsAndInfo.getKey();
    460                     s.append("\t" + basePattern + ":\t\"");
    461                     boolean first = true;
    462                     for (R2<String, String> info : basePatternsAndInfo
    463                         .getValue()) {
    464                         if (first) {
    465                             first = false;
    466                         } else {
    467                             s.append(";\n");
    468                         }
    469                         s.append(info.get0() + "  " + info.get1() + "");
    470                     }
    471                     s.append("\"");
    472                 }
    473                 out.println(s);
    474             }
    475         }
    476         out.close();
    477     }
    478 
    479     public static boolean getPriority(String localeId) {
    480         return STANDARD_CODES.getLocaleCoverageLevel(
    481             Organization.google.toString(), localeId) == Level.MODERN
    482             || STANDARD_CODES.getLocaleCoverageLevel(
    483                 Organization.apple.toString(), localeId) == Level.MODERN
    484             || STANDARD_CODES.getLocaleCoverageLevel(
    485                 Organization.ibm.toString(), localeId) == Level.MODERN;
    486     }
    487 }
    488