Home | History | Annotate | Download | only in test
      1 package org.unicode.cldr.test;
      2 
      3 import java.io.File;
      4 import java.io.IOException;
      5 import java.io.PrintWriter;
      6 import java.util.ArrayList;
      7 import java.util.EnumMap;
      8 import java.util.HashMap;
      9 import java.util.LinkedHashSet;
     10 import java.util.List;
     11 import java.util.Map;
     12 import java.util.Set;
     13 import java.util.regex.Matcher;
     14 import java.util.regex.Pattern;
     15 
     16 import org.unicode.cldr.test.CheckConsistentCasing.CasingType;
     17 import org.unicode.cldr.test.CheckConsistentCasing.CasingTypeAndErrFlag;
     18 import org.unicode.cldr.test.CheckConsistentCasing.Category;
     19 import org.unicode.cldr.tool.Option.Options;
     20 import org.unicode.cldr.util.CLDRFile;
     21 import org.unicode.cldr.util.CLDRFile.WinningChoice;
     22 import org.unicode.cldr.util.CLDRPaths;
     23 import org.unicode.cldr.util.CldrUtility;
     24 import org.unicode.cldr.util.Factory;
     25 import org.unicode.cldr.util.LocaleIDParser;
     26 import org.unicode.cldr.util.PatternCache;
     27 import org.unicode.cldr.util.SimpleXMLSource;
     28 import org.unicode.cldr.util.SupplementalDataInfo;
     29 import org.unicode.cldr.util.XMLFileReader;
     30 import org.unicode.cldr.util.XMLSource;
     31 import org.unicode.cldr.util.XPathParts;
     32 
     33 import com.ibm.icu.text.MessageFormat;
     34 import com.ibm.icu.text.UnicodeSet;
     35 
     36 /**
     37  * Calculates, reads, writes and returns casing information about locales for
     38  * CheckConsistentCasing.
     39  * Run main() to generate the casing information files which will be stored in common/casing.
     40  *
     41  * @author jchye
     42  */
     43 public class CasingInfo {
     44     private static final Options options = new Options(
     45         "This program is used to generate casing files for locales.")
     46             .add("locales", ".*", ".*", "A regex of the locales to generate casing information for")
     47             .add("summary", null,
     48                 "generates a summary of the casing for all locales that had casing generated for this run");
     49     private Map<String, Map<Category, CasingTypeAndErrFlag>> casing;
     50     private List<File> casingDirs;
     51 
     52     public CasingInfo(Factory factory) {
     53         casingDirs = new ArrayList<File>();
     54         for (File f : factory.getSourceDirectories()) {
     55             this.casingDirs.add(new File(f.getAbsolutePath() + "/../casing"));
     56         }
     57         casing = CldrUtility.newConcurrentHashMap();
     58     }
     59 
     60     /**
     61      * ONLY usable in command line tests.
     62      */
     63     public CasingInfo() {
     64         casingDirs = new ArrayList<File>();
     65         this.casingDirs.add(new File(CLDRPaths.CASING_DIRECTORY));
     66         casing = CldrUtility.newConcurrentHashMap();
     67     }
     68 
     69     /**
     70      * Returns casing information to be used for a specified locale.
     71      *
     72      * @param localeID
     73      * @return
     74      */
     75     public Map<Category, CasingTypeAndErrFlag> getLocaleCasing(String localeID) {
     76         // Check if the localeID contains casing first.
     77         // If there isn't a casing file available for the locale,
     78         // recurse over the locale's parents until something is found.
     79         if (!casing.containsKey(localeID)) {
     80             // Synchronize writes to casing map in an attempt to avoid NPEs (cldrbug 5051).
     81             synchronized (casing) {
     82                 CasingHandler handler = loadFromXml(localeID);
     83                 if (handler != null) {
     84                     handler.addParsedResult(casing);
     85                 }
     86                 if (!casing.containsKey(localeID)) {
     87                     String parentID = LocaleIDParser.getSimpleParent(localeID);
     88                     if (!parentID.equals("root")) {
     89                         casing.put(localeID, getLocaleCasing(parentID));
     90                     }
     91                 }
     92             }
     93         }
     94 
     95         return casing.get(localeID);
     96     }
     97 
     98     /**
     99      * Loads casing information about a specified locale from the casing XML,
    100      * if it exists.
    101      *
    102      * @param localeID
    103      */
    104     private CasingHandler loadFromXml(String localeID) {
    105         for (File casingDir : casingDirs) {
    106             File casingFile = new File(casingDir, localeID + ".xml");
    107             if (casingFile.isFile()) {
    108                 CasingHandler handler = new CasingHandler();
    109                 XMLFileReader xfr = new XMLFileReader().setHandler(handler);
    110                 xfr.read(casingFile.toString(), -1, true);
    111                 return handler;
    112             }
    113         } // Fail silently if file not found.
    114         return null;
    115     }
    116 
    117     /**
    118      * Calculates casing information about all languages from the locale data.
    119      */
    120     private Map<String, Boolean> generateCasingInformation(String localePattern) {
    121         SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
    122         Set<String> defaultContentLocales = supplementalDataInfo.getDefaultContentLocales();
    123         String sourceDirectory = CldrUtility.checkValidDirectory(CLDRPaths.MAIN_DIRECTORY);
    124         Factory cldrFactory = Factory.make(sourceDirectory, localePattern);
    125         Set<String> locales = new LinkedHashSet<String>(cldrFactory.getAvailable());
    126         locales.removeAll(defaultContentLocales); // Skip all default content locales
    127         UnicodeSet allCaps = new UnicodeSet("[:Lu:]");
    128         Map<String, Boolean> localeUsesCasing = new HashMap<String, Boolean>();
    129         LocaleIDParser parser = new LocaleIDParser();
    130 
    131         for (String localeID : locales) {
    132             if (CLDRFile.isSupplementalName(localeID)) continue;
    133 
    134             // We want country/script differences but not region differences
    135             // (unless it's pt_PT, which we do want).
    136             // Keep regional locales only if there isn't already a locale for its script,
    137             // e.g. keep zh_Hans_HK because zh_Hans is a default locale.
    138             parser.set(localeID);
    139             if (parser.getRegion().length() > 0 && !localeID.equals("pt_PT")) {
    140                 System.out.println("Skipping regional locale " + localeID);
    141                 continue;
    142             }
    143 
    144             // Save casing information about the locale.
    145             CLDRFile file = cldrFactory.make(localeID, true);
    146             UnicodeSet examplars = file.getExemplarSet("", WinningChoice.NORMAL);
    147             localeUsesCasing.put(localeID, examplars.containsSome(allCaps));
    148             createCasingXml(localeID, CheckConsistentCasing.getSamples(file));
    149         }
    150         return localeUsesCasing;
    151     }
    152 
    153     /**
    154      * Creates a CSV summary of casing information over all locales for verification.
    155      *
    156      * @param outputFile
    157      */
    158     private void createCasingSummary(String outputFile, Map<String, Boolean> localeUsesCasing) {
    159         PrintWriter out;
    160         try {
    161             out = new PrintWriter(outputFile);
    162         } catch (IOException e) {
    163             e.printStackTrace();
    164             return;
    165         }
    166 
    167         // Header
    168         out.print(",");
    169         for (Category category : Category.values()) {
    170             out.print("," + category.toString().replace('_', '-'));
    171         }
    172         out.println();
    173         out.print("Locale ID,Case");
    174         for (int i = 0; i < Category.values().length; i++) {
    175             out.print("," + i);
    176         }
    177         out.println();
    178 
    179         Set<String> locales = casing.keySet();
    180         for (String localeID : locales) {
    181             // Write casing information about the locale to file.
    182             out.print(localeID);
    183             out.print(",");
    184             out.print(localeUsesCasing.get(localeID) ? "Y" : "N");
    185             Map<Category, CasingTypeAndErrFlag> types = casing.get(localeID);
    186             for (Category category : Category.values()) {
    187                 CasingTypeAndErrFlag value = types.get(category);
    188                 out.print("," + value == null ? null : value.type().toString().charAt(0));
    189             }
    190             out.println();
    191             out.flush();
    192         }
    193         out.close();
    194     }
    195 
    196     /**
    197      * Writes casing information for the specified locale to XML format.
    198      */
    199     private void createCasingXml(String localeID, Map<Category, CasingType> localeCasing) {
    200         // Load any existing overrides over casing info.
    201         CasingHandler handler = loadFromXml(localeID);
    202         Map<Category, CasingType> overrides = handler == null ? new EnumMap<Category, CasingType>(Category.class) : handler.getOverrides();
    203         localeCasing.putAll(overrides);
    204 
    205         XMLSource source = new SimpleXMLSource(localeID);
    206         for (Category category : Category.values()) {
    207             if (category == Category.NOT_USED) continue;
    208             CasingType type = localeCasing.get(category);
    209             if (overrides.containsKey(category)) {
    210                 String path = MessageFormat.format("//ldml/metadata/casingData/casingItem[@type=\"{0}\"][@override=\"true\"]", category);
    211                 source.putValueAtPath(path, type.toString());
    212             } else if (type != CasingType.other) {
    213                 String path = "//ldml/metadata/casingData/casingItem[@type=\"" + category + "\"]";
    214                 source.putValueAtPath(path, type.toString());
    215             }
    216         }
    217         CLDRFile cldrFile = new CLDRFile(source);
    218         File casingFile = new File(CLDRPaths.GEN_DIRECTORY + "/casing", localeID + ".xml");
    219 
    220         try {
    221             PrintWriter out = new PrintWriter(casingFile);
    222             cldrFile.write(out);
    223             out.close();
    224         } catch (IOException e) {
    225             e.printStackTrace();
    226         }
    227     }
    228 
    229     /**
    230      * Generates all the casing information and writes it to XML.
    231      * A CSV summary of casing information is written to file if a filename argument is provided.
    232      *
    233      * @param args
    234      */
    235     public static void main(String[] args) {
    236         CasingInfo casingInfo = new CasingInfo();
    237         options.parse(args, true);
    238         Map<String, Boolean> localeUsesCasing = casingInfo.generateCasingInformation(options.get("locales").getValue());
    239         if (options.get("summary").doesOccur()) {
    240             casingInfo.createCasingSummary(args[0], localeUsesCasing);
    241         }
    242     }
    243 
    244     /**
    245      * XML handler for parsing casing files.
    246      */
    247     private class CasingHandler extends XMLFileReader.SimpleHandler {
    248         private Pattern localePattern = PatternCache.get("//ldml/identity/language\\[@type=\"(\\w+)\"\\]");
    249         private String localeID;
    250         private Map<Category, CasingTypeAndErrFlag> caseMap = new EnumMap<Category, CasingTypeAndErrFlag>(Category.class);
    251         private Map<Category, CasingType> overrideMap = new EnumMap<Category, CasingType>(Category.class);
    252 
    253         @Override
    254         public void handlePathValue(String path, String value) {
    255             // Parse casing info.
    256             if (path.contains("casingItem")) {
    257                 XPathParts parts = new XPathParts().set(path);
    258                 Category category = Category.valueOf(parts.getAttributeValue(-1, "type").replace('-', '_'));
    259                 CasingType casingType = CasingType.valueOf(value);
    260                 boolean errFlag = Boolean.parseBoolean(parts.getAttributeValue(-1, "forceError"));
    261                 for (CasingTypeAndErrFlag typeAndFlag : CasingTypeAndErrFlag.values()) {
    262                     if (casingType == typeAndFlag.type() && errFlag == typeAndFlag.flag()) {
    263                         caseMap.put(category, typeAndFlag);
    264                         break;
    265                     }
    266                 }
    267                 if (Boolean.valueOf(parts.getAttributeValue(-1, "override"))) {
    268                     overrideMap.put(category, casingType);
    269                 }
    270             } else {
    271                 // Parse the locale that the casing is for.
    272                 Matcher matcher = localePattern.matcher(path);
    273                 if (matcher.matches()) {
    274                     localeID = matcher.group(1);
    275                 }
    276             }
    277         }
    278 
    279         public void addParsedResult(Map<String, Map<Category, CasingTypeAndErrFlag>> map) {
    280             map.put(localeID, caseMap);
    281         }
    282 
    283         public Map<Category, CasingType> getOverrides() {
    284             return overrideMap;
    285         }
    286     }
    287 }
    288