Home | History | Annotate | Download | only in icu
      1 package org.unicode.cldr.icu;
      2 
      3 import java.io.File;
      4 import java.util.HashMap;
      5 import java.util.Map;
      6 import java.util.Map.Entry;
      7 import java.util.Set;
      8 
      9 import org.xml.sax.Attributes;
     10 import org.xml.sax.SAXException;
     11 
     12 import com.google.common.collect.ImmutableSet;
     13 
     14 /**
     15  * A mapper that converts BCP 47 data from CLDR to the ICU data structure.
     16  *
     17  * @author jchye
     18  */
     19 public class Bcp47Mapper {
     20 //    private static final String[] KEYTYPE_FILES = {
     21 //        "calendar", "collation", "currency", "number", "variant"
     22 //    };
     23     private String sourceDir;
     24 
     25     public Bcp47Mapper(String bcp47Dir) {
     26         sourceDir = bcp47Dir;
     27     }
     28 
     29     /**
     30      * Fills an IcuData object with data of the given type.
     31      */
     32     public IcuData[] fillFromCldr() {
     33         IcuData timezoneData = new IcuData("common/bcp47/timezone.xml", "timezoneTypes", false);
     34         Map<String, String> keyMap = new HashMap<String, String>();
     35         // Timezone data is put in a different file.
     36         fillFromFile("timezone", timezoneData, keyMap);
     37 
     38         // Process the rest of the data.
     39         IcuData keyTypeData = new IcuData("common/bcp47/*.xml", "keyTypeData", false);
     40         for (File file : new File(sourceDir).listFiles()) {
     41             final String filenameXml = file.getName();
     42             if (filenameXml.endsWith(".xml") && !filenameXml.equals("timezone.xml")) {
     43                 fillFromFile(filenameXml.substring(0, filenameXml.length() - 4),
     44                     keyTypeData, keyMap);
     45             }
     46         }
     47         // Add all the keyMap values into the IcuData file.
     48         for (Entry<String, String> kmData : keyMap.entrySet()) {
     49             String bcpKey = kmData.getKey();
     50             String key = kmData.getValue();
     51             if (bcpKey.startsWith("@")) {
     52                 keyTypeData.add("/" + bcpKey.substring(1), key);
     53                 continue;
     54             }
     55             if (bcpKey.equals(key)) {
     56                 // empty value to indicate the BCP47 key is same with the legacy key
     57                 bcpKey = "";
     58             }
     59             keyTypeData.add("/keyMap/" + key, bcpKey);
     60         }
     61         // Add aliases for timezone data.
     62         keyTypeData.add("/typeAlias/timezone:alias", "/ICUDATA/timezoneTypes/typeAlias/timezone");
     63         keyTypeData.add("/typeMap/timezone:alias", "/ICUDATA/timezoneTypes/typeMap/timezone");
     64         keyTypeData.add("/bcpTypeAlias/tz:alias", "/ICUDATA/timezoneTypes/bcpTypeAlias/tz");
     65         return new IcuData[] { timezoneData, keyTypeData };
     66     }
     67 
     68     private void fillFromFile(String filename, IcuData icuData, Map<String, String> keyMap) {
     69         KeywordHandler handler = new KeywordHandler(icuData, keyMap);
     70         MapperUtils.parseFile(new File(sourceDir, filename + ".xml"), handler);
     71     }
     72 
     73     static final Set<String> SKIP_KEY_ATTRIBUTES = ImmutableSet.of(
     74         "name", "alias", "description", "since", "extension");
     75     static final Set<String> SKIP_TYPE_ATTRIBUTES = ImmutableSet.of(
     76         "name", "alias", "description", "since", "preferred");
     77 
     78     /**
     79      * XML parser for BCP47 data.
     80      */
     81     private class KeywordHandler extends MapperUtils.EmptyHandler {
     82         private String typeAliasPrefix;
     83         private String typeMapPrefix;
     84         private String bcpTypeAliasPrefix;
     85         private String lastKeyName;
     86         private IcuData icuData;
     87         private Map<String, String> keyMap;
     88 
     89         /**
     90          * KeywordHandler constructor.
     91          *
     92          * @param icuData
     93          *            the IcuData object to store the parsed data
     94          * @param keyMap
     95          *            a mapping of keys to their aliases. These values will
     96          *            not be added to icuData by the handler
     97          */
     98         public KeywordHandler(IcuData icuData, Map<String, String> keyMap) {
     99             this.icuData = icuData;
    100             this.keyMap = keyMap;
    101         }
    102 
    103         @Override
    104         public void startElement(String uri, String localName, String qName, Attributes attr) throws SAXException {
    105             // Format of BCP47 file:
    106             // <key name="tz" alias="timezone" description="Time zone key">
    107             // <type name="adalv" alias="Europe/Andorra" description="Andorra"/>
    108             // ...
    109             if (attr == null) {
    110                 return;
    111             }
    112 
    113             if (qName.equals("key")) {
    114                 String keyName = attr.getValue("name");
    115                 if (keyName == null) {
    116                     return;
    117                 }
    118 
    119                 String keyAlias = attr.getValue("alias");
    120                 if (keyAlias == null) {
    121                     keyAlias = keyName;
    122 //                    System.err.println(Bcp47Mapper.class.getSimpleName() + " Info: BCP47 key " + keyName
    123 //                        + " didn't have the optional alias= value, mapping " + keyName + "->" + keyName);
    124                 }
    125 
    126                 keyName = keyName.toLowerCase();
    127                 keyAlias = keyAlias.toLowerCase();
    128 
    129                 typeAliasPrefix = "/typeAlias/" + keyAlias + '/';
    130                 typeMapPrefix = "/typeMap/" + keyAlias + '/';
    131                 keyMap.put(keyName, keyAlias);
    132                 bcpTypeAliasPrefix = "/bcpTypeAlias/" + keyName + '/';
    133                 lastKeyName = keyName;
    134                 addOtherInfo(qName, attr, keyName, SKIP_KEY_ATTRIBUTES);
    135             } else if (qName.equals("type")) {
    136                 String typeName = attr.getValue("name");
    137                 if (typeName == null) {
    138                     return;
    139                 }
    140 
    141                 // BCP47 type alias (maps deprecated type to preferred type)
    142                 String preferredTypeName = attr.getValue("preferred");
    143                 if (preferredTypeName != null) {
    144                     icuData.add(bcpTypeAliasPrefix + typeName, preferredTypeName);
    145                     return;
    146                 }
    147 
    148                 String alias = attr.getValue("alias");
    149                 if (alias == null) {
    150                     // Generate type map entry using empty value
    151                     // (an empty value indicates same type name
    152                     // is used for both BCP47 and legacy type.
    153                     icuData.add(typeMapPrefix + typeName, "");
    154                 } else {
    155                     String[] aliases = alias.split("\\s+");
    156                     String mainAlias = aliases[0];
    157                     icuData.add(typeMapPrefix + formatName(mainAlias), typeName);
    158                     for (int i = 1; i < aliases.length; i++) {
    159                         icuData.add(typeAliasPrefix + formatName(aliases[i]), mainAlias);
    160                     }
    161                 }
    162                 addOtherInfo(qName, attr, lastKeyName + "/" + typeName, SKIP_KEY_ATTRIBUTES);
    163             }
    164         }
    165 
    166         private void addOtherInfo(String qName, Attributes attr, String typeName, Set<String> excludedAttributes) {
    167             for (int i = 0; i < attr.getLength(); ++i) {
    168                 String name = attr.getQName(i);
    169                 String value = attr.getValue(i);
    170                 if (excludedAttributes.contains(name)
    171                     || name.equals("deprecated") && value.equals("false")) {
    172                     continue;
    173                 }
    174                 keyMap.put("@" + qName + "Info/" + name + "/" + typeName, value);
    175             }
    176         }
    177 
    178         private String formatName(String str) {
    179             if (str.indexOf('/') > -1) {
    180                 str = '"' + str.replace('/', ':') + '"';
    181             }
    182             return str;
    183         }
    184     }
    185 }
    186