1 package org.unicode.cldr.icu; 2 3 import java.io.File; 4 import java.util.HashMap; 5 import java.util.Map; 6 import java.util.Map.Entry; 7 import java.util.Set; 8 9 import org.xml.sax.Attributes; 10 import org.xml.sax.SAXException; 11 12 import com.google.common.collect.ImmutableSet; 13 14 /** 15 * A mapper that converts BCP 47 data from CLDR to the ICU data structure. 16 * 17 * @author jchye 18 */ 19 public class Bcp47Mapper { 20 // private static final String[] KEYTYPE_FILES = { 21 // "calendar", "collation", "currency", "number", "variant" 22 // }; 23 private String sourceDir; 24 25 public Bcp47Mapper(String bcp47Dir) { 26 sourceDir = bcp47Dir; 27 } 28 29 /** 30 * Fills an IcuData object with data of the given type. 31 */ 32 public IcuData[] fillFromCldr() { 33 IcuData timezoneData = new IcuData("common/bcp47/timezone.xml", "timezoneTypes", false); 34 Map<String, String> keyMap = new HashMap<String, String>(); 35 // Timezone data is put in a different file. 36 fillFromFile("timezone", timezoneData, keyMap); 37 38 // Process the rest of the data. 39 IcuData keyTypeData = new IcuData("common/bcp47/*.xml", "keyTypeData", false); 40 for (File file : new File(sourceDir).listFiles()) { 41 final String filenameXml = file.getName(); 42 if (filenameXml.endsWith(".xml") && !filenameXml.equals("timezone.xml")) { 43 fillFromFile(filenameXml.substring(0, filenameXml.length() - 4), 44 keyTypeData, keyMap); 45 } 46 } 47 // Add all the keyMap values into the IcuData file. 48 for (Entry<String, String> kmData : keyMap.entrySet()) { 49 String bcpKey = kmData.getKey(); 50 String key = kmData.getValue(); 51 if (bcpKey.startsWith("@")) { 52 keyTypeData.add("/" + bcpKey.substring(1), key); 53 continue; 54 } 55 if (bcpKey.equals(key)) { 56 // empty value to indicate the BCP47 key is same with the legacy key 57 bcpKey = ""; 58 } 59 keyTypeData.add("/keyMap/" + key, bcpKey); 60 } 61 // Add aliases for timezone data. 62 keyTypeData.add("/typeAlias/timezone:alias", "/ICUDATA/timezoneTypes/typeAlias/timezone"); 63 keyTypeData.add("/typeMap/timezone:alias", "/ICUDATA/timezoneTypes/typeMap/timezone"); 64 keyTypeData.add("/bcpTypeAlias/tz:alias", "/ICUDATA/timezoneTypes/bcpTypeAlias/tz"); 65 return new IcuData[] { timezoneData, keyTypeData }; 66 } 67 68 private void fillFromFile(String filename, IcuData icuData, Map<String, String> keyMap) { 69 KeywordHandler handler = new KeywordHandler(icuData, keyMap); 70 MapperUtils.parseFile(new File(sourceDir, filename + ".xml"), handler); 71 } 72 73 static final Set<String> SKIP_KEY_ATTRIBUTES = ImmutableSet.of( 74 "name", "alias", "description", "since", "extension"); 75 static final Set<String> SKIP_TYPE_ATTRIBUTES = ImmutableSet.of( 76 "name", "alias", "description", "since", "preferred"); 77 78 /** 79 * XML parser for BCP47 data. 80 */ 81 private class KeywordHandler extends MapperUtils.EmptyHandler { 82 private String typeAliasPrefix; 83 private String typeMapPrefix; 84 private String bcpTypeAliasPrefix; 85 private String lastKeyName; 86 private IcuData icuData; 87 private Map<String, String> keyMap; 88 89 /** 90 * KeywordHandler constructor. 91 * 92 * @param icuData 93 * the IcuData object to store the parsed data 94 * @param keyMap 95 * a mapping of keys to their aliases. These values will 96 * not be added to icuData by the handler 97 */ 98 public KeywordHandler(IcuData icuData, Map<String, String> keyMap) { 99 this.icuData = icuData; 100 this.keyMap = keyMap; 101 } 102 103 @Override 104 public void startElement(String uri, String localName, String qName, Attributes attr) throws SAXException { 105 // Format of BCP47 file: 106 // <key name="tz" alias="timezone" description="Time zone key"> 107 // <type name="adalv" alias="Europe/Andorra" description="Andorra"/> 108 // ... 109 if (attr == null) { 110 return; 111 } 112 113 if (qName.equals("key")) { 114 String keyName = attr.getValue("name"); 115 if (keyName == null) { 116 return; 117 } 118 119 String keyAlias = attr.getValue("alias"); 120 if (keyAlias == null) { 121 keyAlias = keyName; 122 // System.err.println(Bcp47Mapper.class.getSimpleName() + " Info: BCP47 key " + keyName 123 // + " didn't have the optional alias= value, mapping " + keyName + "->" + keyName); 124 } 125 126 keyName = keyName.toLowerCase(); 127 keyAlias = keyAlias.toLowerCase(); 128 129 typeAliasPrefix = "/typeAlias/" + keyAlias + '/'; 130 typeMapPrefix = "/typeMap/" + keyAlias + '/'; 131 keyMap.put(keyName, keyAlias); 132 bcpTypeAliasPrefix = "/bcpTypeAlias/" + keyName + '/'; 133 lastKeyName = keyName; 134 addOtherInfo(qName, attr, keyName, SKIP_KEY_ATTRIBUTES); 135 } else if (qName.equals("type")) { 136 String typeName = attr.getValue("name"); 137 if (typeName == null) { 138 return; 139 } 140 141 // BCP47 type alias (maps deprecated type to preferred type) 142 String preferredTypeName = attr.getValue("preferred"); 143 if (preferredTypeName != null) { 144 icuData.add(bcpTypeAliasPrefix + typeName, preferredTypeName); 145 return; 146 } 147 148 String alias = attr.getValue("alias"); 149 if (alias == null) { 150 // Generate type map entry using empty value 151 // (an empty value indicates same type name 152 // is used for both BCP47 and legacy type. 153 icuData.add(typeMapPrefix + typeName, ""); 154 } else { 155 String[] aliases = alias.split("\\s+"); 156 String mainAlias = aliases[0]; 157 icuData.add(typeMapPrefix + formatName(mainAlias), typeName); 158 for (int i = 1; i < aliases.length; i++) { 159 icuData.add(typeAliasPrefix + formatName(aliases[i]), mainAlias); 160 } 161 } 162 addOtherInfo(qName, attr, lastKeyName + "/" + typeName, SKIP_KEY_ATTRIBUTES); 163 } 164 } 165 166 private void addOtherInfo(String qName, Attributes attr, String typeName, Set<String> excludedAttributes) { 167 for (int i = 0; i < attr.getLength(); ++i) { 168 String name = attr.getQName(i); 169 String value = attr.getValue(i); 170 if (excludedAttributes.contains(name) 171 || name.equals("deprecated") && value.equals("false")) { 172 continue; 173 } 174 keyMap.put("@" + qName + "Info/" + name + "/" + typeName, value); 175 } 176 } 177 178 private String formatName(String str) { 179 if (str.indexOf('/') > -1) { 180 str = '"' + str.replace('/', ':') + '"'; 181 } 182 return str; 183 } 184 } 185 } 186