1 package org.unicode.cldr.tool; 2 3 import java.util.HashMap; 4 import java.util.HashSet; 5 import java.util.LinkedHashMap; 6 import java.util.List; 7 import java.util.Map; 8 import java.util.Map.Entry; 9 import java.util.Set; 10 import java.util.TreeMap; 11 import java.util.TreeSet; 12 13 import org.unicode.cldr.util.CLDRFile; 14 import org.unicode.cldr.util.CLDRPaths; 15 import org.unicode.cldr.util.Factory; 16 import org.unicode.cldr.util.LanguageTagParser; 17 import org.unicode.cldr.util.LocaleIDParser; 18 import org.unicode.cldr.util.SupplementalDataInfo; 19 20 import com.ibm.icu.impl.Relation; 21 import com.ibm.icu.impl.Row.R2; 22 import com.ibm.icu.text.UnicodeSet; 23 24 public class GenerateAliases { 25 public static void main(String[] args) { 26 new Builder().getAliases(); 27 } 28 29 static class Builder { 30 Map<String, String> aliasMap = new LinkedHashMap<String, String>(); 31 Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 32 33 SupplementalDataInfo dataInfo = SupplementalDataInfo.getInstance(); 34 Set<String> defaultContents = dataInfo.getDefaultContentLocales(); 35 LikelySubtags likelySubtags = new LikelySubtags(); 36 Map<String, Map<String, R2<List<String>, String>>> aliasInfo = dataInfo.getLocaleAliasInfo(); 37 38 Relation<String, String> goodToBadLanguages = getGoodToBad(aliasInfo, "language"); 39 Relation<String, String> goodToBadTerritories = getGoodToBad(aliasInfo, "territory"); 40 Relation<String, String> goodToBadScripts = getGoodToBad(aliasInfo, "script"); 41 42 // sh //ldml/alias[@source="sr_Latn"][@path="//ldml"] 43 LanguageTagParser ltp = new LanguageTagParser(); 44 final Set<String> available = factory.getAvailable(); 45 46 Builder() { 47 48 for (String localeID : available) { 49 String targetID = getDefaultContents(localeID); 50 if (targetID == null) { 51 targetID = localeID; 52 } 53 addAlias("deprecated", localeID, targetID); 54 // special hack for sh 55 if (localeID.startsWith("sr_Latn")) { 56 addAlias("deprecated", "sh" + localeID.substring(7), localeID); 57 } 58 } 59 60 Map<String, String> likely = new TreeMap<String, String>(); 61 62 // get all the combinations 63 for (String max : likelySubtags.getToMaximized().values()) { 64 likely.put(max, getDefaultContents(max)); 65 ltp.set(max); 66 ltp.setScript(""); 67 addToLikely(likely); 68 ltp.set(max); 69 ltp.setRegion(""); 70 addToLikely(likely); 71 ltp.setScript(""); 72 addToLikely(likely); 73 } 74 75 for (Entry<String, String> small2large : likely.entrySet()) { 76 String localeID = small2large.getKey(); 77 String targetID = small2large.getValue(); 78 if (localeID.equals(targetID)) { 79 continue; 80 } 81 String base = ltp.set(localeID).getLanguage(); 82 if (!available.contains(base)) { // skip seed locales 83 continue; 84 } 85 // if (!localeID.contains("_")) { 86 // continue; // skip languages not represented 87 // } 88 if (available.contains(localeID) && !isWholeAlias(factory, localeID)) { 89 continue; 90 } 91 targetID = getDefaultContents(targetID); 92 addAlias("default", localeID, targetID); 93 } 94 95 for (String localeID : available) { 96 if (aliasMap.get(localeID) != null) { 97 continue; 98 } 99 if (isWholeAlias(factory, localeID)) { 100 System.out.println("missing" + "\t" + localeID); 101 } 102 } 103 104 // System.out.println(CollectionUtilities.join(aliasMap.entrySet(), "\n")); 105 } 106 107 private void addToLikely(Map<String, String> likely) { 108 String partial = ltp.toString(); 109 final String target = getDefaultContents(partial); 110 String parent = LocaleIDParser.getSimpleParent(partial); 111 if (target.equals(parent)) { 112 return; 113 } 114 likely.put(partial, target); 115 } 116 117 static final Set<String> HAS_MULTIPLE_SCRIPTS = org.unicode.cldr.util.Builder.with(new HashSet<String>()) 118 .addAll("ha", "ku", "zh", "sr", "uz", "sh").freeze(); 119 120 private boolean hasMultipleScripts(String localeID) { 121 LanguageTagParser ltp = new LanguageTagParser().set(localeID); 122 return HAS_MULTIPLE_SCRIPTS.contains(ltp.getLanguage()); 123 } 124 125 private String getDefaultContents(String localeID) { 126 String targetID = hasMultipleScripts(localeID) ? likelySubtags.maximize(localeID) : likelySubtags 127 .minimize(localeID); 128 129 if (targetID == null) { 130 System.out.println("missingLikely" + "\t" + localeID); 131 return localeID; 132 } 133 while (defaultContents.contains(targetID)) { 134 String parent = LocaleIDParser.getSimpleParent(targetID); 135 if (parent == null || parent.equals("root)")) { 136 break; 137 } 138 targetID = parent; 139 } 140 return targetID; 141 } 142 143 public Map<String, String> getAliases() { 144 return aliasMap; 145 } 146 147 static final UnicodeSet NUMBERS = new UnicodeSet("[0-9]"); 148 149 private Relation<String, String> getGoodToBad(Map<String, Map<String, R2<List<String>, String>>> aliasInfo, 150 String tag) { 151 Relation<String, String> result = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 152 Map<String, R2<List<String>, String>> map = aliasInfo.get(tag); 153 for (Entry<String, R2<List<String>, String>> entity : map.entrySet()) { 154 final String key = entity.getKey(); 155 final R2<List<String>, String> listAndReason = entity.getValue(); 156 final List<String> list = listAndReason.get0(); 157 final String reason = listAndReason.get1(); 158 if (reason.equals("overlong")) { 159 continue; 160 } 161 if (list == null) { 162 continue; 163 } 164 if (NUMBERS.containsAll(key)) { // special check for items like 172 165 continue; 166 } 167 result.put(list.iterator().next(), key); 168 } 169 return result; 170 } 171 172 private void addAlias(String title, String localeID, String targetID) { 173 ltp.set(localeID); 174 Set<String> languages = addExtras(ltp.getLanguage(), goodToBadLanguages); 175 Set<String> scripts = addExtras(ltp.getScript(), goodToBadScripts); 176 Set<String> territories = addExtras(ltp.getRegion(), goodToBadTerritories); 177 for (String language : languages) { 178 try { 179 ltp.set(language); // whole language tag 180 } catch (Exception e) { 181 continue; 182 } 183 if (!ltp.getVariants().isEmpty()) { // skip variants 184 continue; 185 } 186 for (String script : scripts) { 187 ltp.setScript(script); 188 for (String territory : territories) { 189 ltp.setRegion(territory); 190 String newTag = ltp.toString().replace('-', '_'); 191 main: { 192 if (newTag.equals(targetID)) { 193 break main; 194 } 195 String old = aliasMap.get(newTag); 196 if (old != null) { 197 if (!old.equals(targetID)) { 198 System.out.println(newTag + "\t\t" + targetID + "\tconflict with\t" + old); 199 } 200 break main; 201 } 202 final boolean wholeAlias = isWholeAlias(factory, newTag); 203 if (!available.contains(newTag) || wholeAlias) { 204 System.out.println(title + "\t" + newTag + "\t\t" + targetID 205 + (wholeAlias ? "\talias-already" : "")); 206 aliasMap.put(newTag, targetID); 207 } 208 } 209 } 210 } 211 } 212 } 213 214 /* 215 * Problems 216 * missingLikely tl 217 * missingLikely tl_PH 218 * sr_YU -> conflict with sr 219 * sr_CS -> conflict with sr_Cyrl_CS 220 * sr_CS -> conflict with sr_Cyrl_CS 221 * sh_CS -> conflict with sr_Latn_CS 222 * sh_YU -> conflict with sr_Latn_RS 223 */ 224 225 private Set<String> addExtras(String language, Relation<String, String> goodToBadLanguages) { 226 Set<String> languages = new TreeSet<String>(); 227 languages.add(language); 228 Set<String> badLanguages = goodToBadLanguages.get(language); 229 if (badLanguages != null) { 230 languages.addAll(badLanguages); 231 } 232 return languages; 233 } 234 235 Map<String, Boolean> wholeAliasCache = new HashMap<String, Boolean>(); 236 237 private boolean isWholeAlias(Factory factory, String localeID) { 238 Boolean result = wholeAliasCache.get(localeID); 239 if (result != null) { 240 return result; 241 } 242 CLDRFile cldrFile; 243 try { 244 cldrFile = factory.make(localeID, false); 245 } catch (Exception e) { 246 wholeAliasCache.put(localeID, false); 247 return false; 248 } 249 for (String path : cldrFile) { 250 if (path.startsWith("//ldml/identity")) { 251 continue; 252 } else if (path.startsWith("//ldml/alias")) { 253 wholeAliasCache.put(localeID, true); 254 return true; 255 } 256 wholeAliasCache.put(localeID, false); 257 return false; 258 } 259 wholeAliasCache.put(localeID, false); 260 return false; 261 } 262 } 263 } 264