1 package org.unicode.cldr.tool; 2 3 import java.util.ArrayList; 4 import java.util.Collection; 5 import java.util.Comparator; 6 import java.util.EnumSet; 7 import java.util.LinkedHashSet; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeMap; 13 import java.util.TreeSet; 14 15 import org.unicode.cldr.util.CLDRConfig; 16 import org.unicode.cldr.util.CLDRFile; 17 import org.unicode.cldr.util.Factory; 18 import org.unicode.cldr.util.ICUServiceBuilder; 19 import org.unicode.cldr.util.LanguageTagParser; 20 import org.unicode.cldr.util.Level; 21 import org.unicode.cldr.util.Organization; 22 import org.unicode.cldr.util.PluralRanges; 23 import org.unicode.cldr.util.StandardCodes; 24 import org.unicode.cldr.util.SupplementalDataInfo; 25 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 26 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 27 28 import com.ibm.icu.dev.util.CollectionUtilities; 29 import com.ibm.icu.impl.Relation; 30 import com.ibm.icu.text.DecimalFormat; 31 import com.ibm.icu.text.MessageFormat; 32 import com.ibm.icu.text.PluralRules; 33 import com.ibm.icu.text.PluralRules.FixedDecimal; 34 import com.ibm.icu.util.Output; 35 import com.ibm.icu.util.ULocale; 36 37 public class GeneratePluralRanges { 38 public GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo) { 39 SUPPLEMENTAL = supplementalDataInfo; 40 prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 41 } 42 43 private static final boolean MINIMAL = true; 44 45 public static void main(String[] args) { 46 CLDRConfig testInfo = ToolConfig.getToolInstance(); 47 GeneratePluralRanges me = new GeneratePluralRanges(testInfo.getSupplementalDataInfo()); 48 me.reformatPluralRanges(); 49 //me.generateSamples(testInfo.getEnglish(), testInfo.getCldrFactory()); 50 } 51 52 private void generateSamples(CLDRFile english, Factory factory) { 53 //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns(); 54 // add all the items with plural ranges 55 Set<String> sorted = new TreeSet<String>(SUPPLEMENTAL.getPluralRangesLocales()); 56 // add the core locales 57 // sorted.addAll(StandardCodes.make().getLocaleCoverageLocales("google", EnumSet.of(Level.MODERN))); 58 sorted.addAll(StandardCodes.make().getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN))); 59 // add any variant plural forms 60 LanguageTagParser ltp = new LanguageTagParser(); 61 for (String locale : SUPPLEMENTAL.getPluralLocales()) { 62 if (locale.contains("_")) { 63 if (sorted.contains(ltp.set(locale).getLanguage())) { 64 sorted.add(locale); 65 } 66 } 67 } 68 //sorted.add("fil"); 69 System.out.println("Co.\tLocale Name\tStart\tEnd\tResult\tStart Sample\tEnd Sample\tStart Example\tEnd Example\tCombined Example"); 70 for (String locale : sorted) { 71 PluralInfo pluralInfo3 = SUPPLEMENTAL.getPlurals(locale); 72 if (locale.contains("_")) { 73 PluralInfo pluralInfo2 = SUPPLEMENTAL.getPlurals(ltp.set(locale).getLanguage()); 74 if (pluralInfo2.equals(pluralInfo3)) { 75 continue; 76 } 77 } 78 79 Set<Count> counts3 = pluralInfo3.getCounts(); 80 if (counts3.size() == 1) { 81 continue; // skip japanese, etc. 82 } 83 84 List<RangeSample> list = getRangeInfo(factory.make(locale, true)); 85 if (list == null) { 86 System.out.println("Failure with " + locale); 87 continue; 88 } 89 for (RangeSample rangeSample : list) { 90 System.out.println(locale + "\t" + english.getName(locale) 91 + "\t" + rangeSample.start 92 + "\t" + rangeSample.end 93 + "\t" + (rangeSample.result == null ? "missing" : rangeSample.result) 94 + "\t" + rangeSample.min 95 + "\t" + rangeSample.max 96 + "\t" + rangeSample.startExample 97 + "\t" + rangeSample.endExample 98 + "\t" + rangeSample.resultExample); 99 } 100 } 101 } 102 103 public List<RangeSample> getRangeInfo(CLDRFile cldrFile) { 104 String locale = cldrFile.getLocaleID(); 105 if (locale.equals("iw")) { 106 locale = "he"; 107 } 108 //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns(); 109 List<RangeSample> list = new ArrayList<RangeSample>(); 110 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 111 Set<Count> counts = pluralInfo.getCounts(); 112 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 113 if (pluralRanges == null && locale.contains("_")) { 114 String locale2 = new ULocale(locale).getLanguage(); 115 pluralRanges = SUPPLEMENTAL.getPluralRanges(locale2); 116 } 117 if (pluralRanges == null) { 118 return null; 119 } 120 ULocale ulocale = new ULocale(locale); 121 PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(ulocale.toString()); // CldrUtility.get(samples, ulocale); 122 // if (samplePatterns == null && locale.contains("_")) { 123 // ulocale = new ULocale(ulocale.getLanguage()); 124 // samplePatterns = CldrUtility.get(samples, ulocale); 125 // if (samplePatterns == null) { 126 // return null; 127 // } 128 // } 129 130 Output<FixedDecimal> maxSample = new Output<FixedDecimal>(); 131 Output<FixedDecimal> minSample = new Output<FixedDecimal>(); 132 133 ICUServiceBuilder icusb = new ICUServiceBuilder(); 134 icusb.setCldrFile(cldrFile); 135 DecimalFormat nf = icusb.getNumberFormat(1); 136 //String decimal = cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal"); 137 String defaultNumberingSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem"); 138 String range = cldrFile.getWinningValue("//ldml/numbers/miscPatterns[@numberSystem=\"" 139 + defaultNumberingSystem 140 + "\"]/pattern[@type=\"range\"]"); 141 142 // if (decimal == null) { 143 // throw new IllegalArgumentException(); 144 // } 145 for (Count s : counts) { 146 for (Count e : counts) { 147 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) { 148 continue; 149 } 150 Count r = pluralRanges.getExplicit(s, e); 151 String minFormatted = format(nf, minSample.value); 152 String maxFormatted = format(nf, maxSample.value); 153 String rangeFormatted = MessageFormat.format(range, minFormatted, maxFormatted); 154 155 list.add(new RangeSample( 156 s, e, r, 157 minSample.value, 158 maxSample.value, 159 getExample(locale, samplePatterns, s, minFormatted), getExample(locale, samplePatterns, e, maxFormatted), 160 getExample(locale, samplePatterns, r, rangeFormatted))); 161 } 162 } 163 return list; 164 } 165 166 public static class RangeSample { 167 // Category Examples Minimal Pairs Rules 168 public RangeSample(Count start, Count end, Count result, 169 FixedDecimal min, FixedDecimal max, 170 String startExample, String endExample, String resultExample) { 171 this.start = start; 172 this.end = end; 173 this.result = result; 174 this.min = min; 175 this.max = max; 176 this.startExample = startExample; 177 this.endExample = endExample; 178 this.resultExample = resultExample; 179 } 180 181 final Count start; 182 final Count end; 183 final Count result; 184 final FixedDecimal min; 185 final FixedDecimal max; 186 final String startExample; 187 final String endExample; 188 final String resultExample; 189 } 190 191 public static String format(DecimalFormat nf, FixedDecimal minSample) { 192 nf.setMinimumFractionDigits(minSample.getVisibleDecimalDigitCount()); 193 nf.setMaximumFractionDigits(minSample.getVisibleDecimalDigitCount()); 194 return nf.format(minSample); 195 } 196 197 // private String format(String decimal, Output<FixedDecimal> minSample) { 198 // return minSample.toString().replace(".", decimal); 199 // } 200 201 public static String getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString) { 202 if (r == null) { 203 return "missing"; 204 } 205 String samplePattern; 206 try { 207 samplePattern = samplePatterns.get(PluralRules.PluralType.CARDINAL, r); // CldrUtility.get(samplePatterns.keywordToPattern, r); 208 } catch (Exception e) { 209 throw new IllegalArgumentException("Locale: " + locale + "; Count: " + r, e); 210 } 211 return samplePattern 212 .replace('\u00A0', '\u0020') 213 .replace("{0}", numString); 214 } 215 216 private final SupplementalDataInfo SUPPLEMENTAL; 217 private final PluralRulesFactory prf; 218 219 public static final Comparator<Set<String>> STRING_SET_COMPARATOR = new SetComparator<String, Set<String>>(); 220 public static final Comparator<Set<Count>> COUNT_SET_COMPARATOR = new SetComparator<Count, Set<Count>>(); 221 222 static final class SetComparator<T extends Comparable<T>, U extends Set<T>> implements Comparator<U> { 223 public int compare(U o1, U o2) { 224 return CollectionUtilities.compare((Collection<T>) o1, (Collection<T>) o2); 225 } 226 }; 227 228 public void reformatPluralRanges() { 229 Map<Set<Count>, Relation<Set<String>, String>> seen = new TreeMap<Set<Count>, Relation<Set<String>, String>>(COUNT_SET_COMPARATOR); 230 231 for (String locale : SUPPLEMENTAL.getPluralRangesLocales()) { 232 233 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 234 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 235 Set<Count> counts = pluralInfo.getCounts(); 236 237 Set<String> s; 238 if (false) { 239 System.out.println("Minimized, but not ready for prime-time"); 240 s = minimize(pluralRanges, pluralInfo); 241 } else { 242 s = reformat(pluralRanges, counts); 243 } 244 Relation<Set<String>, String> item = seen.get(counts); 245 if (item == null) { 246 seen.put(counts, 247 item = Relation.of(new TreeMap<Set<String>, Set<String>>(STRING_SET_COMPARATOR), TreeSet.class)); 248 } 249 item.put(s, locale); 250 } 251 252 for (Entry<Set<Count>, Relation<Set<String>, String>> entry0 : seen.entrySet()) { 253 System.out.println("\n<!-- " + CollectionUtilities.join(entry0.getKey(), ", ") + " -->"); 254 for (Entry<Set<String>, Set<String>> entry : entry0.getValue().keyValuesSet()) { 255 System.out.println("\t\t<pluralRanges locales=\"" + CollectionUtilities.join(entry.getValue(), " ") + "\">"); 256 for (String line : entry.getKey()) { 257 System.out.println("\t\t\t" + line); 258 } 259 System.out.println("\t\t</pluralRanges>"); 260 } 261 } 262 } 263 264 enum RangeStrategy { 265 other, end, start, mixed 266 } 267 268 public Set<String> reformat(PluralRanges pluralRanges, Set<Count> counts) { 269 Set<String> s; 270 s = new LinkedHashSet<String>(); 271 // first determine the general principle 272 273 // EnumSet<RangeStrategy> strategy = EnumSet.allOf(RangeStrategy.class); 274 // Count firstResult = null; 275 // for (Count start : counts) { 276 // for (Count end : counts) { 277 // Count result = pluralRanges.getExplicit(start, end); 278 // if (result == null) { 279 // continue; 280 // } else if (firstResult == null) { 281 // firstResult = result; 282 // } 283 // if (result != start) { 284 // strategy.remove(RangeStrategy.start); 285 // } 286 // if (result != end) { 287 // strategy.remove(RangeStrategy.end); 288 // } 289 // if (result != Count.other) { 290 // strategy.remove(RangeStrategy.other); 291 // } 292 // } 293 // } 294 // s.add("<!-- Range Principle: " + strategy.iterator().next() + " -->"); 295 for (Count start : counts) { 296 for (Count end : counts) { 297 Count result = pluralRanges.getExplicit(start, end); 298 if (result == null) { 299 continue; 300 } 301 String line = PluralRanges.showRange(start, end, result); 302 s.add(line); 303 } 304 } 305 return s; 306 } 307 308 Set<String> minimize(PluralRanges pluralRanges, PluralInfo pluralInfo) { 309 Set<String> result = new LinkedHashSet<String>(); 310 // make it easier to manage 311 PluralRanges.Matrix matrix = new PluralRanges.Matrix(); 312 Output<FixedDecimal> maxSample = new Output<FixedDecimal>(); 313 Output<FixedDecimal> minSample = new Output<FixedDecimal>(); 314 for (Count s : Count.VALUES) { 315 for (Count e : Count.VALUES) { 316 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) { 317 continue; 318 } 319 Count r = pluralRanges.getExplicit(s, e); 320 matrix.set(s, e, r); 321 } 322 } 323 // if everything is 'other', we are done 324 // if (allOther == true) { 325 // return result; 326 // } 327 EnumSet<Count> endDone = EnumSet.noneOf(Count.class); 328 EnumSet<Count> startDone = EnumSet.noneOf(Count.class); 329 if (MINIMAL) { 330 for (Count end : pluralInfo.getCounts()) { 331 Count r = matrix.endSame(end); 332 if (r != null 333 //&& r != Count.other 334 ) { 335 result.add("<pluralRange" + 336 " \t\tend=\"" + end 337 + "\"\tresult=\"" + r + "\"/>"); 338 endDone.add(end); 339 } 340 } 341 Output<Boolean> emit = new Output<Boolean>(); 342 for (Count start : pluralInfo.getCounts()) { 343 Count r = matrix.startSame(start, endDone, emit); 344 if (r != null 345 // && r != Count.other 346 ) { 347 if (emit.value) { 348 result.add("<pluralRange" + 349 "\tstart=\"" + start 350 + "\" \t\tresult=\"" + r + "\"/>"); 351 } 352 startDone.add(start); 353 } 354 } 355 } 356 //Set<String> skip = new LinkedHashSet<String>(); 357 for (Count end : pluralInfo.getCounts()) { 358 if (endDone.contains(end)) { 359 continue; 360 } 361 for (Count start : pluralInfo.getCounts()) { 362 if (startDone.contains(start)) { 363 continue; 364 } 365 Count r = matrix.get(start, end); 366 if (r != null 367 //&& !(MINIMAL && r == Count.other) 368 ) { 369 result.add(PluralRanges.showRange(start, end, r)); 370 } else { 371 result.add("<!-- <pluralRange" + 372 "\tstart=\"" + start 373 + "\" \tend=\"" + end 374 + "\" \tresult=\"" + r + "\"/> -->"); 375 376 } 377 378 } 379 } 380 return result; 381 } 382 383 } 384