Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 import java.util.Collections;
      4 import java.util.EnumSet;
      5 import java.util.Set;
      6 
      7 import com.ibm.icu.impl.PatternTokenizer;
      8 import com.ibm.icu.text.DateTimePatternGenerator.FormatParser;
      9 import com.ibm.icu.text.UnicodeSet;
     10 
     11 public class DateTimeCanonicalizer {
     12 
     13     public enum DateTimePatternType {
     14         NA, STOCK, AVAILABLE, INTERVAL, GMT;
     15 
     16         public static final Set<DateTimePatternType> STOCK_AVAILABLE_INTERVAL_PATTERNS = Collections
     17             .unmodifiableSet(EnumSet.of(DateTimePatternType.STOCK, DateTimePatternType.AVAILABLE,
     18                 DateTimePatternType.INTERVAL));
     19 
     20         public static DateTimePatternType fromPath(String path) {
     21             return !path.contains("/dates") ? DateTimePatternType.NA
     22                 : path.contains("/pattern") && (path.contains("/dateFormats") || path.contains("/timeFormats") || path.contains("/dateTimeFormatLength"))
     23                     ? DateTimePatternType.STOCK
     24                     : path.contains("/dateFormatItem") ? DateTimePatternType.AVAILABLE
     25                         : path.contains("/intervalFormatItem") ? DateTimePatternType.INTERVAL
     26                             : path.contains("/timeZoneNames/hourFormat") ? DateTimePatternType.GMT
     27                                 : DateTimePatternType.NA;
     28         }
     29     }
     30 
     31     private boolean fixYears = false; // true to fix the years to y
     32 
     33     private FormatParser formatDateParser = new FormatParser();
     34 
     35     // TODO make ICU's FormatParser.PatternTokenizer public (and clean up API)
     36 
     37     private transient PatternTokenizer tokenizer = new PatternTokenizer()
     38         .setSyntaxCharacters(new UnicodeSet("[a-zA-Z]"))
     39         .setExtraQuotingCharacters(new UnicodeSet("[[[:script=Latn:][:script=Cyrl:]]&[[:L:][:M:]]]"))
     40         // .setEscapeCharacters(new UnicodeSet("[^\\u0020-\\u007E]")) // WARNING: DateFormat doesn't accept \\uXXXX
     41         .setUsingQuote(true);
     42 
     43     public DateTimeCanonicalizer(boolean fixYears) {
     44         this.fixYears = fixYears;
     45     }
     46 
     47     public String getCanonicalDatePattern(String path, String value, DateTimePatternType datetimePatternType) {
     48         formatDateParser.set(value);
     49 
     50         // ensure that all y fields are single y, except for the stock short, which can be y or yy.
     51         String newValue;
     52         if (fixYears) {
     53             StringBuilder result = new StringBuilder();
     54             for (Object item : formatDateParser.getItems()) {
     55                 String itemString = item.toString();
     56                 if (item instanceof String) {
     57                     result.append(tokenizer.quoteLiteral(itemString));
     58                 } else if (!itemString.startsWith("y")
     59                     || (datetimePatternType == DateTimePatternType.STOCK
     60                         && path.contains("short")
     61                         && itemString.equals("yy"))) {
     62                     result.append(itemString);
     63                 } else {
     64                     result.append('y');
     65                 }
     66             }
     67             newValue = result.toString();
     68         } else {
     69             newValue = formatDateParser.toString();
     70         }
     71 
     72         if (!value.equals(newValue)) {
     73             value = newValue;
     74         }
     75         return value;
     76     }
     77 }