Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 import java.util.ArrayList;
      4 import java.util.Arrays;
      5 import java.util.EnumSet;
      6 import java.util.HashMap;
      7 import java.util.List;
      8 import java.util.Map;
      9 import java.util.Map.Entry;
     10 import java.util.Set;
     11 import java.util.TreeSet;
     12 import java.util.regex.Matcher;
     13 import java.util.regex.Pattern;
     14 
     15 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
     16 
     17 import com.ibm.icu.text.MessageFormat;
     18 import com.ibm.icu.util.Output;
     19 
     20 public class PathDescription {
     21 
     22     public enum ErrorHandling {
     23         SKIP, CONTINUE
     24     }
     25 
     26     // BE sure to sync with the list in xmbSkip!
     27     public static final Set<String> EXTRA_LANGUAGES = new TreeSet<String>(
     28         Arrays
     29             .asList(
     30                 "ach|af|ak|ak|am|ar|az|be|bem|bg|bh|bn|br|bs|ca|chr|ckb|co|crs|cs|cy|da|de|de_AT|de_CH|ee|el|en|en_AU|en_CA|en_GB|en_US|eo|es|es_419|es_ES|et|eu|fa|fi|fil|fo|fr|fr_CA|fr_CH|fy|ga|gaa|gd|gl|gn|gsw|gu|ha|haw|he|hi|hr|ht|hu|hy|ia|id|ig|io|is|it|ja|jv|ka|kg|kk|km|kn|ko|kri|ku|ky|la|lg|ln|lo|loz|lt|lua|lv|mfe|mg|mi|mk|ml|mn|mr|ms|mt|my|nb|ne|nl|nl_BE|nn|nso|ny|nyn|oc|om|or|pa|pcm|pl|ps|pt|pt_BR|pt_PT|qu|rm|rn|ro|ro|ro_MD|ru|rw|sd|si|sk|sl|sn|so|sq|sr|sr_Latn|sr_ME|st|su|sv|sw|ta|te|tg|th|ti|tk|tlh|tn|to|tr|tt|tum|ug|uk|und|ur|uz|vi|wo|xh|yi|yo|zh|zh_Hans|zh_Hant|zh_HK|zu|zxx"
     31                     .split("|")));
     32 
     33     private static final Pattern METAZONE_PATTERN = Pattern
     34         .compile("//ldml/dates/timeZoneNames/metazone\\[@type=\"([^\"]*)\"]/(.*)/(.*)");
     35     private static final Pattern STAR_ATTRIBUTE_PATTERN = PatternCache.get("=\"([^\"]*)\"");
     36 
     37     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
     38     private static Map<String, String> ZONE2COUNTRY = STANDARD_CODES.getZoneToCounty();
     39     private static RegexLookup<String> pathHandling = new RegexLookup<String>().loadFromFile(PathDescription.class,
     40         "data/PathDescription.txt");
     41 
     42     // set in construction
     43 
     44     private final CLDRFile english;
     45     private final Map<String, String> extras;
     46     private final ErrorHandling errorHandling;
     47     private final Map<String, List<Set<String>>> starredPaths;
     48     private final Set<String> allMetazones;
     49 
     50     // used on instance
     51 
     52     private Matcher metazoneMatcher = METAZONE_PATTERN.matcher("");
     53     private XPathParts parts = new XPathParts();
     54     private String starredPathOutput;
     55     private Output<String[]> pathArguments = new Output<String[]>();
     56     private EnumSet<Status> status = EnumSet.noneOf(Status.class);
     57 
     58     public static final String MISSING_DESCRIPTION = "Before translating, please see http://cldr.org/translation.";
     59 
     60     public PathDescription(SupplementalDataInfo supplementalDataInfo,
     61         CLDRFile english,
     62         Map<String, String> extras,
     63         Map<String, List<Set<String>>> starredPaths,
     64         ErrorHandling errorHandling) {
     65         this.english = english;
     66         this.extras = extras == null ? new HashMap<String, String>() : extras;
     67         this.starredPaths = starredPaths == null ? new HashMap<String, List<Set<String>>>() : starredPaths;
     68         allMetazones = supplementalDataInfo.getAllMetazones();
     69         this.errorHandling = errorHandling;
     70     }
     71 
     72     public String getStarredPathOutput() {
     73         return starredPathOutput;
     74     }
     75 
     76     public EnumSet<Status> getStatus() {
     77         return status;
     78     }
     79 
     80     public enum Status {
     81         SKIP, NULL_VALUE, EMPTY_CONTENT, NOT_REQUIRED
     82     }
     83 
     84     public String getRawDescription(String path, String value, Object context) {
     85         status.clear();
     86         return pathHandling.get(path, context, pathArguments);
     87     }
     88 
     89     public String getDescription(String path, String value, Level level, Object context) {
     90         status.clear();
     91 
     92         String description = pathHandling.get(path, context, pathArguments);
     93         if (description == null) {
     94             description = MISSING_DESCRIPTION;
     95         } else if ("SKIP".equals(description)) {
     96             status.add(Status.SKIP);
     97             if (errorHandling == ErrorHandling.SKIP) {
     98                 return null;
     99             }
    100         }
    101 
    102         // String localeWhereFound = english.getSourceLocaleID(path, status);
    103         // if (!status.pathWhereFound.equals(path)) {
    104         // reasonsToPaths.put("alias", path + "  " + value);
    105         // continue;
    106         // }
    107         if (value == null) { // a count item?
    108             String xpath = extras.get(path);
    109             if (xpath != null) {
    110                 value = english.getStringValue(xpath);
    111             } else if (path.contains("/metazone")) {
    112                 if (metazoneMatcher.reset(path).matches()) {
    113                     String name = metazoneMatcher.group(1);
    114                     String type = metazoneMatcher.group(3);
    115                     value = name.replace('_', ' ')
    116                         + (type.equals("generic") ? "" : type.equals("daylight") ? " Summer" : " Winter") + " Time";
    117                     // System.out.println("Missing:    " + path + " :    " + value);
    118                 }
    119             }
    120             if (value == null) {
    121                 status.add(Status.NULL_VALUE);
    122                 if (errorHandling == ErrorHandling.SKIP) {
    123                     return null;
    124                 }
    125             }
    126         }
    127         if (value != null && value.length() == 0) {
    128             status.add(Status.EMPTY_CONTENT);
    129             if (errorHandling == ErrorHandling.SKIP) {
    130                 return null;
    131             }
    132         }
    133         // if (GenerateXMB.contentMatcher != null && !GenerateXMB.contentMatcher.reset(value).find()) {
    134         // PathDescription.addSkipReasons(reasonsToPaths, "content-parameter", level, path, value);
    135         // return null;
    136         // }
    137 
    138         List<String> attributes = addStarredInfo(starredPaths, path);
    139 
    140         // In special cases, only use if there is a root value (languageNames, ...
    141         if (description.startsWith("ROOT")) {
    142             int typeEnd = description.indexOf(';');
    143             String type = description.substring(4, typeEnd).trim();
    144             description = description.substring(typeEnd + 1).trim();
    145 
    146             boolean isMetazone = type.equals("metazone");
    147             String code = attributes.get(0);
    148             boolean isRootCode = isRootCode(code, allMetazones, type, isMetazone);
    149             if (!isRootCode) {
    150                 status.add(Status.NOT_REQUIRED);
    151                 if (errorHandling == ErrorHandling.SKIP) {
    152                     return null;
    153                 }
    154             }
    155             if (isMetazone) {
    156                 parts.set(path);
    157                 String daylightType = parts.getElement(-1);
    158                 daylightType = daylightType.equals("daylight") ? "summer" : daylightType.equals("standard") ? "winter"
    159                     : daylightType;
    160                 String length = parts.getElement(-2);
    161                 length = length.equals("long") ? "" : "abbreviated ";
    162                 code = code + ", " + length + daylightType + " form";
    163             } else if (type.equals("timezone")) {
    164                 String country = (String) ZONE2COUNTRY.get(code);
    165                 int lastSlash = code.lastIndexOf('/');
    166                 String codeName = lastSlash < 0 ? code : code.substring(lastSlash + 1).replace('_', ' ');
    167 
    168                 boolean found = false;
    169                 if ("001".equals(country)) {
    170                     code = "the timezone " + codeName + "";
    171                     found = true;
    172                 } else if (country != null) {
    173                     String countryName = english.getName("territory", country);
    174                     if (countryName != null) {
    175                         if (!codeName.equals(countryName)) {
    176                             code = "the city " + codeName + " (in " + countryName + ")";
    177                         } else {
    178                             code = "the country " + codeName + "";
    179                         }
    180                         found = true;
    181                     }
    182                 }
    183                 if (!found) {
    184                     System.out.println("Missing country for timezone " + code);
    185                 }
    186             }
    187             description = MessageFormat.format(MessageFormat.autoQuoteApostrophe(description), new Object[] { code });
    188         } else if (path.contains("exemplarCity")) {
    189             String regionCode = ZONE2COUNTRY.get(attributes.get(0));
    190             String englishRegionName = english.getName(CLDRFile.TERRITORY_NAME, regionCode);
    191             description = MessageFormat.format(MessageFormat.autoQuoteApostrophe(description),
    192                 new Object[] { englishRegionName });
    193         } else if (description != MISSING_DESCRIPTION) {
    194             description = MessageFormat.format(MessageFormat.autoQuoteApostrophe(description),
    195                 (Object[]) pathArguments.value);
    196         }
    197 
    198         return description;
    199     }
    200 
    201     /**
    202      * Creates an escaped HTML string of placeholder information.
    203      *
    204      * @param path
    205      *            the xpath to specify placeholder information for
    206      * @return a HTML string, or an empty string if there was no placeholder information
    207      */
    208     public String getPlaceholderDescription(String path) {
    209         Map<String, PlaceholderInfo> placeholders = PatternPlaceholders.getInstance().get(path);
    210         if (placeholders != null && placeholders.size() > 0) {
    211             StringBuffer buffer = new StringBuffer();
    212             buffer.append("<table>");
    213             buffer.append("<tr><th>Placeholder</th><th>Meaning</th><th>Example</th></tr>");
    214             for (Entry<String, PlaceholderInfo> entry : placeholders.entrySet()) {
    215                 PlaceholderInfo info = entry.getValue();
    216                 buffer.append("<tr>");
    217                 buffer.append("<td>").append(entry.getKey()).append("</td>");
    218                 buffer.append("<td>").append(info.name).append("</td>");
    219                 buffer.append("<td>").append(info.example).append("</td>");
    220                 buffer.append("</tr>");
    221             }
    222             buffer.append("</table>");
    223             return buffer.toString();
    224         }
    225         return "";
    226     }
    227 
    228     private static boolean isRootCode(String code, Set<String> allMetazones, String type, boolean isMetazone) {
    229         Set<String> codes = isMetazone ? allMetazones
    230             : type.equals("timezone") ? STANDARD_CODES.getCanonicalTimeZones()
    231                 : STANDARD_CODES.getSurveyToolDisplayCodes(type);
    232         // end
    233         boolean isRootCode = codes.contains(code) || code.contains("_");
    234         if (!isRootCode && type.equals("language")
    235             && EXTRA_LANGUAGES.contains(code)) {
    236             isRootCode = true;
    237         }
    238         return isRootCode;
    239     }
    240 
    241     private List<String> addStarredInfo(Map<String, List<Set<String>>> starredPaths, String path) {
    242         Matcher starAttributeMatcher = STAR_ATTRIBUTE_PATTERN.matcher(path);
    243         StringBuilder starredPath = new StringBuilder();
    244         List<String> attributes = new ArrayList<String>();
    245         int lastEnd = 0;
    246         while (starAttributeMatcher.find()) {
    247             int start = starAttributeMatcher.start(1);
    248             int end = starAttributeMatcher.end(1);
    249             starredPath.append(path.substring(lastEnd, start));
    250             starredPath.append(".*");
    251 
    252             attributes.add(path.substring(start, end));
    253             lastEnd = end;
    254         }
    255         starredPath.append(path.substring(lastEnd));
    256         String starredPathString = starredPath.toString().intern();
    257         starredPathOutput = starredPathString;
    258 
    259         List<Set<String>> attributeList = starredPaths.get(starredPathString);
    260         if (attributeList == null) {
    261             starredPaths.put(starredPathString, attributeList = new ArrayList<Set<String>>());
    262         }
    263         int i = 0;
    264         for (String attribute : attributes) {
    265             if (attributeList.size() <= i) {
    266                 TreeSet<String> subset = new TreeSet<String>();
    267                 subset.add(attribute);
    268                 attributeList.add(subset);
    269             } else {
    270                 Set<String> subset = attributeList.get(i);
    271                 subset.add(attribute);
    272             }
    273             ++i;
    274         }
    275         return attributes;
    276     }
    277 }
    278